aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
committerStephen Hines <srhines@google.com>2014-05-29 02:49:00 -0700
commitdce4a407a24b04eebc6a376f8e62b41aaa7b071f (patch)
treedcebc53f2b182f145a2e659393bf9a0472cedf23 /lib
parent220b921aed042f9e520c26cffd8282a94c66c3d5 (diff)
downloadexternal_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.zip
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.gz
external_llvm-dce4a407a24b04eebc6a376f8e62b41aaa7b071f.tar.bz2
Update LLVM for 3.5 rebase (r209712).
Change-Id: I149556c940fb7dc92d075273c87ff584f400941f
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp2
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp4
-rw-r--r--lib/Analysis/AliasSetTracker.cpp18
-rw-r--r--lib/Analysis/Analysis.cpp5
-rw-r--r--lib/Analysis/Android.mk1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp43
-rw-r--r--lib/Analysis/BlockFrequencyInfo.cpp13
-rw-r--r--lib/Analysis/BlockFrequencyInfoImpl.cpp995
-rw-r--r--lib/Analysis/BranchProbabilityInfo.cpp18
-rw-r--r--lib/Analysis/CFG.cpp8
-rw-r--r--lib/Analysis/CFGPrinter.cpp13
-rw-r--r--lib/Analysis/CGSCCPassManager.cpp167
-rw-r--r--lib/Analysis/CMakeLists.txt2
-rw-r--r--lib/Analysis/ConstantFolding.cpp160
-rw-r--r--lib/Analysis/CostModel.cpp40
-rw-r--r--lib/Analysis/Delinearization.cpp32
-rw-r--r--lib/Analysis/DependenceAnalysis.cpp164
-rw-r--r--lib/Analysis/DominanceFrontier.cpp4
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp12
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp27
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp21
-rw-r--r--lib/Analysis/IPA/InlineCost.cpp64
-rw-r--r--lib/Analysis/IVUsers.cpp13
-rw-r--r--lib/Analysis/InstCount.cpp5
-rw-r--r--lib/Analysis/InstructionSimplify.cpp203
-rw-r--r--lib/Analysis/IntervalPartition.cpp2
-rw-r--r--lib/Analysis/LazyCallGraph.cpp673
-rw-r--r--lib/Analysis/LazyValueInfo.cpp21
-rw-r--r--lib/Analysis/LibCallAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/LibCallSemantics.cpp4
-rw-r--r--lib/Analysis/Lint.cpp40
-rw-r--r--lib/Analysis/Loads.cpp10
-rw-r--r--lib/Analysis/LoopInfo.cpp30
-rw-r--r--lib/Analysis/LoopPass.cpp9
-rw-r--r--lib/Analysis/MemDepPrinter.cpp14
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp57
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp31
-rw-r--r--lib/Analysis/NoAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/PHITransAddr.cpp44
-rw-r--r--lib/Analysis/PostDominators.cpp4
-rw-r--r--lib/Analysis/RegionInfo.cpp85
-rw-r--r--lib/Analysis/RegionPass.cpp18
-rw-r--r--lib/Analysis/RegionPrinter.cpp22
-rw-r--r--lib/Analysis/ScalarEvolution.cpp1167
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp8
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp51
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp2
-rw-r--r--lib/Analysis/SparsePropagation.cpp7
-rw-r--r--lib/Analysis/TargetTransformInfo.cpp15
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp30
-rw-r--r--lib/Analysis/ValueTracking.cpp264
-rw-r--r--lib/AsmParser/LLLexer.cpp19
-rw-r--r--lib/AsmParser/LLLexer.h4
-rw-r--r--lib/AsmParser/LLParser.cpp302
-rw-r--r--lib/AsmParser/LLParser.h10
-rw-r--r--lib/AsmParser/LLToken.h6
-rw-r--r--lib/AsmParser/Parser.cpp8
-rw-r--r--lib/AsmParser/module.modulemap1
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp222
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.h20
-rw-r--r--lib/Bitcode/Reader/BitstreamReader.cpp2
-rw-r--r--lib/Bitcode/Writer/BitWriter.cpp1
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp118
-rw-r--r--lib/Bitcode/module.modulemap1
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp52
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h3
-rw-r--r--lib/CodeGen/AllocationOrder.cpp3
-rw-r--r--lib/CodeGen/Analysis.cpp6
-rw-r--r--lib/CodeGen/Android.mk1
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp23
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp45
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h52
-rw-r--r--lib/CodeGen/AsmPrinter/Android.mk12
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp287
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp25
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt4
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp21
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h37
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp18
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp175
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h54
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h138
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp39
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfAccelTable.h29
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1609
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h211
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp28
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp156
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h84
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h55
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp838
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h174
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp27
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h4
-rw-r--r--lib/CodeGen/AtomicExpandLoadLinkedPass.cpp (renamed from lib/Target/ARM/ARMAtomicExpandPass.cpp)165
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp90
-rw-r--r--lib/CodeGen/BranchFolding.cpp74
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp14
-rw-r--r--lib/CodeGen/CallingConvLower.cpp12
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp457
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp34
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp4
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp5
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp11
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp19
-rw-r--r--lib/CodeGen/EdgeBundles.cpp32
-rw-r--r--lib/CodeGen/ExecutionDepsFix.cpp17
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp3
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp3
-rw-r--r--lib/CodeGen/GCMetadata.cpp17
-rw-r--r--lib/CodeGen/GCStrategy.cpp14
-rw-r--r--lib/CodeGen/IfConversion.cpp44
-rw-r--r--lib/CodeGen/InlineSpiller.cpp17
-rw-r--r--lib/CodeGen/InterferenceCache.cpp3
-rw-r--r--lib/CodeGen/InterferenceCache.h20
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp63
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp11
-rw-r--r--lib/CodeGen/LexicalScopes.cpp142
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp37
-rw-r--r--lib/CodeGen/LiveInterval.cpp12
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp15
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp7
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp11
-rw-r--r--lib/CodeGen/LiveRangeCalc.h7
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp11
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp3
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp3
-rw-r--r--lib/CodeGen/LiveVariables.cpp43
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp3
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp52
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp17
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp37
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp4
-rw-r--r--lib/CodeGen/MachineCSE.cpp3
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp3
-rw-r--r--lib/CodeGen/MachineFunction.cpp35
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp4
-rw-r--r--lib/CodeGen/MachineInstr.cpp79
-rw-r--r--lib/CodeGen/MachineLICM.cpp42
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp40
-rw-r--r--lib/CodeGen/MachinePassRegistry.cpp2
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp18
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp10
-rw-r--r--lib/CodeGen/MachineScheduler.cpp82
-rw-r--r--lib/CodeGen/MachineSink.cpp37
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp78
-rw-r--r--lib/CodeGen/MachineVerifier.cpp126
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp3
-rw-r--r--lib/CodeGen/PHIElimination.cpp24
-rw-r--r--lib/CodeGen/Passes.cpp24
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp15
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp23
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp4
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp20
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp10
-rw-r--r--lib/CodeGen/RegAllocBase.cpp5
-rw-r--r--lib/CodeGen/RegAllocBase.h3
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp7
-rw-r--r--lib/CodeGen/RegAllocFast.cpp7
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp100
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp25
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp9
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp50
-rw-r--r--lib/CodeGen/RegisterCoalescer.h4
-rw-r--r--lib/CodeGen/RegisterPressure.cpp4
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp15
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp5
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp134
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp612
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp78
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp137
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp162
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp35
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp47
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp120
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h16
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp75
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp819
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp510
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h31
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp133
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp236
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp13
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp11
-rw-r--r--lib/CodeGen/SlotIndexes.cpp12
-rw-r--r--lib/CodeGen/SpillPlacement.cpp33
-rw-r--r--lib/CodeGen/SpillPlacement.h2
-rw-r--r--lib/CodeGen/Spiller.cpp4
-rw-r--r--lib/CodeGen/SplitKit.cpp17
-rw-r--r--lib/CodeGen/SplitKit.h4
-rw-r--r--lib/CodeGen/StackColoring.cpp32
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp5
-rw-r--r--lib/CodeGen/StackMaps.cpp289
-rw-r--r--lib/CodeGen/StackProtector.cpp23
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp20
-rw-r--r--lib/CodeGen/TailDuplication.cpp29
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp46
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp38
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp56
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp14
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp23
-rw-r--r--lib/CodeGen/VirtRegMap.cpp3
-rw-r--r--lib/CodeGen/module.modulemap1
-rw-r--r--lib/DebugInfo/DWARFCompileUnit.h6
-rw-r--r--lib/DebugInfo/DWARFContext.cpp246
-rw-r--r--lib/DebugInfo/DWARFContext.h2
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.cpp112
-rw-r--r--lib/DebugInfo/DWARFDebugAbbrev.h50
-rw-r--r--lib/DebugInfo/DWARFDebugArangeSet.h1
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.cpp36
-rw-r--r--lib/DebugInfo/DWARFDebugAranges.h17
-rw-r--r--lib/DebugInfo/DWARFDebugFrame.cpp72
-rw-r--r--lib/DebugInfo/DWARFDebugFrame.h6
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.cpp95
-rw-r--r--lib/DebugInfo/DWARFDebugInfoEntry.h57
-rw-r--r--lib/DebugInfo/DWARFDebugLine.cpp348
-rw-r--r--lib/DebugInfo/DWARFDebugLine.h109
-rw-r--r--lib/DebugInfo/DWARFDebugRangeList.cpp15
-rw-r--r--lib/DebugInfo/DWARFDebugRangeList.h15
-rw-r--r--lib/DebugInfo/DWARFFormValue.cpp10
-rw-r--r--lib/DebugInfo/DWARFTypeUnit.h10
-rw-r--r--lib/DebugInfo/DWARFUnit.cpp145
-rw-r--r--lib/DebugInfo/DWARFUnit.h24
-rw-r--r--lib/DebugInfo/module.modulemap1
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp146
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp15
-rw-r--r--lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp10
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp67
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp6
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.cpp2
-rw-r--r--lib/ExecutionEngine/Interpreter/Interpreter.h4
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp26
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h2
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp36
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp57
-rw-r--r--lib/ExecutionEngine/MCJIT/LLVMBuild.txt2
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.cpp35
-rw-r--r--lib/ExecutionEngine/MCJIT/MCJIT.h4
-rw-r--r--lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp2
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp17
-rw-r--r--lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp3
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp13
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h21
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp97
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp107
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h8
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h27
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp483
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h60
-rw-r--r--lib/ExecutionEngine/TargetSelect.cpp8
-rw-r--r--lib/IR/Android.mk1
-rw-r--r--lib/IR/AsmWriter.cpp100
-rw-r--r--lib/IR/Attributes.cpp22
-rw-r--r--lib/IR/AutoUpgrade.cpp69
-rw-r--r--lib/IR/BasicBlock.cpp20
-rw-r--r--lib/IR/CMakeLists.txt5
-rw-r--r--lib/IR/ConstantFold.cpp108
-rw-r--r--lib/IR/Constants.cpp65
-rw-r--r--lib/IR/ConstantsContext.h7
-rw-r--r--lib/IR/Core.cpp140
-rw-r--r--lib/IR/DIBuilder.cpp204
-rw-r--r--lib/IR/DataLayout.cpp14
-rw-r--r--lib/IR/DebugInfo.cpp69
-rw-r--r--lib/IR/DebugLoc.cpp59
-rw-r--r--lib/IR/DiagnosticInfo.cpp125
-rw-r--r--lib/IR/Function.cpp24
-rw-r--r--lib/IR/GCOV.cpp183
-rw-r--r--lib/IR/Globals.cpp155
-rw-r--r--lib/IR/IRPrintingPasses.cpp2
-rw-r--r--lib/IR/InlineAsm.cpp2
-rw-r--r--lib/IR/Instruction.cpp147
-rw-r--r--lib/IR/Instructions.cpp91
-rw-r--r--lib/IR/IntrinsicInst.cpp4
-rw-r--r--lib/IR/LLVMContext.cpp36
-rw-r--r--lib/IR/LLVMContextImpl.cpp27
-rw-r--r--lib/IR/LLVMContextImpl.h10
-rw-r--r--lib/IR/LeaksContext.h16
-rw-r--r--lib/IR/LegacyPassManager.cpp67
-rw-r--r--lib/IR/MDBuilder.cpp139
-rw-r--r--lib/IR/Mangler.cpp2
-rw-r--r--lib/IR/Metadata.cpp45
-rw-r--r--lib/IR/Module.cpp20
-rw-r--r--lib/IR/Pass.cpp12
-rw-r--r--lib/IR/PassManager.cpp11
-rw-r--r--lib/IR/PassRegistry.cpp22
-rw-r--r--lib/IR/SymbolTableListTraitsImpl.h4
-rw-r--r--lib/IR/Type.cpp28
-rw-r--r--lib/IR/Use.cpp6
-rw-r--r--lib/IR/Value.cpp86
-rw-r--r--lib/IR/ValueSymbolTable.cpp9
-rw-r--r--lib/IR/Verifier.cpp442
-rw-r--r--lib/IR/module.modulemap1
-rw-r--r--lib/IRReader/IRReader.cpp14
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp60
-rw-r--r--lib/LTO/LTOModule.cpp54
-rw-r--r--lib/Linker/LinkModules.cpp282
-rw-r--r--lib/MC/Android.mk4
-rw-r--r--lib/MC/CMakeLists.txt2
-rw-r--r--lib/MC/ELFObjectWriter.cpp446
-rw-r--r--lib/MC/MCAsmInfo.cpp6
-rw-r--r--lib/MC/MCAsmStreamer.cpp193
-rw-r--r--lib/MC/MCAssembler.cpp196
-rw-r--r--lib/MC/MCContext.cpp102
-rw-r--r--lib/MC/MCDisassembler.cpp14
-rw-r--r--lib/MC/MCDisassembler/Disassembler.cpp27
-rw-r--r--lib/MC/MCDwarf.cpp111
-rw-r--r--lib/MC/MCELFStreamer.cpp11
-rw-r--r--lib/MC/MCExpr.cpp69
-rw-r--r--lib/MC/MCExternalSymbolizer.cpp14
-rw-r--r--lib/MC/MCFixup.cpp37
-rw-r--r--lib/MC/MCFunction.cpp15
-rw-r--r--lib/MC/MCInst.cpp4
-rw-r--r--lib/MC/MCMachOStreamer.cpp27
-rw-r--r--lib/MC/MCModule.cpp16
-rw-r--r--lib/MC/MCModuleYAML.cpp25
-rw-r--r--lib/MC/MCNullStreamer.cpp10
-rw-r--r--lib/MC/MCObjectDisassembler.cpp16
-rw-r--r--lib/MC/MCObjectFileInfo.cpp111
-rw-r--r--lib/MC/MCObjectStreamer.cpp40
-rw-r--r--lib/MC/MCObjectSymbolizer.cpp42
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp8
-rw-r--r--lib/MC/MCParser/AsmParser.cpp82
-rw-r--r--lib/MC/MCParser/COFFAsmParser.cpp6
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp9
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp8
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp2
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp2
-rw-r--r--lib/MC/MCRelocationInfo.cpp4
-rw-r--r--lib/MC/MCSectionCOFF.cpp5
-rw-r--r--lib/MC/MCSectionMachO.cpp16
-rw-r--r--lib/MC/MCStreamer.cpp38
-rw-r--r--lib/MC/MCSubtargetInfo.cpp17
-rw-r--r--lib/MC/MCTargetOptions.cpp19
-rw-r--r--lib/MC/MCValue.cpp20
-rw-r--r--lib/MC/MachObjectWriter.cpp34
-rw-r--r--lib/MC/SubtargetFeature.cpp154
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp294
-rw-r--r--lib/MC/WinCOFFStreamer.cpp313
-rw-r--r--lib/Object/Android.mk1
-rw-r--r--lib/Object/Archive.cpp21
-rw-r--r--lib/Object/CMakeLists.txt1
-rw-r--r--lib/Object/COFFObjectFile.cpp55
-rw-r--r--lib/Object/COFFYAML.cpp33
-rw-r--r--lib/Object/ELF.cpp10
-rw-r--r--lib/Object/ELFYAML.cpp429
-rw-r--r--lib/Object/LLVMBuild.txt2
-rw-r--r--lib/Object/MachOObjectFile.cpp135
-rw-r--r--lib/Object/MachOUniversal.cpp22
-rw-r--r--lib/Object/Object.cpp9
-rw-r--r--lib/Object/StringTableBuilder.cpp51
-rw-r--r--lib/Option/ArgList.cpp87
-rw-r--r--lib/Option/OptTable.cpp10
-rw-r--r--lib/Option/Option.cpp20
-rw-r--r--lib/ProfileData/Android.mk33
-rw-r--r--lib/ProfileData/InstrProf.cpp4
-rw-r--r--lib/ProfileData/InstrProfIndexed.h55
-rw-r--r--lib/ProfileData/InstrProfReader.cpp165
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp90
-rw-r--r--lib/Support/APFloat.cpp18
-rw-r--r--lib/Support/APInt.cpp15
-rw-r--r--lib/Support/Allocator.cpp33
-rw-r--r--lib/Support/Atomic.cpp1
-rw-r--r--lib/Support/BlockFrequency.cpp95
-rw-r--r--lib/Support/BranchProbability.cpp55
-rw-r--r--lib/Support/CommandLine.cpp75
-rw-r--r--lib/Support/Compression.cpp35
-rw-r--r--lib/Support/CrashRecoveryContext.cpp25
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp2
-rw-r--r--lib/Support/DataExtractor.cpp4
-rw-r--r--lib/Support/DataStream.cpp5
-rw-r--r--lib/Support/Debug.cpp2
-rw-r--r--lib/Support/Dwarf.cpp44
-rw-r--r--lib/Support/DynamicLibrary.cpp12
-rw-r--r--lib/Support/ErrorHandling.cpp6
-rw-r--r--lib/Support/FileOutputBuffer.cpp13
-rw-r--r--lib/Support/FoldingSet.cpp22
-rw-r--r--lib/Support/FormattedStream.cpp2
-rw-r--r--lib/Support/GraphWriter.cpp6
-rw-r--r--lib/Support/Host.cpp45
-rw-r--r--lib/Support/IntervalMap.cpp2
-rw-r--r--lib/Support/LineIterator.cpp7
-rw-r--r--lib/Support/LockFileManager.cpp18
-rw-r--r--lib/Support/ManagedStatic.cpp17
-rw-r--r--lib/Support/MemoryBuffer.cpp120
-rw-r--r--lib/Support/Mutex.cpp10
-rw-r--r--lib/Support/Path.cpp8
-rw-r--r--lib/Support/PrettyStackTrace.cpp4
-rw-r--r--lib/Support/RWMutex.cpp14
-rw-r--r--lib/Support/Regex.cpp2
-rw-r--r--lib/Support/SearchForAddressOfSpecialSymbol.cpp2
-rw-r--r--lib/Support/SmallPtrSet.cpp2
-rw-r--r--lib/Support/SourceMgr.cpp7
-rw-r--r--lib/Support/StringMap.cpp10
-rw-r--r--lib/Support/StringRef.cpp4
-rw-r--r--lib/Support/TargetRegistry.cpp20
-rw-r--r--lib/Support/ThreadLocal.cpp4
-rw-r--r--lib/Support/Threading.cpp6
-rw-r--r--lib/Support/Timer.cpp21
-rw-r--r--lib/Support/Triple.cpp16
-rw-r--r--lib/Support/Unix/Memory.inc14
-rw-r--r--lib/Support/Unix/Path.inc22
-rw-r--r--lib/Support/Unix/Process.inc4
-rw-r--r--lib/Support/Unix/Program.inc23
-rw-r--r--lib/Support/Unix/Signals.inc32
-rw-r--r--lib/Support/Unix/TimeValue.inc10
-rw-r--r--lib/Support/Windows/DynamicLibrary.inc2
-rw-r--r--lib/Support/Windows/Process.inc14
-rw-r--r--lib/Support/Windows/TimeValue.inc19
-rw-r--r--lib/Support/YAMLParser.cpp44
-rw-r--r--lib/Support/YAMLTraits.cpp58
-rw-r--r--lib/Support/raw_ostream.cpp6
-rw-r--r--lib/Support/regengine.inc2
-rw-r--r--lib/TableGen/Main.cpp1
-rw-r--r--lib/TableGen/Record.cpp190
-rw-r--r--lib/TableGen/TGLexer.cpp11
-rw-r--r--lib/TableGen/TGLexer.h2
-rw-r--r--lib/TableGen/TGParser.cpp398
-rw-r--r--lib/TableGen/TGParser.h12
-rw-r--r--lib/TableGen/module.modulemap1
-rw-r--r--lib/Target/AArch64/AArch64.h45
-rw-r--r--lib/Target/AArch64/AArch64.td93
-rw-r--r--lib/Target/AArch64/AArch64AddressTypePromotion.cpp (renamed from lib/Target/ARM64/ARM64AddressTypePromotion.cpp)84
-rw-r--r--lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp (renamed from lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp)113
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.cpp652
-rw-r--r--lib/Target/AArch64/AArch64AsmPrinter.h76
-rw-r--r--lib/Target/AArch64/AArch64BranchFixupPass.cpp600
-rw-r--r--lib/Target/AArch64/AArch64BranchRelaxation.cpp (renamed from lib/Target/ARM64/ARM64BranchRelaxation.cpp)217
-rw-r--r--lib/Target/AArch64/AArch64CallingConv.td197
-rw-r--r--lib/Target/AArch64/AArch64CallingConvention.td (renamed from lib/Target/ARM64/ARM64CallingConvention.td)76
-rw-r--r--lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp (renamed from lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp)44
-rw-r--r--lib/Target/AArch64/AArch64CollectLOH.cpp (renamed from lib/Target/ARM64/ARM64CollectLOH.cpp)442
-rw-r--r--lib/Target/AArch64/AArch64ConditionalCompares.cpp (renamed from lib/Target/ARM64/ARM64ConditionalCompares.cpp)295
-rw-r--r--lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp134
-rw-r--r--lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (renamed from lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp)242
-rw-r--r--lib/Target/AArch64/AArch64FastISel.cpp (renamed from lib/Target/ARM64/ARM64FastISel.cpp)524
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.cpp1295
-rw-r--r--lib/Target/AArch64/AArch64FrameLowering.h125
-rw-r--r--lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4068
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.cpp11169
-rw-r--r--lib/Target/AArch64/AArch64ISelLowering.h701
-rw-r--r--lib/Target/AArch64/AArch64InstrAtomics.td364
-rw-r--r--lib/Target/AArch64/AArch64InstrFormats.td9455
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp2646
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h236
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.td10086
-rw-r--r--lib/Target/AArch64/AArch64InstrNEON.td9476
-rw-r--r--lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (renamed from lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp)377
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.cpp243
-rw-r--r--lib/Target/AArch64/AArch64MCInstLower.h (renamed from lib/Target/ARM64/ARM64MCInstLower.h)12
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.cpp18
-rw-r--r--lib/Target/AArch64/AArch64MachineFunctionInfo.h188
-rw-r--r--lib/Target/AArch64/AArch64PerfectShuffle.h (renamed from lib/Target/ARM64/ARM64PerfectShuffle.h)2
-rw-r--r--lib/Target/AArch64/AArch64PromoteConstant.cpp (renamed from lib/Target/ARM64/ARM64PromoteConstant.cpp)207
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.cpp452
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.h104
-rw-r--r--lib/Target/AArch64/AArch64RegisterInfo.td733
-rw-r--r--lib/Target/AArch64/AArch64SchedA53.td291
-rw-r--r--lib/Target/AArch64/AArch64SchedCyclone.td (renamed from lib/Target/ARM64/ARM64SchedCyclone.td)25
-rw-r--r--lib/Target/AArch64/AArch64Schedule.td168
-rw-r--r--lib/Target/AArch64/AArch64ScheduleA53.td144
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.cpp48
-rw-r--r--lib/Target/AArch64/AArch64SelectionDAGInfo.h17
-rw-r--r--lib/Target/AArch64/AArch64StorePairSuppress.cpp (renamed from lib/Target/ARM64/ARM64StorePairSuppress.cpp)49
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.cpp117
-rw-r--r--lib/Target/AArch64/AArch64Subtarget.h91
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.cpp170
-rw-r--r--lib/Target/AArch64/AArch64TargetMachine.h74
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.cpp46
-rw-r--r--lib/Target/AArch64/AArch64TargetObjectFile.h35
-rw-r--r--lib/Target/AArch64/AArch64TargetTransformInfo.cpp385
-rw-r--r--lib/Target/AArch64/Android.mk34
-rw-r--r--lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp5249
-rw-r--r--lib/Target/AArch64/AsmParser/CMakeLists.txt3
-rw-r--r--lib/Target/AArch64/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/AsmParser/Makefile2
-rw-r--r--lib/Target/AArch64/CMakeLists.txt32
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp2556
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64Disassembler.h40
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp221
-rw-r--r--lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h38
-rw-r--r--lib/Target/AArch64/Disassembler/Android.mk3
-rw-r--r--lib/Target/AArch64/Disassembler/CMakeLists.txt11
-rw-r--r--lib/Target/AArch64/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/Disassembler/Makefile2
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp1567
-rw-r--r--lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h214
-rw-r--r--lib/Target/AArch64/InstPrinter/Android.mk1
-rw-r--r--lib/Target/AArch64/InstPrinter/CMakeLists.txt4
-rw-r--r--lib/Target/AArch64/InstPrinter/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/InstPrinter/Makefile2
-rw-r--r--lib/Target/AArch64/LLVMBuild.txt4
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h (renamed from lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h)222
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp1009
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp428
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp49
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h161
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp67
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h23
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp914
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp212
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h237
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp196
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h38
-rw-r--r--lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp (renamed from lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp)86
-rw-r--r--lib/Target/AArch64/MCTargetDesc/Android.mk1
-rw-r--r--lib/Target/AArch64/MCTargetDesc/CMakeLists.txt7
-rw-r--r--lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/Makefile21
-rw-r--r--lib/Target/AArch64/README.txt2
-rw-r--r--lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp30
-rw-r--r--lib/Target/AArch64/TargetInfo/CMakeLists.txt4
-rw-r--r--lib/Target/AArch64/TargetInfo/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.cpp424
-rw-r--r--lib/Target/AArch64/Utils/AArch64BaseInfo.h592
-rw-r--r--lib/Target/AArch64/Utils/Android.mk15
-rw-r--r--lib/Target/AArch64/Utils/LLVMBuild.txt2
-rw-r--r--lib/Target/AArch64/Utils/Makefile5
-rw-r--r--lib/Target/ARM/A15SDOptimizer.cpp17
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.cpp163
-rw-r--r--lib/Target/ARM/ARMAsmPrinter.h9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp73
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp18
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h6
-rw-r--r--lib/Target/ARM/ARMCallingConv.h113
-rw-r--r--lib/Target/ARM/ARMCallingConv.td3
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp9
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp21
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp62
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp32
-rw-r--r--lib/Target/ARM/ARMFeatures.h6
-rw-r--r--lib/Target/ARM/ARMFrameLowering.cpp129
-rw-r--r--lib/Target/ARM/ARMFrameLowering.h2
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.cpp4
-rw-r--r--lib/Target/ARM/ARMHazardRecognizer.h2
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp126
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp888
-rw-r--r--lib/Target/ARM/ARMISelLowering.h28
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td5
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td178
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1326
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td25
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td30
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp7
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp305
-rw-r--r--lib/Target/ARM/ARMOptimizeBarriersPass.cpp8
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td8
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td4
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp51
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.h8
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h17
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.cpp7
-rw-r--r--lib/Target/ARM/ARMTargetObjectFile.h2
-rw-r--r--lib/Target/ARM/ARMTargetTransformInfo.cpp7
-rw-r--r--lib/Target/ARM/Android.mk1
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp485
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassembler.cpp24
-rw-r--r--lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp129
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp8
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp92
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h4
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp3
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp29
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h10
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp14
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp6
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp82
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp46
-rw-r--r--lib/Target/ARM/MCTargetDesc/Android.mk4
-rw-r--r--lib/Target/ARM/MCTargetDesc/CMakeLists.txt8
-rw-r--r--lib/Target/ARM/MLxExpansionPass.cpp9
-rw-r--r--lib/Target/ARM/README-Thumb.txt4
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.cpp4
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp14
-rw-r--r--lib/Target/ARM64/ARM64.h48
-rw-r--r--lib/Target/ARM64/ARM64.td95
-rw-r--r--lib/Target/ARM64/ARM64AsmPrinter.cpp563
-rw-r--r--lib/Target/ARM64/ARM64CallingConv.h94
-rw-r--r--lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp104
-rw-r--r--lib/Target/ARM64/ARM64FrameLowering.cpp816
-rw-r--r--lib/Target/ARM64/ARM64FrameLowering.h75
-rw-r--r--lib/Target/ARM64/ARM64ISelDAGToDAG.cpp2381
-rw-r--r--lib/Target/ARM64/ARM64ISelLowering.cpp7551
-rw-r--r--lib/Target/ARM64/ARM64ISelLowering.h422
-rw-r--r--lib/Target/ARM64/ARM64InstrAtomics.td293
-rw-r--r--lib/Target/ARM64/ARM64InstrFormats.td8193
-rw-r--r--lib/Target/ARM64/ARM64InstrInfo.cpp1864
-rw-r--r--lib/Target/ARM64/ARM64InstrInfo.h219
-rw-r--r--lib/Target/ARM64/ARM64InstrInfo.td4458
-rw-r--r--lib/Target/ARM64/ARM64MCInstLower.cpp201
-rw-r--r--lib/Target/ARM64/ARM64MachineFunctionInfo.h139
-rw-r--r--lib/Target/ARM64/ARM64RegisterInfo.cpp400
-rw-r--r--lib/Target/ARM64/ARM64RegisterInfo.h101
-rw-r--r--lib/Target/ARM64/ARM64RegisterInfo.td561
-rw-r--r--lib/Target/ARM64/ARM64Schedule.td92
-rw-r--r--lib/Target/ARM64/ARM64SelectionDAGInfo.cpp57
-rw-r--r--lib/Target/ARM64/ARM64SelectionDAGInfo.h37
-rw-r--r--lib/Target/ARM64/ARM64Subtarget.cpp100
-rw-r--r--lib/Target/ARM64/ARM64Subtarget.h87
-rw-r--r--lib/Target/ARM64/ARM64TargetMachine.cpp157
-rw-r--r--lib/Target/ARM64/ARM64TargetMachine.h69
-rw-r--r--lib/Target/ARM64/ARM64TargetObjectFile.cpp52
-rw-r--r--lib/Target/ARM64/ARM64TargetObjectFile.h40
-rw-r--r--lib/Target/ARM64/ARM64TargetTransformInfo.cpp326
-rw-r--r--lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp4832
-rw-r--r--lib/Target/ARM64/AsmParser/CMakeLists.txt6
-rw-r--r--lib/Target/ARM64/AsmParser/LLVMBuild.txt24
-rw-r--r--lib/Target/ARM64/AsmParser/Makefile15
-rw-r--r--lib/Target/ARM64/CMakeLists.txt50
-rw-r--r--lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp2142
-rw-r--r--lib/Target/ARM64/Disassembler/ARM64Disassembler.h54
-rw-r--r--lib/Target/ARM64/Disassembler/CMakeLists.txt13
-rw-r--r--lib/Target/ARM64/Disassembler/LLVMBuild.txt24
-rw-r--r--lib/Target/ARM64/Disassembler/Makefile16
-rw-r--r--lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp1428
-rw-r--r--lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h157
-rw-r--r--lib/Target/ARM64/InstPrinter/CMakeLists.txt7
-rw-r--r--lib/Target/ARM64/InstPrinter/LLVMBuild.txt24
-rw-r--r--lib/Target/ARM64/InstPrinter/Makefile15
-rw-r--r--lib/Target/ARM64/LLVMBuild.txt36
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp533
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h998
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp237
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp158
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h26
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h72
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp92
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h36
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp563
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp168
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h162
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp167
-rw-r--r--lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h62
-rw-r--r--lib/Target/ARM64/MCTargetDesc/CMakeLists.txt14
-rw-r--r--lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt24
-rw-r--r--lib/Target/ARM64/MCTargetDesc/Makefile16
-rw-r--r--lib/Target/ARM64/Makefile25
-rw-r--r--lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp21
-rw-r--r--lib/Target/ARM64/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/ARM64/TargetInfo/LLVMBuild.txt24
-rw-r--r--lib/Target/ARM64/TargetInfo/Makefile15
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp28
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h14
-rw-r--r--lib/Target/Hexagon/Hexagon.td2
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp5
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.h11
-rw-r--r--lib/Target/Hexagon/HexagonCFGOptimizer.cpp15
-rw-r--r--lib/Target/Hexagon/HexagonCopyToCombine.cpp18
-rw-r--r--lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonFixupHwLoops.cpp8
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.cpp4
-rw-r--r--lib/Target/Hexagon/HexagonFrameLowering.h32
-rw-r--r--lib/Target/Hexagon/HexagonHardwareLoops.cpp107
-rw-r--r--lib/Target/Hexagon/HexagonISelDAGToDAG.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.cpp29
-rw-r--r--lib/Target/Hexagon/HexagonISelLowering.h43
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormats.td167
-rw-r--r--lib/Target/Hexagon/HexagonInstrFormatsV4.td31
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp19
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.h197
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.td14
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td35
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.cpp20
-rw-r--r--lib/Target/Hexagon/HexagonMachineScheduler.h19
-rw-r--r--lib/Target/Hexagon/HexagonNewValueJump.cpp13
-rw-r--r--lib/Target/Hexagon/HexagonPeephole.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp9
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.h17
-rw-r--r--lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonSchedule.td51
-rw-r--r--lib/Target/Hexagon/HexagonScheduleV4.td165
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp3
-rw-r--r--lib/Target/Hexagon/HexagonSelectionDAGInfo.h3
-rw-r--r--lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp24
-rw-r--r--lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp10
-rw-r--r--lib/Target/Hexagon/HexagonSubtarget.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp38
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.h20
-rw-r--r--lib/Target/Hexagon/HexagonVLIWPacketizer.cpp28
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp3
-rw-r--r--lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h72
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h2
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp6
-rw-r--r--lib/Target/LLVMBuild.txt2
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp5
-rw-r--r--lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h6
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp6
-rw-r--r--lib/Target/MSP430/MSP430AsmPrinter.cpp13
-rw-r--r--lib/Target/MSP430/MSP430BranchSelector.cpp7
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.cpp2
-rw-r--r--lib/Target/MSP430/MSP430FrameLowering.h22
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp39
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp23
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h56
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp8
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h39
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp18
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h12
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp3
-rw-r--r--lib/Target/MSP430/MSP430Subtarget.cpp6
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h16
-rw-r--r--lib/Target/Mips/Android.mk1
-rw-r--r--lib/Target/Mips/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/AsmParser/MipsAsmParser.cpp273
-rw-r--r--lib/Target/Mips/CMakeLists.txt2
-rw-r--r--lib/Target/Mips/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Mips/Disassembler/MipsDisassembler.cpp370
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp5
-rw-r--r--lib/Target/Mips/InstPrinter/MipsInstPrinter.h6
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp92
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h18
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h12
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp85
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h27
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp3
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCExpr.h10
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h2
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp13
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp7
-rw-r--r--lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp153
-rw-r--r--lib/Target/Mips/Makefile2
-rw-r--r--lib/Target/Mips/MicroMipsInstrFPU.td14
-rw-r--r--lib/Target/Mips/MicroMipsInstrInfo.td23
-rw-r--r--lib/Target/Mips/Mips.td101
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.cpp2
-rw-r--r--lib/Target/Mips/Mips16FrameLowering.h20
-rw-r--r--lib/Target/Mips/Mips16HardFloat.cpp14
-rw-r--r--lib/Target/Mips/Mips16HardFloat.h4
-rw-r--r--lib/Target/Mips/Mips16HardFloatInfo.cpp4
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.cpp19
-rw-r--r--lib/Target/Mips/Mips16ISelDAGToDAG.h10
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.cpp34
-rw-r--r--lib/Target/Mips/Mips16ISelLowering.h21
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.cpp1
-rw-r--r--lib/Target/Mips/Mips16InstrInfo.h50
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.cpp2
-rw-r--r--lib/Target/Mips/Mips16RegisterInfo.h16
-rw-r--r--lib/Target/Mips/Mips32r6InstrFormats.td386
-rw-r--r--lib/Target/Mips/Mips32r6InstrInfo.td583
-rw-r--r--lib/Target/Mips/Mips64InstrInfo.td257
-rw-r--r--lib/Target/Mips/Mips64r6InstrInfo.td88
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.cpp40
-rw-r--r--lib/Target/Mips/MipsAsmPrinter.h32
-rw-r--r--lib/Target/Mips/MipsCallingConv.td4
-rw-r--r--lib/Target/Mips/MipsCodeEmitter.cpp46
-rw-r--r--lib/Target/Mips/MipsCondMov.td138
-rw-r--r--lib/Target/Mips/MipsConstantIslandPass.cpp20
-rw-r--r--lib/Target/Mips/MipsDelaySlotFiller.cpp78
-rw-r--r--lib/Target/Mips/MipsFastISel.cpp283
-rw-r--r--lib/Target/Mips/MipsFrameLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsFrameLowering.h2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.h12
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp296
-rw-r--r--lib/Target/Mips/MipsISelLowering.h103
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td356
-rw-r--r--lib/Target/Mips/MipsInstrFormats.td33
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp8
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h50
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td412
-rw-r--r--lib/Target/Mips/MipsJITInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsJITInfo.h14
-rw-r--r--lib/Target/Mips/MipsLongBranch.cpp96
-rw-r--r--lib/Target/Mips/MipsMCInstLower.cpp68
-rw-r--r--lib/Target/Mips/MipsMCInstLower.h8
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td90
-rw-r--r--lib/Target/Mips/MipsMachineFunction.cpp7
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h8
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Mips/MipsModuleISelDAGToDAG.h8
-rw-r--r--lib/Target/Mips/MipsOptimizePICCall.cpp46
-rw-r--r--lib/Target/Mips/MipsOs16.cpp3
-rw-r--r--lib/Target/Mips/MipsOs16.h4
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp16
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h21
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td16
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.cpp3
-rw-r--r--lib/Target/Mips/MipsSEFrameLowering.h14
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Mips/MipsSEISelDAGToDAG.h66
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp185
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.h36
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.h48
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.cpp4
-rw-r--r--lib/Target/Mips/MipsSERegisterInfo.h12
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp3
-rw-r--r--lib/Target/Mips/MipsSubtarget.cpp57
-rw-r--r--lib/Target/Mips/MipsSubtarget.h61
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp12
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h46
-rw-r--r--lib/Target/Mips/MipsTargetStreamer.h134
-rw-r--r--lib/Target/NVPTX/CMakeLists.txt3
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp3
-rw-r--r--lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h14
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h6
-rw-r--r--lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTX.h3
-rw-r--r--lib/Target/NVPTX/NVPTXAllocaHoisting.h6
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.cpp382
-rw-r--r--lib/Target/NVPTX/NVPTXAsmPrinter.h66
-rw-r--r--lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp195
-rw-r--r--lib/Target/NVPTX/NVPTXFrameLowering.h10
-rw-r--r--lib/Target/NVPTX/NVPTXGenericToNVVM.cpp10
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp826
-rw-r--r--lib/Target/NVPTX/NVPTXISelDAGToDAG.h15
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp552
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.h139
-rw-r--r--lib/Target/NVPTX/NVPTXImageOptimizer.cpp178
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.cpp7
-rw-r--r--lib/Target/NVPTX/NVPTXInstrInfo.h16
-rw-r--r--lib/Target/NVPTX/NVPTXIntrinsics.td1823
-rw-r--r--lib/Target/NVPTX/NVPTXLowerAggrCopies.h6
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.cpp3
-rw-r--r--lib/Target/NVPTX/NVPTXMCExpr.h12
-rw-r--r--lib/Target/NVPTX/NVPTXMachineFunctionInfo.h46
-rw-r--r--lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp6
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.cpp21
-rw-r--r--lib/Target/NVPTX/NVPTXRegisterInfo.h23
-rw-r--r--lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp357
-rw-r--r--lib/Target/NVPTX/NVPTXSection.h16
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.cpp8
-rw-r--r--lib/Target/NVPTX/NVPTXSubtarget.h7
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.cpp47
-rw-r--r--lib/Target/NVPTX/NVPTXTargetMachine.h24
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h38
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.cpp67
-rw-r--r--lib/Target/NVPTX/NVPTXUtilities.h4
-rw-r--r--lib/Target/NVPTX/NVVMReflect.cpp14
-rw-r--r--lib/Target/PowerPC/AsmParser/LLVMBuild.txt4
-rw-r--r--lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp52
-rw-r--r--lib/Target/PowerPC/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp11
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp3
-rw-r--r--lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h6
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp20
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp7
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h4
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp5
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp3
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h10
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp25
-rw-r--r--lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp8
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp82
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp9
-rw-r--r--lib/Target/PowerPC/PPCCTRLoops.cpp30
-rw-r--r--lib/Target/PowerPC/PPCCodeEmitter.cpp10
-rw-r--r--lib/Target/PowerPC/PPCFastISel.cpp71
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.cpp102
-rw-r--r--lib/Target/PowerPC/PPCFrameLowering.h28
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp5
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.h24
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp164
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp308
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h120
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td2
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp43
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h169
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td8
-rw-r--r--lib/Target/PowerPC/PPCInstrVSX.td2
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.cpp3
-rw-r--r--lib/Target/PowerPC/PPCJITInfo.h16
-rw-r--r--lib/Target/PowerPC/PPCMCInstLower.cpp2
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp9
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h40
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td12
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp3
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.cpp32
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h15
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp36
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h26
-rw-r--r--lib/Target/PowerPC/PPCTargetTransformInfo.cpp187
-rw-r--r--lib/Target/R600/AMDGPU.h11
-rw-r--r--lib/Target/R600/AMDGPU.td11
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.cpp25
-rw-r--r--lib/Target/R600/AMDGPUAsmPrinter.h13
-rw-r--r--lib/Target/R600/AMDGPUCallingConv.td2
-rw-r--r--lib/Target/R600/AMDGPUConvertToISA.cpp4
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.cpp2
-rw-r--r--lib/Target/R600/AMDGPUFrameLowering.h13
-rw-r--r--lib/Target/R600/AMDGPUISelDAGToDAG.cpp291
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.cpp744
-rw-r--r--lib/Target/R600/AMDGPUISelLowering.h91
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.cpp22
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.h73
-rw-r--r--lib/Target/R600/AMDGPUInstrInfo.td15
-rw-r--r--lib/Target/R600/AMDGPUInstructions.td49
-rw-r--r--lib/Target/R600/AMDGPUIntrinsics.td4
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.cpp28
-rw-r--r--lib/Target/R600/AMDGPUMCInstLower.h16
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.cpp8
-rw-r--r--lib/Target/R600/AMDGPURegisterInfo.h14
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.cpp13
-rw-r--r--lib/Target/R600/AMDGPUSubtarget.h15
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.cpp23
-rw-r--r--lib/Target/R600/AMDGPUTargetMachine.h27
-rw-r--r--lib/Target/R600/AMDGPUTargetTransformInfo.cpp26
-rw-r--r--lib/Target/R600/AMDILCFGStructurizer.cpp58
-rw-r--r--lib/Target/R600/AMDILISelLowering.cpp92
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.cpp2
-rw-r--r--lib/Target/R600/AMDILIntrinsicInfo.h12
-rw-r--r--lib/Target/R600/AMDILIntrinsics.td4
-rw-r--r--lib/Target/R600/CMakeLists.txt1
-rw-r--r--lib/Target/R600/CaymanInstructions.td7
-rw-r--r--lib/Target/R600/EvergreenInstructions.td18
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp118
-rw-r--r--lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h13
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp26
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h2
-rw-r--r--lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp4
-rw-r--r--lib/Target/R600/MCTargetDesc/LLVMBuild.txt4
-rw-r--r--lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp10
-rw-r--r--lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp10
-rw-r--r--lib/Target/R600/Processors.td2
-rw-r--r--lib/Target/R600/R600ClauseMergePass.cpp7
-rw-r--r--lib/Target/R600/R600ControlFlowFinalizer.cpp15
-rw-r--r--lib/Target/R600/R600EmitClauseMarkers.cpp6
-rw-r--r--lib/Target/R600/R600ExpandSpecialInstrs.cpp6
-rw-r--r--lib/Target/R600/R600ISelLowering.cpp150
-rw-r--r--lib/Target/R600/R600ISelLowering.h32
-rw-r--r--lib/Target/R600/R600InstrInfo.cpp8
-rw-r--r--lib/Target/R600/R600InstrInfo.h76
-rw-r--r--lib/Target/R600/R600Instructions.td6
-rw-r--r--lib/Target/R600/R600MachineFunctionInfo.h2
-rw-r--r--lib/Target/R600/R600MachineScheduler.cpp14
-rw-r--r--lib/Target/R600/R600MachineScheduler.h15
-rw-r--r--lib/Target/R600/R600OptimizeVectorRegisters.cpp11
-rw-r--r--lib/Target/R600/R600Packetizer.cpp24
-rw-r--r--lib/Target/R600/R600RegisterInfo.h15
-rw-r--r--lib/Target/R600/R600TextureIntrinsicsReplacer.cpp8
-rw-r--r--lib/Target/R600/SIAnnotateControlFlow.cpp28
-rw-r--r--lib/Target/R600/SIFixSGPRCopies.cpp23
-rw-r--r--lib/Target/R600/SIISelLowering.cpp307
-rw-r--r--lib/Target/R600/SIISelLowering.h36
-rw-r--r--lib/Target/R600/SIInsertWaits.cpp8
-rw-r--r--lib/Target/R600/SIInstrFormats.td23
-rw-r--r--lib/Target/R600/SIInstrInfo.cpp379
-rw-r--r--lib/Target/R600/SIInstrInfo.h66
-rw-r--r--lib/Target/R600/SIInstrInfo.td146
-rw-r--r--lib/Target/R600/SIInstructions.td1109
-rw-r--r--lib/Target/R600/SILowerControlFlow.cpp10
-rw-r--r--lib/Target/R600/SILowerI1Copies.cpp148
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.cpp57
-rw-r--r--lib/Target/R600/SIMachineFunctionInfo.h9
-rw-r--r--lib/Target/R600/SIRegisterInfo.cpp18
-rw-r--r--lib/Target/R600/SIRegisterInfo.h20
-rw-r--r--lib/Target/R600/SIRegisterInfo.td14
-rw-r--r--lib/Target/R600/SITypeRewriter.cpp24
-rw-r--r--lib/Target/Sparc/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/Sparc/AsmParser/SparcAsmParser.cpp51
-rw-r--r--lib/Target/Sparc/DelaySlotFiller.cpp7
-rw-r--r--lib/Target/Sparc/Disassembler/LLVMBuild.txt2
-rw-r--r--lib/Target/Sparc/Disassembler/SparcDisassembler.cpp29
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp4
-rw-r--r--lib/Target/Sparc/InstPrinter/SparcInstPrinter.h8
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp16
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h14
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp5
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp3
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h10
-rw-r--r--lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp16
-rw-r--r--lib/Target/Sparc/SparcAsmPrinter.cpp17
-rw-r--r--lib/Target/Sparc/SparcCodeEmitter.cpp13
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.cpp9
-rw-r--r--lib/Target/Sparc/SparcFrameLowering.h17
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp110
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h52
-rw-r--r--lib/Target/Sparc/SparcInstr64Bit.td4
-rw-r--r--lib/Target/Sparc/SparcInstrAliases.td8
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp15
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h68
-rw-r--r--lib/Target/Sparc/SparcJITInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcJITInfo.h14
-rw-r--r--lib/Target/Sparc/SparcMCInstLower.cpp2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp8
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h15
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcSubtarget.cpp6
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h20
-rw-r--r--lib/Target/Sparc/SparcTargetObjectFile.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetStreamer.h8
-rw-r--r--lib/Target/SystemZ/AsmParser/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp11
-rw-r--r--lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp11
-rw-r--r--lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp4
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp3
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp2
-rw-r--r--lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp7
-rw-r--r--lib/Target/SystemZ/SystemZElimCompare.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZFrameLowering.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp32
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp54
-rw-r--r--lib/Target/SystemZ/SystemZInstrFormats.td2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp24
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZLongBranch.cpp13
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZShortenInst.cpp8
-rw-r--r--lib/Target/SystemZ/SystemZSubtarget.cpp6
-rw-r--r--lib/Target/Target.cpp8
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp12
-rw-r--r--lib/Target/TargetMachine.cpp69
-rw-r--r--lib/Target/TargetMachineC.cpp22
-rw-r--r--lib/Target/TargetSubtargetInfo.cpp11
-rw-r--r--lib/Target/X86/Android.mk1
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp122
-rw-r--r--lib/Target/X86/AsmParser/X86AsmInstrumentation.h17
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp93
-rw-r--r--lib/Target/X86/AsmParser/X86Operand.h6
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/Disassembler/Android.mk3
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt2
-rw-r--r--lib/Target/X86/Disassembler/Makefile4
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp51
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.h17
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp (renamed from lib/Target/X86/Disassembler/X86DisassemblerDecoder.c)181
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h362
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h221
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp3
-rw-r--r--lib/Target/X86/InstPrinter/X86ATTInstPrinter.h2
-rw-r--r--lib/Target/X86/InstPrinter/X86InstComments.cpp4
-rw-r--r--lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp3
-rw-r--r--lib/Target/X86/MCTargetDesc/Android.mk3
-rw-r--r--lib/Target/X86/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/X86/MCTargetDesc/LLVMBuild.txt2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp16
-rw-r--r--lib/Target/X86/MCTargetDesc/X86BaseInfo.h9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86FixupKinds.h1
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp8
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp19
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp42
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h9
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp2
-rw-r--r--lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp23
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp72
-rw-r--r--lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp51
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td27
-rw-r--r--lib/Target/X86/X86AsmPrinter.cpp192
-rw-r--r--lib/Target/X86/X86AsmPrinter.h4
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.cpp19
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h46
-rw-r--r--lib/Target/X86/X86CallingConv.h27
-rw-r--r--lib/Target/X86/X86CallingConv.td10
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp9
-rw-r--r--lib/Target/X86/X86FastISel.cpp36
-rw-r--r--lib/Target/X86/X86FixupLEAs.cpp107
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp5
-rw-r--r--lib/Target/X86/X86FrameLowering.cpp91
-rw-r--r--lib/Target/X86/X86FrameLowering.h2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp98
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp2193
-rw-r--r--lib/Target/X86/X86ISelLowering.h42
-rw-r--r--lib/Target/X86/X86InstrAVX512.td206
-rw-r--r--lib/Target/X86/X86InstrBuilder.h3
-rw-r--r--lib/Target/X86/X86InstrCompiler.td4
-rw-r--r--lib/Target/X86/X86InstrFMA.td46
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td5
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp262
-rw-r--r--lib/Target/X86/X86InstrInfo.h9
-rw-r--r--lib/Target/X86/X86InstrInfo.td200
-rw-r--r--lib/Target/X86/X86InstrMMX.td8
-rw-r--r--lib/Target/X86/X86InstrSSE.td147
-rw-r--r--lib/Target/X86/X86InstrSystem.td2
-rw-r--r--lib/Target/X86/X86JITInfo.cpp10
-rw-r--r--lib/Target/X86/X86MCInstLower.cpp8
-rw-r--r--lib/Target/X86/X86PadShortFunction.cpp9
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp8
-rw-r--r--lib/Target/X86/X86RegisterInfo.h4
-rw-r--r--lib/Target/X86/X86SchedHaswell.td3
-rw-r--r--lib/Target/X86/X86SchedSandyBridge.td3
-rw-r--r--lib/Target/X86/X86ScheduleAtom.td4
-rw-r--r--lib/Target/X86/X86ScheduleSLM.td849
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp36
-rw-r--r--lib/Target/X86/X86Subtarget.cpp364
-rw-r--r--lib/Target/X86/X86Subtarget.h11
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp33
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp14
-rw-r--r--lib/Target/X86/X86TargetTransformInfo.cpp159
-rw-r--r--lib/Target/X86/X86VZeroUpper.cpp6
-rw-r--r--lib/Target/XCore/Disassembler/XCoreDisassembler.cpp19
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp3
-rw-r--r--lib/Target/XCore/InstPrinter/XCoreInstPrinter.h4
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h2
-rw-r--r--lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp13
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp17
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.cpp19
-rw-r--r--lib/Target/XCore/XCoreFrameLowering.h27
-rw-r--r--lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp4
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp17
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp125
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h51
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp17
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h76
-rw-r--r--lib/Target/XCore/XCoreLowerThreadLocal.cpp7
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp12
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h15
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp19
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--lib/Target/XCore/XCoreSubtarget.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h18
-rw-r--r--lib/Target/XCore/XCoreTargetObjectFile.h2
-rw-r--r--lib/Target/XCore/XCoreTargetTransformInfo.cpp5
-rw-r--r--lib/Transforms/Hello/Hello.cpp3
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp17
-rw-r--r--lib/Transforms/IPO/ConstantMerge.cpp9
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp8
-rw-r--r--lib/Transforms/IPO/ExtractGV.cpp9
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp28
-rw-r--r--lib/Transforms/IPO/GlobalDCE.cpp23
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp269
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp11
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp11
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp7
-rw-r--r--lib/Transforms/IPO/Inliner.cpp70
-rw-r--r--lib/Transforms/IPO/Internalize.cpp10
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp7
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp734
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp11
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp28
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp5
-rw-r--r--lib/Transforms/IPO/StripDeadPrototypes.cpp3
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h107
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp102
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp149
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp305
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp89
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp390
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp51
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp128
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp80
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp92
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp91
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp85
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp115
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h7
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp246
-rw-r--r--lib/Transforms/Instrumentation/AddressSanitizer.cpp310
-rw-r--r--lib/Transforms/Instrumentation/BoundsChecking.cpp9
-rw-r--r--lib/Transforms/Instrumentation/DataFlowSanitizer.cpp32
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.cpp30
-rw-r--r--lib/Transforms/Instrumentation/DebugIR.h2
-rw-r--r--lib/Transforms/Instrumentation/GCOVProfiling.cpp45
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp226
-rw-r--r--lib/Transforms/Instrumentation/ThreadSanitizer.cpp14
-rw-r--r--lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h40
-rw-r--r--lib/Transforms/ObjCARC/DependencyAnalysis.cpp5
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAPElim.cpp13
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp3
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCContract.cpp9
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCExpand.cpp4
-rw-r--r--lib/Transforms/ObjCARC/ObjCARCOpts.cpp49
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp3
-rw-r--r--lib/Transforms/Scalar/Android.mk6
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt11
-rw-r--r--lib/Transforms/Scalar/ConstantHoisting.cpp24
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp5
-rw-r--r--lib/Transforms/Scalar/CorrelatedValuePropagation.cpp5
-rw-r--r--lib/Transforms/Scalar/DCE.cpp3
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp33
-rw-r--r--lib/Transforms/Scalar/EarlyCSE.cpp23
-rw-r--r--lib/Transforms/Scalar/FlattenCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/GVN.cpp115
-rw-r--r--lib/Transforms/Scalar/GlobalMerge.cpp8
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp80
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp39
-rw-r--r--lib/Transforms/Scalar/LICM.cpp15
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp3
-rw-r--r--lib/Transforms/Scalar/LoopIdiomRecognize.cpp65
-rw-r--r--lib/Transforms/Scalar/LoopInstSimplify.cpp17
-rw-r--r--lib/Transforms/Scalar/LoopRerollPass.cpp7
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp36
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp683
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp12
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp62
-rw-r--r--lib/Transforms/Scalar/LowerAtomic.cpp5
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp41
-rw-r--r--lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp3
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp43
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp3
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp33
-rw-r--r--lib/Transforms/Scalar/SROA.cpp106
-rw-r--r--lib/Transforms/Scalar/SampleProfile.cpp10
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp2
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp79
-rw-r--r--lib/Transforms/Scalar/Scalarizer.cpp13
-rw-r--r--lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp623
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp13
-rw-r--r--lib/Transforms/Scalar/Sink.cpp13
-rw-r--r--lib/Transforms/Scalar/StructurizeCFG.cpp27
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp380
-rw-r--r--lib/Transforms/Utils/AddDiscriminators.cpp15
-rw-r--r--lib/Transforms/Utils/Android.mk1
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp117
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp15
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp31
-rw-r--r--lib/Transforms/Utils/BypassSlowDivision.cpp7
-rw-r--r--lib/Transforms/Utils/CMakeLists.txt1
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp8
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp14
-rw-r--r--lib/Transforms/Utils/CmpInstAnalysis.cpp2
-rw-r--r--lib/Transforms/Utils/CodeExtractor.cpp20
-rw-r--r--lib/Transforms/Utils/CtorUtils.cpp183
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp14
-rw-r--r--lib/Transforms/Utils/FlattenCFG.cpp14
-rw-r--r--lib/Transforms/Utils/GlobalStatus.cpp4
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp253
-rw-r--r--lib/Transforms/Utils/IntegerDivision.cpp3
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp5
-rw-r--r--lib/Transforms/Utils/Local.cpp84
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp42
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp28
-rw-r--r--lib/Transforms/Utils/LoopUnrollRuntime.cpp13
-rw-r--r--lib/Transforms/Utils/LowerExpectIntrinsic.cpp3
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp3
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp7
-rw-r--r--lib/Transforms/Utils/Mem2Reg.cpp3
-rw-r--r--lib/Transforms/Utils/ModuleUtils.cpp29
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp29
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp21
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp153
-rw-r--r--lib/Transforms/Utils/SimplifyIndVar.cpp30
-rw-r--r--lib/Transforms/Utils/SimplifyInstructions.cpp24
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp316
-rw-r--r--lib/Transforms/Utils/SpecialCaseList.cpp6
-rw-r--r--lib/Transforms/Utils/UnifyFunctionExitNodes.cpp8
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp20
-rw-r--r--lib/Transforms/Vectorize/BBVectorize.cpp94
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp443
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp257
1281 files changed, 82909 insertions, 99167 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 9583bbe..57237e5 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -473,7 +473,7 @@ AliasAnalysis::~AliasAnalysis() {}
///
void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = P->getAnalysisIfAvailable<TargetLibraryInfo>();
AA = &P->getAnalysis<AliasAnalysis>();
}
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 2e3bc55..b860914 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -126,7 +126,7 @@ AliasAnalysis::AliasResult
AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
- const char *AliasString = 0;
+ const char *AliasString = nullptr;
switch (R) {
case NoAlias: No++; AliasString = "No alias"; break;
case MayAlias: May++; AliasString = "May alias"; break;
@@ -152,7 +152,7 @@ AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
const Location &Loc) {
ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
- const char *MRString = 0;
+ const char *MRString = nullptr;
switch (R) {
case NoModRef: NoMR++; MRString = "NoModRef"; break;
case Ref: JustRef++; MRString = "JustRef"; break;
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index ab1005e..a45fe23 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -72,16 +72,16 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
AS.PtrList->setPrevInList(PtrListEnd);
PtrListEnd = AS.PtrListEnd;
- AS.PtrList = 0;
+ AS.PtrList = nullptr;
AS.PtrListEnd = &AS.PtrList;
- assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+ assert(*AS.PtrListEnd == nullptr && "End of list is not null?");
}
}
void AliasSetTracker::removeAliasSet(AliasSet *AS) {
if (AliasSet *Fwd = AS->Forward) {
Fwd->dropRef(*this);
- AS->Forward = 0;
+ AS->Forward = nullptr;
}
AliasSets.erase(AS);
}
@@ -115,10 +115,10 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
// Add it to the end of the list...
- assert(*PtrListEnd == 0 && "End of list is not null?");
+ assert(*PtrListEnd == nullptr && "End of list is not null?");
*PtrListEnd = &Entry;
PtrListEnd = Entry.setPrevInList(PtrListEnd);
- assert(*PtrListEnd == 0 && "End of list is not null?");
+ assert(*PtrListEnd == nullptr && "End of list is not null?");
addRef(); // Entry points to alias set.
}
@@ -217,11 +217,11 @@ void AliasSetTracker::clear() {
AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
uint64_t Size,
const MDNode *TBAAInfo) {
- AliasSet *FoundSet = 0;
+ AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
- if (FoundSet == 0) { // If this is the first alias set ptr can go into.
+ if (!FoundSet) { // If this is the first alias set ptr can go into.
FoundSet = I; // Remember it.
} else { // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*I, *this); // Merge in contents.
@@ -245,12 +245,12 @@ bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
- AliasSet *FoundSet = 0;
+ AliasSet *FoundSet = nullptr;
for (iterator I = begin(), E = end(); I != E; ++I) {
if (I->Forward || !I->aliasesUnknownInst(Inst, AA))
continue;
- if (FoundSet == 0) // If this is the first alias set ptr can go into.
+ if (!FoundSet) // If this is the first alias set ptr can go into.
FoundSet = I; // Remember it.
else if (!I->Forward) // Otherwise, we must merge the sets.
FoundSet->mergeSetIn(*I, *this); // Merge in contents.
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index c960123..01c1c7e 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -73,7 +73,7 @@ void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
char **OutMessages) {
- raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : 0;
+ raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr;
std::string Messages;
raw_string_ostream MsgsOS(Messages);
@@ -94,7 +94,8 @@ LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
LLVMBool Result = verifyFunction(
- *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs() : 0);
+ *unwrap<Function>(Fn), Action != LLVMReturnStatusAction ? &errs()
+ : nullptr);
if (Action == LLVMAbortProcessAction && Result)
report_fatal_error("Broken function found, compilation aborted!");
diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk
index 76eee74..a8fef77 100644
--- a/lib/Analysis/Android.mk
+++ b/lib/Analysis/Android.mk
@@ -9,6 +9,7 @@ analysis_SRC_FILES := \
Analysis.cpp \
BasicAliasAnalysis.cpp \
BlockFrequencyInfo.cpp \
+ BlockFrequencyInfoImpl.cpp \
BranchProbabilityInfo.cpp \
CFG.cpp \
CFGPrinter.cpp \
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index e267374..fe90b84 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -298,7 +298,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
do {
// See if this is a bitcast or GEP.
const Operator *Op = dyn_cast<Operator>(V);
- if (Op == 0) {
+ if (!Op) {
// The only non-operator case we can handle are GlobalAliases.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
if (!GA->mayBeOverridden()) {
@@ -315,7 +315,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
}
const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
- if (GEPOp == 0) {
+ if (!GEPOp) {
// If it's not a GEP, hand it off to SimplifyInstruction to see if it
// can come up with something. This matches what GetUnderlyingObject does.
if (const Instruction *I = dyn_cast<Instruction>(V))
@@ -336,7 +336,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
// If we are lacking DataLayout information, we can't compute the offets of
// elements computed by GEPs. However, we can handle bitcast equivalent
// GEPs.
- if (DL == 0) {
+ if (!DL) {
if (!GEPOp->hasAllZeroIndices())
return V;
V = GEPOp->getOperand(0);
@@ -433,7 +433,7 @@ static const Function *getParent(const Value *V) {
if (const Argument *arg = dyn_cast<Argument>(V))
return arg->getParent();
- return NULL;
+ return nullptr;
}
static bool notDifferentParent(const Value *O1, const Value *O2) {
@@ -753,7 +753,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
// Finally, handle specific knowledge of intrinsics.
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
- if (II != 0)
+ if (II != nullptr)
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::memcpy:
@@ -868,21 +868,6 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
}
-static bool areVarIndicesEqual(SmallVectorImpl<VariableGEPIndex> &Indices1,
- SmallVectorImpl<VariableGEPIndex> &Indices2) {
- unsigned Size1 = Indices1.size();
- unsigned Size2 = Indices2.size();
-
- if (Size1 != Size2)
- return false;
-
- for (unsigned I = 0; I != Size1; ++I)
- if (Indices1[I] != Indices2[I])
- return false;
-
- return true;
-}
-
/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
/// against another pointer. We know that V1 is a GEP, but we don't know
/// anything about V2. UnderlyingV1 is GetUnderlyingObject(GEP1, DL),
@@ -904,8 +889,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// derived pointer.
if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
// Do the base pointers alias?
- AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
- UnderlyingV2, UnknownSize, 0);
+ AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
+ UnderlyingV2, UnknownSize, nullptr);
// Check for geps of non-aliasing underlying pointers where the offsets are
// identical.
@@ -929,8 +914,8 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(DL == 0 &&
- "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+ assert(!DL &&
+ "DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
// If the max search depth is reached the result is undefined
@@ -939,7 +924,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// Same offsets.
if (GEP1BaseOffset == GEP2BaseOffset &&
- areVarIndicesEqual(GEP1VariableIndices, GEP2VariableIndices))
+ GEP1VariableIndices == GEP2VariableIndices)
return NoAlias;
GEP1VariableIndices.clear();
}
@@ -966,7 +951,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
- assert(DL == 0 &&
+ assert(!DL &&
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
@@ -988,7 +973,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
if (V1Size == UnknownSize && V2Size == UnknownSize)
return MayAlias;
- AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+ AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, nullptr,
V2, V2Size, V2TBAAInfo);
if (R != MustAlias)
// If V2 may alias GEP base pointer, conservatively returns MayAlias.
@@ -1005,7 +990,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
// DecomposeGEPExpression and GetUnderlyingObject should return the
// same result except when DecomposeGEPExpression has no DataLayout.
if (GEP1BasePtr != UnderlyingV1) {
- assert(DL == 0 &&
+ assert(!DL &&
"DecomposeGEPExpression and GetUnderlyingObject disagree!");
return MayAlias;
}
@@ -1371,7 +1356,7 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,
// Use dominance or loop info if available.
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = getAnalysisIfAvailable<LoopInfo>();
// Make sure that the visited phis cannot reach the Value. This ensures that
diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp
index 63049a5..8ed8e3e 100644
--- a/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/lib/Analysis/BlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===//
+//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -24,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "block-freq"
+
#ifndef NDEBUG
enum GVDAGType {
GVDT_None,
@@ -106,6 +108,7 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {
INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq",
"Block Frequency Analysis", true, true)
@@ -120,14 +123,16 @@ BlockFrequencyInfo::~BlockFrequencyInfo() {}
void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<BranchProbabilityInfo>();
+ AU.addRequired<LoopInfo>();
AU.setPreservesAll();
}
bool BlockFrequencyInfo::runOnFunction(Function &F) {
BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
if (!BFI)
BFI.reset(new ImplType);
- BFI->doFunction(&F, &BPI);
+ BFI->doFunction(&F, &BPI, &LI);
#ifndef NDEBUG
if (ViewBlockFreqPropagationDAG != GVDT_None)
view();
@@ -158,7 +163,7 @@ void BlockFrequencyInfo::view() const {
}
const Function *BlockFrequencyInfo::getFunction() const {
- return BFI ? BFI->Fn : nullptr;
+ return BFI ? BFI->getFunction() : nullptr;
}
raw_ostream &BlockFrequencyInfo::
diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp
new file mode 100644
index 0000000..87d93a4
--- /dev/null
+++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -0,0 +1,995 @@
+//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <deque>
+
+using namespace llvm;
+using namespace llvm::bfi_detail;
+
+#define DEBUG_TYPE "block-freq"
+
+//===----------------------------------------------------------------------===//
+//
+// UnsignedFloat implementation.
+//
+//===----------------------------------------------------------------------===//
+#ifndef _MSC_VER
+const int32_t UnsignedFloatBase::MaxExponent;
+const int32_t UnsignedFloatBase::MinExponent;
+#endif
+
+static void appendDigit(std::string &Str, unsigned D) {
+ assert(D < 10);
+ Str += '0' + D % 10;
+}
+
+static void appendNumber(std::string &Str, uint64_t N) {
+ while (N) {
+ appendDigit(Str, N % 10);
+ N /= 10;
+ }
+}
+
+static bool doesRoundUp(char Digit) {
+ switch (Digit) {
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return true;
+ default:
+ return false;
+ }
+}
+
+static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
+ assert(E >= UnsignedFloatBase::MinExponent);
+ assert(E <= UnsignedFloatBase::MaxExponent);
+
+ // Find a new E, but don't let it increase past MaxExponent.
+ int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D);
+ int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros);
+ int Shift = 63 - (NewE - E);
+ assert(Shift <= LeadingZeros);
+ assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent);
+ D <<= Shift;
+ E = NewE;
+
+ // Check for a denormal.
+ unsigned AdjustedE = E + 16383;
+ if (!(D >> 63)) {
+ assert(E == UnsignedFloatBase::MaxExponent);
+ AdjustedE = 0;
+ }
+
+ // Build the float and print it.
+ uint64_t RawBits[2] = {D, AdjustedE};
+ APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
+ SmallVector<char, 24> Chars;
+ Float.toString(Chars, Precision, 0);
+ return std::string(Chars.begin(), Chars.end());
+}
+
+static std::string stripTrailingZeros(const std::string &Float) {
+ size_t NonZero = Float.find_last_not_of('0');
+ assert(NonZero != std::string::npos && "no . in floating point string");
+
+ if (Float[NonZero] == '.')
+ ++NonZero;
+
+ return Float.substr(0, NonZero + 1);
+}
+
+std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width,
+ unsigned Precision) {
+ if (!D)
+ return "0.0";
+
+ // Canonicalize exponent and digits.
+ uint64_t Above0 = 0;
+ uint64_t Below0 = 0;
+ uint64_t Extra = 0;
+ int ExtraShift = 0;
+ if (E == 0) {
+ Above0 = D;
+ } else if (E > 0) {
+ if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) {
+ D <<= Shift;
+ E -= Shift;
+
+ if (!E)
+ Above0 = D;
+ }
+ } else if (E > -64) {
+ Above0 = D >> -E;
+ Below0 = D << (64 + E);
+ } else if (E > -120) {
+ Below0 = D >> (-E - 64);
+ Extra = D << (128 + E);
+ ExtraShift = -64 - E;
+ }
+
+ // Fall back on APFloat for very small and very large numbers.
+ if (!Above0 && !Below0)
+ return toStringAPFloat(D, E, Precision);
+
+ // Append the digits before the decimal.
+ std::string Str;
+ size_t DigitsOut = 0;
+ if (Above0) {
+ appendNumber(Str, Above0);
+ DigitsOut = Str.size();
+ } else
+ appendDigit(Str, 0);
+ std::reverse(Str.begin(), Str.end());
+
+ // Return early if there's nothing after the decimal.
+ if (!Below0)
+ return Str + ".0";
+
+ // Append the decimal and beyond.
+ Str += '.';
+ uint64_t Error = UINT64_C(1) << (64 - Width);
+
+ // We need to shift Below0 to the right to make space for calculating
+ // digits. Save the precision we're losing in Extra.
+ Extra = (Below0 & 0xf) << 56 | (Extra >> 8);
+ Below0 >>= 4;
+ size_t SinceDot = 0;
+ size_t AfterDot = Str.size();
+ do {
+ if (ExtraShift) {
+ --ExtraShift;
+ Error *= 5;
+ } else
+ Error *= 10;
+
+ Below0 *= 10;
+ Extra *= 10;
+ Below0 += (Extra >> 60);
+ Extra = Extra & (UINT64_MAX >> 4);
+ appendDigit(Str, Below0 >> 60);
+ Below0 = Below0 & (UINT64_MAX >> 4);
+ if (DigitsOut || Str.back() != '0')
+ ++DigitsOut;
+ ++SinceDot;
+ } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 &&
+ (!Precision || DigitsOut <= Precision || SinceDot < 2));
+
+ // Return early for maximum precision.
+ if (!Precision || DigitsOut <= Precision)
+ return stripTrailingZeros(Str);
+
+ // Find where to truncate.
+ size_t Truncate =
+ std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1);
+
+ // Check if there's anything to truncate.
+ if (Truncate >= Str.size())
+ return stripTrailingZeros(Str);
+
+ bool Carry = doesRoundUp(Str[Truncate]);
+ if (!Carry)
+ return stripTrailingZeros(Str.substr(0, Truncate));
+
+ // Round with the first truncated digit.
+ for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend();
+ I != E; ++I) {
+ if (*I == '.')
+ continue;
+ if (*I == '9') {
+ *I = '0';
+ continue;
+ }
+
+ ++*I;
+ Carry = false;
+ break;
+ }
+
+ // Add "1" in front if we still need to carry.
+ return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate));
+}
+
+raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E,
+ int Width, unsigned Precision) {
+ return OS << toString(D, E, Width, Precision);
+}
+
+void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) {
+ print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E
+ << "]";
+}
+
+static std::pair<uint64_t, int16_t>
+getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) {
+ if (ShouldRound)
+ if (!++N)
+ // Rounding caused an overflow.
+ return std::make_pair(UINT64_C(1), Shift + 64);
+ return std::make_pair(N, Shift);
+}
+
+std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend,
+ uint64_t Divisor) {
+ // Input should be sanitized.
+ assert(Divisor);
+ assert(Dividend);
+
+ // Minimize size of divisor.
+ int16_t Shift = 0;
+ if (int Zeros = countTrailingZeros(Divisor)) {
+ Shift -= Zeros;
+ Divisor >>= Zeros;
+ }
+
+ // Check for powers of two.
+ if (Divisor == 1)
+ return std::make_pair(Dividend, Shift);
+
+ // Maximize size of dividend.
+ if (int Zeros = countLeadingZeros64(Dividend)) {
+ Shift -= Zeros;
+ Dividend <<= Zeros;
+ }
+
+ // Start with the result of a divide.
+ uint64_t Quotient = Dividend / Divisor;
+ Dividend %= Divisor;
+
+ // Continue building the quotient with long division.
+ //
+ // TODO: continue with largers digits.
+ while (!(Quotient >> 63) && Dividend) {
+ // Shift Dividend, and check for overflow.
+ bool IsOverflow = Dividend >> 63;
+ Dividend <<= 1;
+ --Shift;
+
+ // Divide.
+ bool DoesDivide = IsOverflow || Divisor <= Dividend;
+ Quotient = (Quotient << 1) | uint64_t(DoesDivide);
+ Dividend -= DoesDivide ? Divisor : 0;
+ }
+
+ // Round.
+ if (Dividend >= getHalf(Divisor))
+ if (!++Quotient)
+ // Rounding caused an overflow in Quotient.
+ return std::make_pair(UINT64_C(1), Shift + 64);
+
+ return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift);
+}
+
+std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L,
+ uint64_t R) {
+ // Separate into two 32-bit digits (U.L).
+ uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX;
+
+ // Compute cross products.
+ uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR;
+
+ // Sum into two 64-bit digits.
+ uint64_t Upper = P1, Lower = P4;
+ auto addWithCarry = [&](uint64_t N) {
+ uint64_t NewLower = Lower + (N << 32);
+ Upper += (N >> 32) + (NewLower < Lower);
+ Lower = NewLower;
+ };
+ addWithCarry(P2);
+ addWithCarry(P3);
+
+ // Check whether the upper digit is empty.
+ if (!Upper)
+ return std::make_pair(Lower, 0);
+
+ // Shift as little as possible to maximize precision.
+ unsigned LeadingZeros = countLeadingZeros64(Upper);
+ int16_t Shift = 64 - LeadingZeros;
+ if (LeadingZeros)
+ Upper = Upper << LeadingZeros | Lower >> Shift;
+ bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1));
+ return getRoundedFloat(Upper, ShouldRound, Shift);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// BlockMass implementation.
+//
+//===----------------------------------------------------------------------===//
+UnsignedFloat<uint64_t> BlockMass::toFloat() const {
+ if (isFull())
+ return UnsignedFloat<uint64_t>(1, 0);
+ return UnsignedFloat<uint64_t>(getMass() + 1, -64);
+}
+
+void BlockMass::dump() const { print(dbgs()); }
+
+static char getHexDigit(int N) {
+ assert(N < 16);
+ if (N < 10)
+ return '0' + N;
+ return 'a' + N - 10;
+}
+raw_ostream &BlockMass::print(raw_ostream &OS) const {
+ for (int Digits = 0; Digits < 16; ++Digits)
+ OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf);
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// BlockFrequencyInfoImpl implementation.
+//
+//===----------------------------------------------------------------------===//
+namespace {
+
+typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+typedef BlockFrequencyInfoImplBase::Distribution Distribution;
+typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList;
+typedef BlockFrequencyInfoImplBase::Float Float;
+typedef BlockFrequencyInfoImplBase::LoopData LoopData;
+typedef BlockFrequencyInfoImplBase::Weight Weight;
+typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData;
+
+/// \brief Dithering mass distributer.
+///
+/// This class splits up a single mass into portions by weight, dithering to
+/// spread out error. No mass is lost. The dithering precision depends on the
+/// precision of the product of \a BlockMass and \a BranchProbability.
+///
+/// The distribution algorithm follows.
+///
+/// 1. Initialize by saving the sum of the weights in \a RemWeight and the
+/// mass to distribute in \a RemMass.
+///
+/// 2. For each portion:
+///
+/// 1. Construct a branch probability, P, as the portion's weight divided
+/// by the current value of \a RemWeight.
+/// 2. Calculate the portion's mass as \a RemMass times P.
+/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting
+/// the current portion's weight and mass.
+struct DitheringDistributer {
+ uint32_t RemWeight;
+ BlockMass RemMass;
+
+ DitheringDistributer(Distribution &Dist, const BlockMass &Mass);
+
+ BlockMass takeMass(uint32_t Weight);
+};
+}
+
+DitheringDistributer::DitheringDistributer(Distribution &Dist,
+ const BlockMass &Mass) {
+ Dist.normalize();
+ RemWeight = Dist.Total;
+ RemMass = Mass;
+}
+
+BlockMass DitheringDistributer::takeMass(uint32_t Weight) {
+ assert(Weight && "invalid weight");
+ assert(Weight <= RemWeight);
+ BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight);
+
+ // Decrement totals (dither).
+ RemWeight -= Weight;
+ RemMass -= Mass;
+ return Mass;
+}
+
+void Distribution::add(const BlockNode &Node, uint64_t Amount,
+ Weight::DistType Type) {
+ assert(Amount && "invalid weight of 0");
+ uint64_t NewTotal = Total + Amount;
+
+ // Check for overflow. It should be impossible to overflow twice.
+ bool IsOverflow = NewTotal < Total;
+ assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow");
+ DidOverflow |= IsOverflow;
+
+ // Update the total.
+ Total = NewTotal;
+
+ // Save the weight.
+ Weight W;
+ W.TargetNode = Node;
+ W.Amount = Amount;
+ W.Type = Type;
+ Weights.push_back(W);
+}
+
+static void combineWeight(Weight &W, const Weight &OtherW) {
+ assert(OtherW.TargetNode.isValid());
+ if (!W.Amount) {
+ W = OtherW;
+ return;
+ }
+ assert(W.Type == OtherW.Type);
+ assert(W.TargetNode == OtherW.TargetNode);
+ assert(W.Amount < W.Amount + OtherW.Amount && "Unexpected overflow");
+ W.Amount += OtherW.Amount;
+}
+static void combineWeightsBySorting(WeightList &Weights) {
+ // Sort so edges to the same node are adjacent.
+ std::sort(Weights.begin(), Weights.end(),
+ [](const Weight &L,
+ const Weight &R) { return L.TargetNode < R.TargetNode; });
+
+ // Combine adjacent edges.
+ WeightList::iterator O = Weights.begin();
+ for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E;
+ ++O, (I = L)) {
+ *O = *I;
+
+ // Find the adjacent weights to the same node.
+ for (++L; L != E && I->TargetNode == L->TargetNode; ++L)
+ combineWeight(*O, *L);
+ }
+
+ // Erase extra entries.
+ Weights.erase(O, Weights.end());
+ return;
+}
+static void combineWeightsByHashing(WeightList &Weights) {
+ // Collect weights into a DenseMap.
+ typedef DenseMap<BlockNode::IndexType, Weight> HashTable;
+ HashTable Combined(NextPowerOf2(2 * Weights.size()));
+ for (const Weight &W : Weights)
+ combineWeight(Combined[W.TargetNode.Index], W);
+
+ // Check whether anything changed.
+ if (Weights.size() == Combined.size())
+ return;
+
+ // Fill in the new weights.
+ Weights.clear();
+ Weights.reserve(Combined.size());
+ for (const auto &I : Combined)
+ Weights.push_back(I.second);
+}
+static void combineWeights(WeightList &Weights) {
+ // Use a hash table for many successors to keep this linear.
+ if (Weights.size() > 128) {
+ combineWeightsByHashing(Weights);
+ return;
+ }
+
+ combineWeightsBySorting(Weights);
+}
+static uint64_t shiftRightAndRound(uint64_t N, int Shift) {
+ assert(Shift >= 0);
+ assert(Shift < 64);
+ if (!Shift)
+ return N;
+ return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1));
+}
+void Distribution::normalize() {
+ // Early exit for termination nodes.
+ if (Weights.empty())
+ return;
+
+ // Only bother if there are multiple successors.
+ if (Weights.size() > 1)
+ combineWeights(Weights);
+
+ // Early exit when combined into a single successor.
+ if (Weights.size() == 1) {
+ Total = 1;
+ Weights.front().Amount = 1;
+ return;
+ }
+
+ // Determine how much to shift right so that the total fits into 32-bits.
+ //
+ // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1
+ // for each weight can cause a 32-bit overflow.
+ int Shift = 0;
+ if (DidOverflow)
+ Shift = 33;
+ else if (Total > UINT32_MAX)
+ Shift = 33 - countLeadingZeros(Total);
+
+ // Early exit if nothing needs to be scaled.
+ if (!Shift)
+ return;
+
+ // Recompute the total through accumulation (rather than shifting it) so that
+ // it's accurate after shifting.
+ Total = 0;
+
+ // Sum the weights to each node and shift right if necessary.
+ for (Weight &W : Weights) {
+ // Scale down below UINT32_MAX. Since Shift is larger than necessary, we
+ // can round here without concern about overflow.
+ assert(W.TargetNode.isValid());
+ W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift));
+ assert(W.Amount <= UINT32_MAX);
+
+ // Update the total.
+ Total += W.Amount;
+ }
+ assert(Total <= UINT32_MAX);
+}
+
+void BlockFrequencyInfoImplBase::clear() {
+ // Swap with a default-constructed std::vector, since std::vector<>::clear()
+ // does not actually clear heap storage.
+ std::vector<FrequencyData>().swap(Freqs);
+ std::vector<WorkingData>().swap(Working);
+ Loops.clear();
+}
+
+/// \brief Clear all memory not needed downstream.
+///
+/// Releases all memory not used downstream. In particular, saves Freqs.
+static void cleanup(BlockFrequencyInfoImplBase &BFI) {
+ std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs));
+ BFI.clear();
+ BFI.Freqs = std::move(SavedFreqs);
+}
+
+bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist,
+ const LoopData *OuterLoop,
+ const BlockNode &Pred,
+ const BlockNode &Succ,
+ uint64_t Weight) {
+ if (!Weight)
+ Weight = 1;
+
+ auto isLoopHeader = [&OuterLoop](const BlockNode &Node) {
+ return OuterLoop && OuterLoop->isHeader(Node);
+ };
+
+ BlockNode Resolved = Working[Succ.Index].getResolvedNode();
+
+#ifndef NDEBUG
+ auto debugSuccessor = [&](const char *Type) {
+ dbgs() << " =>"
+ << " [" << Type << "] weight = " << Weight;
+ if (!isLoopHeader(Resolved))
+ dbgs() << ", succ = " << getBlockName(Succ);
+ if (Resolved != Succ)
+ dbgs() << ", resolved = " << getBlockName(Resolved);
+ dbgs() << "\n";
+ };
+ (void)debugSuccessor;
+#endif
+
+ if (isLoopHeader(Resolved)) {
+ DEBUG(debugSuccessor("backedge"));
+ Dist.addBackedge(OuterLoop->getHeader(), Weight);
+ return true;
+ }
+
+ if (Working[Resolved.Index].getContainingLoop() != OuterLoop) {
+ DEBUG(debugSuccessor(" exit "));
+ Dist.addExit(Resolved, Weight);
+ return true;
+ }
+
+ if (Resolved < Pred) {
+ if (!isLoopHeader(Pred)) {
+ // If OuterLoop is an irreducible loop, we can't actually handle this.
+ assert((!OuterLoop || !OuterLoop->isIrreducible()) &&
+ "unhandled irreducible control flow");
+
+ // Irreducible backedge. Abort.
+ DEBUG(debugSuccessor("abort!!!"));
+ return false;
+ }
+
+ // If "Pred" is a loop header, then this isn't really a backedge; rather,
+ // OuterLoop must be irreducible. These false backedges can come only from
+ // secondary loop headers.
+ assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) &&
+ "unhandled irreducible control flow");
+ }
+
+ DEBUG(debugSuccessor(" local "));
+ Dist.addLocal(Resolved, Weight);
+ return true;
+}
+
+bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist(
+ const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) {
+ // Copy the exit map into Dist.
+ for (const auto &I : Loop.Exits)
+ if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first,
+ I.second.getMass()))
+ // Irreducible backedge.
+ return false;
+
+ return true;
+}
+
+/// \brief Get the maximum allowed loop scale.
+///
+/// Gives the maximum number of estimated iterations allowed for a loop. Very
+/// large numbers cause problems downstream (even within 64-bits).
+static Float getMaxLoopScale() { return Float(1, 12); }
+
+/// \brief Compute the loop scale for a loop.
+void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) {
+ // Compute loop scale.
+ DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n");
+
+ // LoopScale == 1 / ExitMass
+ // ExitMass == HeadMass - BackedgeMass
+ BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass;
+
+ // Block scale stores the inverse of the scale.
+ Loop.Scale = ExitMass.toFloat().inverse();
+
+ DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull()
+ << " - " << Loop.BackedgeMass << ")\n"
+ << " - scale = " << Loop.Scale << "\n");
+
+ if (Loop.Scale > getMaxLoopScale()) {
+ Loop.Scale = getMaxLoopScale();
+ DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n");
+ }
+}
+
+/// \brief Package up a loop.
+void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) {
+ DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n");
+
+ // Clear the subloop exits to prevent quadratic memory usage.
+ for (const BlockNode &M : Loop.Nodes) {
+ if (auto *Loop = Working[M.Index].getPackagedLoop())
+ Loop->Exits.clear();
+ DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n");
+ }
+ Loop.IsPackaged = true;
+}
+
+void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
+ LoopData *OuterLoop,
+ Distribution &Dist) {
+ BlockMass Mass = Working[Source.Index].getMass();
+ DEBUG(dbgs() << " => mass: " << Mass << "\n");
+
+ // Distribute mass to successors as laid out in Dist.
+ DitheringDistributer D(Dist, Mass);
+
+#ifndef NDEBUG
+ auto debugAssign = [&](const BlockNode &T, const BlockMass &M,
+ const char *Desc) {
+ dbgs() << " => assign " << M << " (" << D.RemMass << ")";
+ if (Desc)
+ dbgs() << " [" << Desc << "]";
+ if (T.isValid())
+ dbgs() << " to " << getBlockName(T);
+ dbgs() << "\n";
+ };
+ (void)debugAssign;
+#endif
+
+ for (const Weight &W : Dist.Weights) {
+ // Check for a local edge (non-backedge and non-exit).
+ BlockMass Taken = D.takeMass(W.Amount);
+ if (W.Type == Weight::Local) {
+ Working[W.TargetNode.Index].getMass() += Taken;
+ DEBUG(debugAssign(W.TargetNode, Taken, nullptr));
+ continue;
+ }
+
+ // Backedges and exits only make sense if we're processing a loop.
+ assert(OuterLoop && "backedge or exit outside of loop");
+
+ // Check for a backedge.
+ if (W.Type == Weight::Backedge) {
+ OuterLoop->BackedgeMass += Taken;
+ DEBUG(debugAssign(BlockNode(), Taken, "back"));
+ continue;
+ }
+
+ // This must be an exit.
+ assert(W.Type == Weight::Exit);
+ OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken));
+ DEBUG(debugAssign(W.TargetNode, Taken, "exit"));
+ }
+}
+
+static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
+ const Float &Min, const Float &Max) {
+ // Scale the Factor to a size that creates integers. Ideally, integers would
+ // be scaled so that Max == UINT64_MAX so that they can be best
+ // differentiated. However, the register allocator currently deals poorly
+ // with large numbers. Instead, push Min up a little from 1 to give some
+ // room to differentiate small, unequal numbers.
+ //
+ // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max.
+ Float ScalingFactor = Min.inverse();
+ if ((Max / Min).lg() < 60)
+ ScalingFactor <<= 3;
+
+ // Translate the floats to integers.
+ DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
+ << ", factor = " << ScalingFactor << "\n");
+ for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) {
+ Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor;
+ BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>());
+ DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = "
+ << BFI.Freqs[Index].Floating << ", scaled = " << Scaled
+ << ", int = " << BFI.Freqs[Index].Integer << "\n");
+ }
+}
+
+/// \brief Unwrap a loop package.
+///
+/// Visits all the members of a loop, adjusting their BlockData according to
+/// the loop's pseudo-node.
+static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) {
+ DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop)
+ << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale
+ << "\n");
+ Loop.Scale *= Loop.Mass.toFloat();
+ Loop.IsPackaged = false;
+ DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n");
+
+ // Propagate the head scale through the loop. Since members are visited in
+ // RPO, the head scale will be updated by the loop scale first, and then the
+ // final head scale will be used for updated the rest of the members.
+ for (const BlockNode &N : Loop.Nodes) {
+ const auto &Working = BFI.Working[N.Index];
+ Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale
+ : BFI.Freqs[N.Index].Floating;
+ Float New = Loop.Scale * F;
+ DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New
+ << "\n");
+ F = New;
+ }
+}
+
+void BlockFrequencyInfoImplBase::unwrapLoops() {
+ // Set initial frequencies from loop-local masses.
+ for (size_t Index = 0; Index < Working.size(); ++Index)
+ Freqs[Index].Floating = Working[Index].Mass.toFloat();
+
+ for (LoopData &Loop : Loops)
+ unwrapLoop(*this, Loop);
+}
+
+void BlockFrequencyInfoImplBase::finalizeMetrics() {
+ // Unwrap loop packages in reverse post-order, tracking min and max
+ // frequencies.
+ auto Min = Float::getLargest();
+ auto Max = Float::getZero();
+ for (size_t Index = 0; Index < Working.size(); ++Index) {
+ // Update min/max scale.
+ Min = std::min(Min, Freqs[Index].Floating);
+ Max = std::max(Max, Freqs[Index].Floating);
+ }
+
+ // Convert to integers.
+ convertFloatingToInteger(*this, Min, Max);
+
+ // Clean up data structures.
+ cleanup(*this);
+
+ // Print out the final stats.
+ DEBUG(dump());
+}
+
+BlockFrequency
+BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
+ if (!Node.isValid())
+ return 0;
+ return Freqs[Node.Index].Integer;
+}
+Float
+BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {
+ if (!Node.isValid())
+ return Float::getZero();
+ return Freqs[Node.Index].Floating;
+}
+
+std::string
+BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {
+ return std::string();
+}
+std::string
+BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const {
+ return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*");
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+ const BlockNode &Node) const {
+ return OS << getFloatingBlockFreq(Node);
+}
+
+raw_ostream &
+BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS,
+ const BlockFrequency &Freq) const {
+ Float Block(Freq.getFrequency(), 0);
+ Float Entry(getEntryFreq(), 0);
+
+ return OS << Block / Entry;
+}
+
+void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) {
+ Start = OuterLoop.getHeader();
+ Nodes.reserve(OuterLoop.Nodes.size());
+ for (auto N : OuterLoop.Nodes)
+ addNode(N);
+ indexNodes();
+}
+void IrreducibleGraph::addNodesInFunction() {
+ Start = 0;
+ for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index)
+ if (!BFI.Working[Index].isPackaged())
+ addNode(Index);
+ indexNodes();
+}
+void IrreducibleGraph::indexNodes() {
+ for (auto &I : Nodes)
+ Lookup[I.Node.Index] = &I;
+}
+void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ,
+ const BFIBase::LoopData *OuterLoop) {
+ if (OuterLoop && OuterLoop->isHeader(Succ))
+ return;
+ auto L = Lookup.find(Succ.Index);
+ if (L == Lookup.end())
+ return;
+ IrrNode &SuccIrr = *L->second;
+ Irr.Edges.push_back(&SuccIrr);
+ SuccIrr.Edges.push_front(&Irr);
+ ++SuccIrr.NumIn;
+}
+
+namespace llvm {
+template <> struct GraphTraits<IrreducibleGraph> {
+ typedef bfi_detail::IrreducibleGraph GraphT;
+
+ typedef const GraphT::IrrNode NodeType;
+ typedef GraphT::IrrNode::iterator ChildIteratorType;
+
+ static const NodeType *getEntryNode(const GraphT &G) {
+ return G.StartIrr;
+ }
+ static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
+ static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+};
+}
+
+/// \brief Find extra irreducible headers.
+///
+/// Find entry blocks and other blocks with backedges, which exist when \c G
+/// contains irreducible sub-SCCs.
+static void findIrreducibleHeaders(
+ const BlockFrequencyInfoImplBase &BFI,
+ const IrreducibleGraph &G,
+ const std::vector<const IrreducibleGraph::IrrNode *> &SCC,
+ LoopData::NodeList &Headers, LoopData::NodeList &Others) {
+ // Map from nodes in the SCC to whether it's an entry block.
+ SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC;
+
+ // InSCC also acts the set of nodes in the graph. Seed it.
+ for (const auto *I : SCC)
+ InSCC[I] = false;
+
+ for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) {
+ auto &Irr = *I->first;
+ for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+ if (InSCC.count(P))
+ continue;
+
+ // This is an entry block.
+ I->second = true;
+ Headers.push_back(Irr.Node);
+ DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n");
+ break;
+ }
+ }
+ assert(Headers.size() >= 2 && "Should be irreducible");
+ if (Headers.size() == InSCC.size()) {
+ // Every block is a header.
+ std::sort(Headers.begin(), Headers.end());
+ return;
+ }
+
+ // Look for extra headers from irreducible sub-SCCs.
+ for (const auto &I : InSCC) {
+ // Entry blocks are already headers.
+ if (I.second)
+ continue;
+
+ auto &Irr = *I.first;
+ for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) {
+ // Skip forward edges.
+ if (P->Node < Irr.Node)
+ continue;
+
+ // Skip predecessors from entry blocks. These can have inverted
+ // ordering.
+ if (InSCC.lookup(P))
+ continue;
+
+ // Store the extra header.
+ Headers.push_back(Irr.Node);
+ DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n");
+ break;
+ }
+ if (Headers.back() == Irr.Node)
+ // Added this as a header.
+ continue;
+
+ // This is not a header.
+ Others.push_back(Irr.Node);
+ DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n");
+ }
+ std::sort(Headers.begin(), Headers.end());
+ std::sort(Others.begin(), Others.end());
+}
+
+static void createIrreducibleLoop(
+ BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G,
+ LoopData *OuterLoop, std::list<LoopData>::iterator Insert,
+ const std::vector<const IrreducibleGraph::IrrNode *> &SCC) {
+ // Translate the SCC into RPO.
+ DEBUG(dbgs() << " - found-scc\n");
+
+ LoopData::NodeList Headers;
+ LoopData::NodeList Others;
+ findIrreducibleHeaders(BFI, G, SCC, Headers, Others);
+
+ auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(),
+ Headers.end(), Others.begin(), Others.end());
+
+ // Update loop hierarchy.
+ for (const auto &N : Loop->Nodes)
+ if (BFI.Working[N.Index].isLoopHeader())
+ BFI.Working[N.Index].Loop->Parent = &*Loop;
+ else
+ BFI.Working[N.Index].Loop = &*Loop;
+}
+
+iterator_range<std::list<LoopData>::iterator>
+BlockFrequencyInfoImplBase::analyzeIrreducible(
+ const IrreducibleGraph &G, LoopData *OuterLoop,
+ std::list<LoopData>::iterator Insert) {
+ assert((OuterLoop == nullptr) == (Insert == Loops.begin()));
+ auto Prev = OuterLoop ? std::prev(Insert) : Loops.end();
+
+ for (auto I = scc_begin(G); !I.isAtEnd(); ++I) {
+ if (I->size() < 2)
+ continue;
+
+ // Translate the SCC into RPO.
+ createIrreducibleLoop(*this, G, OuterLoop, Insert, *I);
+ }
+
+ if (OuterLoop)
+ return make_range(std::next(Prev), Insert);
+ return make_range(Loops.begin(), Insert);
+}
+
+void
+BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) {
+ OuterLoop.Exits.clear();
+ OuterLoop.BackedgeMass = BlockMass::getEmpty();
+ auto O = OuterLoop.Nodes.begin() + 1;
+ for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I)
+ if (!Working[I->Index].isPackaged())
+ *O++ = *I;
+ OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end());
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index b901c54..bbd8750 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "branch-prob"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "branch-prob"
+
INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob",
"Branch Probability Analysis", false, true)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
@@ -322,6 +323,9 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {
InEdges.push_back(I.getSuccessorIndex());
}
+ if (BackEdges.empty() && ExitingEdges.empty())
+ return false;
+
if (uint32_t numBackEdges = BackEdges.size()) {
uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges;
if (backWeight < NORMAL_WEIGHT)
@@ -557,7 +561,7 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {
BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
uint32_t Sum = 0;
uint32_t MaxWeight = 0;
- BasicBlock *MaxSucc = 0;
+ BasicBlock *MaxSucc = nullptr;
for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
BasicBlock *Succ = *I;
@@ -577,7 +581,7 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const {
if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5))
return MaxSucc;
- return 0;
+ return nullptr;
}
/// Get the raw edge weight for the edge. If can't find it, return
@@ -594,11 +598,9 @@ getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const {
return DEFAULT_WEIGHT;
}
-uint32_t
-BranchProbabilityInfo::
-getEdgeWeight(const BasicBlock *Src, succ_const_iterator Dst) const {
- size_t index = std::distance(succ_begin(Src), Dst);
- return getEdgeWeight(Src, index);
+uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src,
+ succ_const_iterator Dst) const {
+ return getEdgeWeight(Src, Dst.getSuccessorIndex());
}
/// Get the raw edge weight calculated for the block pair. This returns the sum
diff --git a/lib/Analysis/CFG.cpp b/lib/Analysis/CFG.cpp
index 6963760..8ef5302 100644
--- a/lib/Analysis/CFG.cpp
+++ b/lib/Analysis/CFG.cpp
@@ -123,7 +123,7 @@ static bool loopContainsBoth(const LoopInfo *LI,
const BasicBlock *BB1, const BasicBlock *BB2) {
const Loop *L1 = getOutermostLoop(LI, BB1);
const Loop *L2 = getOutermostLoop(LI, BB2);
- return L1 != NULL && L1 == L2;
+ return L1 != nullptr && L1 == L2;
}
static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
@@ -133,7 +133,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
// When the stop block is unreachable, it's dominated from everywhere,
// regardless of whether there's a path between the two blocks.
if (DT && !DT->isReachableFromEntry(StopBB))
- DT = 0;
+ DT = nullptr;
// Limit the number of blocks we visit. The goal is to avoid run-away compile
// times on large CFGs without hampering sensible code. Arbitrarily chosen.
@@ -156,7 +156,7 @@ static bool isPotentiallyReachableInner(SmallVectorImpl<BasicBlock *> &Worklist,
return true;
}
- if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) {
+ if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : nullptr) {
// All blocks in a single loop are reachable from all other blocks. From
// any of these blocks, we can skip directly to the exits of the loop,
// ignoring any other blocks inside the loop body.
@@ -200,7 +200,7 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,
// If the block is in a loop then we can reach any instruction in the block
// from any other instruction in the block by going around a backedge.
- if (LI && LI->getLoopFor(BB) != 0)
+ if (LI && LI->getLoopFor(BB) != nullptr)
return true;
// Linear scan, start at 'A', see whether we hit 'B' or the end first.
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 537d6d1..c2c19d6 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/CFGPrinter.h"
#include "llvm/Pass.h"
+#include "llvm/Support/FileSystem.h"
using namespace llvm;
namespace {
@@ -33,7 +34,7 @@ namespace {
return false;
}
- void print(raw_ostream &OS, const Module* = 0) const override {}
+ void print(raw_ostream &OS, const Module* = nullptr) const override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -56,7 +57,7 @@ namespace {
return false;
}
- void print(raw_ostream &OS, const Module* = 0) const override {}
+ void print(raw_ostream &OS, const Module* = nullptr) const override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -90,7 +91,7 @@ namespace {
return false;
}
- void print(raw_ostream &OS, const Module* = 0) const override {}
+ void print(raw_ostream &OS, const Module* = nullptr) const override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -123,7 +124,7 @@ namespace {
errs() << "\n";
return false;
}
- void print(raw_ostream &OS, const Module* = 0) const override {}
+ void print(raw_ostream &OS, const Module* = nullptr) const override {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
@@ -147,8 +148,8 @@ void Function::viewCFG() const {
/// viewCFGOnly - This function is meant for use from the debugger. It works
/// just like viewCFG, but it does not include the contents of basic blocks
-/// into the nodes, just the label. If you are only interested in the CFG t
-/// his can make the graph smaller.
+/// into the nodes, just the label. If you are only interested in the CFG
+/// this can make the graph smaller.
///
void Function::viewCFGOnly() const {
ViewGraph(this, "cfg" + getName(), true);
diff --git a/lib/Analysis/CGSCCPassManager.cpp b/lib/Analysis/CGSCCPassManager.cpp
new file mode 100644
index 0000000..5d1d8a9
--- /dev/null
+++ b/lib/Analysis/CGSCCPassManager.cpp
@@ -0,0 +1,167 @@
+//===- CGSCCPassManager.cpp - Managing & running CGSCC passes -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DebugPM("debug-cgscc-pass-manager", cl::Hidden,
+ cl::desc("Print CGSCC pass management debugging information"));
+
+PreservedAnalyses CGSCCPassManager::run(LazyCallGraph::SCC *C,
+ CGSCCAnalysisManager *AM) {
+ PreservedAnalyses PA = PreservedAnalyses::all();
+
+ if (DebugPM)
+ dbgs() << "Starting CGSCC pass manager run.\n";
+
+ for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) {
+ if (DebugPM)
+ dbgs() << "Running CGSCC pass: " << Passes[Idx]->name() << "\n";
+
+ PreservedAnalyses PassPA = Passes[Idx]->run(C, AM);
+ if (AM)
+ AM->invalidate(C, PassPA);
+ PA.intersect(std::move(PassPA));
+ }
+
+ if (DebugPM)
+ dbgs() << "Finished CGSCC pass manager run.\n";
+
+ return PA;
+}
+
+bool CGSCCAnalysisManager::empty() const {
+ assert(CGSCCAnalysisResults.empty() == CGSCCAnalysisResultLists.empty() &&
+ "The storage and index of analysis results disagree on how many there "
+ "are!");
+ return CGSCCAnalysisResults.empty();
+}
+
+void CGSCCAnalysisManager::clear() {
+ CGSCCAnalysisResults.clear();
+ CGSCCAnalysisResultLists.clear();
+}
+
+CGSCCAnalysisManager::ResultConceptT &
+CGSCCAnalysisManager::getResultImpl(void *PassID, LazyCallGraph::SCC *C) {
+ CGSCCAnalysisResultMapT::iterator RI;
+ bool Inserted;
+ std::tie(RI, Inserted) = CGSCCAnalysisResults.insert(std::make_pair(
+ std::make_pair(PassID, C), CGSCCAnalysisResultListT::iterator()));
+
+ // If we don't have a cached result for this function, look up the pass and
+ // run it to produce a result, which we then add to the cache.
+ if (Inserted) {
+ CGSCCAnalysisResultListT &ResultList = CGSCCAnalysisResultLists[C];
+ ResultList.emplace_back(PassID, lookupPass(PassID).run(C, this));
+ RI->second = std::prev(ResultList.end());
+ }
+
+ return *RI->second->second;
+}
+
+CGSCCAnalysisManager::ResultConceptT *
+CGSCCAnalysisManager::getCachedResultImpl(void *PassID,
+ LazyCallGraph::SCC *C) const {
+ CGSCCAnalysisResultMapT::const_iterator RI =
+ CGSCCAnalysisResults.find(std::make_pair(PassID, C));
+ return RI == CGSCCAnalysisResults.end() ? nullptr : &*RI->second->second;
+}
+
+void CGSCCAnalysisManager::invalidateImpl(void *PassID, LazyCallGraph::SCC *C) {
+ CGSCCAnalysisResultMapT::iterator RI =
+ CGSCCAnalysisResults.find(std::make_pair(PassID, C));
+ if (RI == CGSCCAnalysisResults.end())
+ return;
+
+ CGSCCAnalysisResultLists[C].erase(RI->second);
+}
+
+void CGSCCAnalysisManager::invalidateImpl(LazyCallGraph::SCC *C,
+ const PreservedAnalyses &PA) {
+ // Clear all the invalidated results associated specifically with this
+ // function.
+ SmallVector<void *, 8> InvalidatedPassIDs;
+ CGSCCAnalysisResultListT &ResultsList = CGSCCAnalysisResultLists[C];
+ for (CGSCCAnalysisResultListT::iterator I = ResultsList.begin(),
+ E = ResultsList.end();
+ I != E;)
+ if (I->second->invalidate(C, PA)) {
+ InvalidatedPassIDs.push_back(I->first);
+ I = ResultsList.erase(I);
+ } else {
+ ++I;
+ }
+ while (!InvalidatedPassIDs.empty())
+ CGSCCAnalysisResults.erase(
+ std::make_pair(InvalidatedPassIDs.pop_back_val(), C));
+ CGSCCAnalysisResultLists.erase(C);
+}
+
+char CGSCCAnalysisManagerModuleProxy::PassID;
+
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module *M) {
+ assert(CGAM->empty() && "CGSCC analyses ran prior to the module proxy!");
+ return Result(*CGAM);
+}
+
+CGSCCAnalysisManagerModuleProxy::Result::~Result() {
+ // Clear out the analysis manager if we're being destroyed -- it means we
+ // didn't even see an invalidate call when we got invalidated.
+ CGAM->clear();
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+ Module *M, const PreservedAnalyses &PA) {
+ // If this proxy isn't marked as preserved, then we can't even invalidate
+ // individual CGSCC analyses, there may be an invalid set of SCC objects in
+ // the cache making it impossible to incrementally preserve them.
+ // Just clear the entire manager.
+ if (!PA.preserved(ID()))
+ CGAM->clear();
+
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
+}
+
+char ModuleAnalysisManagerCGSCCProxy::PassID;
+
+char FunctionAnalysisManagerCGSCCProxy::PassID;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC *C) {
+ assert(FAM->empty() && "Function analyses ran prior to the CGSCC proxy!");
+ return Result(*FAM);
+}
+
+FunctionAnalysisManagerCGSCCProxy::Result::~Result() {
+ // Clear out the analysis manager if we're being destroyed -- it means we
+ // didn't even see an invalidate call when we got invalidated.
+ FAM->clear();
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+ LazyCallGraph::SCC *C, const PreservedAnalyses &PA) {
+ // If this proxy isn't marked as preserved, then we can't even invalidate
+ // individual function analyses, there may be an invalid set of Function
+ // objects in the cache making it impossible to incrementally preserve them.
+ // Just clear the entire manager.
+ if (!PA.preserved(ID()))
+ FAM->clear();
+
+ // Return false to indicate that this result is still a valid proxy.
+ return false;
+}
+
+char CGSCCAnalysisManagerFunctionProxy::PassID;
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index c6d4573..b546789 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -7,9 +7,11 @@ add_llvm_library(LLVMAnalysis
Analysis.cpp
BasicAliasAnalysis.cpp
BlockFrequencyInfo.cpp
+ BlockFrequencyInfoImpl.cpp
BranchProbabilityInfo.cpp
CFG.cpp
CFGPrinter.cpp
+ CGSCCPassManager.cpp
CaptureTracking.cpp
CostModel.cpp
CodeMetrics.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 782acfa..0ac1cb5 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -56,7 +56,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// Handle a vector->integer cast.
if (IntegerType *IT = dyn_cast<IntegerType>(DestTy)) {
VectorType *VTy = dyn_cast<VectorType>(C->getType());
- if (VTy == 0)
+ if (!VTy)
return ConstantExpr::getBitCast(C, DestTy);
unsigned NumSrcElts = VTy->getNumElements();
@@ -73,7 +73,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
}
ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
- if (CDV == 0)
+ if (!CDV)
return ConstantExpr::getBitCast(C, DestTy);
// Now that we know that the input value is a vector of integers, just shift
@@ -93,7 +93,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy,
// The code below only handles casts to vectors currently.
VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
- if (DestVTy == 0)
+ if (!DestVTy)
return ConstantExpr::getBitCast(C, DestTy);
// If this is a scalar -> vector cast, convert the input into a <1 x scalar>
@@ -411,32 +411,32 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
TD.getTypeAllocSizeInBits(LoadTy),
AS);
} else
- return 0;
+ return nullptr;
C = FoldBitCast(C, MapTy, TD);
if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
return FoldBitCast(Res, LoadTy, TD);
- return 0;
+ return nullptr;
}
unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
if (BytesLoaded > 32 || BytesLoaded == 0)
- return 0;
+ return nullptr;
GlobalValue *GVal;
APInt Offset;
if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
- return 0;
+ return nullptr;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
!GV->getInitializer()->getType()->isSized())
- return 0;
+ return nullptr;
// If we're loading off the beginning of the global, some bytes may be valid,
// but we don't try to handle this.
if (Offset.isNegative())
- return 0;
+ return nullptr;
// If we're not accessing anything in this constant, the result is undefined.
if (Offset.getZExtValue() >=
@@ -446,7 +446,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
unsigned char RawBytes[32] = {0};
if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes,
BytesLoaded, TD))
- return 0;
+ return nullptr;
APInt ResultVal = APInt(IntType->getBitWidth(), 0);
if (TD.isLittleEndian()) {
@@ -466,6 +466,52 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
return ConstantInt::get(IntType->getContext(), ResultVal);
}
+static Constant *ConstantFoldLoadThroughBitcast(ConstantExpr *CE,
+ const DataLayout *DL) {
+ if (!DL)
+ return nullptr;
+ auto *DestPtrTy = dyn_cast<PointerType>(CE->getType());
+ if (!DestPtrTy)
+ return nullptr;
+ Type *DestTy = DestPtrTy->getElementType();
+
+ Constant *C = ConstantFoldLoadFromConstPtr(CE->getOperand(0), DL);
+ if (!C)
+ return nullptr;
+
+ do {
+ Type *SrcTy = C->getType();
+
+ // If the type sizes are the same and a cast is legal, just directly
+ // cast the constant.
+ if (DL->getTypeSizeInBits(DestTy) == DL->getTypeSizeInBits(SrcTy)) {
+ Instruction::CastOps Cast = Instruction::BitCast;
+ // If we are going from a pointer to int or vice versa, we spell the cast
+ // differently.
+ if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
+ Cast = Instruction::IntToPtr;
+ else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
+ Cast = Instruction::PtrToInt;
+
+ if (CastInst::castIsValid(Cast, C, DestTy))
+ return ConstantExpr::getCast(Cast, C, DestTy);
+ }
+
+ // If this isn't an aggregate type, there is nothing we can do to drill down
+ // and find a bitcastable constant.
+ if (!SrcTy->isAggregateType())
+ return nullptr;
+
+ // We're simulating a load through a pointer that was bitcast to point to
+ // a different type, so we can try to walk down through the initial
+ // elements of an aggregate to see if some part of th e aggregate is
+ // castable to implement the "load" semantic model.
+ C = C->getAggregateElement(0u);
+ } while (C);
+
+ return nullptr;
+}
+
/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
/// produce if it is constant and determinable. If this is not determinable,
/// return null.
@@ -479,7 +525,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// If the loaded value isn't a constant expr, we can't handle it.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE)
- return 0;
+ return nullptr;
if (CE->getOpcode() == Instruction::GetElementPtr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0))) {
@@ -491,6 +537,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
}
}
+ if (CE->getOpcode() == Instruction::BitCast)
+ if (Constant *LoadedC = ConstantFoldLoadThroughBitcast(CE, TD))
+ return LoadedC;
+
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
StringRef Str;
@@ -542,16 +592,16 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// Try hard to fold loads from bitcasted strange and non-type-safe things.
if (TD)
return FoldReinterpretLoadFromConstPtr(CE, *TD);
- return 0;
+ return nullptr;
}
static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){
- if (LI->isVolatile()) return 0;
+ if (LI->isVolatile()) return nullptr;
if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
return ConstantFoldLoadFromConstPtr(C, TD);
- return 0;
+ return nullptr;
}
/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
@@ -571,8 +621,8 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()->getScalarType());
APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0);
APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0);
- ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL);
- ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL);
+ computeKnownBits(Op0, KnownZero0, KnownOne0, DL);
+ computeKnownBits(Op1, KnownZero1, KnownOne1, DL);
if ((KnownOne1 | KnownZero0).isAllOnesValue()) {
// All the bits of Op0 that the 'and' could be masking are already zero.
return Op0;
@@ -608,7 +658,7 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
}
}
- return 0;
+ return nullptr;
}
/// CastGEPIndices - If array indices are not pointer-sized integers,
@@ -618,7 +668,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
Type *ResultTy, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TD)
- return 0;
+ return nullptr;
Type *IntPtrTy = TD->getIntPtrType(ResultTy);
@@ -641,7 +691,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops,
}
if (!Any)
- return 0;
+ return nullptr;
Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
@@ -676,7 +726,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
Constant *Ptr = Ops[0];
if (!TD || !Ptr->getType()->getPointerElementType()->isSized() ||
!Ptr->getType()->isPointerTy())
- return 0;
+ return nullptr;
Type *IntPtrTy = TD->getIntPtrType(Ptr->getType());
Type *ResultElementTy = ResultTy->getPointerElementType();
@@ -690,7 +740,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// "inttoptr (sub (ptrtoint Ptr), V)"
if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
- assert((CE == 0 || CE->getType() == IntPtrTy) &&
+ assert((!CE || CE->getType() == IntPtrTy) &&
"CastGEPIndices didn't canonicalize index types!");
if (CE && CE->getOpcode() == Instruction::Sub &&
CE->getOperand(0)->isNullValue()) {
@@ -702,7 +752,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
return Res;
}
}
- return 0;
+ return nullptr;
}
unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
@@ -765,7 +815,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// Only handle pointers to sized types, not pointers to functions.
if (!ATy->getElementType()->isSized())
- return 0;
+ return nullptr;
}
// Determine which element of the array the offset points into.
@@ -810,7 +860,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef<Constant *> Ops,
// type, then the offset is pointing into the middle of an indivisible
// member, so we can't simplify it.
if (Offset != 0)
- return 0;
+ return nullptr;
// Create a GEP.
Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs);
@@ -841,7 +891,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
const TargetLibraryInfo *TLI) {
// Handle PHI nodes quickly here...
if (PHINode *PN = dyn_cast<PHINode>(I)) {
- Constant *CommonValue = 0;
+ Constant *CommonValue = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = PN->getIncomingValue(i);
@@ -854,14 +904,14 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
// If the incoming value is not a constant, then give up.
Constant *C = dyn_cast<Constant>(Incoming);
if (!C)
- return 0;
+ return nullptr;
// Fold the PHI's operands.
if (ConstantExpr *NewC = dyn_cast<ConstantExpr>(C))
C = ConstantFoldConstantExpression(NewC, TD, TLI);
// If the incoming value is a different constant to
// the one we saw previously, then give up.
if (CommonValue && C != CommonValue)
- return 0;
+ return nullptr;
CommonValue = C;
}
@@ -876,7 +926,7 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
Constant *Op = dyn_cast<Constant>(*i);
if (!Op)
- return 0; // All operands not constant!
+ return nullptr; // All operands not constant!
// Fold the Instruction's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(Op))
@@ -966,14 +1016,14 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
}
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case Instruction::ICmp:
case Instruction::FCmp: llvm_unreachable("Invalid for compares");
case Instruction::Call:
if (Function *F = dyn_cast<Function>(Ops.back()))
if (canConstantFoldCallTo(F))
return ConstantFoldCall(F, Ops.slice(0, Ops.size() - 1), TLI);
- return 0;
+ return nullptr;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
// the width of a pointer, so it can't be done in ConstantExpr::getCast.
@@ -1142,14 +1192,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C,
ConstantExpr *CE) {
if (!CE->getOperand(1)->isNullValue())
- return 0; // Do not allow stepping over the value!
+ return nullptr; // Do not allow stepping over the value!
// Loop over all of the operands, tracking down which value we are
// addressing.
for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) {
C = C->getAggregateElement(CE->getOperand(i));
- if (C == 0)
- return 0;
+ if (!C)
+ return nullptr;
}
return C;
}
@@ -1164,8 +1214,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C,
// addressing.
for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
C = C->getAggregateElement(Indices[i]);
- if (C == 0)
- return 0;
+ if (!C)
+ return nullptr;
}
return C;
}
@@ -1270,7 +1320,7 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
V = NativeFP(V);
if (sys::llvm_fenv_testexcept()) {
sys::llvm_fenv_clearexcept();
- return 0;
+ return nullptr;
}
return GetConstantFoldFPValue(V, Ty);
@@ -1282,7 +1332,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
V = NativeFP(V, W);
if (sys::llvm_fenv_testexcept()) {
sys::llvm_fenv_clearexcept();
- return 0;
+ return nullptr;
}
return GetConstantFoldFPValue(V, Ty);
@@ -1311,7 +1361,7 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val,
/*isSigned=*/true, mode,
&isExact);
if (status != APFloat::opOK && status != APFloat::opInexact)
- return 0;
+ return nullptr;
return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
}
@@ -1345,7 +1395,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
}
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return 0;
+ return nullptr;
if (IntrinsicID == Intrinsic::round) {
APFloat V = Op->getValueAPF();
@@ -1357,7 +1407,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
/// likely to be aborted with an exception anyway, and some host libms
/// have known errors raising exceptions.
if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
- return 0;
+ return nullptr;
/// Currently APFloat versions of these functions do not exist, so we use
/// the host native double versions. Float versions are not called
@@ -1396,7 +1446,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
}
if (!TLI)
- return 0;
+ return nullptr;
switch (Name[0]) {
case 'a':
@@ -1467,7 +1517,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
default:
break;
}
- return 0;
+ return nullptr;
}
if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
@@ -1491,7 +1541,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFP::get(Ty->getContext(), Val);
}
default:
- return 0;
+ return nullptr;
}
}
@@ -1523,21 +1573,21 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
if (isa<UndefValue>(Operands[0])) {
if (IntrinsicID == Intrinsic::bswap)
return Operands[0];
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
if (Operands.size() == 2) {
if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy())
- return 0;
+ return nullptr;
double Op1V = getValueAsDouble(Op1);
if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
if (Op2->getType() != Op1->getType())
- return 0;
+ return nullptr;
double Op2V = getValueAsDouble(Op2);
if (IntrinsicID == Intrinsic::pow) {
@@ -1550,7 +1600,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
return ConstantFP::get(Ty->getContext(), V1);
}
if (!TLI)
- return 0;
+ return nullptr;
if (Name == "pow" && TLI->has(LibFunc::pow))
return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
if (Name == "fmod" && TLI->has(LibFunc::fmod))
@@ -1571,7 +1621,7 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
APFloat((double)std::pow((double)Op1V,
(int)Op2C->getZExtValue())));
}
- return 0;
+ return nullptr;
}
if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
@@ -1624,13 +1674,13 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
}
}
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
if (Operands.size() != 3)
- return 0;
+ return nullptr;
if (const ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
if (const ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
@@ -1646,14 +1696,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,
if (s != APFloat::opInvalidOp)
return ConstantFP::get(Ty->getContext(), V);
- return 0;
+ return nullptr;
}
}
}
}
}
- return 0;
+ return nullptr;
}
static Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID,
@@ -1690,7 +1740,7 @@ Constant *
llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
const TargetLibraryInfo *TLI) {
if (!F->hasName())
- return 0;
+ return nullptr;
StringRef Name = F->getName();
Type *Ty = F->getReturnType();
diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp
index b49211d..780b1aa 100644
--- a/lib/Analysis/CostModel.cpp
+++ b/lib/Analysis/CostModel.cpp
@@ -17,8 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define CM_NAME "cost-model"
-#define DEBUG_TYPE CM_NAME
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -32,6 +30,9 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define CM_NAME "cost-model"
+#define DEBUG_TYPE CM_NAME
+
static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
cl::Hidden,
cl::desc("Recognize reduction patterns."));
@@ -41,7 +42,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) {
+ CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
initializeCostModelAnalysisPass(
*PassRegistry::getPassRegistry());
}
@@ -101,24 +102,13 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
// Check for a splat of a constant or for a non uniform vector of constants.
if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(V)->getSplatValue() != NULL)
+ if (cast<Constant>(V)->getSplatValue() != nullptr)
OpInfo = TargetTransformInfo::OK_UniformConstantValue;
}
return OpInfo;
}
-static bool matchMask(SmallVectorImpl<int> &M1, SmallVectorImpl<int> &M2) {
- if (M1.size() != M2.size())
- return false;
-
- for (unsigned i = 0, e = M1.size(); i != e; ++i)
- if (M1[i] != M2[i])
- return false;
-
- return true;
-}
-
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
unsigned Level) {
// We don't need a shuffle if we just want to have element 0 in position 0 of
@@ -136,7 +126,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
Mask[i] = val;
SmallVector<int, 16> ActualMask = SI->getShuffleMask();
- if (!matchMask(Mask, ActualMask))
+ if (Mask != ActualMask)
return false;
return true;
@@ -150,7 +140,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
// %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
// <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
// %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
- if (BinOp == 0)
+ if (BinOp == nullptr)
return false;
assert(BinOp->getType()->isVectorTy() && "Expecting a vector type");
@@ -171,9 +161,9 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
return false;
// Shuffle inputs must match.
- Value *NextLevelOpL = LS ? LS->getOperand(0) : 0;
- Value *NextLevelOpR = RS ? RS->getOperand(0) : 0;
- Value *NextLevelOp = 0;
+ Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
+ Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
+ Value *NextLevelOp = nullptr;
if (NextLevelOpR && NextLevelOpL) {
// If we have two shuffles their operands must match.
if (NextLevelOpL != NextLevelOpR)
@@ -198,7 +188,7 @@ static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp,
// Check that the next levels binary operation exists and matches with the
// current one.
- BinaryOperator *NextLevelBinOp = 0;
+ BinaryOperator *NextLevelBinOp = nullptr;
if (Level + 1 != NumLevels) {
if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp)))
return false;
@@ -277,7 +267,7 @@ getShuffleAndOtherOprd(BinaryOperator *B) {
Value *L = B->getOperand(0);
Value *R = B->getOperand(1);
- ShuffleVectorInst *S = 0;
+ ShuffleVectorInst *S = nullptr;
if ((S = dyn_cast<ShuffleVectorInst>(L)))
return std::make_pair(R, S);
@@ -337,7 +327,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp);
// Check the current reduction operation and the shuffle use the same value.
- if (Shuffle == 0)
+ if (Shuffle == nullptr)
return false;
if (Shuffle->getOperand(0) != NextRdxOp)
return false;
@@ -349,7 +339,7 @@ static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
- if (!matchMask(ShuffleMask, Mask))
+ if (ShuffleMask != Mask)
return false;
RdxOp = NextRdxOp;
@@ -478,7 +468,7 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
if (NumVecElems == Mask.size() && isReverseVectorMask(Mask))
return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0,
- 0);
+ nullptr);
return -1;
}
case Instruction::Call:
diff --git a/lib/Analysis/Delinearization.cpp b/lib/Analysis/Delinearization.cpp
index fd4a2f0..9334ceb 100644
--- a/lib/Analysis/Delinearization.cpp
+++ b/lib/Analysis/Delinearization.cpp
@@ -14,8 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DL_NAME "delinearize"
-#define DEBUG_TYPE DL_NAME
#include "llvm/IR/Constants.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/Passes.h"
@@ -34,6 +32,9 @@
using namespace llvm;
+#define DL_NAME "delinearize"
+#define DEBUG_TYPE DL_NAME
+
namespace {
class Delinearization : public FunctionPass {
@@ -51,7 +52,7 @@ public:
}
bool runOnFunction(Function &F) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
- void print(raw_ostream &O, const Module *M = 0) const override;
+ void print(raw_ostream &O, const Module *M = nullptr) const override;
};
} // end anonymous namespace
@@ -76,7 +77,7 @@ static Value *getPointerOperand(Instruction &Inst) {
return Store->getPointerOperand();
else if (GetElementPtrInst *Gep = dyn_cast<GetElementPtrInst>(&Inst))
return Gep->getPointerOperand();
- return NULL;
+ return nullptr;
}
void Delinearization::print(raw_ostream &O, const Module *) const {
@@ -92,25 +93,38 @@ void Delinearization::print(raw_ostream &O, const Module *) const {
const BasicBlock *BB = Inst->getParent();
// Delinearize the memory access as analyzed in all the surrounding loops.
// Do not analyze memory accesses outside loops.
- for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) {
+ for (Loop *L = LI->getLoopFor(BB); L != nullptr; L = L->getParentLoop()) {
const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L);
+
+ const SCEVUnknown *BasePointer =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(AccessFn));
+ // Do not delinearize if we cannot find the base pointer.
+ if (!BasePointer)
+ break;
+ AccessFn = SE->getMinusSCEV(AccessFn, BasePointer);
const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn);
// Do not try to delinearize memory accesses that are not AddRecs.
if (!AR)
break;
+
+ O << "\n";
+ O << "Inst:" << *Inst << "\n";
+ O << "In Loop with Header: " << L->getHeader()->getName() << "\n";
O << "AddRec: " << *AR << "\n";
SmallVector<const SCEV *, 3> Subscripts, Sizes;
- const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes);
- int Size = Subscripts.size();
- if (Res == AR || Size == 0) {
+ AR->delinearize(*SE, Subscripts, Sizes, SE->getElementSize(Inst));
+ if (Subscripts.size() == 0 || Sizes.size() == 0 ||
+ Subscripts.size() != Sizes.size()) {
O << "failed to delinearize\n";
continue;
}
- O << "Base offset: " << *Res << "\n";
+
+ O << "Base offset: " << *BasePointer << "\n";
O << "ArrayDecl[UnknownSize]";
+ int Size = Subscripts.size();
for (int i = 0; i < Size - 1; i++)
O << "[" << *Sizes[i] << "]";
O << " with elements of " << *Sizes[Size - 1] << " bytes.\n";
diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp
index ff98611..d0784f1 100644
--- a/lib/Analysis/DependenceAnalysis.cpp
+++ b/lib/Analysis/DependenceAnalysis.cpp
@@ -51,8 +51,6 @@
// //
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "da"
-
#include "llvm/Analysis/DependenceAnalysis.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -69,6 +67,8 @@
using namespace llvm;
+#define DEBUG_TYPE "da"
+
//===----------------------------------------------------------------------===//
// statistics
@@ -234,7 +234,7 @@ FullDependence::FullDependence(Instruction *Source,
Levels(CommonLevels),
LoopIndependent(PossiblyLoopIndependent) {
Consistent = true;
- DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
+ DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr;
}
// The rest are simple getters that hide the implementation.
@@ -658,7 +658,7 @@ Value *getPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
llvm_unreachable("Value is not load or store instruction");
- return 0;
+ return nullptr;
}
@@ -932,7 +932,7 @@ const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
const SCEV *UB = SE->getBackedgeTakenCount(L);
return SE->getNoopOrZeroExtend(UB, T);
}
- return NULL;
+ return nullptr;
}
@@ -943,7 +943,7 @@ const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
) const {
if (const SCEV *UB = collectUpperBound(L, T))
return dyn_cast<SCEVConstant>(UB);
- return NULL;
+ return nullptr;
}
@@ -2194,7 +2194,7 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
return Constant;
}
- return NULL;
+ return nullptr;
}
@@ -2646,8 +2646,8 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
CoefficientInfo *B,
BoundInfo *Bound,
unsigned K) const {
- Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
- Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
+ Bound[K].Lower[Dependence::DVEntry::ALL] = nullptr; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::ALL] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
Bound[K].Lower[Dependence::DVEntry::ALL] =
SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
@@ -2687,8 +2687,8 @@ void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
CoefficientInfo *B,
BoundInfo *Bound,
unsigned K) const {
- Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
- Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
+ Bound[K].Lower[Dependence::DVEntry::EQ] = nullptr; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::EQ] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
const SCEV *NegativePart = getNegativePart(Delta);
@@ -2729,8 +2729,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
CoefficientInfo *B,
BoundInfo *Bound,
unsigned K) const {
- Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
- Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
+ Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Iter_1 =
SE->getMinusSCEV(Bound[K].Iterations,
@@ -2776,8 +2776,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
CoefficientInfo *B,
BoundInfo *Bound,
unsigned K) const {
- Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
- Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
+ Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.
+ Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.
if (Bound[K].Iterations) {
const SCEV *Iter_1 =
SE->getMinusSCEV(Bound[K].Iterations,
@@ -2829,7 +2829,7 @@ DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
CI[K].Coeff = Zero;
CI[K].PosPart = Zero;
CI[K].NegPart = Zero;
- CI[K].Iterations = NULL;
+ CI[K].Iterations = nullptr;
}
while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
const Loop *L = AddRec->getLoop();
@@ -2872,7 +2872,7 @@ const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
if (Bound[K].Lower[Bound[K].Direction])
Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
else
- Sum = NULL;
+ Sum = nullptr;
}
return Sum;
}
@@ -2888,7 +2888,7 @@ const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
if (Bound[K].Upper[Bound[K].Direction])
Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
else
- Sum = NULL;
+ Sum = nullptr;
}
return Sum;
}
@@ -3148,12 +3148,12 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
}
else if (CurConstraint.isLine()) {
Level.Scalar = false;
- Level.Distance = NULL;
+ Level.Distance = nullptr;
// direction should be accurate
}
else if (CurConstraint.isPoint()) {
Level.Scalar = false;
- Level.Distance = NULL;
+ Level.Distance = nullptr;
unsigned NewDirection = Dependence::DVEntry::NONE;
if (!isKnownPredicate(CmpInst::ICMP_NE,
CurConstraint.getY(),
@@ -3180,59 +3180,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
/// source and destination array references are recurrences on a nested loop,
/// this function flattens the nested recurrences into separate recurrences
/// for each loop level.
-bool
-DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
- SmallVectorImpl<Subscript> &Pair) const {
+bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV,
+ const SCEV *DstSCEV,
+ SmallVectorImpl<Subscript> &Pair,
+ const SCEV *ElementSize) const {
+ const SCEVUnknown *SrcBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV));
+ const SCEVUnknown *DstBase =
+ dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV));
+
+ if (!SrcBase || !DstBase || SrcBase != DstBase)
+ return false;
+
+ SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase);
+ DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase);
+
const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
return false;
- SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
- const SCEV *RemainderS = SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
- const SCEV *RemainderD = DstAR->delinearize(*SE, DstSubscripts, DstSizes);
+ // First step: collect parametric terms in both array references.
+ SmallVector<const SCEV *, 4> Terms;
+ SrcAR->collectParametricTerms(*SE, Terms);
+ DstAR->collectParametricTerms(*SE, Terms);
- int size = SrcSubscripts.size();
- // Fail when there is only a subscript: that's a linearized access function.
- if (size < 2)
- return false;
-
- int dstSize = DstSubscripts.size();
- // Fail when the number of subscripts in Src and Dst differ.
- if (size != dstSize)
- return false;
+ // Second step: find subscript sizes.
+ SmallVector<const SCEV *, 4> Sizes;
+ SE->findArrayDimensions(Terms, Sizes, ElementSize);
- // Fail when the size of any of the subscripts in Src and Dst differs: the
- // dependence analysis assumes that elements in the same array have same size.
- // SCEV delinearization does not have a context based on which it would decide
- // globally the size of subscripts that would best fit all the array accesses.
- for (int i = 0; i < size; ++i)
- if (SrcSizes[i] != DstSizes[i])
- return false;
+ // Third step: compute the access functions for each subscript.
+ SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+ SrcAR->computeAccessFunctions(*SE, SrcSubscripts, Sizes);
+ DstAR->computeAccessFunctions(*SE, DstSubscripts, Sizes);
- // When the difference in remainders is different than a constant it might be
- // that the base address of the arrays is not the same.
- const SCEV *DiffRemainders = SE->getMinusSCEV(RemainderS, RemainderD);
- if (!isa<SCEVConstant>(DiffRemainders))
+ // Fail when there is only a subscript: that's a linearized access function.
+ if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 ||
+ SrcSubscripts.size() != DstSubscripts.size())
return false;
- // Normalize the last dimension: integrate the size of the "scalar dimension"
- // and the remainder of the delinearization.
- DstSubscripts[size-1] = SE->getMulExpr(DstSubscripts[size-1],
- DstSizes[size-1]);
- SrcSubscripts[size-1] = SE->getMulExpr(SrcSubscripts[size-1],
- SrcSizes[size-1]);
- SrcSubscripts[size-1] = SE->getAddExpr(SrcSubscripts[size-1], RemainderS);
- DstSubscripts[size-1] = SE->getAddExpr(DstSubscripts[size-1], RemainderD);
+ int size = SrcSubscripts.size();
-#ifndef NDEBUG
- DEBUG(errs() << "\nSrcSubscripts: ");
- for (int i = 0; i < size; i++)
- DEBUG(errs() << *SrcSubscripts[i]);
- DEBUG(errs() << "\nDstSubscripts: ");
- for (int i = 0; i < size; i++)
- DEBUG(errs() << *DstSubscripts[i]);
-#endif
+ DEBUG({
+ dbgs() << "\nSrcSubscripts: ";
+ for (int i = 0; i < size; i++)
+ dbgs() << *SrcSubscripts[i];
+ dbgs() << "\nDstSubscripts: ";
+ for (int i = 0; i < size; i++)
+ dbgs() << *DstSubscripts[i];
+ });
// The delinearization transforms a single-subscript MIV dependence test into
// a multi-subscript SIV dependence test that is easier to compute. So we
@@ -3290,7 +3286,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
(!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
// if both instructions don't reference memory, there's no dependence
- return NULL;
+ return nullptr;
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
@@ -3310,7 +3306,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
case AliasAnalysis::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
DEBUG(dbgs() << "no alias\n");
- return NULL;
+ return nullptr;
case AliasAnalysis::MustAlias:
break; // The underlying objects alias; test accesses for dependence.
}
@@ -3363,7 +3359,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
}
if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
DEBUG(dbgs() << " delinerized GEP\n");
Pairs = Pair.size();
}
@@ -3505,26 +3501,26 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
case Subscript::ZIV:
DEBUG(dbgs() << ", ZIV\n");
if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
- return NULL;
+ return nullptr;
break;
case Subscript::SIV: {
DEBUG(dbgs() << ", SIV\n");
unsigned Level;
- const SCEV *SplitIter = NULL;
+ const SCEV *SplitIter = nullptr;
if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
Result, NewConstraint, SplitIter))
- return NULL;
+ return nullptr;
break;
}
case Subscript::RDIV:
DEBUG(dbgs() << ", RDIV\n");
if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
- return NULL;
+ return nullptr;
break;
case Subscript::MIV:
DEBUG(dbgs() << ", MIV\n");
if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
- return NULL;
+ return nullptr;
break;
default:
llvm_unreachable("subscript has unexpected classification");
@@ -3558,16 +3554,16 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
- const SCEV *SplitIter = NULL;
+ const SCEV *SplitIter = nullptr;
DEBUG(dbgs() << "SIV\n");
if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
Result, NewConstraint, SplitIter))
- return NULL;
+ return nullptr;
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
if (Constraints[Level].isEmpty()) {
++DeltaIndependence;
- return NULL;
+ return nullptr;
}
Changed = true;
}
@@ -3593,7 +3589,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
case Subscript::ZIV:
DEBUG(dbgs() << "ZIV\n");
if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
- return NULL;
+ return nullptr;
Mivs.reset(SJ);
break;
case Subscript::SIV:
@@ -3616,7 +3612,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
if (Pair[SJ].Classification == Subscript::RDIV) {
DEBUG(dbgs() << "RDIV test\n");
if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
- return NULL;
+ return nullptr;
// I don't yet understand how to propagate RDIV results
Mivs.reset(SJ);
}
@@ -3629,7 +3625,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
if (Pair[SJ].Classification == Subscript::MIV) {
DEBUG(dbgs() << "MIV test\n");
if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
- return NULL;
+ return nullptr;
}
else
llvm_unreachable("expected only MIV subscripts at this point");
@@ -3641,7 +3637,7 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
- return NULL;
+ return nullptr;
}
}
}
@@ -3676,11 +3672,11 @@ Dependence *DependenceAnalysis::depends(Instruction *Src,
}
}
if (AllEqual)
- return NULL;
+ return nullptr;
}
FullDependence *Final = new FullDependence(Result);
- Result.DV = NULL;
+ Result.DV = nullptr;
return Final;
}
@@ -3787,7 +3783,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
}
if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
- tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
+ tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) {
DEBUG(dbgs() << " delinerized GEP\n");
Pairs = Pair.size();
}
@@ -3853,11 +3849,11 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
switch (Pair[SI].Classification) {
case Subscript::SIV: {
unsigned Level;
- const SCEV *SplitIter = NULL;
+ const SCEV *SplitIter = nullptr;
(void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
Result, NewConstraint, SplitIter);
if (Level == SplitLevel) {
- assert(SplitIter != NULL);
+ assert(SplitIter != nullptr);
return SplitIter;
}
break;
@@ -3892,7 +3888,7 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
// SJ is an SIV subscript that's part of the current coupled group
unsigned Level;
- const SCEV *SplitIter = NULL;
+ const SCEV *SplitIter = nullptr;
(void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
Result, NewConstraint, SplitIter);
if (Level == SplitLevel && SplitIter)
@@ -3933,5 +3929,5 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
}
}
llvm_unreachable("somehow reached end of routine");
- return NULL;
+ return nullptr;
}
diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp
index f0787f1..74594f8 100644
--- a/lib/Analysis/DominanceFrontier.cpp
+++ b/lib/Analysis/DominanceFrontier.cpp
@@ -40,12 +40,12 @@ const DominanceFrontier::DomSetType &
DominanceFrontier::calculate(const DominatorTree &DT,
const DomTreeNode *Node) {
BasicBlock *BB = Node->getBlock();
- DomSetType *Result = NULL;
+ DomSetType *Result = nullptr;
std::vector<DFCalculateWorkObject> workList;
SmallPtrSet<BasicBlock *, 32> visited;
- workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+ workList.push_back(DFCalculateWorkObject(BB, nullptr, Node, nullptr));
do {
DFCalculateWorkObject *currentW = &workList.back();
assert (currentW && "Missing work object.");
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index f43675b..caec253 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -21,14 +21,14 @@ using namespace llvm;
//
CallGraph::CallGraph(Module &M)
- : M(M), Root(0), ExternalCallingNode(getOrInsertFunction(0)),
- CallsExternalNode(new CallGraphNode(0)) {
+ : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)),
+ CallsExternalNode(new CallGraphNode(nullptr)) {
// Add every function to the call graph.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
addToCallGraph(I);
// If we didn't find a main function, use the external call graph node
- if (Root == 0)
+ if (!Root)
Root = ExternalCallingNode;
}
@@ -210,7 +210,7 @@ void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
CallRecord &CR = *I;
- if (CR.second == Callee && CR.first == 0) {
+ if (CR.second == Callee && CR.first == nullptr) {
Callee->DropRef();
*I = CalledFunctions.back();
CalledFunctions.pop_back();
@@ -267,7 +267,7 @@ INITIALIZE_PASS(CallGraphWrapperPass, "basiccg", "CallGraph Construction",
char CallGraphWrapperPass::ID = 0;
-void CallGraphWrapperPass::releaseMemory() { G.reset(0); }
+void CallGraphWrapperPass::releaseMemory() { G.reset(nullptr); }
void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
if (!G) {
@@ -280,7 +280,7 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-void CallGraphWrapperPass::dump() const { print(dbgs(), 0); }
+void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); }
#endif
// Enuse that users of CallGraph.h also link with this file
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index aafc085..bfab744 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "cgscc-passmgr"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -23,12 +22,15 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LegacyPassManagers.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "cgscc-passmgr"
+
static cl::opt<unsigned>
MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
@@ -112,7 +114,7 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
bool Changed = false;
PMDataManager *PM = P->getAsPMDataManager();
- if (PM == 0) {
+ if (!PM) {
CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
if (!CallGraphUpToDate) {
DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
@@ -144,8 +146,11 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
I != E; ++I) {
if (Function *F = (*I)->getFunction()) {
dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
- TimeRegion PassTimer(getPassTimer(FPP));
- Changed |= FPP->runOnFunction(*F);
+ {
+ TimeRegion PassTimer(getPassTimer(FPP));
+ Changed |= FPP->runOnFunction(*F);
+ }
+ F->getContext().yield();
}
}
@@ -190,7 +195,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
SCCIdx != E; ++SCCIdx, ++FunctionNo) {
CallGraphNode *CGN = *SCCIdx;
Function *F = CGN->getFunction();
- if (F == 0 || F->isDeclaration()) continue;
+ if (!F || F->isDeclaration()) continue;
// Walk the function body looking for call sites. Sync up the call sites in
// CGN with those actually in the function.
@@ -203,7 +208,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
// If this call site is null, then the function pass deleted the call
// entirely and the WeakVH nulled it out.
- if (I->first == 0 ||
+ if (!I->first ||
// If we've already seen this call site, then the FunctionPass RAUW'd
// one call with another, which resulted in two "uses" in the edge
// list of the same call.
@@ -217,7 +222,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
"CallGraphSCCPass did not update the CallGraph correctly!");
// If this was an indirect call site, count it.
- if (I->second->getFunction() == 0)
+ if (!I->second->getFunction())
++NumIndirectRemoved;
else
++NumDirectRemoved;
@@ -273,7 +278,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
// site could be turned direct), don't reject it in checking mode, and
// don't tweak it to be more precise.
if (CheckingMode && CS.getCalledFunction() &&
- ExistingNode->getFunction() == 0)
+ ExistingNode->getFunction() == nullptr)
continue;
assert(!CheckingMode &&
@@ -286,7 +291,7 @@ bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
CalleeNode = CG.getOrInsertFunction(Callee);
// Keep track of whether we turned an indirect call into a direct
// one.
- if (ExistingNode->getFunction() == 0) {
+ if (!ExistingNode->getFunction()) {
DevirtualizedCall = true;
DEBUG(dbgs() << " CGSCCPASSMGR: Devirtualized call to '"
<< Callee->getName() << "'\n");
@@ -434,8 +439,8 @@ bool CGPassManager::runOnModule(Module &M) {
while (!CGI.isAtEnd()) {
// Copy the current SCC and increment past it so that the pass can hack
// on the SCC if it wants to without invalidating our iterator.
- std::vector<CallGraphNode*> &NodeVec = *CGI;
- CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+ const std::vector<CallGraphNode *> &NodeVec = *CGI;
+ CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
++CGI;
// At the top level, we run all the passes in this pass manager on the
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index f4097e4..607c068 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "globalsmodref-aa"
#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "globalsmodref-aa"
+
STATISTIC(NumNonAddrTakenGlobalVars,
"Number of global vars without address taken");
STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
@@ -177,14 +178,14 @@ namespace {
FunctionInfo.find(F);
if (I != FunctionInfo.end())
return &I->second;
- return 0;
+ return nullptr;
}
void AnalyzeGlobals(Module &M);
void AnalyzeCallGraph(CallGraph &CG, Module &M);
bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
std::vector<Function*> &Writers,
- GlobalValue *OkayStoreDest = 0);
+ GlobalValue *OkayStoreDest = nullptr);
bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
};
}
@@ -358,7 +359,7 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
// We do a bottom-up SCC traversal of the call graph. In other words, we
// visit all callees before callers (leaf-first).
for (scc_iterator<CallGraph*> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
- std::vector<CallGraphNode *> &SCC = *I;
+ const std::vector<CallGraphNode *> &SCC = *I;
assert(!SCC.empty() && "SCC with no functions?");
if (!SCC[0]->getFunction()) {
@@ -410,10 +411,8 @@ void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
FunctionEffect |= CalleeFR->FunctionEffect;
// Incorporate callee's effects on globals into our info.
- for (std::map<const GlobalValue*, unsigned>::iterator GI =
- CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
- GI != E; ++GI)
- FR.GlobalInfo[GI->first] |= GI->second;
+ for (const auto &G : CalleeFR->GlobalInfo)
+ FR.GlobalInfo[G.first] |= G.second;
FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
} else {
// Can't say anything about it. However, if it is inside our SCC,
@@ -492,8 +491,8 @@ GlobalsModRef::alias(const Location &LocA,
if (GV1 || GV2) {
// If the global's address is taken, pretend we don't know it's a pointer to
// the global.
- if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
- if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+ if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = nullptr;
+ if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = nullptr;
// If the two pointers are derived from two different non-addr-taken
// globals, or if one is and the other isn't, we know these can't alias.
@@ -507,7 +506,7 @@ GlobalsModRef::alias(const Location &LocA,
// These pointers may be based on the memory owned by an indirect global. If
// so, we may be able to handle this. First check to see if the base pointer
// is a direct load from an indirect global.
- GV1 = GV2 = 0;
+ GV1 = GV2 = nullptr;
if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
if (IndirectGlobals.count(GV))
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp
index 8dafc1c..66f3f8e 100644
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline-cost"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -34,6 +33,8 @@
using namespace llvm;
+#define DEBUG_TYPE "inline-cost"
+
STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
namespace {
@@ -97,9 +98,6 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
void disableSROA(Value *V);
void accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
int InstructionCost);
- bool handleSROACandidate(bool IsSROAValid,
- DenseMap<Value *, int>::iterator CostIt,
- int InstructionCost);
bool isGEPOffsetConstant(GetElementPtrInst &GEP);
bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset);
bool simplifyCallSite(Function *F, CallSite CS);
@@ -225,21 +223,6 @@ void CallAnalyzer::accumulateSROACost(DenseMap<Value *, int>::iterator CostIt,
SROACostSavings += InstructionCost;
}
-/// \brief Helper for the common pattern of handling a SROA candidate.
-/// Either accumulates the cost savings if the SROA remains valid, or disables
-/// SROA for the candidate.
-bool CallAnalyzer::handleSROACandidate(bool IsSROAValid,
- DenseMap<Value *, int>::iterator CostIt,
- int InstructionCost) {
- if (IsSROAValid) {
- accumulateSROACost(CostIt, InstructionCost);
- return true;
- }
-
- disableSROA(CostIt);
- return false;
-}
-
/// \brief Check whether a GEP's indices are all constant.
///
/// Respects any simplified values known during the analysis of this callsite.
@@ -287,8 +270,17 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
}
bool CallAnalyzer::visitAlloca(AllocaInst &I) {
- // FIXME: Check whether inlining will turn a dynamic alloca into a static
+ // Check whether inlining will turn a dynamic alloca into a static
// alloca, and handle that case.
+ if (I.isArrayAllocation()) {
+ if (Constant *Size = SimplifiedValues.lookup(I.getArraySize())) {
+ ConstantInt *AllocSize = dyn_cast<ConstantInt>(Size);
+ assert(AllocSize && "Allocation size not a constant int?");
+ Type *Ty = I.getAllocatedType();
+ AllocatedSize += Ty->getPrimitiveSizeInBits() * AllocSize->getZExtValue();
+ return Base::visitAlloca(I);
+ }
+ }
// Accumulate the allocated size.
if (I.isStaticAlloca()) {
@@ -816,9 +808,29 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
// We model unconditional switches as free, see the comments on handling
// branches.
- return isa<ConstantInt>(SI.getCondition()) ||
- dyn_cast_or_null<ConstantInt>(
- SimplifiedValues.lookup(SI.getCondition()));
+ if (isa<ConstantInt>(SI.getCondition()))
+ return true;
+ if (Value *V = SimplifiedValues.lookup(SI.getCondition()))
+ if (isa<ConstantInt>(V))
+ return true;
+
+ // Otherwise, we need to accumulate a cost proportional to the number of
+ // distinct successor blocks. This fan-out in the CFG cannot be represented
+ // for free even if we can represent the core switch as a jumptable that
+ // takes a single instruction.
+ //
+ // NB: We convert large switches which are just used to initialize large phi
+ // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+ // inlining those. It will prevent inlining in cases where the optimization
+ // does not (yet) fire.
+ SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
+ SuccessorBlocks.insert(SI.getDefaultDest());
+ for (auto I = SI.case_begin(), E = SI.case_end(); I != E; ++I)
+ SuccessorBlocks.insert(I.getCaseSuccessor());
+ // Add cost corresponding to the number of distinct destinations. The first
+ // we model as free because of fallthrough.
+ Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+ return false;
}
bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) {
@@ -934,7 +946,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
/// no constant offsets applied.
ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
if (!DL || !V->getType()->isPointerTy())
- return 0;
+ return nullptr;
unsigned IntPtrWidth = DL->getPointerSizeInBits();
APInt Offset = APInt::getNullValue(IntPtrWidth);
@@ -946,7 +958,7 @@ ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) {
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset))
- return 0;
+ return nullptr;
V = GEP->getPointerOperand();
} else if (Operator::getOpcode(V) == Instruction::BitCast) {
V = cast<Operator>(V)->getOperand(0);
@@ -1247,7 +1259,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
- if (Callee->hasFnAttribute(Attribute::AlwaysInline)) {
+ if (CS.hasFnAttr(Attribute::AlwaysInline)) {
if (isInlineViable(*Callee))
return llvm::InlineCost::getAlways();
return llvm::InlineCost::getNever();
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 5317a47..c819bd3 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "iv-users"
#include "llvm/Analysis/IVUsers.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,6 +28,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "iv-users"
+
char IVUsers::ID = 0;
INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
"Induction Variable Users", false, true)
@@ -84,7 +85,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
static bool isSimplifiedLoopNest(BasicBlock *BB, const DominatorTree *DT,
const LoopInfo *LI,
SmallPtrSet<Loop*,16> &SimpleLoopNests) {
- Loop *NearestLoop = 0;
+ Loop *NearestLoop = nullptr;
for (DomTreeNode *Rung = DT->getNode(BB);
Rung; Rung = Rung->getIDom()) {
BasicBlock *DomBB = Rung->getBlock();
@@ -253,7 +254,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all uses of induction variables in this loop, and categorize
// them by stride. Start by finding all of the PHI nodes in the header for
@@ -329,16 +330,16 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
I != E; ++I)
if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
return AR;
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
return AR->getStepRecurrence(*SE);
- return 0;
+ return nullptr;
}
void IVStrideUse::transformToPostInc(const Loop *L) {
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp
index 3d05556..de2b9c0 100644
--- a/lib/Analysis/InstCount.cpp
+++ b/lib/Analysis/InstCount.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instcount"
#include "llvm/Analysis/Passes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Function.h"
@@ -22,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "instcount"
+
STATISTIC(TotalInsts , "Number of instructions (of all types)");
STATISTIC(TotalBlocks, "Number of basic blocks");
STATISTIC(TotalFuncs , "Number of non-external functions");
@@ -47,7 +48,7 @@ namespace {
void visitInstruction(Instruction &I) {
errs() << "Instruction Count does not know about " << I;
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index d8d8a09..3684fda 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instsimplify"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "instsimplify"
+
enum { RecursionLimit = 3 };
STATISTIC(NumExpand, "Number of expansions");
@@ -131,7 +132,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
// Check whether the expression has the form "(A op' B) op C".
if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
@@ -179,7 +180,7 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
- return 0;
+ return nullptr;
}
/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
@@ -192,14 +193,14 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
!Op1 || Op1->getOpcode() != OpcodeToExtract)
- return 0;
+ return nullptr;
// The expression has the form "(A op' B) op (C op' D)".
Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
@@ -251,7 +252,7 @@ static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
- return 0;
+ return nullptr;
}
/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
@@ -263,7 +264,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
@@ -308,7 +309,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
// The remaining transforms require commutativity as well as associativity.
if (!Instruction::isCommutative(Opcode))
- return 0;
+ return nullptr;
// Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
if (Op0 && Op0->getOpcode() == Opcode) {
@@ -348,7 +349,7 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
}
}
- return 0;
+ return nullptr;
}
/// ThreadBinOpOverSelect - In the case of a binary operation with a select
@@ -359,7 +360,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
SelectInst *SI;
if (isa<SelectInst>(LHS)) {
@@ -420,7 +421,7 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
}
}
- return 0;
+ return nullptr;
}
/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
@@ -432,7 +433,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
// Make sure the select is on the LHS.
if (!isa<SelectInst>(LHS)) {
@@ -456,7 +457,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// It didn't simplify. However if "cmp TV, RHS" is equal to the select
// condition then we can replace it with 'true'. Otherwise give up.
if (!isSameCompare(Cond, Pred, TV, RHS))
- return 0;
+ return nullptr;
TCmp = getTrue(Cond->getType());
}
@@ -470,7 +471,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// It didn't simplify. However if "cmp FV, RHS" is equal to the select
// condition then we can replace it with 'false'. Otherwise give up.
if (!isSameCompare(Cond, Pred, FV, RHS))
- return 0;
+ return nullptr;
FCmp = getFalse(Cond->getType());
}
@@ -482,7 +483,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
// The remaining cases only make sense if the select condition has the same
// type as the result of the comparison, so bail out if this is not so.
if (Cond->getType()->isVectorTy() != RHS->getType()->isVectorTy())
- return 0;
+ return nullptr;
// If the false value simplified to false, then the result of the compare
// is equal to "Cond && TCmp". This also catches the case when the false
// value simplified to false and the true value to true, returning "Cond".
@@ -502,7 +503,7 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
@@ -513,24 +514,24 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
PHINode *PI;
if (isa<PHINode>(LHS)) {
PI = cast<PHINode>(LHS);
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
if (!ValueDominatesPHI(RHS, PI, Q.DT))
- return 0;
+ return nullptr;
} else {
assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
PI = cast<PHINode>(RHS);
// Bail out if LHS and the phi may be mutually interdependent due to a loop.
if (!ValueDominatesPHI(LHS, PI, Q.DT))
- return 0;
+ return nullptr;
}
// Evaluate the BinOp on the incoming phi values.
- Value *CommonValue = 0;
+ Value *CommonValue = nullptr;
for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = PI->getIncomingValue(i);
// If the incoming value is the phi node itself, it can safely be skipped.
@@ -541,7 +542,7 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
- return 0;
+ return nullptr;
CommonValue = V;
}
@@ -556,7 +557,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
if (!MaxRecurse--)
- return 0;
+ return nullptr;
// Make sure the phi is on the LHS.
if (!isa<PHINode>(LHS)) {
@@ -568,10 +569,10 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// Bail out if RHS and the phi may be mutually interdependent due to a loop.
if (!ValueDominatesPHI(RHS, PI, Q.DT))
- return 0;
+ return nullptr;
// Evaluate the BinOp on the incoming phi values.
- Value *CommonValue = 0;
+ Value *CommonValue = nullptr;
for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = PI->getIncomingValue(i);
// If the incoming value is the phi node itself, it can safely be skipped.
@@ -580,7 +581,7 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
- return 0;
+ return nullptr;
CommonValue = V;
}
@@ -613,7 +614,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// X + (Y - X) -> Y
// (Y - X) + X -> Y
// Eg: X + -X -> 0
- Value *Y = 0;
+ Value *Y = nullptr;
if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
return Y;
@@ -647,7 +648,7 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// "A+B" and "A+C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -720,7 +721,7 @@ static Constant *computePointerDifference(const DataLayout *DL,
// If LHS and RHS are not related via constant offsets to the same base
// value, there is nothing we can do here.
if (LHS != RHS)
- return 0;
+ return nullptr;
// Otherwise, the difference of LHS - RHS can be computed as:
// LHS - RHS
@@ -755,14 +756,14 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// (X*2) - X -> X
// (X<<1) - X -> X
- Value *X = 0;
+ Value *X = nullptr;
if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
match(Op0, m_Shl(m_Specific(Op1), m_One())))
return Op1;
// (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
// For example, (X + Y) - Y -> X; (Y + X) - Y -> X
- Value *Y = 0, *Z = Op1;
+ Value *Y = nullptr, *Z = Op1;
if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
// See if "V === Y - Z" simplifies.
if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1))
@@ -853,7 +854,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
// "A-B" and "A-C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
- return 0;
+ return nullptr;
}
Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -890,7 +891,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
// where nnan and ninf have to occur at least once somewhere in this
// expression
- Value *SubOp = 0;
+ Value *SubOp = nullptr;
if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0))))
SubOp = Op1;
else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1))))
@@ -902,7 +903,7 @@ static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
return Constant::getNullValue(Op0->getType());
}
- return 0;
+ return nullptr;
}
/// Given operands for an FSub, see if we can fold the result. If not, this
@@ -939,7 +940,7 @@ static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1)
return Constant::getNullValue(Op0->getType());
- return 0;
+ return nullptr;
}
/// Given the operands for an FMul, see if we can fold the result
@@ -966,7 +967,7 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero()))
return Op1;
- return 0;
+ return nullptr;
}
/// SimplifyMulInst - Given operands for a Mul, see if we can
@@ -997,7 +998,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
return Op0;
// (X / Y) * Y -> X if the division is exact.
- Value *X = 0;
+ Value *X = nullptr;
if (match(Op0, m_Exact(m_IDiv(m_Value(X), m_Specific(Op1)))) || // (X / Y) * Y
match(Op1, m_Exact(m_IDiv(m_Value(X), m_Specific(Op0))))) // Y * (X / Y)
return X;
@@ -1031,7 +1032,7 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
@@ -1098,7 +1099,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return ConstantInt::get(Op0->getType(), 1);
// (X * Y) / Y -> X if the multiplication does not overflow.
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
OverflowingBinaryOperator *Mul = cast<OverflowingBinaryOperator>(Op0);
@@ -1129,7 +1130,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
/// SimplifySDivInst - Given operands for an SDiv, see if we can
@@ -1139,7 +1140,7 @@ static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1155,7 +1156,7 @@ static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1174,7 +1175,7 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const Query &Q,
if (match(Op1, m_Undef()))
return Op1;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1234,7 +1235,7 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
/// SimplifySRemInst - Given operands for an SRem, see if we can
@@ -1244,7 +1245,7 @@ static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1260,7 +1261,7 @@ static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1279,7 +1280,7 @@ static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const Query &,
if (match(Op1, m_Undef()))
return Op1;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1350,7 +1351,7 @@ static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
/// SimplifyShlInst - Given operands for an Shl, see if we can
@@ -1368,7 +1369,7 @@ static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
Value *X;
if (match(Op0, m_Exact(m_Shr(m_Value(X), m_Specific(Op1)))))
return X;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
@@ -1399,7 +1400,7 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
return X;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
@@ -1435,7 +1436,7 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
return X;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
@@ -1483,7 +1484,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
return Constant::getNullValue(Op0->getType());
// (A | ?) & A = A
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
(A == Op1 || B == Op1))
return Op1;
@@ -1536,7 +1537,7 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1582,7 +1583,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
return Constant::getAllOnesValue(Op0->getType());
// (A & ?) | A = A
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
(A == Op1 || B == Op1))
return Op1;
@@ -1630,7 +1631,7 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1690,7 +1691,7 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
// "A^B" and "A^C" thus gains nothing, but costs compile time. Similarly
// for threading over phi nodes.
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const DataLayout *DL,
@@ -1710,17 +1711,17 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
Value *LHS, Value *RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
if (!SI)
- return 0;
+ return nullptr;
CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
if (!Cmp)
- return 0;
+ return nullptr;
Value *CmpLHS = Cmp->getOperand(0), *CmpRHS = Cmp->getOperand(1);
if (Pred == Cmp->getPredicate() && LHS == CmpLHS && RHS == CmpRHS)
return Cmp;
if (Pred == CmpInst::getSwappedPredicate(Cmp->getPredicate()) &&
LHS == CmpRHS && RHS == CmpLHS)
return Cmp;
- return 0;
+ return nullptr;
}
// A significant optimization not implemented here is assuming that alloca
@@ -1768,7 +1769,7 @@ static Constant *computePointerICmp(const DataLayout *DL,
// We can only fold certain predicates on pointer comparisons.
switch (Pred) {
default:
- return 0;
+ return nullptr;
// Equality comaprisons are easy to fold.
case CmpInst::ICMP_EQ:
@@ -1874,7 +1875,7 @@ static Constant *computePointerICmp(const DataLayout *DL,
}
// Otherwise, fail.
- return 0;
+ return nullptr;
}
/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
@@ -2000,7 +2001,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Many binary operators with constant RHS have easy to compute constant
// range. Use them to check whether the comparison is a tautology.
- uint32_t Width = CI->getBitWidth();
+ unsigned Width = CI->getBitWidth();
APInt Lower = APInt(Width, 0);
APInt Upper = APInt(Width, 0);
ConstantInt *CI2;
@@ -2019,6 +2020,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
APInt NegOne = APInt::getAllOnesValue(Width);
if (!CI2->isZero())
Upper = NegOne.udiv(CI2->getValue()) + 1;
+ } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
+ // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
+ Upper = CI2->getValue().abs() + 1;
+ Lower = (-Upper) + 1;
} else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
// 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
APInt IntMin = APInt::getSignedMinValue(Width);
@@ -2033,6 +2038,13 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
APInt NegOne = APInt::getAllOnesValue(Width);
if (CI2->getValue().ult(Width))
Upper = NegOne.lshr(CI2->getValue()) + 1;
+ } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
+ // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
+ unsigned ShiftAmount = Width - 1;
+ if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+ ShiftAmount = CI2->getValue().countTrailingZeros();
+ Lower = CI2->getValue().lshr(ShiftAmount);
+ Upper = CI2->getValue() + 1;
} else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
// 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
APInt IntMin = APInt::getSignedMinValue(Width);
@@ -2041,6 +2053,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Lower = IntMin.ashr(CI2->getValue());
Upper = IntMax.ashr(CI2->getValue()) + 1;
}
+ } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
+ unsigned ShiftAmount = Width - 1;
+ if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
+ ShiftAmount = CI2->getValue().countTrailingZeros();
+ if (CI2->isNegative()) {
+ // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
+ Lower = CI2->getValue();
+ Upper = CI2->getValue().ashr(ShiftAmount) + 1;
+ } else {
+ // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
+ Lower = CI2->getValue().ashr(ShiftAmount);
+ Upper = CI2->getValue() + 1;
+ }
} else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
// 'or x, CI2' produces [CI2, UINT_MAX].
Lower = CI2->getValue();
@@ -2221,7 +2246,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
if (MaxRecurse && (LBO || RBO)) {
// Analyze the case when either LHS or RHS is an add instruction.
- Value *A = 0, *B = 0, *C = 0, *D = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
// LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
if (LBO && LBO->getOpcode() == Instruction::Add) {
@@ -2279,6 +2304,28 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
}
+ // 0 - (zext X) pred C
+ if (!CmpInst::isUnsigned(Pred) && match(LHS, m_Neg(m_ZExt(m_Value())))) {
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+ if (RHSC->getValue().isStrictlyPositive()) {
+ if (Pred == ICmpInst::ICMP_SLT)
+ return ConstantInt::getTrue(RHSC->getContext());
+ if (Pred == ICmpInst::ICMP_SGE)
+ return ConstantInt::getFalse(RHSC->getContext());
+ if (Pred == ICmpInst::ICMP_EQ)
+ return ConstantInt::getFalse(RHSC->getContext());
+ if (Pred == ICmpInst::ICMP_NE)
+ return ConstantInt::getTrue(RHSC->getContext());
+ }
+ if (RHSC->getValue().isNonNegative()) {
+ if (Pred == ICmpInst::ICMP_SLE)
+ return ConstantInt::getTrue(RHSC->getContext());
+ if (Pred == ICmpInst::ICMP_SGT)
+ return ConstantInt::getFalse(RHSC->getContext());
+ }
+ }
+ }
+
// icmp pred (urem X, Y), Y
if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) {
bool KnownNonNegative, KnownNegative;
@@ -2605,7 +2652,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -2702,7 +2749,7 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -2741,7 +2788,7 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
if (isa<UndefValue>(FalseVal)) // select C, X, undef -> X
return TrueVal;
- return 0;
+ return nullptr;
}
Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
@@ -2786,7 +2833,7 @@ static Value *SimplifyGEPInst(ArrayRef<Value *> Ops, const Query &Q, unsigned) {
// Check to see if this is constant foldable.
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (!isa<Constant>(Ops[i]))
- return 0;
+ return nullptr;
return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]), Ops.slice(1));
}
@@ -2823,7 +2870,7 @@ static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
return Agg;
}
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
@@ -2839,7 +2886,7 @@ Value *llvm::SimplifyInsertValueInst(Value *Agg, Value *Val,
static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
- Value *CommonValue = 0;
+ Value *CommonValue = nullptr;
bool HasUndefInput = false;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *Incoming = PN->getIncomingValue(i);
@@ -2851,7 +2898,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
continue;
}
if (CommonValue && Incoming != CommonValue)
- return 0; // Not the same, bail out.
+ return nullptr; // Not the same, bail out.
CommonValue = Incoming;
}
@@ -2864,7 +2911,7 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// instruction, we cannot return X as the result of the PHI node unless it
// dominates the PHI block.
if (HasUndefInput)
- return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : 0;
+ return ValueDominatesPHI(CommonValue, PN, Q.DT) ? CommonValue : nullptr;
return CommonValue;
}
@@ -2873,7 +2920,7 @@ static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
if (Constant *C = dyn_cast<Constant>(Op))
return ConstantFoldInstOperands(Instruction::Trunc, Ty, C, Q.DL, Q.TLI);
- return 0;
+ return nullptr;
}
Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout *DL,
@@ -2945,7 +2992,7 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, Q, MaxRecurse))
return V;
- return 0;
+ return nullptr;
}
}
@@ -2992,7 +3039,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn
const Query &Q, unsigned MaxRecurse) {
// Perform idempotent optimizations
if (!IsIdempotent(IID))
- return 0;
+ return nullptr;
// Unary Ops
if (std::distance(ArgBegin, ArgEnd) == 1)
@@ -3000,7 +3047,7 @@ static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEn
if (II->getIntrinsicID() == IID)
return II;
- return 0;
+ return nullptr;
}
template <typename IterTy>
@@ -3017,7 +3064,7 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
Function *F = dyn_cast<Function>(V);
if (!F)
- return 0;
+ return nullptr;
if (unsigned IID = F->getIntrinsicID())
if (Value *Ret =
@@ -3025,14 +3072,14 @@ static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd,
return Ret;
if (!canConstantFoldCallTo(F))
- return 0;
+ return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
ConstantArgs.reserve(ArgEnd - ArgBegin);
for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) {
Constant *C = dyn_cast<Constant>(*I);
if (!C)
- return 0;
+ return nullptr;
ConstantArgs.push_back(C);
}
@@ -3247,7 +3294,7 @@ bool llvm::recursivelySimplifyInstruction(Instruction *I,
const DataLayout *DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT) {
- return replaceAndRecursivelySimplifyImpl(I, 0, DL, TLI, DT);
+ return replaceAndRecursivelySimplifyImpl(I, nullptr, DL, TLI, DT);
}
bool llvm::replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV,
diff --git a/lib/Analysis/IntervalPartition.cpp b/lib/Analysis/IntervalPartition.cpp
index 2e259b1..a0583e8 100644
--- a/lib/Analysis/IntervalPartition.cpp
+++ b/lib/Analysis/IntervalPartition.cpp
@@ -29,7 +29,7 @@ void IntervalPartition::releaseMemory() {
delete Intervals[i];
IntervalMap.clear();
Intervals.clear();
- RootInterval = 0;
+ RootInterval = nullptr;
}
void IntervalPartition::print(raw_ostream &O, const Module*) const {
diff --git a/lib/Analysis/LazyCallGraph.cpp b/lib/Analysis/LazyCallGraph.cpp
index ea213f2..e073616 100644
--- a/lib/Analysis/LazyCallGraph.cpp
+++ b/lib/Analysis/LazyCallGraph.cpp
@@ -8,19 +8,22 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "lcg"
+
static void findCallees(
SmallVectorImpl<Constant *> &Worklist, SmallPtrSetImpl<Constant *> &Visited,
SmallVectorImpl<PointerUnion<Function *, LazyCallGraph::Node *>> &Callees,
- SmallPtrSetImpl<Function *> &CalleeSet) {
+ DenseMap<Function *, size_t> &CalleeIndexMap) {
while (!Worklist.empty()) {
Constant *C = Worklist.pop_back_val();
@@ -35,8 +38,12 @@ static void findCallees(
// alias. Then a test of the address of the weak function against the new
// strong definition's address would be an effective way to determine the
// safety of optimizing a direct call edge.
- if (!F->isDeclaration() && CalleeSet.insert(F))
+ if (!F->isDeclaration() &&
+ CalleeIndexMap.insert(std::make_pair(F, Callees.size())).second) {
+ DEBUG(dbgs() << " Added callable function: " << F->getName()
+ << "\n");
Callees.push_back(F);
+ }
continue;
}
@@ -46,7 +53,11 @@ static void findCallees(
}
}
-LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) {
+LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
+ : G(&G), F(F), DFSNumber(0), LowLink(0) {
+ DEBUG(dbgs() << " Adding functions called by '" << F.getName()
+ << "' to the graph.\n");
+
SmallVector<Constant *, 16> Worklist;
SmallPtrSet<Constant *, 16> Visited;
// Find all the potential callees in this function. First walk the
@@ -61,36 +72,41 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(G), F(F) {
// We've collected all the constant (and thus potentially function or
// function containing) operands to all of the instructions in the function.
// Process them (recursively) collecting every function found.
- findCallees(Worklist, Visited, Callees, CalleeSet);
+ findCallees(Worklist, Visited, Callees, CalleeIndexMap);
}
-LazyCallGraph::Node::Node(LazyCallGraph &G, const Node &OtherN)
- : G(G), F(OtherN.F), CalleeSet(OtherN.CalleeSet) {
- // Loop over the other node's callees, adding the Function*s to our list
- // directly, and recursing to add the Node*s.
- Callees.reserve(OtherN.Callees.size());
- for (const auto &OtherCallee : OtherN.Callees)
- if (Function *Callee = OtherCallee.dyn_cast<Function *>())
- Callees.push_back(Callee);
- else
- Callees.push_back(G.copyInto(*OtherCallee.get<Node *>()));
+void LazyCallGraph::Node::insertEdgeInternal(Function &Callee) {
+ if (Node *N = G->lookup(Callee))
+ return insertEdgeInternal(*N);
+
+ CalleeIndexMap.insert(std::make_pair(&Callee, Callees.size()));
+ Callees.push_back(&Callee);
}
-LazyCallGraph::Node::Node(LazyCallGraph &G, Node &&OtherN)
- : G(G), F(OtherN.F), Callees(std::move(OtherN.Callees)),
- CalleeSet(std::move(OtherN.CalleeSet)) {
- // Loop over our Callees. They've been moved from another node, but we need
- // to move the Node*s to live under our bump ptr allocator.
- for (auto &Callee : Callees)
- if (Node *ChildN = Callee.dyn_cast<Node *>())
- Callee = G.moveInto(std::move(*ChildN));
+void LazyCallGraph::Node::insertEdgeInternal(Node &CalleeN) {
+ CalleeIndexMap.insert(std::make_pair(&CalleeN.getFunction(), Callees.size()));
+ Callees.push_back(&CalleeN);
}
-LazyCallGraph::LazyCallGraph(Module &M) : M(M) {
+void LazyCallGraph::Node::removeEdgeInternal(Function &Callee) {
+ auto IndexMapI = CalleeIndexMap.find(&Callee);
+ assert(IndexMapI != CalleeIndexMap.end() &&
+ "Callee not in the callee set for this caller?");
+
+ Callees[IndexMapI->second] = nullptr;
+ CalleeIndexMap.erase(IndexMapI);
+}
+
+LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
+ DEBUG(dbgs() << "Building CG for module: " << M.getModuleIdentifier()
+ << "\n");
for (Function &F : M)
if (!F.isDeclaration() && !F.hasLocalLinkage())
- if (EntryNodeSet.insert(&F))
+ if (EntryIndexMap.insert(std::make_pair(&F, EntryNodes.size())).second) {
+ DEBUG(dbgs() << " Adding '" << F.getName()
+ << "' to entry set of the graph.\n");
EntryNodes.push_back(&F);
+ }
// Now add entry nodes for functions reachable via initializers to globals.
SmallVector<Constant *, 16> Worklist;
@@ -100,51 +116,568 @@ LazyCallGraph::LazyCallGraph(Module &M) : M(M) {
if (Visited.insert(GV.getInitializer()))
Worklist.push_back(GV.getInitializer());
- findCallees(Worklist, Visited, EntryNodes, EntryNodeSet);
-}
+ DEBUG(dbgs() << " Adding functions referenced by global initializers to the "
+ "entry set.\n");
+ findCallees(Worklist, Visited, EntryNodes, EntryIndexMap);
-LazyCallGraph::LazyCallGraph(const LazyCallGraph &G)
- : M(G.M), EntryNodeSet(G.EntryNodeSet) {
- EntryNodes.reserve(G.EntryNodes.size());
- for (const auto &EntryNode : G.EntryNodes)
- if (Function *Callee = EntryNode.dyn_cast<Function *>())
- EntryNodes.push_back(Callee);
+ for (auto &Entry : EntryNodes) {
+ assert(!Entry.isNull() &&
+ "We can't have removed edges before we finish the constructor!");
+ if (Function *F = Entry.dyn_cast<Function *>())
+ SCCEntryNodes.push_back(F);
else
- EntryNodes.push_back(copyInto(*EntryNode.get<Node *>()));
+ SCCEntryNodes.push_back(&Entry.get<Node *>()->getFunction());
+ }
}
-// FIXME: This would be crazy simpler if BumpPtrAllocator were movable without
-// invalidating any of the allocated memory. We should make that be the case at
-// some point and delete this.
LazyCallGraph::LazyCallGraph(LazyCallGraph &&G)
- : M(G.M), EntryNodes(std::move(G.EntryNodes)),
- EntryNodeSet(std::move(G.EntryNodeSet)) {
- // Loop over our EntryNodes. They've been moved from another graph, so we
- // need to move the Node*s to live under our bump ptr allocator. We can just
- // do this in-place.
- for (auto &Entry : EntryNodes)
- if (Node *EntryN = Entry.dyn_cast<Node *>())
- Entry = moveInto(std::move(*EntryN));
+ : BPA(std::move(G.BPA)), NodeMap(std::move(G.NodeMap)),
+ EntryNodes(std::move(G.EntryNodes)),
+ EntryIndexMap(std::move(G.EntryIndexMap)), SCCBPA(std::move(G.SCCBPA)),
+ SCCMap(std::move(G.SCCMap)), LeafSCCs(std::move(G.LeafSCCs)),
+ DFSStack(std::move(G.DFSStack)),
+ SCCEntryNodes(std::move(G.SCCEntryNodes)),
+ NextDFSNumber(G.NextDFSNumber) {
+ updateGraphPtrs();
+}
+
+LazyCallGraph &LazyCallGraph::operator=(LazyCallGraph &&G) {
+ BPA = std::move(G.BPA);
+ NodeMap = std::move(G.NodeMap);
+ EntryNodes = std::move(G.EntryNodes);
+ EntryIndexMap = std::move(G.EntryIndexMap);
+ SCCBPA = std::move(G.SCCBPA);
+ SCCMap = std::move(G.SCCMap);
+ LeafSCCs = std::move(G.LeafSCCs);
+ DFSStack = std::move(G.DFSStack);
+ SCCEntryNodes = std::move(G.SCCEntryNodes);
+ NextDFSNumber = G.NextDFSNumber;
+ updateGraphPtrs();
+ return *this;
}
-LazyCallGraph::Node *LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
- return new (MappedN = BPA.Allocate()) Node(*this, F);
+void LazyCallGraph::SCC::insert(Node &N) {
+ N.DFSNumber = N.LowLink = -1;
+ Nodes.push_back(&N);
+ G->SCCMap[&N] = this;
}
-LazyCallGraph::Node *LazyCallGraph::copyInto(const Node &OtherN) {
- Node *&N = NodeMap[&OtherN.F];
- if (N)
- return N;
+bool LazyCallGraph::SCC::isDescendantOf(const SCC &C) const {
+ // Walk up the parents of this SCC and verify that we eventually find C.
+ SmallVector<const SCC *, 4> AncestorWorklist;
+ AncestorWorklist.push_back(this);
+ do {
+ const SCC *AncestorC = AncestorWorklist.pop_back_val();
+ if (AncestorC->isChildOf(C))
+ return true;
+ for (const SCC *ParentC : AncestorC->ParentSCCs)
+ AncestorWorklist.push_back(ParentC);
+ } while (!AncestorWorklist.empty());
- return new (N = BPA.Allocate()) Node(*this, OtherN);
+ return false;
}
-LazyCallGraph::Node *LazyCallGraph::moveInto(Node &&OtherN) {
- Node *&N = NodeMap[&OtherN.F];
- if (N)
- return N;
+void LazyCallGraph::SCC::insertIntraSCCEdge(Node &CallerN, Node &CalleeN) {
+ // First insert it into the caller.
+ CallerN.insertEdgeInternal(CalleeN);
+
+ assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+ assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
- return new (N = BPA.Allocate()) Node(*this, std::move(OtherN));
+ // Nothing changes about this SCC or any other.
+}
+
+void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {
+ // First insert it into the caller.
+ CallerN.insertEdgeInternal(CalleeN);
+
+ assert(G->SCCMap.lookup(&CallerN) == this && "Caller must be in this SCC.");
+
+ SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+ assert(&CalleeC != this && "Callee must not be in this SCC.");
+ assert(CalleeC.isDescendantOf(*this) &&
+ "Callee must be a descendant of the Caller.");
+
+ // The only change required is to add this SCC to the parent set of the callee.
+ CalleeC.ParentSCCs.insert(this);
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::insertIncomingEdge(Node &CallerN, Node &CalleeN) {
+ // First insert it into the caller.
+ CallerN.insertEdgeInternal(CalleeN);
+
+ assert(G->SCCMap.lookup(&CalleeN) == this && "Callee must be in this SCC.");
+
+ SCC &CallerC = *G->SCCMap.lookup(&CallerN);
+ assert(&CallerC != this && "Caller must not be in this SCC.");
+ assert(CallerC.isDescendantOf(*this) &&
+ "Caller must be a descendant of the Callee.");
+
+ // The algorithm we use for merging SCCs based on the cycle introduced here
+ // is to walk the SCC inverted DAG formed by the parent SCC sets. The inverse
+ // graph has the same cycle properties as the actual DAG of the SCCs, and
+ // when forming SCCs lazily by a DFS, the bottom of the graph won't exist in
+ // many cases which should prune the search space.
+ //
+ // FIXME: We can get this pruning behavior even after the incremental SCC
+ // formation by leaving behind (conservative) DFS numberings in the nodes,
+ // and pruning the search with them. These would need to be cleverly updated
+ // during the removal of intra-SCC edges, but could be preserved
+ // conservatively.
+
+ // The set of SCCs that are connected to the caller, and thus will
+ // participate in the merged connected component.
+ SmallPtrSet<SCC *, 8> ConnectedSCCs;
+ ConnectedSCCs.insert(this);
+ ConnectedSCCs.insert(&CallerC);
+
+ // We build up a DFS stack of the parents chains.
+ SmallVector<std::pair<SCC *, SCC::parent_iterator>, 8> DFSSCCs;
+ SmallPtrSet<SCC *, 8> VisitedSCCs;
+ int ConnectedDepth = -1;
+ SCC *C = this;
+ parent_iterator I = parent_begin(), E = parent_end();
+ for (;;) {
+ while (I != E) {
+ SCC &ParentSCC = *I++;
+
+ // If we have already processed this parent SCC, skip it, and remember
+ // whether it was connected so we don't have to check the rest of the
+ // stack. This also handles when we reach a child of the 'this' SCC (the
+ // callee) which terminates the search.
+ if (ConnectedSCCs.count(&ParentSCC)) {
+ ConnectedDepth = std::max<int>(ConnectedDepth, DFSSCCs.size());
+ continue;
+ }
+ if (VisitedSCCs.count(&ParentSCC))
+ continue;
+
+ // We fully explore the depth-first space, adding nodes to the connected
+ // set only as we pop them off, so "recurse" by rotating to the parent.
+ DFSSCCs.push_back(std::make_pair(C, I));
+ C = &ParentSCC;
+ I = ParentSCC.parent_begin();
+ E = ParentSCC.parent_end();
+ }
+
+ // If we've found a connection anywhere below this point on the stack (and
+ // thus up the parent graph from the caller), the current node needs to be
+ // added to the connected set now that we've processed all of its parents.
+ if ((int)DFSSCCs.size() == ConnectedDepth) {
+ --ConnectedDepth; // We're finished with this connection.
+ ConnectedSCCs.insert(C);
+ } else {
+ // Otherwise remember that its parents don't ever connect.
+ assert(ConnectedDepth < (int)DFSSCCs.size() &&
+ "Cannot have a connected depth greater than the DFS depth!");
+ VisitedSCCs.insert(C);
+ }
+
+ if (DFSSCCs.empty())
+ break; // We've walked all the parents of the caller transitively.
+
+ // Pop off the prior node and position to unwind the depth first recursion.
+ std::tie(C, I) = DFSSCCs.pop_back_val();
+ E = C->parent_end();
+ }
+
+ // Now that we have identified all of the SCCs which need to be merged into
+ // a connected set with the inserted edge, merge all of them into this SCC.
+ // FIXME: This operation currently creates ordering stability problems
+ // because we don't use stably ordered containers for the parent SCCs or the
+ // connected SCCs.
+ unsigned NewNodeBeginIdx = Nodes.size();
+ for (SCC *C : ConnectedSCCs) {
+ if (C == this)
+ continue;
+ for (SCC *ParentC : C->ParentSCCs)
+ if (!ConnectedSCCs.count(ParentC))
+ ParentSCCs.insert(ParentC);
+ C->ParentSCCs.clear();
+
+ for (Node *N : *C) {
+ for (Node &ChildN : *N) {
+ SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+ if (&ChildC != C)
+ ChildC.ParentSCCs.erase(C);
+ }
+ G->SCCMap[N] = this;
+ Nodes.push_back(N);
+ }
+ C->Nodes.clear();
+ }
+ for (auto I = Nodes.begin() + NewNodeBeginIdx, E = Nodes.end(); I != E; ++I)
+ for (Node &ChildN : **I) {
+ SCC &ChildC = *G->SCCMap.lookup(&ChildN);
+ if (&ChildC != this)
+ ChildC.ParentSCCs.insert(this);
+ }
+
+ // We return the list of SCCs which were merged so that callers can
+ // invalidate any data they have associated with those SCCs. Note that these
+ // SCCs are no longer in an interesting state (they are totally empty) but
+ // the pointers will remain stable for the life of the graph itself.
+ return SmallVector<SCC *, 1>(ConnectedSCCs.begin(), ConnectedSCCs.end());
+}
+
+void LazyCallGraph::SCC::removeInterSCCEdge(Node &CallerN, Node &CalleeN) {
+ // First remove it from the node.
+ CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+ assert(G->SCCMap.lookup(&CallerN) == this &&
+ "The caller must be a member of this SCC.");
+
+ SCC &CalleeC = *G->SCCMap.lookup(&CalleeN);
+ assert(&CalleeC != this &&
+ "This API only supports the rmoval of inter-SCC edges.");
+
+ assert(std::find(G->LeafSCCs.begin(), G->LeafSCCs.end(), this) ==
+ G->LeafSCCs.end() &&
+ "Cannot have a leaf SCC caller with a different SCC callee.");
+
+ bool HasOtherCallToCalleeC = false;
+ bool HasOtherCallOutsideSCC = false;
+ for (Node *N : *this) {
+ for (Node &OtherCalleeN : *N) {
+ SCC &OtherCalleeC = *G->SCCMap.lookup(&OtherCalleeN);
+ if (&OtherCalleeC == &CalleeC) {
+ HasOtherCallToCalleeC = true;
+ break;
+ }
+ if (&OtherCalleeC != this)
+ HasOtherCallOutsideSCC = true;
+ }
+ if (HasOtherCallToCalleeC)
+ break;
+ }
+ // Because the SCCs form a DAG, deleting such an edge cannot change the set
+ // of SCCs in the graph. However, it may cut an edge of the SCC DAG, making
+ // the caller no longer a parent of the callee. Walk the other call edges
+ // in the caller to tell.
+ if (!HasOtherCallToCalleeC) {
+ bool Removed = CalleeC.ParentSCCs.erase(this);
+ (void)Removed;
+ assert(Removed &&
+ "Did not find the caller SCC in the callee SCC's parent list!");
+
+ // It may orphan an SCC if it is the last edge reaching it, but that does
+ // not violate any invariants of the graph.
+ if (CalleeC.ParentSCCs.empty())
+ DEBUG(dbgs() << "LCG: Update removing " << CallerN.getFunction().getName()
+ << " -> " << CalleeN.getFunction().getName()
+ << " edge orphaned the callee's SCC!\n");
+ }
+
+ // It may make the Caller SCC a leaf SCC.
+ if (!HasOtherCallOutsideSCC)
+ G->LeafSCCs.push_back(this);
+}
+
+void LazyCallGraph::SCC::internalDFS(
+ SmallVectorImpl<std::pair<Node *, Node::iterator>> &DFSStack,
+ SmallVectorImpl<Node *> &PendingSCCStack, Node *N,
+ SmallVectorImpl<SCC *> &ResultSCCs) {
+ Node::iterator I = N->begin();
+ N->LowLink = N->DFSNumber = 1;
+ int NextDFSNumber = 2;
+ for (;;) {
+ assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+ "before processing a node.");
+
+ // We simulate recursion by popping out of the nested loop and continuing.
+ Node::iterator E = N->end();
+ while (I != E) {
+ Node &ChildN = *I;
+ if (SCC *ChildSCC = G->SCCMap.lookup(&ChildN)) {
+ // Check if we have reached a node in the new (known connected) set of
+ // this SCC. If so, the entire stack is necessarily in that set and we
+ // can re-start.
+ if (ChildSCC == this) {
+ insert(*N);
+ while (!PendingSCCStack.empty())
+ insert(*PendingSCCStack.pop_back_val());
+ while (!DFSStack.empty())
+ insert(*DFSStack.pop_back_val().first);
+ return;
+ }
+
+ // If this child isn't currently in this SCC, no need to process it.
+ // However, we do need to remove this SCC from its SCC's parent set.
+ ChildSCC->ParentSCCs.erase(this);
+ ++I;
+ continue;
+ }
+
+ if (ChildN.DFSNumber == 0) {
+ // Mark that we should start at this child when next this node is the
+ // top of the stack. We don't start at the next child to ensure this
+ // child's lowlink is reflected.
+ DFSStack.push_back(std::make_pair(N, I));
+
+ // Continue, resetting to the child node.
+ ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+ N = &ChildN;
+ I = ChildN.begin();
+ E = ChildN.end();
+ continue;
+ }
+
+ // Track the lowest link of the children, if any are still in the stack.
+ // Any child not on the stack will have a LowLink of -1.
+ assert(ChildN.LowLink != 0 &&
+ "Low-link must not be zero with a non-zero DFS number.");
+ if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+ N->LowLink = ChildN.LowLink;
+ ++I;
+ }
+
+ if (N->LowLink == N->DFSNumber) {
+ ResultSCCs.push_back(G->formSCC(N, PendingSCCStack));
+ if (DFSStack.empty())
+ return;
+ } else {
+ // At this point we know that N cannot ever be an SCC root. Its low-link
+ // is not its dfs-number, and we've processed all of its children. It is
+ // just sitting here waiting until some node further down the stack gets
+ // low-link == dfs-number and pops it off as well. Move it to the pending
+ // stack which is pulled into the next SCC to be formed.
+ PendingSCCStack.push_back(N);
+
+ assert(!DFSStack.empty() && "We shouldn't have an empty stack!");
+ }
+
+ N = DFSStack.back().first;
+ I = DFSStack.back().second;
+ DFSStack.pop_back();
+ }
+}
+
+SmallVector<LazyCallGraph::SCC *, 1>
+LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,
+ Node &CalleeN) {
+ // First remove it from the node.
+ CallerN.removeEdgeInternal(CalleeN.getFunction());
+
+ // We return a list of the resulting *new* SCCs in postorder.
+ SmallVector<SCC *, 1> ResultSCCs;
+
+ // Direct recursion doesn't impact the SCC graph at all.
+ if (&CallerN == &CalleeN)
+ return ResultSCCs;
+
+ // The worklist is every node in the original SCC.
+ SmallVector<Node *, 1> Worklist;
+ Worklist.swap(Nodes);
+ for (Node *N : Worklist) {
+ // The nodes formerly in this SCC are no longer in any SCC.
+ N->DFSNumber = 0;
+ N->LowLink = 0;
+ G->SCCMap.erase(N);
+ }
+ assert(Worklist.size() > 1 && "We have to have at least two nodes to have an "
+ "edge between them that is within the SCC.");
+
+ // The callee can already reach every node in this SCC (by definition). It is
+ // the only node we know will stay inside this SCC. Everything which
+ // transitively reaches Callee will also remain in the SCC. To model this we
+ // incrementally add any chain of nodes which reaches something in the new
+ // node set to the new node set. This short circuits one side of the Tarjan's
+ // walk.
+ insert(CalleeN);
+
+ // We're going to do a full mini-Tarjan's walk using a local stack here.
+ SmallVector<std::pair<Node *, Node::iterator>, 4> DFSStack;
+ SmallVector<Node *, 4> PendingSCCStack;
+ do {
+ Node *N = Worklist.pop_back_val();
+ if (N->DFSNumber == 0)
+ internalDFS(DFSStack, PendingSCCStack, N, ResultSCCs);
+
+ assert(DFSStack.empty() && "Didn't flush the entire DFS stack!");
+ assert(PendingSCCStack.empty() && "Didn't flush all pending SCC nodes!");
+ } while (!Worklist.empty());
+
+ // Now we need to reconnect the current SCC to the graph.
+ bool IsLeafSCC = true;
+ for (Node *N : Nodes) {
+ for (Node &ChildN : *N) {
+ SCC &ChildSCC = *G->SCCMap.lookup(&ChildN);
+ if (&ChildSCC == this)
+ continue;
+ ChildSCC.ParentSCCs.insert(this);
+ IsLeafSCC = false;
+ }
+ }
+#ifndef NDEBUG
+ if (!ResultSCCs.empty())
+ assert(!IsLeafSCC && "This SCC cannot be a leaf as we have split out new "
+ "SCCs by removing this edge.");
+ if (!std::any_of(G->LeafSCCs.begin(), G->LeafSCCs.end(),
+ [&](SCC *C) { return C == this; }))
+ assert(!IsLeafSCC && "This SCC cannot be a leaf as it already had child "
+ "SCCs before we removed this edge.");
+#endif
+ // If this SCC stopped being a leaf through this edge removal, remove it from
+ // the leaf SCC list.
+ if (!IsLeafSCC && !ResultSCCs.empty())
+ G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this),
+ G->LeafSCCs.end());
+
+ // Return the new list of SCCs.
+ return ResultSCCs;
+}
+
+void LazyCallGraph::insertEdge(Node &CallerN, Function &Callee) {
+ assert(SCCMap.empty() && DFSStack.empty() &&
+ "This method cannot be called after SCCs have been formed!");
+
+ return CallerN.insertEdgeInternal(Callee);
+}
+
+void LazyCallGraph::removeEdge(Node &CallerN, Function &Callee) {
+ assert(SCCMap.empty() && DFSStack.empty() &&
+ "This method cannot be called after SCCs have been formed!");
+
+ return CallerN.removeEdgeInternal(Callee);
+}
+
+LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
+ return *new (MappedN = BPA.Allocate()) Node(*this, F);
+}
+
+void LazyCallGraph::updateGraphPtrs() {
+ // Process all nodes updating the graph pointers.
+ {
+ SmallVector<Node *, 16> Worklist;
+ for (auto &Entry : EntryNodes)
+ if (Node *EntryN = Entry.dyn_cast<Node *>())
+ Worklist.push_back(EntryN);
+
+ while (!Worklist.empty()) {
+ Node *N = Worklist.pop_back_val();
+ N->G = this;
+ for (auto &Callee : N->Callees)
+ if (!Callee.isNull())
+ if (Node *CalleeN = Callee.dyn_cast<Node *>())
+ Worklist.push_back(CalleeN);
+ }
+ }
+
+ // Process all SCCs updating the graph pointers.
+ {
+ SmallVector<SCC *, 16> Worklist(LeafSCCs.begin(), LeafSCCs.end());
+
+ while (!Worklist.empty()) {
+ SCC *C = Worklist.pop_back_val();
+ C->G = this;
+ Worklist.insert(Worklist.end(), C->ParentSCCs.begin(),
+ C->ParentSCCs.end());
+ }
+ }
+}
+
+LazyCallGraph::SCC *LazyCallGraph::formSCC(Node *RootN,
+ SmallVectorImpl<Node *> &NodeStack) {
+ // The tail of the stack is the new SCC. Allocate the SCC and pop the stack
+ // into it.
+ SCC *NewSCC = new (SCCBPA.Allocate()) SCC(*this);
+
+ while (!NodeStack.empty() && NodeStack.back()->DFSNumber > RootN->DFSNumber) {
+ assert(NodeStack.back()->LowLink >= RootN->LowLink &&
+ "We cannot have a low link in an SCC lower than its root on the "
+ "stack!");
+ NewSCC->insert(*NodeStack.pop_back_val());
+ }
+ NewSCC->insert(*RootN);
+
+ // A final pass over all edges in the SCC (this remains linear as we only
+ // do this once when we build the SCC) to connect it to the parent sets of
+ // its children.
+ bool IsLeafSCC = true;
+ for (Node *SCCN : NewSCC->Nodes)
+ for (Node &SCCChildN : *SCCN) {
+ SCC &ChildSCC = *SCCMap.lookup(&SCCChildN);
+ if (&ChildSCC == NewSCC)
+ continue;
+ ChildSCC.ParentSCCs.insert(NewSCC);
+ IsLeafSCC = false;
+ }
+
+ // For the SCCs where we fine no child SCCs, add them to the leaf list.
+ if (IsLeafSCC)
+ LeafSCCs.push_back(NewSCC);
+
+ return NewSCC;
+}
+
+LazyCallGraph::SCC *LazyCallGraph::getNextSCCInPostOrder() {
+ Node *N;
+ Node::iterator I;
+ if (!DFSStack.empty()) {
+ N = DFSStack.back().first;
+ I = DFSStack.back().second;
+ DFSStack.pop_back();
+ } else {
+ // If we've handled all candidate entry nodes to the SCC forest, we're done.
+ do {
+ if (SCCEntryNodes.empty())
+ return nullptr;
+
+ N = &get(*SCCEntryNodes.pop_back_val());
+ } while (N->DFSNumber != 0);
+ I = N->begin();
+ N->LowLink = N->DFSNumber = 1;
+ NextDFSNumber = 2;
+ }
+
+ for (;;) {
+ assert(N->DFSNumber != 0 && "We should always assign a DFS number "
+ "before placing a node onto the stack.");
+
+ Node::iterator E = N->end();
+ while (I != E) {
+ Node &ChildN = *I;
+ if (ChildN.DFSNumber == 0) {
+ // Mark that we should start at this child when next this node is the
+ // top of the stack. We don't start at the next child to ensure this
+ // child's lowlink is reflected.
+ DFSStack.push_back(std::make_pair(N, N->begin()));
+
+ // Recurse onto this node via a tail call.
+ assert(!SCCMap.count(&ChildN) &&
+ "Found a node with 0 DFS number but already in an SCC!");
+ ChildN.LowLink = ChildN.DFSNumber = NextDFSNumber++;
+ N = &ChildN;
+ I = ChildN.begin();
+ E = ChildN.end();
+ continue;
+ }
+
+ // Track the lowest link of the children, if any are still in the stack.
+ assert(ChildN.LowLink != 0 &&
+ "Low-link must not be zero with a non-zero DFS number.");
+ if (ChildN.LowLink >= 0 && ChildN.LowLink < N->LowLink)
+ N->LowLink = ChildN.LowLink;
+ ++I;
+ }
+
+ if (N->LowLink == N->DFSNumber)
+ // Form the new SCC out of the top of the DFS stack.
+ return formSCC(N, PendingSCCStack);
+
+ // At this point we know that N cannot ever be an SCC root. Its low-link
+ // is not its dfs-number, and we've processed all of its children. It is
+ // just sitting here waiting until some node further down the stack gets
+ // low-link == dfs-number and pops it off as well. Move it to the pending
+ // stack which is pulled into the next SCC to be formed.
+ PendingSCCStack.push_back(N);
+
+ assert(!DFSStack.empty() && "We never found a viable root!");
+ N = DFSStack.back().first;
+ I = DFSStack.back().second;
+ DFSStack.pop_back();
+ }
}
char LazyCallGraphAnalysis::PassID;
@@ -154,9 +687,9 @@ LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
SmallPtrSetImpl<LazyCallGraph::Node *> &Printed) {
// Recurse depth first through the nodes.
- for (LazyCallGraph::Node *ChildN : N)
- if (Printed.insert(ChildN))
- printNodes(OS, *ChildN, Printed);
+ for (LazyCallGraph::Node &ChildN : N)
+ if (Printed.insert(&ChildN))
+ printNodes(OS, ChildN, Printed);
OS << " Call edges in function: " << N.getFunction().getName() << "\n";
for (LazyCallGraph::iterator I = N.begin(), E = N.end(); I != E; ++I)
@@ -165,6 +698,16 @@ static void printNodes(raw_ostream &OS, LazyCallGraph::Node &N,
OS << "\n";
}
+static void printSCC(raw_ostream &OS, LazyCallGraph::SCC &SCC) {
+ ptrdiff_t SCCSize = std::distance(SCC.begin(), SCC.end());
+ OS << " SCC with " << SCCSize << " functions:\n";
+
+ for (LazyCallGraph::Node *N : SCC)
+ OS << " " << N->getFunction().getName() << "\n";
+
+ OS << "\n";
+}
+
PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
ModuleAnalysisManager *AM) {
LazyCallGraph &G = AM->getResult<LazyCallGraphAnalysis>(M);
@@ -173,9 +716,13 @@ PreservedAnalyses LazyCallGraphPrinterPass::run(Module *M,
<< "\n\n";
SmallPtrSet<LazyCallGraph::Node *, 16> Printed;
- for (LazyCallGraph::Node *N : G)
- if (Printed.insert(N))
- printNodes(OS, *N, Printed);
+ for (LazyCallGraph::Node &N : G)
+ if (Printed.insert(&N))
+ printNodes(OS, N, Printed);
+
+ for (LazyCallGraph::SCC &SCC : G.postorder_sccs())
+ printSCC(OS, SCC);
return PreservedAnalyses::all();
+
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index 3d6c583..9f919f7 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lazy-value-info"
#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,6 +33,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "lazy-value-info"
+
char LazyValueInfo::ID = 0;
INITIALIZE_PASS_BEGIN(LazyValueInfo, "lazy-value-info",
"Lazy Value Information Analysis", false, true)
@@ -82,7 +83,7 @@ class LVILatticeVal {
ConstantRange Range;
public:
- LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+ LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {}
static LVILatticeVal get(Constant *C) {
LVILatticeVal Res;
@@ -516,7 +517,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
BBLV.markOverdefined();
Instruction *BBI = dyn_cast<Instruction>(Val);
- if (BBI == 0 || BBI->getParent() != BB) {
+ if (!BBI || BBI->getParent() != BB) {
return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
}
@@ -595,7 +596,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
Value *UnderlyingVal = GetUnderlyingObject(Val);
// If 'GetUnderlyingObject' didn't converge, skip it. It won't converge
// inside InstructionDereferencesPointer either.
- if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, NULL, 1)) {
+ if (UnderlyingVal == GetUnderlyingObject(UnderlyingVal, nullptr, 1)) {
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
BI != BE; ++BI) {
if (InstructionDereferencesPointer(BI, UnderlyingVal)) {
@@ -813,7 +814,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
// Recognize the range checking idiom that InstCombine produces.
// (X-C1) u< C2 --> [C1, C1+C2)
- ConstantInt *NegOffset = 0;
+ ConstantInt *NegOffset = nullptr;
if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
match(ICI->getOperand(0), m_Add(m_Specific(Val),
m_ConstantInt(NegOffset)));
@@ -1014,7 +1015,7 @@ bool LazyValueInfo::runOnFunction(Function &F) {
getCache(PImpl).clear();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// Fully lazy.
@@ -1030,7 +1031,7 @@ void LazyValueInfo::releaseMemory() {
// If the cache was allocated, free it.
if (PImpl) {
delete &getCache(PImpl);
- PImpl = 0;
+ PImpl = nullptr;
}
}
@@ -1044,7 +1045,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
if (const APInt *SingleVal = CR.getSingleElement())
return ConstantInt::get(V->getContext(), *SingleVal);
}
- return 0;
+ return nullptr;
}
/// getConstantOnEdge - Determine whether the specified value is known to be a
@@ -1060,7 +1061,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
if (const APInt *SingleVal = CR.getSingleElement())
return ConstantInt::get(V->getContext(), *SingleVal);
}
- return 0;
+ return nullptr;
}
/// getPredicateOnEdge - Determine whether the specified value comparison
@@ -1072,7 +1073,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
// If we know the value is a constant, evaluate the conditional.
- Constant *Res = 0;
+ Constant *Res = nullptr;
if (Result.isConstant()) {
Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, DL,
TLI);
diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp
index fefa516..016f8c5 100644
--- a/lib/Analysis/LibCallAliasAnalysis.cpp
+++ b/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -54,7 +54,7 @@ LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
// if we have detailed info and if 'P' is any of the locations we know
// about.
const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
- if (Details == 0)
+ if (Details == nullptr)
return MRInfo;
// If the details array is of the 'DoesNot' kind, we only know something if
diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp
index 0592ccb..7d4e254 100644
--- a/lib/Analysis/LibCallSemantics.cpp
+++ b/lib/Analysis/LibCallSemantics.cpp
@@ -46,11 +46,11 @@ LibCallInfo::getFunctionInfo(const Function *F) const {
/// If this is the first time we are querying for this info, lazily construct
/// the StringMap to index it.
- if (Map == 0) {
+ if (!Map) {
Impl = Map = new StringMap<const LibCallFunctionInfo*>();
const LibCallFunctionInfo *Array = getFunctionInfoArray();
- if (Array == 0) return 0;
+ if (!Array) return nullptr;
// We now have the array of entries. Populate the StringMap.
for (unsigned i = 0; Array[i].Name; ++i)
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index b2182b1..b14f329 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -137,8 +137,8 @@ namespace {
// that failed. This provides a nice place to put a breakpoint if you want
// to see why something is not correct.
void CheckFailed(const Twine &Message,
- const Value *V1 = 0, const Value *V2 = 0,
- const Value *V3 = 0, const Value *V4 = 0) {
+ const Value *V1 = nullptr, const Value *V2 = nullptr,
+ const Value *V3 = nullptr, const Value *V4 = nullptr) {
MessagesStr << Message.str() << "\n";
WriteValue(V1);
WriteValue(V2);
@@ -177,7 +177,7 @@ bool Lint::runOnFunction(Function &F) {
AA = &getAnalysis<AliasAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
visit(F);
dbgs() << MessagesStr.str();
@@ -199,7 +199,7 @@ void Lint::visitCallSite(CallSite CS) {
Value *Callee = CS.getCalledValue();
visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Callee);
+ 0, nullptr, MemRef::Callee);
if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
Assert1(CS.getCallingConv() == F->getCallingConv(),
@@ -275,10 +275,10 @@ void Lint::visitCallSite(CallSite CS) {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
- MCI->getAlignment(), 0,
+ MCI->getAlignment(), nullptr,
MemRef::Write);
visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
- MCI->getAlignment(), 0,
+ MCI->getAlignment(), nullptr,
MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
@@ -299,10 +299,10 @@ void Lint::visitCallSite(CallSite CS) {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
- MMI->getAlignment(), 0,
+ MMI->getAlignment(), nullptr,
MemRef::Write);
visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
- MMI->getAlignment(), 0,
+ MMI->getAlignment(), nullptr,
MemRef::Read);
break;
}
@@ -310,7 +310,7 @@ void Lint::visitCallSite(CallSite CS) {
MemSetInst *MSI = cast<MemSetInst>(&I);
// TODO: If the size is known, use it.
visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
- MSI->getAlignment(), 0,
+ MSI->getAlignment(), nullptr,
MemRef::Write);
break;
}
@@ -321,17 +321,17 @@ void Lint::visitCallSite(CallSite CS) {
&I);
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Read | MemRef::Write);
+ 0, nullptr, MemRef::Read | MemRef::Write);
break;
case Intrinsic::vacopy:
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Write);
+ 0, nullptr, MemRef::Write);
visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Read);
+ 0, nullptr, MemRef::Read);
break;
case Intrinsic::vaend:
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Read | MemRef::Write);
+ 0, nullptr, MemRef::Read | MemRef::Write);
break;
case Intrinsic::stackrestore:
@@ -339,7 +339,7 @@ void Lint::visitCallSite(CallSite CS) {
// stack pointer, which the compiler may read from or write to
// at any time, so check it for both readability and writeability.
visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
- 0, 0, MemRef::Read | MemRef::Write);
+ 0, nullptr, MemRef::Read | MemRef::Write);
break;
}
}
@@ -513,7 +513,7 @@ static bool isZero(Value *V, const DataLayout *DL) {
if (!VecTy) {
unsigned BitWidth = V->getType()->getIntegerBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL);
return KnownZero.isAllOnesValue();
}
@@ -534,7 +534,7 @@ static bool isZero(Value *V, const DataLayout *DL) {
return true;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Elem, KnownZero, KnownOne, DL);
+ computeKnownBits(Elem, KnownZero, KnownOne, DL);
if (KnownZero.isAllOnesValue())
return true;
}
@@ -572,13 +572,13 @@ void Lint::visitAllocaInst(AllocaInst &I) {
}
void Lint::visitVAArgInst(VAArgInst &I) {
- visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
- MemRef::Read | MemRef::Write);
+ visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0,
+ nullptr, MemRef::Read | MemRef::Write);
}
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
- visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
- MemRef::Branchee);
+ visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0,
+ nullptr, MemRef::Branchee);
Assert1(I.getNumDestinations() != 0,
"Undefined behavior: indirectbr with no destinations", &I);
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
index 0902a39..005d309 100644
--- a/lib/Analysis/Loads.cpp
+++ b/lib/Analysis/Loads.cpp
@@ -62,7 +62,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
if (ByteOffset < 0) // out of bounds
return false;
- Type *BaseType = 0;
+ Type *BaseType = nullptr;
unsigned BaseAlign = 0;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
// An alloca is safe to load from as load as it is suitably aligned.
@@ -161,7 +161,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
ScanFrom++;
// Don't scan huge blocks.
- if (MaxInstsToScan-- == 0) return 0;
+ if (MaxInstsToScan-- == 0) return nullptr;
--ScanFrom;
// If this is a load of Ptr, the loaded value is available.
@@ -198,7 +198,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// Otherwise the store that may or may not alias the pointer, bail out.
++ScanFrom;
- return 0;
+ return nullptr;
}
// If this is some other instruction that may clobber Ptr, bail out.
@@ -211,11 +211,11 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
// May modify the pointer, bail out.
++ScanFrom;
- return 0;
+ return nullptr;
}
}
// Got to the start of the block, we didn't find it, but are done for this
// block.
- return 0;
+ return nullptr;
}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index b38672e..46c0eaa 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -141,21 +141,21 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
PHINode *Loop::getCanonicalInductionVariable() const {
BasicBlock *H = getHeader();
- BasicBlock *Incoming = 0, *Backedge = 0;
+ BasicBlock *Incoming = nullptr, *Backedge = nullptr;
pred_iterator PI = pred_begin(H);
assert(PI != pred_end(H) &&
"Loop must have at least one backedge!");
Backedge = *PI++;
- if (PI == pred_end(H)) return 0; // dead loop
+ if (PI == pred_end(H)) return nullptr; // dead loop
Incoming = *PI++;
- if (PI != pred_end(H)) return 0; // multiple backedges?
+ if (PI != pred_end(H)) return nullptr; // multiple backedges?
if (contains(Incoming)) {
if (contains(Backedge))
- return 0;
+ return nullptr;
std::swap(Incoming, Backedge);
} else if (!contains(Backedge))
- return 0;
+ return nullptr;
// Loop over all of the PHI nodes, looking for a canonical indvar.
for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
@@ -171,7 +171,7 @@ PHINode *Loop::getCanonicalInductionVariable() const {
if (CI->equalsInt(1))
return PN;
}
- return 0;
+ return nullptr;
}
/// isLCSSAForm - Return true if the Loop is in LCSSA form
@@ -232,7 +232,7 @@ bool Loop::isSafeToClone() const {
}
MDNode *Loop::getLoopID() const {
- MDNode *LoopID = 0;
+ MDNode *LoopID = nullptr;
if (isLoopSimplifyForm()) {
LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName);
} else {
@@ -241,7 +241,7 @@ MDNode *Loop::getLoopID() const {
BasicBlock *H = getHeader();
for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) {
TerminatorInst *TI = (*I)->getTerminator();
- MDNode *MD = 0;
+ MDNode *MD = nullptr;
// Check if this terminator branches to the loop header.
for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) {
@@ -251,17 +251,17 @@ MDNode *Loop::getLoopID() const {
}
}
if (!MD)
- return 0;
+ return nullptr;
if (!LoopID)
LoopID = MD;
else if (MD != LoopID)
- return 0;
+ return nullptr;
}
}
if (!LoopID || LoopID->getNumOperands() == 0 ||
LoopID->getOperand(0) != LoopID)
- return 0;
+ return nullptr;
return LoopID;
}
@@ -402,7 +402,7 @@ BasicBlock *Loop::getUniqueExitBlock() const {
getUniqueExitBlocks(UniqueExitBlocks);
if (UniqueExitBlocks.size() == 1)
return UniqueExitBlocks[0];
- return 0;
+ return nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -548,7 +548,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
// is considered uninitialized.
Loop *NearLoop = BBLoop;
- Loop *Subloop = 0;
+ Loop *Subloop = nullptr;
if (NearLoop != Unloop && Unloop->contains(NearLoop)) {
Subloop = NearLoop;
// Find the subloop ancestor that is directly contained within Unloop.
@@ -564,7 +564,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
succ_iterator I = succ_begin(BB), E = succ_end(BB);
if (I == E) {
assert(!Subloop && "subloop blocks must have a successor");
- NearLoop = 0; // unloop blocks may now exit the function.
+ NearLoop = nullptr; // unloop blocks may now exit the function.
}
for (; I != E; ++I) {
if (*I == BB)
@@ -637,7 +637,7 @@ void LoopInfo::updateUnloop(Loop *Unloop) {
// Blocks no longer have a parent but are still referenced by Unloop until
// the Unloop object is deleted.
- LI.changeLoopFor(*I, 0);
+ LI.changeLoopFor(*I, nullptr);
}
// Remove the loop from the top-level LoopInfo object.
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 38e753f..8df18e7 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -15,10 +15,13 @@
#include "llvm/Analysis/LoopPass.h"
#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-pass-manager"
+
namespace {
/// PrintLoopPass - Print a Function corresponding to a Loop.
@@ -61,8 +64,8 @@ LPPassManager::LPPassManager()
: FunctionPass(ID), PMDataManager() {
skipThisLoop = false;
redoThisLoop = false;
- LI = NULL;
- CurrentLoop = NULL;
+ LI = nullptr;
+ CurrentLoop = nullptr;
}
/// Delete loop from the loop queue and loop hierarchy (LoopInfo).
@@ -251,6 +254,8 @@ bool LPPassManager::runOnFunction(Function &F) {
// Then call the regular verifyAnalysis functions.
verifyPreservedAnalysis(P);
+
+ F.getContext().yield();
}
removeNotPreservedAnalysis(P);
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index bc1dc69..10da3d5 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -46,7 +46,7 @@ namespace {
bool runOnFunction(Function &F) override;
- void print(raw_ostream &OS, const Module * = 0) const override;
+ void print(raw_ostream &OS, const Module * = nullptr) const override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequiredTransitive<AliasAnalysis>();
@@ -56,7 +56,7 @@ namespace {
void releaseMemory() override {
Deps.clear();
- F = 0;
+ F = nullptr;
}
private:
@@ -106,7 +106,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {
MemDepResult Res = MDA.getDependency(Inst);
if (!Res.isNonLocal()) {
Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
- static_cast<BasicBlock *>(0)));
+ static_cast<BasicBlock *>(nullptr)));
} else if (CallSite CS = cast<Value>(Inst)) {
const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
MDA.getNonLocalCallDependency(CS);
@@ -122,8 +122,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
if (!LI->isUnordered()) {
// FIXME: Handle atomic/volatile loads.
- Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
- static_cast<BasicBlock *>(0)));
+ Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
+ static_cast<BasicBlock *>(nullptr)));
continue;
}
AliasAnalysis::Location Loc = AA.getLocation(LI);
@@ -131,8 +131,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
} else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
if (!SI->isUnordered()) {
// FIXME: Handle atomic/volatile stores.
- Deps[Inst].insert(std::make_pair(getInstTypePair(0, Unknown),
- static_cast<BasicBlock *>(0)));
+ Deps[Inst].insert(std::make_pair(getInstTypePair(nullptr, Unknown),
+ static_cast<BasicBlock *>(nullptr)));
continue;
}
AliasAnalysis::Location Loc = AA.getLocation(SI);
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 1dba323..64d339f 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memory-builtins"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -30,6 +29,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "memory-builtins"
+
enum AllocType {
OpNewLike = 1<<0, // allocates; never returns null
MallocLike = 1<<1 | OpNewLike, // allocates; may return null
@@ -76,14 +77,14 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
CallSite CS(const_cast<Value*>(V));
if (!CS.getInstruction())
- return 0;
+ return nullptr;
if (CS.isNoBuiltin())
- return 0;
+ return nullptr;
Function *Callee = CS.getCalledFunction();
if (!Callee || !Callee->isDeclaration())
- return 0;
+ return nullptr;
return Callee;
}
@@ -94,17 +95,17 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
bool LookThroughBitCast = false) {
// Skip intrinsics
if (isa<IntrinsicInst>(V))
- return 0;
+ return nullptr;
Function *Callee = getCalledFunction(V, LookThroughBitCast);
if (!Callee)
- return 0;
+ return nullptr;
// Make sure that the function is available.
StringRef FnName = Callee->getName();
LibFunc::Func TLIFn;
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
- return 0;
+ return nullptr;
unsigned i = 0;
bool found = false;
@@ -115,11 +116,11 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
}
}
if (!found)
- return 0;
+ return nullptr;
const AllocFnsTy *FnData = &AllocationFnData[i];
if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)
- return 0;
+ return nullptr;
// Check function prototype.
int FstParam = FnData->FstParam;
@@ -135,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,
FTy->getParamType(SndParam)->isIntegerTy(32) ||
FTy->getParamType(SndParam)->isIntegerTy(64)))
return FnData;
- return 0;
+ return nullptr;
}
static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
@@ -202,19 +203,19 @@ bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,
/// ignore InvokeInst here.
const CallInst *llvm::extractMallocCall(const Value *I,
const TargetLibraryInfo *TLI) {
- return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : 0;
+ return isMallocLikeFn(I, TLI) ? dyn_cast<CallInst>(I) : nullptr;
}
static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
const TargetLibraryInfo *TLI,
bool LookThroughSExt = false) {
if (!CI)
- return 0;
+ return nullptr;
// The size of the malloc's result type must be known to determine array size.
Type *T = getMallocAllocatedType(CI, TLI);
if (!T || !T->isSized() || !DL)
- return 0;
+ return nullptr;
unsigned ElementSize = DL->getTypeAllocSize(T);
if (StructType *ST = dyn_cast<StructType>(T))
@@ -223,12 +224,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *DL,
// If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
Value *MallocArg = CI->getArgOperand(0);
- Value *Multiple = 0;
+ Value *Multiple = nullptr;
if (ComputeMultiple(MallocArg, ElementSize, Multiple,
LookThroughSExt))
return Multiple;
- return 0;
+ return nullptr;
}
/// isArrayMalloc - Returns the corresponding CallInst if the instruction
@@ -245,7 +246,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I,
return CI;
// CI is a non-array malloc or we can't figure out that it is an array malloc.
- return 0;
+ return nullptr;
}
/// getMallocType - Returns the PointerType resulting from the malloc call.
@@ -257,7 +258,7 @@ PointerType *llvm::getMallocType(const CallInst *CI,
const TargetLibraryInfo *TLI) {
assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call");
- PointerType *MallocType = 0;
+ PointerType *MallocType = nullptr;
unsigned NumOfBitCastUses = 0;
// Determine if CallInst has a bitcast use.
@@ -277,7 +278,7 @@ PointerType *llvm::getMallocType(const CallInst *CI,
return cast<PointerType>(CI->getType());
// Type could not be determined.
- return 0;
+ return nullptr;
}
/// getMallocAllocatedType - Returns the Type allocated by malloc call.
@@ -288,7 +289,7 @@ PointerType *llvm::getMallocType(const CallInst *CI,
Type *llvm::getMallocAllocatedType(const CallInst *CI,
const TargetLibraryInfo *TLI) {
PointerType *PT = getMallocType(CI, TLI);
- return PT ? PT->getElementType() : 0;
+ return PT ? PT->getElementType() : nullptr;
}
/// getMallocArraySize - Returns the array size of a malloc call. If the
@@ -308,7 +309,7 @@ Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL,
/// is a calloc call.
const CallInst *llvm::extractCallocCall(const Value *I,
const TargetLibraryInfo *TLI) {
- return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : 0;
+ return isCallocLikeFn(I, TLI) ? cast<CallInst>(I) : nullptr;
}
@@ -316,15 +317,15 @@ const CallInst *llvm::extractCallocCall(const Value *I,
const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
const CallInst *CI = dyn_cast<CallInst>(I);
if (!CI || isa<IntrinsicInst>(CI))
- return 0;
+ return nullptr;
Function *Callee = CI->getCalledFunction();
- if (Callee == 0 || !Callee->isDeclaration())
- return 0;
+ if (Callee == nullptr || !Callee->isDeclaration())
+ return nullptr;
StringRef FnName = Callee->getName();
LibFunc::Func TLIFn;
if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
- return 0;
+ return nullptr;
unsigned ExpectedNumParams;
if (TLIFn == LibFunc::free ||
@@ -335,18 +336,18 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {
TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow)
ExpectedNumParams = 2;
else
- return 0;
+ return nullptr;
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
FunctionType *FTy = Callee->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
- return 0;
+ return nullptr;
if (FTy->getNumParams() != ExpectedNumParams)
- return 0;
+ return nullptr;
if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
- return 0;
+ return nullptr;
return CI;
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 015ded1..9eaf109 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memdep"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
+#define DEBUG_TYPE "memdep"
+
STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
@@ -88,10 +89,10 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
bool MemoryDependenceAnalysis::runOnFunction(Function &) {
AA = &getAnalysis<AliasAnalysis>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
if (!PredCache)
PredCache.reset(new PredIteratorCache());
return false;
@@ -261,10 +262,10 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc,
const LoadInst *LI,
const DataLayout *DL) {
// If we have no target data, we can't do this.
- if (DL == 0) return false;
+ if (!DL) return false;
// If we haven't already computed the base/offset of MemLoc, do so now.
- if (MemLocBase == 0)
+ if (!MemLocBase)
MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
unsigned Size = MemoryDependenceAnalysis::
@@ -362,13 +363,13 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB,
Instruction *QueryInst) {
- const Value *MemLocBase = 0;
+ const Value *MemLocBase = nullptr;
int64_t MemLocOffset = 0;
unsigned Limit = BlockScanLimit;
bool isInvariantLoad = false;
if (isLoad && QueryInst) {
LoadInst *LI = dyn_cast<LoadInst>(QueryInst);
- if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0)
+ if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr)
isInvariantLoad = true;
}
@@ -696,7 +697,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
if (Entry != Cache.begin() && std::prev(Entry)->getBB() == DirtyBB)
--Entry;
- NonLocalDepEntry *ExistingResult = 0;
+ NonLocalDepEntry *ExistingResult = nullptr;
if (Entry != Cache.begin()+NumSortedEntries &&
Entry->getBB() == DirtyBB) {
// If we already have an entry, and if it isn't already dirty, the block
@@ -807,7 +808,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
--Entry;
- NonLocalDepEntry *ExistingResult = 0;
+ NonLocalDepEntry *ExistingResult = nullptr;
if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
ExistingResult = &*Entry;
@@ -960,7 +961,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
if (CacheInfo->TBAATag != Loc.TBAATag) {
if (CacheInfo->TBAATag) {
CacheInfo->Pair = BBSkipFirstBlockPair();
- CacheInfo->TBAATag = 0;
+ CacheInfo->TBAATag = nullptr;
for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
if (Instruction *Inst = DI->getResult().getInst())
@@ -1116,7 +1117,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
NumSortedEntries = Cache->size();
}
- Cache = 0;
+ Cache = nullptr;
PredList.clear();
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
@@ -1126,7 +1127,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
PHITransAddr &PredPointer = PredList.back().second;
- PredPointer.PHITranslateValue(BB, Pred, 0);
+ PredPointer.PHITranslateValue(BB, Pred, nullptr);
Value *PredPtrVal = PredPointer.getAddr();
@@ -1175,7 +1176,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// predecessor, then we have to assume that the pointer is clobbered in
// that predecessor. We can still do PRE of the load, which would insert
// a computation of the pointer in this predecessor.
- if (PredPtrVal == 0)
+ if (!PredPtrVal)
CanTranslate = false;
// FIXME: it is entirely possible that PHI translating will end up with
@@ -1224,7 +1225,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
// for the given block. It assumes that we haven't modified any of
// our datastructures while processing the current block.
- if (Cache == 0) {
+ if (!Cache) {
// Refresh the CacheInfo/Cache pointer if it got invalidated.
CacheInfo = &NonLocalPointerDeps[CacheKey];
Cache = &CacheInfo->NonLocalDeps;
@@ -1279,7 +1280,7 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
Instruction *Target = PInfo[i].getResult().getInst();
- if (Target == 0) continue; // Ignore non-local dep results.
+ if (!Target) continue; // Ignore non-local dep results.
assert(Target->getParent() == PInfo[i].getBB());
// Eliminating the dirty entry from 'Cache', so update the reverse info.
diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp
index 0c119d6..4e11e50 100644
--- a/lib/Analysis/NoAliasAnalysis.cpp
+++ b/lib/Analysis/NoAliasAnalysis.cpp
@@ -36,7 +36,7 @@ namespace {
// Note: NoAA does not call InitializeAliasAnalysis because it's
// special and does not support chaining.
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
}
AliasResult alias(const Location &LocA, const Location &LocB) override {
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index ad3685a..bfe8642 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -43,7 +43,7 @@ static bool CanPHITrans(Instruction *Inst) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void PHITransAddr::dump() const {
- if (Addr == 0) {
+ if (!Addr) {
dbgs() << "PHITransAddr: null\n";
return;
}
@@ -58,7 +58,7 @@ static bool VerifySubExpr(Value *Expr,
SmallVectorImpl<Instruction*> &InstInputs) {
// If this is a non-instruction value, there is nothing to do.
Instruction *I = dyn_cast<Instruction>(Expr);
- if (I == 0) return true;
+ if (!I) return true;
// If it's an instruction, it is either in Tmp or its operands recursively
// are.
@@ -90,7 +90,7 @@ static bool VerifySubExpr(Value *Expr,
/// structure is valid, it returns true. If invalid, it prints errors and
/// returns false.
bool PHITransAddr::Verify() const {
- if (Addr == 0) return true;
+ if (!Addr) return true;
SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
@@ -116,14 +116,14 @@ bool PHITransAddr::IsPotentiallyPHITranslatable() const {
// If the input value is not an instruction, or if it is not defined in CurBB,
// then we don't need to phi translate it.
Instruction *Inst = dyn_cast<Instruction>(Addr);
- return Inst == 0 || CanPHITrans(Inst);
+ return !Inst || CanPHITrans(Inst);
}
static void RemoveInstInputs(Value *V,
SmallVectorImpl<Instruction*> &InstInputs) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return;
+ if (!I) return;
// If the instruction is in the InstInputs list, remove it.
SmallVectorImpl<Instruction*>::iterator Entry =
@@ -147,7 +147,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
const DominatorTree *DT) {
// If this is a non-instruction value, it can't require PHI translation.
Instruction *Inst = dyn_cast<Instruction>(V);
- if (Inst == 0) return V;
+ if (!Inst) return V;
// Determine whether 'Inst' is an input to our PHI translatable expression.
bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
@@ -173,7 +173,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If this is a non-phi value, and it is analyzable, we can incorporate it
// into the expression by making all instruction operands be inputs.
if (!CanPHITrans(Inst))
- return 0;
+ return nullptr;
// All instruction operands are now inputs (and of course, they may also be
// defined in this block, so they may need to be phi translated themselves.
@@ -187,9 +187,9 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
- if (PHIIn == 0) return 0;
+ if (!PHIIn) return nullptr;
if (PHIIn == Cast->getOperand(0))
return Cast;
@@ -209,7 +209,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
(!DT || DT->dominates(CastI->getParent(), PredBB)))
return CastI;
}
- return 0;
+ return nullptr;
}
// Handle getelementptr with at least one PHI translatable operand.
@@ -218,7 +218,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
bool AnyChanged = false;
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
- if (GEPOp == 0) return 0;
+ if (!GEPOp) return nullptr;
AnyChanged |= GEPOp != GEP->getOperand(i);
GEPOps.push_back(GEPOp);
@@ -253,7 +253,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return GEPI;
}
}
- return 0;
+ return nullptr;
}
// Handle add with a constant RHS.
@@ -265,7 +265,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
- if (LHS == 0) return 0;
+ if (!LHS) return nullptr;
// If the PHI translated LHS is an add of a constant, fold the immediates.
if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
@@ -304,11 +304,11 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return BO;
}
- return 0;
+ return nullptr;
}
// Otherwise, we failed.
- return 0;
+ return nullptr;
}
@@ -326,10 +326,10 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
// Make sure the value is live in the predecessor.
if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
if (!DT->dominates(Inst->getParent(), PredBB))
- Addr = 0;
+ Addr = nullptr;
}
- return Addr == 0;
+ return Addr == nullptr;
}
/// PHITranslateWithInsertion - PHI translate this value into the specified
@@ -354,7 +354,7 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
// If not, destroy any intermediate instructions inserted.
while (NewInsts.size() != NISize)
NewInsts.pop_back_val()->eraseFromParent();
- return 0;
+ return nullptr;
}
@@ -379,10 +379,10 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// Handle cast of PHI translatable value.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!isSafeToSpeculativelyExecute(Cast)) return 0;
+ if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
CurBB, PredBB, DT, NewInsts);
- if (OpVal == 0) return 0;
+ if (!OpVal) return nullptr;
// Otherwise insert a cast at the end of PredBB.
CastInst *New = CastInst::Create(Cast->getOpcode(),
@@ -400,7 +400,7 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
CurBB, PredBB, DT, NewInsts);
- if (OpVal == 0) return 0;
+ if (!OpVal) return nullptr;
GEPOps.push_back(OpVal);
}
@@ -436,5 +436,5 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
}
#endif
- return 0;
+ return nullptr;
}
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index f23833a..6d92909 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "postdomtree"
-
#include "llvm/Analysis/PostDominators.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -22,6 +20,8 @@
#include "llvm/Support/GenericDomTreeConstruction.h"
using namespace llvm;
+#define DEBUG_TYPE "postdomtree"
+
//===----------------------------------------------------------------------===//
// PostDominatorTree Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp
index f4da598..7f88ae1 100644
--- a/lib/Analysis/RegionInfo.cpp
+++ b/lib/Analysis/RegionInfo.cpp
@@ -9,7 +9,6 @@
// Detects single entry single exit regions in the control flow graph.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "region"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -19,10 +18,13 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
+#include <iterator>
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "region"
+
// Always verify if expensive checking is enabled.
#ifdef XDEBUG
static bool VerifyRegionInfo = true;
@@ -62,9 +64,6 @@ Region::~Region() {
// Only clean the cache for this Region. Caches of child Regions will be
// cleaned when the child Regions are deleted.
BBNodeMap.clear();
-
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
}
void Region::replaceEntry(BasicBlock *BB) {
@@ -88,7 +87,7 @@ void Region::replaceEntryRecursive(BasicBlock *NewEntry) {
R->replaceEntry(NewEntry);
for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
if ((*RI)->getEntry() == OldEntry)
- RegionQueue.push_back(*RI);
+ RegionQueue.push_back(RI->get());
}
}
@@ -104,7 +103,7 @@ void Region::replaceExitRecursive(BasicBlock *NewExit) {
R->replaceExit(NewExit);
for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
if ((*RI)->getExit() == OldExit)
- RegionQueue.push_back(*RI);
+ RegionQueue.push_back(RI->get());
}
}
@@ -128,8 +127,8 @@ bool Region::contains(const Loop *L) const {
// BBs that are not part of any loop are element of the Loop
// described by the NULL pointer. This loop is not part of any region,
// except if the region describes the whole function.
- if (L == 0)
- return getExit() == 0;
+ if (!L)
+ return getExit() == nullptr;
if (!contains(L->getHeader()))
return false;
@@ -147,7 +146,7 @@ bool Region::contains(const Loop *L) const {
Loop *Region::outermostLoopInRegion(Loop *L) const {
if (!contains(L))
- return 0;
+ return nullptr;
while (L && contains(L->getParentLoop())) {
L = L->getParentLoop();
@@ -165,14 +164,14 @@ Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
BasicBlock *Region::getEnteringBlock() const {
BasicBlock *entry = getEntry();
BasicBlock *Pred;
- BasicBlock *enteringBlock = 0;
+ BasicBlock *enteringBlock = nullptr;
for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
++PI) {
Pred = *PI;
if (DT->getNode(Pred) && !contains(Pred)) {
if (enteringBlock)
- return 0;
+ return nullptr;
enteringBlock = Pred;
}
@@ -184,17 +183,17 @@ BasicBlock *Region::getEnteringBlock() const {
BasicBlock *Region::getExitingBlock() const {
BasicBlock *exit = getExit();
BasicBlock *Pred;
- BasicBlock *exitingBlock = 0;
+ BasicBlock *exitingBlock = nullptr;
if (!exit)
- return 0;
+ return nullptr;
for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
++PI) {
Pred = *PI;
if (contains(Pred)) {
if (exitingBlock)
- return 0;
+ return nullptr;
exitingBlock = Pred;
}
@@ -295,7 +294,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const {
Region *R = RI->getRegionFor(BB);
if (!R || R == this)
- return 0;
+ return nullptr;
// If we pass the BB out of this region, that means our code is broken.
assert(contains(R) && "BB not in current region!");
@@ -304,7 +303,7 @@ Region* Region::getSubRegionNode(BasicBlock *BB) const {
R = R->getParent();
if (R->getEntry() != BB)
- return 0;
+ return nullptr;
return R;
}
@@ -333,18 +332,20 @@ RegionNode* Region::getNode(BasicBlock *BB) const {
void Region::transferChildrenTo(Region *To) {
for (iterator I = begin(), E = end(); I != E; ++I) {
(*I)->parent = To;
- To->children.push_back(*I);
+ To->children.push_back(std::move(*I));
}
children.clear();
}
void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
- assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
- assert(std::find(begin(), end(), SubRegion) == children.end()
- && "Subregion already exists!");
+ assert(!SubRegion->parent && "SubRegion already has a parent!");
+ assert(std::find_if(begin(), end(), [&](const std::unique_ptr<Region> &R) {
+ return R.get() == SubRegion;
+ }) == children.end() &&
+ "Subregion already exists!");
SubRegion->parent = this;
- children.push_back(SubRegion);
+ children.push_back(std::unique_ptr<Region>(SubRegion));
if (!moveChildren)
return;
@@ -360,23 +361,27 @@ void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
RI->setRegionFor(BB, SubRegion);
}
- std::vector<Region*> Keep;
+ std::vector<std::unique_ptr<Region>> Keep;
for (iterator I = begin(), E = end(); I != E; ++I)
- if (SubRegion->contains(*I) && *I != SubRegion) {
- SubRegion->children.push_back(*I);
+ if (SubRegion->contains(I->get()) && I->get() != SubRegion) {
(*I)->parent = SubRegion;
+ SubRegion->children.push_back(std::move(*I));
} else
- Keep.push_back(*I);
+ Keep.push_back(std::move(*I));
children.clear();
- children.insert(children.begin(), Keep.begin(), Keep.end());
+ children.insert(children.begin(),
+ std::move_iterator<RegionSet::iterator>(Keep.begin()),
+ std::move_iterator<RegionSet::iterator>(Keep.end()));
}
Region *Region::removeSubRegion(Region *Child) {
assert(Child->parent == this && "Child is not a child of this region!");
- Child->parent = 0;
- RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+ Child->parent = nullptr;
+ RegionSet::iterator I = std::find_if(
+ children.begin(), children.end(),
+ [&](const std::unique_ptr<Region> &R) { return R.get() == Child; });
assert(I != children.end() && "Region does not exit. Unable to remove.");
children.erase(children.begin()+(I-begin()));
return Child;
@@ -385,7 +390,7 @@ Region *Region::removeSubRegion(Region *Child) {
unsigned Region::getDepth() const {
unsigned Depth = 0;
- for (Region *R = parent; R != 0; R = R->parent)
+ for (Region *R = parent; R != nullptr; R = R->parent)
++Depth;
return Depth;
@@ -395,12 +400,12 @@ Region *Region::getExpandedRegion() const {
unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
if (NumSuccessors == 0)
- return NULL;
+ return nullptr;
for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
PI != PE; ++PI)
if (!DT->dominates(getEntry(), *PI))
- return NULL;
+ return nullptr;
Region *R = RI->getRegionFor(exit);
@@ -408,7 +413,7 @@ Region *Region::getExpandedRegion() const {
if (exit->getTerminator()->getNumSuccessors() == 1)
return new Region(getEntry(), *succ_begin(exit), RI, DT);
else
- return NULL;
+ return nullptr;
}
while (R->getParent() && R->getParent()->getEntry() == exit)
@@ -418,7 +423,7 @@ Region *Region::getExpandedRegion() const {
for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
PI != PE; ++PI)
if (!DT->dominates(R->getExit(), *PI))
- return NULL;
+ return nullptr;
return new Region(getEntry(), R->getExit(), RI, DT);
}
@@ -577,7 +582,7 @@ Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
assert(entry && exit && "entry and exit must not be null!");
if (isTrivialRegion(entry, exit))
- return 0;
+ return nullptr;
Region *region = new Region(entry, exit, this, DT);
BBtoRegion.insert(std::make_pair(entry, region));
@@ -600,7 +605,7 @@ void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
if (!N)
return;
- Region *lastRegion= 0;
+ Region *lastRegion= nullptr;
BasicBlock *lastExit = entry;
// As only a BasicBlock that postdominates entry can finish a region, walk the
@@ -680,12 +685,12 @@ void RegionInfo::releaseMemory() {
BBtoRegion.clear();
if (TopLevelRegion)
delete TopLevelRegion;
- TopLevelRegion = 0;
+ TopLevelRegion = nullptr;
}
RegionInfo::RegionInfo() : FunctionPass(ID) {
initializeRegionInfoPass(*PassRegistry::getPassRegistry());
- TopLevelRegion = 0;
+ TopLevelRegion = nullptr;
}
RegionInfo::~RegionInfo() {
@@ -710,7 +715,7 @@ bool RegionInfo::runOnFunction(Function &F) {
PDT = &getAnalysis<PostDominatorTree>();
DF = &getAnalysis<DominanceFrontier>();
- TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+ TopLevelRegion = new Region(&F.getEntryBlock(), nullptr, this, DT, nullptr);
updateStatistics(TopLevelRegion);
Calculate(F);
@@ -744,7 +749,7 @@ void RegionInfo::verifyAnalysis() const {
Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
BBtoRegionMap::const_iterator I=
BBtoRegion.find(BB);
- return I != BBtoRegion.end() ? I->second : 0;
+ return I != BBtoRegion.end() ? I->second : nullptr;
}
void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
@@ -756,7 +761,7 @@ Region *RegionInfo::operator[](BasicBlock *BB) const {
}
BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
- BasicBlock *Exit = NULL;
+ BasicBlock *Exit = nullptr;
while (true) {
// Get largest region that starts at BB.
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 12d7ca3..3c7798f 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -17,10 +17,11 @@
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Support/Timer.h"
-#define DEBUG_TYPE "regionpassmgr"
#include "llvm/Support/Debug.h"
using namespace llvm;
+#define DEBUG_TYPE "regionpassmgr"
+
//===----------------------------------------------------------------------===//
// RGPassManager
//
@@ -31,15 +32,15 @@ RGPassManager::RGPassManager()
: FunctionPass(ID), PMDataManager() {
skipThisRegion = false;
redoThisRegion = false;
- RI = NULL;
- CurrentRegion = NULL;
+ RI = nullptr;
+ CurrentRegion = nullptr;
}
// Recurse through all subregions and all regions into RQ.
-static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
- RQ.push_back(R);
- for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
- addRegionIntoQueue(*I, RQ);
+static void addRegionIntoQueue(Region &R, std::deque<Region *> &RQ) {
+ RQ.push_back(&R);
+ for (const auto &E : R)
+ addRegionIntoQueue(*E, RQ);
}
/// Pass Manager itself does not invalidate any analysis info.
@@ -57,7 +58,7 @@ bool RGPassManager::runOnFunction(Function &F) {
// Collect inherited analysis from Module level pass manager.
populateInheritedAnalysis(TPM->activeStack);
- addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+ addRegionIntoQueue(*RI->getTopLevelRegion(), RQ);
if (RQ.empty()) // No regions, skip calling finalizers
return false;
@@ -185,7 +186,6 @@ private:
public:
static char ID;
- PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
PrintRegionPass(const std::string &B, raw_ostream &o)
: RegionPass(ID), Banner(B), Out(o) {}
diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp
index 6467f47..893210a 100644
--- a/lib/Analysis/RegionPrinter.cpp
+++ b/lib/Analysis/RegionPrinter.cpp
@@ -98,31 +98,31 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
// Print the cluster of the subregions. This groups the single basic blocks
// and adds a different background color for each group.
- static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+ static void printRegionCluster(const Region &R, GraphWriter<RegionInfo*> &GW,
unsigned depth = 0) {
raw_ostream &O = GW.getOStream();
- O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+ O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R)
<< " {\n";
O.indent(2 * (depth + 1)) << "label = \"\";\n";
- if (!onlySimpleRegions || R->isSimple()) {
+ if (!onlySimpleRegions || R.isSimple()) {
O.indent(2 * (depth + 1)) << "style = filled;\n";
O.indent(2 * (depth + 1)) << "color = "
- << ((R->getDepth() * 2 % 12) + 1) << "\n";
+ << ((R.getDepth() * 2 % 12) + 1) << "\n";
} else {
O.indent(2 * (depth + 1)) << "style = solid;\n";
O.indent(2 * (depth + 1)) << "color = "
- << ((R->getDepth() * 2 % 12) + 2) << "\n";
+ << ((R.getDepth() * 2 % 12) + 2) << "\n";
}
- for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
- printRegionCluster(*RI, GW, depth + 1);
+ for (Region::const_iterator RI = R.begin(), RE = R.end(); RI != RE; ++RI)
+ printRegionCluster(**RI, GW, depth + 1);
- RegionInfo *RI = R->getRegionInfo();
+ RegionInfo *RI = R.getRegionInfo();
- for (const auto &BB : R->blocks())
- if (RI->getRegionFor(BB) == R)
+ for (const auto &BB : R.blocks())
+ if (RI->getRegionFor(BB) == &R)
O.indent(2 * (depth + 1)) << "Node"
<< static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
<< ";\n";
@@ -134,7 +134,7 @@ struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
GraphWriter<RegionInfo*> &GW) {
raw_ostream &O = GW.getOStream();
O << "\tcolorscheme = \"paired12\"\n";
- printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+ printRegionCluster(*RI->getTopLevelRegion(), GW, 4);
}
};
} //end namespace llvm
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 08de621..42a7aa2 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -58,7 +58,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalar-evolution"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -89,6 +88,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "scalar-evolution"
+
STATISTIC(NumArrayLenItCounts,
"Number of trip counts computed with array length");
STATISTIC(NumTripCountsComputed,
@@ -182,7 +183,7 @@ void SCEV::print(raw_ostream &OS) const {
case scUMaxExpr:
case scSMaxExpr: {
const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
- const char *OpStr = 0;
+ const char *OpStr = nullptr;
switch (NAry->getSCEVType()) {
case scAddExpr: OpStr = " + "; break;
case scMulExpr: OpStr = " * "; break;
@@ -312,7 +313,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
FoldingSetNodeID ID;
ID.AddInteger(scConstant);
ID.AddPointer(V);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
UniqueSCEVs.InsertNode(S, IP);
@@ -365,7 +366,7 @@ void SCEVUnknown::deleted() {
SE->UniqueSCEVs.RemoveNode(this);
// Release the value.
- setValPtr(0);
+ setValPtr(nullptr);
}
void SCEVUnknown::allUsesReplacedWith(Value *New) {
@@ -829,7 +830,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
ID.AddInteger(scTruncate);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// Fold if the operand is constant.
@@ -919,7 +920,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
ID.AddInteger(scZeroExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// zext(trunc(x)) --> zext(x) or x or trunc(x)
@@ -1072,7 +1073,7 @@ static const SCEV *getOverflowLimitForStep(const SCEV *Step,
return SE->getConstant(APInt::getSignedMaxValue(BitWidth) -
SE->getSignedRange(Step).getSignedMin());
}
- return 0;
+ return nullptr;
}
// The recurrence AR has been shown to have no signed wrap. Typically, if we can
@@ -1091,19 +1092,18 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
// Check for a simple looking step prior to loop entry.
const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start);
if (!SA)
- return 0;
+ return nullptr;
// Create an AddExpr for "PreStart" after subtracting Step. Full SCEV
// subtraction is expensive. For this purpose, perform a quick and dirty
// difference, by checking for Step in the operand list.
SmallVector<const SCEV *, 4> DiffOps;
- for (SCEVAddExpr::op_iterator I = SA->op_begin(), E = SA->op_end();
- I != E; ++I) {
- if (*I != Step)
- DiffOps.push_back(*I);
- }
+ for (const SCEV *Op : SA->operands())
+ if (Op != Step)
+ DiffOps.push_back(Op);
+
if (DiffOps.size() == SA->getNumOperands())
- return 0;
+ return nullptr;
// This is a postinc AR. Check for overflow on the preinc recurrence using the
// same three conditions that getSignExtendedExpr checks.
@@ -1139,7 +1139,7 @@ static const SCEV *getPreStartForSignExtend(const SCEVAddRecExpr *AR,
SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) {
return PreStart;
}
- return 0;
+ return nullptr;
}
// Get the normalized sign-extended expression for this AddRec's Start.
@@ -1181,7 +1181,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
ID.AddInteger(scSignExtend);
ID.AddPointer(Op);
ID.AddPointer(Ty);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
// If the input value is provably positive, build a zext instead.
@@ -1201,6 +1201,23 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
return getTruncateOrSignExtend(X, Ty);
}
+ // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2
+ if (auto SA = dyn_cast<SCEVAddExpr>(Op)) {
+ if (SA->getNumOperands() == 2) {
+ auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0));
+ auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));
+ if (SMul && SC1) {
+ if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) {
+ const APInt &C1 = SC1->getValue()->getValue();
+ const APInt &C2 = SC2->getValue()->getValue();
+ if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&
+ C2.ugt(C1) && C2.isPowerOf2())
+ return getAddExpr(getSignExtendExpr(SC1, Ty),
+ getSignExtendExpr(SMul, Ty));
+ }
+ }
+ }
+ }
// If the input value is a chrec scev, and we can prove that the value
// did not overflow the old, smaller, value, we can sign extend all of the
// operands (often constants). This allows analysis of something like
@@ -1292,6 +1309,22 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
L, AR->getNoWrapFlags());
}
}
+ // If Start and Step are constants, check if we can apply this
+ // transformation:
+ // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2
+ auto SC1 = dyn_cast<SCEVConstant>(Start);
+ auto SC2 = dyn_cast<SCEVConstant>(Step);
+ if (SC1 && SC2) {
+ const APInt &C1 = SC1->getValue()->getValue();
+ const APInt &C2 = SC2->getValue()->getValue();
+ if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&
+ C2.isPowerOf2()) {
+ Start = getSignExtendExpr(Start, Ty);
+ const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step,
+ L, AR->getNoWrapFlags());
+ return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));
+ }
+ }
}
// The cast wasn't folded; create an explicit cast node.
@@ -1340,9 +1373,8 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
// Force the cast to be folded into the operands of an addrec.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
SmallVector<const SCEV *, 4> Ops;
- for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
- I != E; ++I)
- Ops.push_back(getAnyExtendExpr(*I, Ty));
+ for (const SCEV *Op : AR->operands())
+ Ops.push_back(getAnyExtendExpr(Op, Ty));
return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW);
}
@@ -1811,7 +1843,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
ID.AddInteger(scAddExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
SCEVAddExpr *S =
static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
@@ -2105,7 +2137,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
ID.AddInteger(scMulExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
SCEVMulExpr *S =
static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
@@ -2230,7 +2262,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
ID.AddInteger(scUDivExpr);
ID.AddPointer(LHS);
ID.AddPointer(RHS);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
LHS, RHS);
@@ -2425,7 +2457,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
for (unsigned i = 0, e = Operands.size(); i != e; ++i)
ID.AddPointer(Operands[i]);
ID.AddPointer(L);
- void *IP = 0;
+ void *IP = nullptr;
SCEVAddRecExpr *S =
static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
if (!S) {
@@ -2533,7 +2565,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
ID.AddInteger(scSMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2637,7 +2669,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
ID.AddInteger(scUMaxExpr);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
ID.AddPointer(Ops[i]);
- void *IP = 0;
+ void *IP = nullptr;
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
std::uninitialized_copy(Ops.begin(), Ops.end(), O);
@@ -2704,7 +2736,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
FoldingSetNodeID ID;
ID.AddInteger(scUnknown);
ID.AddPointer(V);
- void *IP = 0;
+ void *IP = nullptr;
if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
assert(cast<SCEVUnknown>(S)->getValue() == V &&
"Stale SCEVUnknown in uniquing map!");
@@ -3010,7 +3042,7 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {
return getPointerBase(Cast->getOperand());
}
else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {
- const SCEV *PtrOp = 0;
+ const SCEV *PtrOp = nullptr;
for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
I != E; ++I) {
if ((*I)->getType()->isPointerTy()) {
@@ -3090,20 +3122,20 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
// The loop may have multiple entrances or multiple exits; we can analyze
// this phi as an addrec if it has a unique entry value and a unique
// backedge value.
- Value *BEValueV = 0, *StartValueV = 0;
+ Value *BEValueV = nullptr, *StartValueV = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
if (L->contains(PN->getIncomingBlock(i))) {
if (!BEValueV) {
BEValueV = V;
} else if (BEValueV != V) {
- BEValueV = 0;
+ BEValueV = nullptr;
break;
}
} else if (!StartValueV) {
StartValueV = V;
} else if (StartValueV != V) {
- StartValueV = 0;
+ StartValueV = nullptr;
break;
}
}
@@ -3363,7 +3395,7 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
// For a SCEVUnknown, ask ValueTracking.
unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Zeros, Ones);
+ computeKnownBits(U->getValue(), Zeros, Ones);
return Zeros.countTrailingOnes();
}
@@ -3502,7 +3534,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
- ComputeMaskedBits(U->getValue(), Zeros, Ones, DL);
+ computeKnownBits(U->getValue(), Zeros, Ones, DL);
if (Ones == ~Zeros + 1)
return setUnsignedRange(U, ConservativeResult);
return setUnsignedRange(U,
@@ -3755,13 +3787,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Instcombine's ShrinkDemandedConstant may strip bits out of
// constants, obscuring what would otherwise be a low-bits mask.
- // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // Use computeKnownBits to compute what ShrinkDemandedConstant
// knew about to reconstruct a low-bits mask value.
unsigned LZ = A.countLeadingZeros();
unsigned TZ = A.countTrailingZeros();
unsigned BitWidth = A.getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, DL);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, DL);
APInt EffectiveMask =
APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ);
@@ -4316,9 +4348,9 @@ ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const {
if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
- const SCEV *BECount = 0;
+ const SCEV *BECount = nullptr;
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != 0; ENT = ENT->getNextExit()) {
+ ENT != nullptr; ENT = ENT->getNextExit()) {
assert(ENT->ExactNotTaken != SE->getCouldNotCompute() && "bad exit SCEV");
@@ -4336,7 +4368,7 @@ const SCEV *
ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
ScalarEvolution *SE) const {
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != 0; ENT = ENT->getNextExit()) {
+ ENT != nullptr; ENT = ENT->getNextExit()) {
if (ENT->ExitingBlock == ExitingBlock)
return ENT->ExactNotTaken;
@@ -4359,7 +4391,7 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
return false;
for (const ExitNotTakenInfo *ENT = &ExitNotTaken;
- ENT != 0; ENT = ENT->getNextExit()) {
+ ENT != nullptr; ENT = ENT->getNextExit()) {
if (ENT->ExactNotTaken != SE->getCouldNotCompute()
&& SE->hasOperand(ENT->ExactNotTaken, S)) {
@@ -4398,8 +4430,8 @@ ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
/// clear - Invalidate this result and free the ExitNotTakenInfo array.
void ScalarEvolution::BackedgeTakenInfo::clear() {
- ExitNotTaken.ExitingBlock = 0;
- ExitNotTaken.ExactNotTaken = 0;
+ ExitNotTaken.ExitingBlock = nullptr;
+ ExitNotTaken.ExactNotTaken = nullptr;
delete[] ExitNotTaken.getNextExit();
}
@@ -4410,38 +4442,63 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- // Examine all exits and pick the most conservative values.
- const SCEV *MaxBECount = getCouldNotCompute();
+ SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
bool CouldComputeBECount = true;
BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
- const SCEV *LatchMaxCount = 0;
- SmallVector<std::pair<BasicBlock *, const SCEV *>, 4> ExitCounts;
+ const SCEV *MustExitMaxBECount = nullptr;
+ const SCEV *MayExitMaxBECount = nullptr;
+
+ // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
+ // and compute maxBECount.
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
- ExitLimit EL = ComputeExitLimit(L, ExitingBlocks[i]);
+ BasicBlock *ExitBB = ExitingBlocks[i];
+ ExitLimit EL = ComputeExitLimit(L, ExitBB);
+
+ // 1. For each exit that can be computed, add an entry to ExitCounts.
+ // CouldComputeBECount is true only if all exits can be computed.
if (EL.Exact == getCouldNotCompute())
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
else
- ExitCounts.push_back(std::make_pair(ExitingBlocks[i], EL.Exact));
-
- if (MaxBECount == getCouldNotCompute())
- MaxBECount = EL.Max;
- else if (EL.Max != getCouldNotCompute()) {
- // We cannot take the "min" MaxBECount, because non-unit stride loops may
- // skip some loop tests. Taking the max over the exits is sufficiently
- // conservative. TODO: We could do better taking into consideration
- // non-latch exits that dominate the latch.
- if (EL.MustExit && ExitingBlocks[i] == Latch)
- LatchMaxCount = EL.Max;
- else
- MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, EL.Max);
+ ExitCounts.push_back(std::make_pair(ExitBB, EL.Exact));
+
+ // 2. Derive the loop's MaxBECount from each exit's max number of
+ // non-exiting iterations. Partition the loop exits into two kinds:
+ // LoopMustExits and LoopMayExits.
+ //
+ // A LoopMustExit meets two requirements:
+ //
+ // (a) Its ExitLimit.MustExit flag must be set which indicates that the exit
+ // test condition cannot be skipped (the tested variable has unit stride or
+ // the test is less-than or greater-than, rather than a strict inequality).
+ //
+ // (b) It must dominate the loop latch, hence must be tested on every loop
+ // iteration.
+ //
+ // If any computable LoopMustExit is found, then MaxBECount is the minimum
+ // EL.Max of computable LoopMustExits. Otherwise, MaxBECount is
+ // conservatively the maximum EL.Max, where CouldNotCompute is considered
+ // greater than any computable EL.Max.
+ if (EL.MustExit && EL.Max != getCouldNotCompute() && Latch &&
+ DT->dominates(ExitBB, Latch)) {
+ if (!MustExitMaxBECount)
+ MustExitMaxBECount = EL.Max;
+ else {
+ MustExitMaxBECount =
+ getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+ }
+ } else if (MayExitMaxBECount != getCouldNotCompute()) {
+ if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
+ MayExitMaxBECount = EL.Max;
+ else {
+ MayExitMaxBECount =
+ getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+ }
}
}
- // Be more precise in the easy case of a loop latch that must exit.
- if (LatchMaxCount) {
- MaxBECount = getUMinFromMismatchedTypes(MaxBECount, LatchMaxCount);
- }
+ const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
+ (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
}
@@ -4454,7 +4511,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {
// exit at this block and remember the exit block and whether all other targets
// lead to the loop header.
bool MustExecuteLoopHeader = true;
- BasicBlock *Exit = 0;
+ BasicBlock *Exit = nullptr;
for (succ_iterator SI = succ_begin(ExitingBlock), SE = succ_end(ExitingBlock);
SI != SE; ++SI)
if (!L->contains(*SI)) {
@@ -4800,7 +4857,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
return getCouldNotCompute();
// Okay, we allow one non-constant index into the GEP instruction.
- Value *VarIdx = 0;
+ Value *VarIdx = nullptr;
std::vector<Constant*> Indexes;
unsigned VarIdxNum = 0;
for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
@@ -4810,7 +4867,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's.
VarIdx = GEP->getOperand(i);
VarIdxNum = i-2;
- Indexes.push_back(0);
+ Indexes.push_back(nullptr);
}
// Loop-invariant loads may be a byproduct of loop optimization. Skip them.
@@ -4841,7 +4898,7 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(
Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(),
Indexes);
- if (Result == 0) break; // Cannot compute!
+ if (!Result) break; // Cannot compute!
// Evaluate the condition for this iteration.
Result = ConstantExpr::getICmp(predicate, Result, RHS);
@@ -4902,14 +4959,14 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
// Otherwise, we can evaluate this instruction if all of its operands are
// constant or derived from a PHI node themselves.
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
for (Instruction::op_iterator OpI = UseInst->op_begin(),
OpE = UseInst->op_end(); OpI != OpE; ++OpI) {
if (isa<Constant>(*OpI)) continue;
Instruction *OpInst = dyn_cast<Instruction>(*OpI);
- if (!OpInst || !canConstantEvolve(OpInst, L)) return 0;
+ if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;
PHINode *P = dyn_cast<PHINode>(OpInst);
if (!P)
@@ -4923,8 +4980,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap);
PHIMap[OpInst] = P;
}
- if (P == 0) return 0; // Not evolving from PHI
- if (PHI && PHI != P) return 0; // Evolving from multiple different PHIs.
+ if (!P)
+ return nullptr; // Not evolving from PHI
+ if (PHI && PHI != P)
+ return nullptr; // Evolving from multiple different PHIs.
PHI = P;
}
// This is a expression evolving from a constant PHI!
@@ -4938,7 +4997,7 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,
/// constraints, return null.
static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || !canConstantEvolve(I, L)) return 0;
+ if (!I || !canConstantEvolve(I, L)) return nullptr;
if (PHINode *PN = dyn_cast<PHINode>(I)) {
return PN;
@@ -4960,18 +5019,18 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
// Convenient constant check, but redundant for recursive calls.
if (Constant *C = dyn_cast<Constant>(V)) return C;
Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return 0;
+ if (!I) return nullptr;
if (Constant *C = Vals.lookup(I)) return C;
// An instruction inside the loop depends on a value outside the loop that we
// weren't given a mapping for, or a value such as a call inside the loop.
- if (!canConstantEvolve(I, L)) return 0;
+ if (!canConstantEvolve(I, L)) return nullptr;
// An unmapped PHI can be due to a branch or another loop inside this loop,
// or due to this not being the initial iteration through a loop where we
// couldn't compute the evolution of this particular PHI last time.
- if (isa<PHINode>(I)) return 0;
+ if (isa<PHINode>(I)) return nullptr;
std::vector<Constant*> Operands(I->getNumOperands());
@@ -4979,12 +5038,12 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i));
if (!Operand) {
Operands[i] = dyn_cast<Constant>(I->getOperand(i));
- if (!Operands[i]) return 0;
+ if (!Operands[i]) return nullptr;
continue;
}
Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI);
Vals[Operand] = C;
- if (!C) return 0;
+ if (!C) return nullptr;
Operands[i] = C;
}
@@ -5013,7 +5072,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
return I->second;
if (BEs.ugt(MaxBruteForceIterations))
- return ConstantEvolutionLoopExitValue[PN] = 0; // Not going to evaluate it.
+ return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it.
Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
@@ -5025,22 +5084,22 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
// entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
for (BasicBlock::iterator I = Header->begin();
(PHI = dyn_cast<PHINode>(I)); ++I) {
Constant *StartCST =
dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0) continue;
+ if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
- return RetVal = 0;
+ return RetVal = nullptr;
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
// Execute the loop symbolically to determine the exit value.
if (BEs.getActiveBits() >= 32)
- return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+ return RetVal = nullptr; // More than 2^32-1 iterations?? Not doing it!
unsigned NumIterations = BEs.getZExtValue(); // must be in range
unsigned IterationNum = 0;
@@ -5053,8 +5112,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
DenseMap<Instruction *, Constant *> NextIterVals;
Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL,
TLI);
- if (NextPHI == 0)
- return 0; // Couldn't evaluate!
+ if (!NextPHI)
+ return nullptr; // Couldn't evaluate!
NextIterVals[PN] = NextPHI;
bool StoppedEvolving = NextPHI == CurrentIterVals[PN];
@@ -5101,7 +5160,7 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
Value *Cond,
bool ExitWhen) {
PHINode *PN = getConstantEvolvingPHI(Cond, L);
- if (PN == 0) return getCouldNotCompute();
+ if (!PN) return getCouldNotCompute();
// If the loop is canonicalized, the PHI will have exactly two entries.
// That's the only form we support here.
@@ -5114,12 +5173,12 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,
// One entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
for (BasicBlock::iterator I = Header->begin();
(PHI = dyn_cast<PHINode>(I)); ++I) {
Constant *StartCST =
dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge));
- if (StartCST == 0) continue;
+ if (!StartCST) continue;
CurrentIterVals[PHI] = StartCST;
}
if (!CurrentIterVals.count(PN))
@@ -5189,7 +5248,7 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
if (Values[u].first == L)
return Values[u].second ? Values[u].second : V;
}
- Values.push_back(std::make_pair(L, static_cast<const SCEV *>(0)));
+ Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr)));
// Otherwise compute it.
const SCEV *C = computeSCEVAtScope(V, L);
SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V];
@@ -5243,7 +5302,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
}
for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i));
- if (!C2) return 0;
+ if (!C2) return nullptr;
// First pointer!
if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) {
@@ -5258,7 +5317,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
// Don't bother trying to sum two pointers. We probably can't
// statically compute a load that results from it anyway.
if (C2->getType()->isPointerTy())
- return 0;
+ return nullptr;
if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) {
if (PTy->getElementType()->isStructTy())
@@ -5276,10 +5335,10 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
const SCEVMulExpr *SM = cast<SCEVMulExpr>(V);
if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) {
// Don't bother with pointers at all.
- if (C->getType()->isPointerTy()) return 0;
+ if (C->getType()->isPointerTy()) return nullptr;
for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) {
Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i));
- if (!C2 || C2->getType()->isPointerTy()) return 0;
+ if (!C2 || C2->getType()->isPointerTy()) return nullptr;
C = ConstantExpr::getMul(C, C2);
}
return C;
@@ -5298,7 +5357,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
case scUMaxExpr:
break; // TODO: smax, umax.
}
- return 0;
+ return nullptr;
}
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
@@ -5365,7 +5424,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Check to see if getSCEVAtScope actually made an improvement.
if (MadeImprovement) {
- Constant *C = 0;
+ Constant *C = nullptr;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
Operands[0], Operands[1], DL,
@@ -5697,7 +5756,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr) {
// to 0, it must be counting down to equal 0. Consequently, N = Start / -Step.
// We have not yet seen any such cases.
const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
- if (StepC == 0 || StepC->getValue()->equalsInt(0))
+ if (!StepC || StepC->getValue()->equalsInt(0))
return getCouldNotCompute();
// For positive steps (counting up until unsigned overflow):
@@ -6136,18 +6195,30 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
// If LHS or RHS is an addrec, check to see if the condition is true in
// every iteration of the loop.
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
- if (isLoopEntryGuardedByCond(
- AR->getLoop(), Pred, AR->getStart(), RHS) &&
- isLoopBackedgeGuardedByCond(
- AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
- return true;
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
- if (isLoopEntryGuardedByCond(
- AR->getLoop(), Pred, LHS, AR->getStart()) &&
- isLoopBackedgeGuardedByCond(
- AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
- return true;
+ // If LHS and RHS are both addrec, both conditions must be true in
+ // every iteration of the loop.
+ const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS);
+ const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS);
+ bool LeftGuarded = false;
+ bool RightGuarded = false;
+ if (LAR) {
+ const Loop *L = LAR->getLoop();
+ if (isLoopEntryGuardedByCond(L, Pred, LAR->getStart(), RHS) &&
+ isLoopBackedgeGuardedByCond(L, Pred, LAR->getPostIncExpr(*this), RHS)) {
+ if (!RAR) return true;
+ LeftGuarded = true;
+ }
+ }
+ if (RAR) {
+ const Loop *L = RAR->getLoop();
+ if (isLoopEntryGuardedByCond(L, Pred, LHS, RAR->getStart()) &&
+ isLoopBackedgeGuardedByCond(L, Pred, LHS, RAR->getPostIncExpr(*this))) {
+ if (!LAR) return true;
+ RightGuarded = true;
+ }
+ }
+ if (LeftGuarded && RightGuarded)
+ return true;
// Otherwise see what can be done with known constant ranges.
return isKnownPredicateWithRanges(Pred, LHS, RHS);
@@ -6814,6 +6885,105 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
return SE.getCouldNotCompute();
}
+namespace {
+struct FindUndefs {
+ bool Found;
+ FindUndefs() : Found(false) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
+ if (isa<UndefValue>(C->getValue()))
+ Found = true;
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ if (isa<UndefValue>(C->getValue()))
+ Found = true;
+ }
+
+ // Keep looking if we haven't found it yet.
+ return !Found;
+ }
+ bool isDone() const {
+ // Stop recursion if we have found an undef.
+ return Found;
+ }
+};
+}
+
+// Return true when S contains at least an undef value.
+static inline bool
+containsUndefs(const SCEV *S) {
+ FindUndefs F;
+ SCEVTraversal<FindUndefs> ST(F);
+ ST.visitAll(S);
+
+ return F.Found;
+}
+
+namespace {
+// Collect all steps of SCEV expressions.
+struct SCEVCollectStrides {
+ ScalarEvolution &SE;
+ SmallVectorImpl<const SCEV *> &Strides;
+
+ SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S)
+ : SE(SE), Strides(S) {}
+
+ bool follow(const SCEV *S) {
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ Strides.push_back(AR->getStepRecurrence(SE));
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+
+// Collect all SCEVUnknown and SCEVMulExpr expressions.
+struct SCEVCollectTerms {
+ SmallVectorImpl<const SCEV *> &Terms;
+
+ SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T)
+ : Terms(T) {}
+
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+ if (!containsUndefs(S))
+ Terms.push_back(S);
+
+ // Stop recursion: once we collected a term, do not walk its operands.
+ return false;
+ }
+
+ // Keep looking.
+ return true;
+ }
+ bool isDone() const { return false; }
+};
+}
+
+/// Find parametric terms in this SCEVAddRecExpr.
+void SCEVAddRecExpr::collectParametricTerms(
+ ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Terms) const {
+ SmallVector<const SCEV *, 4> Strides;
+ SCEVCollectStrides StrideCollector(SE, Strides);
+ visitAll(this, StrideCollector);
+
+ DEBUG({
+ dbgs() << "Strides:\n";
+ for (const SCEV *S : Strides)
+ dbgs() << *S << "\n";
+ });
+
+ for (const SCEV *S : Strides) {
+ SCEVCollectTerms TermCollector(Terms);
+ visitAll(S, TermCollector);
+ }
+
+ DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
+}
+
static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) {
APInt A = C1->getValue()->getValue();
APInt B = C2->getValue()->getValue();
@@ -6843,351 +7013,479 @@ static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) {
}
namespace {
-struct SCEVGCD : public SCEVVisitor<SCEVGCD, const SCEV *> {
-public:
- // Pattern match Step into Start. When Step is a multiply expression, find
- // the largest subexpression of Step that appears in Start. When Start is an
- // add expression, try to match Step in the subexpressions of Start, non
- // matching subexpressions are returned under Remainder.
- static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start,
- const SCEV *Step, const SCEV **Remainder) {
- assert(Remainder && "Remainder should not be NULL");
- SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0));
- const SCEV *Res = R.visit(Start);
- *Remainder = R.Remainder;
- return Res;
- }
+struct FindSCEVSize {
+ int Size;
+ FindSCEVSize() : Size(0) {}
- SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R)
- : SE(S), GCD(G), Remainder(R) {
- Zero = SE.getConstant(GCD->getType(), 0);
- One = SE.getConstant(GCD->getType(), 1);
+ bool follow(const SCEV *S) {
+ ++Size;
+ // Keep looking at all operands of S.
+ return true;
}
+ bool isDone() const {
+ return false;
+ }
+};
+}
- const SCEV *visitConstant(const SCEVConstant *Constant) {
- if (GCD == Constant || Constant == Zero)
- return GCD;
+// Returns the size of the SCEV S.
+static inline int sizeOfSCEV(const SCEV *S) {
+ FindSCEVSize F;
+ SCEVTraversal<FindSCEVSize> ST(F);
+ ST.visitAll(S);
+ return F.Size;
+}
- if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD)) {
- const SCEV *Res = SE.getConstant(gcd(Constant, CGCD));
- if (Res != One)
- return Res;
+namespace {
- Remainder = SE.getConstant(srem(Constant, CGCD));
- Constant = cast<SCEVConstant>(SE.getMinusSCEV(Constant, Remainder));
- Res = SE.getConstant(gcd(Constant, CGCD));
- return Res;
+struct SCEVDivision : public SCEVVisitor<SCEVDivision, void> {
+public:
+ // Computes the Quotient and Remainder of the division of Numerator by
+ // Denominator.
+ static void divide(ScalarEvolution &SE, const SCEV *Numerator,
+ const SCEV *Denominator, const SCEV **Quotient,
+ const SCEV **Remainder) {
+ assert(Numerator && Denominator && "Uninitialized SCEV");
+
+ SCEVDivision D(SE, Numerator, Denominator);
+
+ // Check for the trivial case here to avoid having to check for it in the
+ // rest of the code.
+ if (Numerator == Denominator) {
+ *Quotient = D.One;
+ *Remainder = D.Zero;
+ return;
}
- // When GCD is not a constant, it could be that the GCD is an Add, Mul,
- // AddRec, etc., in which case we want to find out how many times the
- // Constant divides the GCD: we then return that as the new GCD.
- const SCEV *Rem = Zero;
- const SCEV *Res = findGCD(SE, GCD, Constant, &Rem);
+ if (Numerator->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = D.Zero;
+ return;
+ }
- if (Res == One || Rem != Zero) {
- Remainder = Constant;
- return One;
+ // Split the Denominator when it is a product.
+ if (const SCEVMulExpr *T = dyn_cast<const SCEVMulExpr>(Denominator)) {
+ const SCEV *Q, *R;
+ *Quotient = Numerator;
+ for (const SCEV *Op : T->operands()) {
+ divide(SE, *Quotient, Op, &Q, &R);
+ *Quotient = Q;
+
+ // Bail out when the Numerator is not divisible by one of the terms of
+ // the Denominator.
+ if (!R->isZero()) {
+ *Quotient = D.Zero;
+ *Remainder = Numerator;
+ return;
+ }
+ }
+ *Remainder = D.Zero;
+ return;
}
- assert(isa<SCEVConstant>(Res) && "Res should be a constant");
- Remainder = SE.getConstant(srem(Constant, cast<SCEVConstant>(Res)));
- return Res;
+ D.visit(Numerator);
+ *Quotient = D.Quotient;
+ *Remainder = D.Remainder;
+ }
+
+ SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, const SCEV *Denominator)
+ : SE(S), Denominator(Denominator) {
+ Zero = SE.getConstant(Denominator->getType(), 0);
+ One = SE.getConstant(Denominator->getType(), 1);
+
+ // By default, we don't know how to divide Expr by Denominator.
+ // Providing the default here simplifies the rest of the code.
+ Quotient = Zero;
+ Remainder = Numerator;
+ }
+
+ // Except in the trivial case described above, we do not know how to divide
+ // Expr by Denominator for the following functions with empty implementation.
+ void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {}
+ void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {}
+ void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {}
+ void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+ void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+ void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
+ void visitUnknown(const SCEVUnknown *Numerator) {}
+ void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+
+ void visitConstant(const SCEVConstant *Numerator) {
+ if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) {
+ Quotient = SE.getConstant(sdiv(Numerator, D));
+ Remainder = SE.getConstant(srem(Numerator, D));
+ return;
+ }
}
- const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
+ void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
+ const SCEV *StartQ, *StartR, *StepQ, *StepR;
+ assert(Numerator->isAffine() && "Numerator should be affine");
+ divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);
+ divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);
+ Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
+ Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
+ Numerator->getNoWrapFlags());
}
- const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
- }
+ void visitAddExpr(const SCEVAddExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs, Rs;
+ Type *Ty = Denominator->getType();
- const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
- }
+ for (const SCEV *Op : Numerator->operands()) {
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
- const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- if (GCD == Expr)
- return GCD;
+ // Bail out if types do not match.
+ if (Ty != Q->getType() || Ty != R->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
- const SCEV *Rem = Zero;
- const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem);
+ Qs.push_back(Q);
+ Rs.push_back(R);
+ }
- // FIXME: There may be ambiguous situations: for instance,
- // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m).
- // The order in which the AddExpr is traversed computes a different GCD
- // and Remainder.
- if (Res != One)
- GCD = Res;
- if (Rem != Zero)
- Remainder = SE.getAddExpr(Remainder, Rem);
+ if (Qs.size() == 1) {
+ Quotient = Qs[0];
+ Remainder = Rs[0];
+ return;
}
- return GCD;
+ Quotient = SE.getAddExpr(Qs);
+ Remainder = SE.getAddExpr(Rs);
}
- const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
- if (GCD == Expr)
- return GCD;
+ void visitMulExpr(const SCEVMulExpr *Numerator) {
+ SmallVector<const SCEV *, 2> Qs;
+ Type *Ty = Denominator->getType();
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
- if (Expr->getOperand(i) == GCD)
- return GCD;
- }
+ bool FoundDenominatorTerm = false;
+ for (const SCEV *Op : Numerator->operands()) {
+ // Bail out if types do not match.
+ if (Ty != Op->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ if (FoundDenominatorTerm) {
+ Qs.push_back(Op);
+ continue;
+ }
- // If we have not returned yet, it means that GCD is not part of Expr.
- const SCEV *PartialGCD = One;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
- const SCEV *Rem = Zero;
- const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem);
- if (Rem != Zero)
- // GCD does not divide Expr->getOperand(i).
+ // Check whether Denominator divides one of the product operands.
+ const SCEV *Q, *R;
+ divide(SE, Op, Denominator, &Q, &R);
+ if (!R->isZero()) {
+ Qs.push_back(Op);
continue;
+ }
- if (Res == GCD)
- return GCD;
- PartialGCD = SE.getMulExpr(PartialGCD, Res);
- if (PartialGCD == GCD)
- return GCD;
- }
-
- if (PartialGCD != One)
- return PartialGCD;
-
- Remainder = Expr;
- const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(GCD);
- if (!Mul)
- return PartialGCD;
-
- // When the GCD is a multiply expression, try to decompose it:
- // this occurs when Step does not divide the Start expression
- // as in: {(-4 + (3 * %m)),+,(2 * %m)}
- for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) {
- const SCEV *Rem = Zero;
- const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem);
- if (Rem == Zero) {
- Remainder = Rem;
- return Res;
+ // Bail out if types do not match.
+ if (Ty != Q->getType()) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
}
+
+ FoundDenominatorTerm = true;
+ Qs.push_back(Q);
}
- return PartialGCD;
- }
+ if (FoundDenominatorTerm) {
+ Remainder = Zero;
+ if (Qs.size() == 1)
+ Quotient = Qs[0];
+ else
+ Quotient = SE.getMulExpr(Qs);
+ return;
+ }
- const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
+ if (!isa<SCEVUnknown>(Denominator)) {
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+
+ // The Remainder is obtained by replacing Denominator by 0 in Numerator.
+ ValueToValueMap RewriteMap;
+ RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] =
+ cast<SCEVConstant>(Zero)->getValue();
+ Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true);
+
+ // Quotient is (Numerator - Remainder) divided by Denominator.
+ const SCEV *Q, *R;
+ const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder);
+ if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) {
+ // This SCEV does not seem to simplify: fail the division here.
+ Quotient = Zero;
+ Remainder = Numerator;
+ return;
+ }
+ divide(SE, Diff, Denominator, &Q, &R);
+ assert(R == Zero &&
+ "(Numerator - Remainder) should evenly divide Denominator");
+ Quotient = Q;
}
- const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
- if (GCD == Expr)
- return GCD;
+private:
+ ScalarEvolution &SE;
+ const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One;
+};
+}
- if (!Expr->isAffine()) {
- Remainder = Expr;
- return GCD;
- }
+static bool findArrayDimensionsRec(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes) {
+ int Last = Terms.size() - 1;
+ const SCEV *Step = Terms[Last];
- const SCEV *Rem = Zero;
- const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem);
- if (Rem != Zero)
- Remainder = SE.getAddExpr(Remainder, Rem);
+ // End of recursion.
+ if (Last == 0) {
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) {
+ SmallVector<const SCEV *, 2> Qs;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Qs.push_back(Op);
- Rem = Zero;
- Res = findGCD(SE, Expr->getOperand(1), Res, &Rem);
- if (Rem != Zero) {
- Remainder = Expr;
- return GCD;
+ Step = SE.getMulExpr(Qs);
}
- return Res;
+ Sizes.push_back(Step);
+ return true;
}
- const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
- }
+ for (const SCEV *&Term : Terms) {
+ // Normalize the terms before the next call to findArrayDimensionsRec.
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, Step, &Q, &R);
- const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
- }
+ // Bail out when GCD does not evenly divide one of the terms.
+ if (!R->isZero())
+ return false;
- const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- if (GCD != Expr)
- Remainder = Expr;
- return GCD;
+ Term = Q;
}
- const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
- return One;
- }
+ // Remove all SCEVConstants.
+ Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
+ return isa<SCEVConstant>(E);
+ }),
+ Terms.end());
-private:
- ScalarEvolution &SE;
- const SCEV *GCD, *Remainder, *Zero, *One;
-};
+ if (Terms.size() > 0)
+ if (!findArrayDimensionsRec(SE, Terms, Sizes))
+ return false;
-struct SCEVDivision : public SCEVVisitor<SCEVDivision, const SCEV *> {
-public:
- // Remove from Start all multiples of Step.
- static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start,
- const SCEV *Step) {
- SCEVDivision D(SE, Step);
- const SCEV *Rem = D.Zero;
- (void)Rem;
- // The division is guaranteed to succeed: Step should divide Start with no
- // remainder.
- assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero &&
- "Step should divide Start with no remainder.");
- return D.visit(Start);
- }
+ Sizes.push_back(Step);
+ return true;
+}
+
+namespace {
+struct FindParameter {
+ bool FoundParameter;
+ FindParameter() : FoundParameter(false) {}
- SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) {
- Zero = SE.getConstant(GCD->getType(), 0);
- One = SE.getConstant(GCD->getType(), 1);
+ bool follow(const SCEV *S) {
+ if (isa<SCEVUnknown>(S)) {
+ FoundParameter = true;
+ // Stop recursion: we found a parameter.
+ return false;
+ }
+ // Keep looking.
+ return true;
}
+ bool isDone() const {
+ // Stop recursion if we have found a parameter.
+ return FoundParameter;
+ }
+};
+}
- const SCEV *visitConstant(const SCEVConstant *Constant) {
- if (GCD == Constant)
- return One;
+// Returns true when S contains at least a SCEVUnknown parameter.
+static inline bool
+containsParameters(const SCEV *S) {
+ FindParameter F;
+ SCEVTraversal<FindParameter> ST(F);
+ ST.visitAll(S);
- if (const SCEVConstant *CGCD = dyn_cast<SCEVConstant>(GCD))
- return SE.getConstant(sdiv(Constant, CGCD));
- return Constant;
- }
+ return F.FoundParameter;
+}
- const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
- }
+// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
+static inline bool
+containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+ for (const SCEV *T : Terms)
+ if (containsParameters(T))
+ return true;
+ return false;
+}
- const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
- }
+// Return the number of product terms in S.
+static inline int numberOfTerms(const SCEV *S) {
+ if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S))
+ return Expr->getNumOperands();
+ return 1;
+}
- const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
- }
+static const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) {
+ if (isa<SCEVConstant>(T))
+ return nullptr;
- const SCEV *visitAddExpr(const SCEVAddExpr *Expr) {
- if (GCD == Expr)
- return One;
+ if (isa<SCEVUnknown>(T))
+ return T;
- SmallVector<const SCEV *, 2> Operands;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) {
+ SmallVector<const SCEV *, 2> Factors;
+ for (const SCEV *Op : M->operands())
+ if (!isa<SCEVConstant>(Op))
+ Factors.push_back(Op);
- if (Operands.size() == 1)
- return Operands[0];
- return SE.getAddExpr(Operands);
+ return SE.getMulExpr(Factors);
}
- const SCEV *visitMulExpr(const SCEVMulExpr *Expr) {
- if (GCD == Expr)
- return One;
+ return T;
+}
- bool FoundGCDTerm = false;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i)
- if (Expr->getOperand(i) == GCD)
- FoundGCDTerm = true;
+/// Return the size of an element read or written by Inst.
+const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) {
+ Type *Ty;
+ if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
+ Ty = Store->getValueOperand()->getType();
+ else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
+ Ty = Load->getPointerOperand()->getType();
+ else
+ return nullptr;
- SmallVector<const SCEV *, 2> Operands;
- if (FoundGCDTerm) {
- FoundGCDTerm = false;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
- if (FoundGCDTerm)
- Operands.push_back(Expr->getOperand(i));
- else if (Expr->getOperand(i) == GCD)
- FoundGCDTerm = true;
- else
- Operands.push_back(Expr->getOperand(i));
- }
- } else {
- const SCEV *PartialGCD = One;
- for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) {
- if (PartialGCD == GCD) {
- Operands.push_back(Expr->getOperand(i));
- continue;
- }
+ Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty));
+ return getSizeOfExpr(ETy, Ty);
+}
- const SCEV *Rem = Zero;
- const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem);
- if (Rem == Zero) {
- PartialGCD = SE.getMulExpr(PartialGCD, Res);
- Operands.push_back(divide(SE, Expr->getOperand(i), GCD));
- } else {
- Operands.push_back(Expr->getOperand(i));
- }
- }
- }
+/// Second step of delinearization: compute the array dimensions Sizes from the
+/// set of Terms extracted from the memory access function of this SCEVAddRec.
+void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) const {
- if (Operands.size() == 1)
- return Operands[0];
- return SE.getMulExpr(Operands);
- }
+ if (Terms.size() < 1)
+ return;
- const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
- }
+ // Early return when Terms do not contain parameters: we do not delinearize
+ // non parametric SCEVs.
+ if (!containsParameters(Terms))
+ return;
- const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
- if (GCD == Expr)
- return One;
+ DEBUG({
+ dbgs() << "Terms:\n";
+ for (const SCEV *T : Terms)
+ dbgs() << *T << "\n";
+ });
- assert(Expr->isAffine() && "Expr should be affine");
+ // Remove duplicates.
+ std::sort(Terms.begin(), Terms.end());
+ Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end());
- const SCEV *Start = divide(SE, Expr->getStart(), GCD);
- const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD);
+ // Put larger terms first.
+ std::sort(Terms.begin(), Terms.end(), [](const SCEV *LHS, const SCEV *RHS) {
+ return numberOfTerms(LHS) > numberOfTerms(RHS);
+ });
- return SE.getAddRecExpr(Start, Step, Expr->getLoop(),
- Expr->getNoWrapFlags());
- }
+ ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
- const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
+ // Divide all terms by the element size.
+ for (const SCEV *&Term : Terms) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Term, ElementSize, &Q, &R);
+ Term = Q;
}
- const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
- }
+ SmallVector<const SCEV *, 4> NewTerms;
- const SCEV *visitUnknown(const SCEVUnknown *Expr) {
- if (GCD == Expr)
- return One;
- return Expr;
+ // Remove constant factors.
+ for (const SCEV *T : Terms)
+ if (const SCEV *NewT = removeConstantFactors(SE, T))
+ NewTerms.push_back(NewT);
+
+ DEBUG({
+ dbgs() << "Terms after sorting:\n";
+ for (const SCEV *T : NewTerms)
+ dbgs() << *T << "\n";
+ });
+
+ if (NewTerms.empty() ||
+ !findArrayDimensionsRec(SE, NewTerms, Sizes)) {
+ Sizes.clear();
+ return;
}
- const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) {
- return Expr;
+ // The last element to be pushed into Sizes is the size of an element.
+ Sizes.push_back(ElementSize);
+
+ DEBUG({
+ dbgs() << "Sizes:\n";
+ for (const SCEV *S : Sizes)
+ dbgs() << *S << "\n";
+ });
+}
+
+/// Third step of delinearization: compute the access functions for the
+/// Subscripts based on the dimensions in Sizes.
+void SCEVAddRecExpr::computeAccessFunctions(
+ ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes) const {
+
+ // Early exit in case this SCEV is not an affine multivariate function.
+ if (Sizes.empty() || !this->isAffine())
+ return;
+
+ const SCEV *Res = this;
+ int Last = Sizes.size() - 1;
+ for (int i = Last; i >= 0; i--) {
+ const SCEV *Q, *R;
+ SCEVDivision::divide(SE, Res, Sizes[i], &Q, &R);
+
+ DEBUG({
+ dbgs() << "Res: " << *Res << "\n";
+ dbgs() << "Sizes[i]: " << *Sizes[i] << "\n";
+ dbgs() << "Res divided by Sizes[i]:\n";
+ dbgs() << "Quotient: " << *Q << "\n";
+ dbgs() << "Remainder: " << *R << "\n";
+ });
+
+ Res = Q;
+
+ // Do not record the last subscript corresponding to the size of elements in
+ // the array.
+ if (i == Last) {
+
+ // Bail out if the remainder is too complex.
+ if (isa<SCEVAddRecExpr>(R)) {
+ Subscripts.clear();
+ Sizes.clear();
+ return;
+ }
+
+ continue;
+ }
+
+ // Record the access function for the current subscript.
+ Subscripts.push_back(R);
}
-private:
- ScalarEvolution &SE;
- const SCEV *GCD, *Zero, *One;
-};
+ // Also push in last position the remainder of the last division: it will be
+ // the access function of the innermost dimension.
+ Subscripts.push_back(Res);
+
+ std::reverse(Subscripts.begin(), Subscripts.end());
+
+ DEBUG({
+ dbgs() << "Subscripts:\n";
+ for (const SCEV *S : Subscripts)
+ dbgs() << *S << "\n";
+ });
}
/// Splits the SCEV into two vectors of SCEVs representing the subscripts and
@@ -7239,84 +7537,40 @@ private:
/// asking for the SCEV of the memory access with respect to all enclosing
/// loops, calling SCEV->delinearize on that and printing the results.
-const SCEV *
-SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
- SmallVectorImpl<const SCEV *> &Subscripts,
- SmallVectorImpl<const SCEV *> &Sizes) const {
- // Early exit in case this SCEV is not an affine multivariate function.
- if (!this->isAffine())
- return this;
-
- const SCEV *Start = this->getStart();
- const SCEV *Step = this->getStepRecurrence(SE);
-
- // Build the SCEV representation of the canonical induction variable in the
- // loop of this SCEV.
- const SCEV *Zero = SE.getConstant(this->getType(), 0);
- const SCEV *One = SE.getConstant(this->getType(), 1);
- const SCEV *IV =
- SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags());
-
- DEBUG(dbgs() << "(delinearize: " << *this << "\n");
-
- // When the stride of this SCEV is 1, do not compute the GCD: the size of this
- // subscript is 1, and this same SCEV for the access function.
- const SCEV *Remainder = Zero;
- const SCEV *GCD = One;
-
- // Find the GCD and Remainder of the Start and Step coefficients of this SCEV.
- if (Step != One && !Step->isAllOnesValue())
- GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder);
-
- DEBUG(dbgs() << "GCD: " << *GCD << "\n");
- DEBUG(dbgs() << "Remainder: " << *Remainder << "\n");
-
- const SCEV *Quotient = Start;
- if (GCD != One && !GCD->isAllOnesValue())
- // As findGCD computed Remainder, GCD divides "Start - Remainder." The
- // Quotient is then this SCEV without Remainder, scaled down by the GCD. The
- // Quotient is what will be used in the next subscript delinearization.
- Quotient = SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD);
-
- DEBUG(dbgs() << "Quotient: " << *Quotient << "\n");
-
- const SCEV *Rem = Quotient;
- if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Quotient))
- // Recursively call delinearize on the Quotient until there are no more
- // multiples that can be recognized.
- Rem = AR->delinearize(SE, Subscripts, Sizes);
-
- // Scale up the canonical induction variable IV by whatever remains from the
- // Step after division by the GCD: the GCD is the size of all the sub-array.
- if (Step != One && !Step->isAllOnesValue() && GCD != One &&
- !GCD->isAllOnesValue() && Step != GCD) {
- Step = SCEVDivision::divide(SE, Step, GCD);
- IV = SE.getMulExpr(IV, Step);
- }
- // The access function in the current subscript is computed as the canonical
- // induction variable IV (potentially scaled up by the step) and offset by
- // Rem, the offset of delinearization in the sub-array.
- const SCEV *Index = SE.getAddExpr(IV, Rem);
-
- // Record the access function and the size of the current subscript.
- Subscripts.push_back(Index);
- Sizes.push_back(GCD);
+void SCEVAddRecExpr::delinearize(ScalarEvolution &SE,
+ SmallVectorImpl<const SCEV *> &Subscripts,
+ SmallVectorImpl<const SCEV *> &Sizes,
+ const SCEV *ElementSize) const {
+ // First step: collect parametric terms.
+ SmallVector<const SCEV *, 4> Terms;
+ collectParametricTerms(SE, Terms);
-#ifndef NDEBUG
- int Size = Sizes.size();
- DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n");
- DEBUG(dbgs() << "ArrayDecl[UnknownSize]");
- for (int i = 0; i < Size - 1; i++)
- DEBUG(dbgs() << "[" << *Sizes[i] << "]");
- DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n");
-
- DEBUG(dbgs() << "ArrayRef");
- for (int i = 0; i < Size; i++)
- DEBUG(dbgs() << "[" << *Subscripts[i] << "]");
- DEBUG(dbgs() << "\n)\n");
-#endif
+ if (Terms.empty())
+ return;
+
+ // Second step: find subscript sizes.
+ SE.findArrayDimensions(Terms, Sizes, ElementSize);
+
+ if (Sizes.empty())
+ return;
+
+ // Third step: compute the access functions for each subscript.
+ computeAccessFunctions(SE, Subscripts, Sizes);
+
+ if (Subscripts.empty())
+ return;
+
+ DEBUG({
+ dbgs() << "succeeded to delinearize " << *this << "\n";
+ dbgs() << "ArrayDecl[UnknownSize]";
+ for (const SCEV *S : Sizes)
+ dbgs() << "[" << *S << "]";
- return Remainder;
+ dbgs() << "\nArrayRef";
+ for (const SCEV *S : Subscripts)
+ dbgs() << "[" << *S << "]";
+ dbgs() << "\n";
+ });
}
//===----------------------------------------------------------------------===//
@@ -7368,7 +7622,8 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) {
+ : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64),
+ BlockDispositions(64), FirstUnknown(nullptr) {
initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
}
@@ -7376,7 +7631,7 @@ bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return false;
@@ -7387,7 +7642,7 @@ void ScalarEvolution::releaseMemory() {
// destructors, so that they release their references to their values.
for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
U->~SCEVUnknown();
- FirstUnknown = 0;
+ FirstUnknown = nullptr;
ValueExprMap.clear();
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 7be6aca..6933f74 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -34,7 +34,7 @@ namespace {
public:
static char ID; // Class identification, replacement for typeinfo
- ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+ ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) {
initializeScalarEvolutionAliasAnalysisPass(
*PassRegistry::getPassRegistry());
}
@@ -102,7 +102,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
return U->getValue();
}
// No Identified object found.
- return 0;
+ return nullptr;
}
AliasAnalysis::AliasResult
@@ -162,10 +162,10 @@ ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
if (alias(Location(AO ? AO : LocA.Ptr,
AO ? +UnknownSize : LocA.Size,
- AO ? 0 : LocA.TBAATag),
+ AO ? nullptr : LocA.TBAATag),
Location(BO ? BO : LocB.Ptr,
BO ? +UnknownSize : LocB.Size,
- BO ? 0 : LocB.TBAATag)) == NoAlias)
+ BO ? nullptr : LocB.TBAATag)) == NoAlias)
return NoAlias;
// Forward the query to the next analysis.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index fb3d595..b507043 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -44,7 +44,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
// not allowed to move it.
BasicBlock::iterator BIP = Builder.GetInsertPoint();
- Instruction *Ret = NULL;
+ Instruction *Ret = nullptr;
// Check to see if there is already a cast!
for (User *U : V->users())
@@ -627,21 +627,21 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
// Test whether we've already computed the most relevant loop for this SCEV.
std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
- RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+ RelevantLoops.insert(std::make_pair(S, nullptr));
if (!Pair.second)
return Pair.first->second;
if (isa<SCEVConstant>(S))
// A constant has no relevant loops.
- return 0;
+ return nullptr;
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
return Pair.first->second = SE.LI->getLoopFor(I->getParent());
// A non-instruction has no relevant loops.
- return 0;
+ return nullptr;
}
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
- const Loop *L = 0;
+ const Loop *L = nullptr;
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
L = AR->getLoop();
for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
@@ -716,7 +716,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// Emit instructions to add all the operands. Hoist as much as possible
// out of loops, and form meaningful getelementptrs where possible.
- Value *Sum = 0;
+ Value *Sum = nullptr;
for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
const Loop *CurLoop = I->first;
@@ -784,7 +784,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
// Emit instructions to mul all the operands. Hoist as much as possible
// out of loops.
- Value *Prod = 0;
+ Value *Prod = nullptr;
for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
const SCEV *Op = I->second;
@@ -892,18 +892,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
Instruction *InsertPos,
bool allowScale) {
if (IncV == InsertPos)
- return NULL;
+ return nullptr;
switch (IncV->getOpcode()) {
default:
- return NULL;
+ return nullptr;
// Check for a simple Add/Sub or GEP of a loop invariant step.
case Instruction::Add:
case Instruction::Sub: {
Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
if (!OInst || SE.DT->dominates(OInst, InsertPos))
return dyn_cast<Instruction>(IncV->getOperand(0));
- return NULL;
+ return nullptr;
}
case Instruction::BitCast:
return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -914,7 +914,7 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
continue;
if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
if (!SE.DT->dominates(OInst, InsertPos))
- return NULL;
+ return nullptr;
}
if (allowScale) {
// allow any kind of GEP as long as it can be hoisted.
@@ -925,11 +925,11 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
// have 2 operands. i1* is used by the expander to represent an
// address-size element.
if (IncV->getNumOperands() != 2)
- return NULL;
+ return nullptr;
unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
&& IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
- return NULL;
+ return nullptr;
break;
}
return dyn_cast<Instruction>(IncV->getOperand(0));
@@ -1077,9 +1077,9 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Reuse a previously-inserted PHI, if present.
BasicBlock *LatchBlock = L->getLoopLatch();
if (LatchBlock) {
- PHINode *AddRecPhiMatch = 0;
- Instruction *IncV = 0;
- TruncTy = 0;
+ PHINode *AddRecPhiMatch = nullptr;
+ Instruction *IncV = nullptr;
+ TruncTy = nullptr;
InvertStep = false;
// Only try partially matching scevs that need truncation and/or
@@ -1120,7 +1120,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// Stop if we have found an exact match SCEV.
if (IsMatchingSCEV) {
IncV = TempIncV;
- TruncTy = 0;
+ TruncTy = nullptr;
InvertStep = false;
AddRecPhiMatch = PN;
break;
@@ -1243,13 +1243,13 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
PostIncLoopSet Loops;
Loops.insert(L);
Normalized =
- cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
- Loops, SE, *SE.DT));
+ cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr,
+ nullptr, Loops, SE, *SE.DT));
}
// Strip off any non-loop-dominating component from the addrec start.
const SCEV *Start = Normalized->getStart();
- const SCEV *PostLoopOffset = 0;
+ const SCEV *PostLoopOffset = nullptr;
if (!SE.properlyDominates(Start, L->getHeader())) {
PostLoopOffset = Start;
Start = SE.getConstant(Normalized->getType(), 0);
@@ -1261,7 +1261,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Strip off any non-loop-dominating component from the addrec step.
const SCEV *Step = Normalized->getStepRecurrence(SE);
- const SCEV *PostLoopScale = 0;
+ const SCEV *PostLoopScale = nullptr;
if (!SE.dominates(Step, L->getHeader())) {
PostLoopScale = Step;
Step = SE.getConstant(Normalized->getType(), 1);
@@ -1276,7 +1276,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
Type *ExpandTy = PostLoopScale ? IntTy : STy;
// In some cases, we decide to reuse an existing phi node but need to truncate
// it and/or invert the step.
- Type *TruncTy = 0;
+ Type *TruncTy = nullptr;
bool InvertStep = false;
PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
TruncTy, InvertStep);
@@ -1372,7 +1372,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
const Loop *L = S->getLoop();
// First check for an existing canonical IV in a suitable type.
- PHINode *CanonicalIV = 0;
+ PHINode *CanonicalIV = nullptr;
if (PHINode *PN = L->getCanonicalInductionVariable())
if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
CanonicalIV = PN;
@@ -1393,7 +1393,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
isa<LandingPadInst>(NewInsertPt))
++NewInsertPt;
- V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+ V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr,
NewInsertPt);
return V;
}
@@ -1666,7 +1666,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
// Emit code for it.
BuilderType::InsertPointGuard Guard(Builder);
- PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+ PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr,
+ L->getHeader()->begin()));
return V;
}
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 1e4c0bd..e9db295 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -113,7 +113,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {
// Transform each operand.
for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
I != E; ++I) {
- Operands.push_back(TransformSubExpr(*I, LUser, 0));
+ Operands.push_back(TransformSubExpr(*I, LUser, nullptr));
}
// Conservatively use AnyWrap until/unless we need FlagNW.
const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap);
diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp
index 87a4fa4..edd82f5 100644
--- a/lib/Analysis/SparsePropagation.cpp
+++ b/lib/Analysis/SparsePropagation.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sparseprop"
#include "llvm/Analysis/SparsePropagation.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -21,6 +20,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "sparseprop"
+
//===----------------------------------------------------------------------===//
// AbstractLatticeFunction Implementation
//===----------------------------------------------------------------------===//
@@ -147,7 +148,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
- if (C == 0 || !isa<ConstantInt>(C)) {
+ if (!C || !isa<ConstantInt>(C)) {
// Non-constant values can go either way.
Succs[0] = Succs[1] = true;
return;
@@ -189,7 +190,7 @@ void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
return;
Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
- if (C == 0 || !isa<ConstantInt>(C)) {
+ if (!C || !isa<ConstantInt>(C)) {
// All destinations are executable!
Succs.assign(TI.getNumSuccessors(), true);
return;
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp
index 04d09f1..cdb0b79 100644
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tti"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
@@ -19,6 +18,8 @@
using namespace llvm;
+#define DEBUG_TYPE "tti"
+
// Setup the analysis group to manage the TargetTransformInfo passes.
INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI)
char TargetTransformInfo::ID = 0;
@@ -234,7 +235,7 @@ namespace {
struct NoTTI final : ImmutablePass, TargetTransformInfo {
const DataLayout *DL;
- NoTTI() : ImmutablePass(ID), DL(0) {
+ NoTTI() : ImmutablePass(ID), DL(nullptr) {
initializeNoTTIPass(*PassRegistry::getPassRegistry());
}
@@ -242,9 +243,9 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// Note that this subclass is special, and must *not* call initializeTTI as
// it does not chain.
TopTTI = this;
- PrevTTI = 0;
+ PrevTTI = nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -443,7 +444,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
// Otherwise delegate to the fully generic implementations.
return getOperationCost(Operator::getOpcode(U), U->getType(),
U->getNumOperands() == 1 ?
- U->getOperand(0)->getType() : 0);
+ U->getOperand(0)->getType() : nullptr);
}
bool hasBranchDivergence() const override { return false; }
@@ -567,7 +568,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
}
unsigned getShuffleCost(ShuffleKind Kind, Type *Ty,
- int Index = 0, Type *SubTp = 0) const override {
+ int Index = 0, Type *SubTp = nullptr) const override {
return 1;
}
@@ -581,7 +582,7 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
}
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = 0) const override {
+ Type *CondTy = nullptr) const override {
return 1;
}
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 05daf18..f36f6f8 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -144,7 +144,7 @@ namespace {
const MDNode *Node;
public:
- TBAANode() : Node(0) {}
+ TBAANode() : Node(nullptr) {}
explicit TBAANode(const MDNode *N) : Node(N) {}
/// getNode - Get the MDNode for this TBAANode.
@@ -182,7 +182,6 @@ namespace {
const MDNode *Node;
public:
- TBAAStructTagNode() : Node(0) {}
explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
/// Get the MDNode for this TBAAStructTagNode.
@@ -218,7 +217,7 @@ namespace {
const MDNode *Node;
public:
- TBAAStructTypeNode() : Node(0) {}
+ TBAAStructTypeNode() : Node(nullptr) {}
explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
/// Get the MDNode for this TBAAStructTypeNode.
@@ -340,7 +339,8 @@ static bool isStructPathTBAA(const MDNode *MD) {
bool
TypeBasedAliasAnalysis::Aliases(const MDNode *A,
const MDNode *B) const {
- if (isStructPathTBAA(A))
+ // Make sure that both MDNodes are struct-path aware.
+ if (isStructPathTBAA(A) && isStructPathTBAA(B))
return PathAliases(A, B);
// Keep track of the root node for A and B.
@@ -386,6 +386,10 @@ TypeBasedAliasAnalysis::Aliases(const MDNode *A,
bool
TypeBasedAliasAnalysis::PathAliases(const MDNode *A,
const MDNode *B) const {
+ // Verify that both input nodes are struct-path aware.
+ assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
+ assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
+
// Keep track of the root node for A and B.
TBAAStructTypeNode RootA, RootB;
TBAAStructTagNode TagA(A), TagB(B);
@@ -555,38 +559,40 @@ bool MDNode::isTBAAVtableAccess() const {
MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
if (!A || !B)
- return NULL;
+ return nullptr;
if (A == B)
return A;
// For struct-path aware TBAA, we use the access type of the tag.
- bool StructPath = isStructPathTBAA(A);
+ bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
if (StructPath) {
A = cast_or_null<MDNode>(A->getOperand(1));
- if (!A) return 0;
+ if (!A) return nullptr;
B = cast_or_null<MDNode>(B->getOperand(1));
- if (!B) return 0;
+ if (!B) return nullptr;
}
SmallVector<MDNode *, 4> PathA;
MDNode *T = A;
while (T) {
PathA.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+ : nullptr;
}
SmallVector<MDNode *, 4> PathB;
T = B;
while (T) {
PathB.push_back(T);
- T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1)) : 0;
+ T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
+ : nullptr;
}
int IA = PathA.size() - 1;
int IB = PathB.size() - 1;
- MDNode *Ret = 0;
+ MDNode *Ret = nullptr;
while (IA >= 0 && IB >=0) {
if (PathA[IA] == PathB[IB])
Ret = PathA[IA];
@@ -599,7 +605,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
return Ret;
if (!Ret)
- return 0;
+ return nullptr;
// We need to convert from a type node to a tag node.
Type *Int64 = IntegerType::get(A->getContext(), 64);
Value *Ops[3] = { Ret, Ret, ConstantInt::get(Int64, 0) };
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 72617a0..4f48753 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -44,10 +45,10 @@ static unsigned getBitWidth(Type *Ty, const DataLayout *TD) {
return TD ? TD->getPointerTypeSizeInBits(Ty) : 0;
}
-static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout *TD, unsigned Depth) {
if (!Add) {
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
// We know that the top bits of C-X are clear if X contains less bits
@@ -58,7 +59,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
+ llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -79,13 +80,10 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
// result. For an add, this works with either operand. For a subtract,
// this only works if the known zeros are in the right operand.
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- llvm::ComputeMaskedBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
- assert((LHSKnownZero & LHSKnownOne) == 0 &&
- "Bits known to be one AND zero?");
+ llvm::computeKnownBits(Op0, LHSKnownZero, LHSKnownOne, TD, Depth+1);
unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
- llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ llvm::computeKnownBits(Op1, KnownZero2, KnownOne2, TD, Depth+1);
unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
// Determine which operand has more trailing zeros, and use that
@@ -130,15 +128,13 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
}
}
-static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
- APInt &KnownZero, APInt &KnownOne,
- APInt &KnownZero2, APInt &KnownOne2,
- const DataLayout *TD, unsigned Depth) {
+static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+ APInt &KnownZero, APInt &KnownOne,
+ APInt &KnownZero2, APInt &KnownOne2,
+ const DataLayout *TD, unsigned Depth) {
unsigned BitWidth = KnownZero.getBitWidth();
- ComputeMaskedBits(Op1, KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op1, KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(Op0, KnownZero2, KnownOne2, TD, Depth+1);
bool isKnownNegative = false;
bool isKnownNonNegative = false;
@@ -192,7 +188,7 @@ static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
KnownOne.setBit(BitWidth - 1);
}
-void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
+void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned NumRanges = Ranges.getNumOperands() / 2;
assert(NumRanges >= 1);
@@ -211,8 +207,9 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros);
}
-/// ComputeMaskedBits - Determine which of the bits are known to be either zero
-/// or one and return them in the KnownZero/KnownOne bit sets.
+
+/// Determine which bits of V are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bit sets.
///
/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
/// we cannot optimize based on the assumption that it is zero without changing
@@ -226,8 +223,8 @@ void llvm::computeMaskedBitsLoad(const MDNode &Ranges, APInt &KnownZero) {
/// where V is a vector, known zero, and known one values are the
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
-void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
- const DataLayout *TD, unsigned Depth) {
+void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ const DataLayout *TD, unsigned Depth) {
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = KnownZero.getBitWidth();
@@ -241,7 +238,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
V->getType()->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
- "V, Mask, KnownOne and KnownZero should have same BitWidth");
+ "V, KnownOne and KnownZero should have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
@@ -303,7 +300,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
if (GA->mayBeOverridden()) {
KnownZero.clearAllBits(); KnownOne.clearAllBits();
} else {
- ComputeMaskedBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, TD, Depth+1);
}
return;
}
@@ -341,49 +338,43 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
default: break;
case Instruction::Load:
if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range))
- computeMaskedBitsLoad(*MD, KnownZero);
- return;
+ computeKnownBitsLoad(*MD, KnownZero);
+ break;
case Instruction::And: {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
KnownZero |= KnownZero2;
- return;
+ break;
}
case Instruction::Or: {
- ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
// Output known-1 are known to be set if set in either the LHS | RHS.
KnownOne |= KnownOne2;
- return;
+ break;
}
case Instruction::Xor: {
- ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
KnownZero = KnownZeroOut;
- return;
+ break;
}
case Instruction::Mul: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+ computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth);
break;
}
@@ -391,42 +382,40 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
- return;
+ break;
}
case Instruction::Select:
- ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
+ computeKnownBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD,
Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
- return;
+ break;
case Instruction::FPTrunc:
case Instruction::FPExt:
case Instruction::FPToUI:
case Instruction::FPToSI:
case Instruction::SIToFP:
case Instruction::UIToFP:
- return; // Can't work with floating point.
+ break; // Can't work with floating point.
case Instruction::PtrToInt:
case Instruction::IntToPtr:
// We can't handle these if we don't know the pointer size.
- if (!TD) return;
+ if (!TD) break;
// FALL THROUGH and handle them the same as zext/trunc.
case Instruction::ZExt:
case Instruction::Trunc: {
@@ -439,19 +428,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType());
} else {
SrcBitWidth = SrcTy->getScalarSizeInBits();
- if (!SrcBitWidth) return;
+ if (!SrcBitWidth) break;
}
assert(SrcBitWidth && "SrcBitWidth can't be zero");
KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
KnownZero = KnownZero.zextOrTrunc(BitWidth);
KnownOne = KnownOne.zextOrTrunc(BitWidth);
// Any top bits are known to be zero.
if (BitWidth > SrcBitWidth)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
+ break;
}
case Instruction::BitCast: {
Type *SrcTy = I->getOperand(0)->getType();
@@ -459,8 +448,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
!I->getType()->isVectorTy()) {
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- return;
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ break;
}
break;
}
@@ -470,8 +459,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero = KnownZero.trunc(SrcBitWidth);
KnownOne = KnownOne.trunc(SrcBitWidth);
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -481,18 +469,17 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
else if (KnownOne[SrcBitWidth-1]) // Input sign bit known set
KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
- return;
+ break;
}
case Instruction::Shl:
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
KnownZero <<= ShiftAmt;
KnownOne <<= ShiftAmt;
KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
- return;
+ break;
}
break;
case Instruction::LShr:
@@ -502,13 +489,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
// Unsigned shift right.
- ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
// high bits known zero.
KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
- return;
+ break;
}
break;
case Instruction::AShr:
@@ -518,8 +504,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
// Signed shift right.
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
KnownOne = APIntOps::lshr(KnownOne, ShiftAmt);
@@ -528,19 +513,19 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownZero |= HighBits;
else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one.
KnownOne |= HighBits;
- return;
+ break;
}
break;
case Instruction::Sub: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- ComputeMaskedBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
+ computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
Depth);
break;
}
case Instruction::Add: {
bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
- ComputeMaskedBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
+ computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
KnownZero, KnownOne, KnownZero2, KnownOne2, TD,
Depth);
break;
@@ -550,7 +535,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -574,8 +559,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// remainder is zero.
if (KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
- Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, TD,
+ Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(BitWidth - 1);
@@ -587,9 +572,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
APInt RA = Rem->getValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD,
- Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD,
+ Depth+1);
KnownZero |= ~LowBits;
KnownOne &= LowBits;
break;
@@ -598,8 +582,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
- ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1);
unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
@@ -622,8 +606,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Analyze all of the subscripts of this getelementptr instruction
// to determine if we can prove known low zero bits.
APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
- Depth+1);
+ computeKnownBits(I->getOperand(0), LocalKnownZero, LocalKnownOne, TD,
+ Depth+1);
unsigned TrailZ = LocalKnownZero.countTrailingOnes();
gep_type_iterator GTI = gep_type_begin(I);
@@ -631,8 +615,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
Value *Index = I->getOperand(i);
if (StructType *STy = dyn_cast<StructType>(*GTI)) {
// Handle struct member offset arithmetic.
- if (!TD)
- return;
+ if (!TD) {
+ TrailZ = 0;
+ break;
+ }
// Handle case when index is vector zeroinitializer
Constant *CIndex = cast<Constant>(Index);
@@ -650,11 +636,14 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
} else {
// Handle array index arithmetic.
Type *IndexedTy = GTI.getIndexedType();
- if (!IndexedTy->isSized()) return;
+ if (!IndexedTy->isSized()) {
+ TrailZ = 0;
+ break;
+ }
unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
- ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
+ computeKnownBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1);
TrailZ = std::min(TrailZ,
unsigned(countTrailingZeros(TypeSize) +
LocalKnownZero.countTrailingOnes()));
@@ -696,11 +685,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
break;
// Ok, we have a PHI of the form L op= R. Check for low
// zero bits.
- ComputeMaskedBits(R, KnownZero2, KnownOne2, TD, Depth+1);
+ computeKnownBits(R, KnownZero2, KnownOne2, TD, Depth+1);
// We need to take the minimum number of known bits
APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
- ComputeMaskedBits(L, KnownZero3, KnownOne3, TD, Depth+1);
+ computeKnownBits(L, KnownZero3, KnownOne3, TD, Depth+1);
KnownZero = APInt::getLowBitsSet(BitWidth,
std::min(KnownZero2.countTrailingOnes(),
@@ -712,7 +701,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
// Unreachable blocks may have zero-operand PHI nodes.
if (P->getNumIncomingValues() == 0)
- return;
+ break;
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
@@ -731,8 +720,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
KnownOne2 = APInt(BitWidth, 0);
// Recurse, but cap the recursion to one level, because we don't
// want to waste time spinning around in loops.
- ComputeMaskedBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
- MaxDepth-1);
+ computeKnownBits(P->getIncomingValue(i), KnownZero2, KnownOne2, TD,
+ MaxDepth-1);
KnownZero &= KnownZero2;
KnownOne &= KnownOne2;
// If all bits have been ruled out, there's no need to check
@@ -776,30 +765,32 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne,
default: break;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
- ComputeMaskedBitsAddSub(true, II->getArgOperand(0),
- II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ computeKnownBitsAddSub(true, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
break;
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
- ComputeMaskedBitsAddSub(false, II->getArgOperand(0),
- II->getArgOperand(1), false, KnownZero,
- KnownOne, KnownZero2, KnownOne2, TD, Depth);
+ computeKnownBitsAddSub(false, II->getArgOperand(0),
+ II->getArgOperand(1), false, KnownZero,
+ KnownOne, KnownZero2, KnownOne2, TD, Depth);
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
- ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
- false, KnownZero, KnownOne,
- KnownZero2, KnownOne2, TD, Depth);
+ computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+ false, KnownZero, KnownOne,
+ KnownZero2, KnownOne2, TD, Depth);
break;
}
}
}
}
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
/// ComputeSignBit - Determine whether the sign bit is known to be zero or
-/// one. Convenience wrapper around ComputeMaskedBits.
+/// one. Convenience wrapper around computeKnownBits.
void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
const DataLayout *TD, unsigned Depth) {
unsigned BitWidth = getBitWidth(V->getType(), TD);
@@ -810,7 +801,7 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
}
APInt ZeroBits(BitWidth, 0);
APInt OneBits(BitWidth, 0);
- ComputeMaskedBits(V, ZeroBits, OneBits, TD, Depth);
+ computeKnownBits(V, ZeroBits, OneBits, TD, Depth);
KnownOne = OneBits[BitWidth - 1];
KnownZero = ZeroBits[BitWidth - 1];
}
@@ -842,7 +833,7 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
if (Depth++ == MaxDepth)
return false;
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
// A shift of a power of two is a power of two or zero.
if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
match(V, m_Shr(m_Value(X), m_Value()))))
@@ -882,10 +873,10 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) {
unsigned BitWidth = V->getType()->getScalarSizeInBits();
APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0);
- ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth);
+ computeKnownBits(X, LHSZeroBits, LHSOneBits, nullptr, Depth);
APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0);
- ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth);
+ computeKnownBits(Y, RHSZeroBits, RHSOneBits, nullptr, Depth);
// If i8 V is a power of two or zero:
// ZeroBits: 1 1 1 0 1 1 1 1
// ~ZeroBits: 0 0 0 1 0 0 0 0
@@ -1005,7 +996,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD);
// X | Y != 0 if X != 0 or Y != 0.
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (match(V, m_Or(m_Value(X), m_Value(Y))))
return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
@@ -1023,7 +1014,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
if (KnownOne[0])
return true;
}
@@ -1065,12 +1056,12 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
APInt Mask = APInt::getSignedMaxValue(BitWidth);
// The sign bit of X is set. If some other bit is set then X is not equal
// to INT_MIN.
- ComputeMaskedBits(X, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(X, KnownZero, KnownOne, TD, Depth);
if ((KnownOne & Mask) != 0)
return true;
// The sign bit of Y is set. If some other bit is set then Y is not equal
// to INT_MIN.
- ComputeMaskedBits(Y, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(Y, KnownZero, KnownOne, TD, Depth);
if ((KnownOne & Mask) != 0)
return true;
}
@@ -1100,7 +1091,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
if (!BitWidth) return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
return KnownOne != 0;
}
@@ -1116,8 +1107,7 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) {
bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
const DataLayout *TD, unsigned Depth) {
APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
return (KnownZero & Mask) == Mask;
}
@@ -1142,7 +1132,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
- // Note that ConstantInt is handled by the general ComputeMaskedBits case
+ // Note that ConstantInt is handled by the general computeKnownBits case
// below.
if (Depth == 6)
@@ -1187,7 +1177,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
- // ComputeMaskedBits, and pick whichever answer is better.
+ // computeKnownBits, and pick whichever answer is better.
}
break;
@@ -1207,7 +1197,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -1232,7 +1222,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
- ComputeMaskedBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
+ computeKnownBits(U->getOperand(1), KnownZero, KnownOne, TD, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue())
@@ -1278,7 +1268,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD,
// use this information.
APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
APInt Mask;
- ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, TD, Depth);
if (KnownZero.isNegative()) { // sign bit is 0
Mask = KnownZero;
@@ -1364,7 +1354,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
Op1 = ConstantInt::get(V->getContext(), API);
}
- Value *Mul0 = NULL;
+ Value *Mul0 = nullptr;
if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
if (Constant *Op1C = dyn_cast<Constant>(Op1))
if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
@@ -1388,7 +1378,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
}
}
- Value *Mul1 = NULL;
+ Value *Mul1 = nullptr;
if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
if (Constant *Op0C = dyn_cast<Constant>(Op0))
if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
@@ -1432,7 +1422,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
return 1; // Limit search depth.
const Operator *I = dyn_cast<Operator>(V);
- if (I == 0) return false;
+ if (!I) return false;
// Check if the nsz fast-math flag is set
if (const FPMathOperator *FPO = dyn_cast<FPMathOperator>(I))
@@ -1513,7 +1503,7 @@ Value *llvm::isBytewiseValue(Value *V) {
// If the top/bottom halves aren't the same, reject it.
if (Val != Val2)
- return 0;
+ return nullptr;
}
return ConstantInt::get(V->getContext(), Val);
}
@@ -1525,11 +1515,11 @@ Value *llvm::isBytewiseValue(Value *V) {
Value *Elt = CA->getElementAsConstant(0);
Value *Val = isBytewiseValue(Elt);
if (!Val)
- return 0;
+ return nullptr;
for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I)
if (CA->getElementAsConstant(I) != Elt)
- return 0;
+ return nullptr;
return Val;
}
@@ -1540,7 +1530,7 @@ Value *llvm::isBytewiseValue(Value *V) {
// %c = or i16 %a, %b
// but until there is an example that actually needs this, it doesn't seem
// worth worrying about.
- return 0;
+ return nullptr;
}
@@ -1590,7 +1580,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
Value *V = FindInsertedValue(From, Idxs);
if (!V)
- return NULL;
+ return nullptr;
// Insert the value in the new (sub) aggregrate
return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
@@ -1641,7 +1631,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
if (Constant *C = dyn_cast<Constant>(V)) {
C = C->getAggregateElement(idx_range[0]);
- if (C == 0) return 0;
+ if (!C) return nullptr;
return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
}
@@ -1654,7 +1644,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
if (req_idx == idx_range.end()) {
// We can't handle this without inserting insertvalues
if (!InsertBefore)
- return 0;
+ return nullptr;
// The requested index identifies a part of a nested aggregate. Handle
// this specially. For example,
@@ -1708,7 +1698,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
}
// Otherwise, we don't know (such as, extracting from a function return value
// or load instruction)
- return 0;
+ return nullptr;
}
/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
@@ -1769,13 +1759,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// Make sure the index-ee is a pointer to array of i8.
PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
- if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+ if (!AT || !AT->getElementType()->isIntegerTy(8))
return false;
// Check to make sure that the first operand of the GEP is an integer and
// has value 0 so that we are sure we're indexing into the initializer.
const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
- if (FirstIdx == 0 || !FirstIdx->isZero())
+ if (!FirstIdx || !FirstIdx->isZero())
return false;
// If the second index isn't a ConstantInt, then this is a variable index
@@ -1807,7 +1797,7 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
// Must be a Constant Array
const ConstantDataArray *Array =
dyn_cast<ConstantDataArray>(GV->getInitializer());
- if (Array == 0 || !Array->isString())
+ if (!Array || !Array->isString())
return false;
// Get the number of elements in the array
@@ -1913,7 +1903,7 @@ llvm::GetUnderlyingObject(Value *V, const DataLayout *TD, unsigned MaxLookup) {
// See if InstructionSimplify knows any relevant tricks.
if (Instruction *I = dyn_cast<Instruction>(V))
// TODO: Acquire a DominatorTree and use it.
- if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+ if (Value *Simplified = SimplifyInstruction(I, TD, nullptr)) {
V = Simplified;
continue;
}
@@ -2001,7 +1991,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,
return false;
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(Op, KnownZero, KnownOne, TD);
+ computeKnownBits(Op, KnownZero, KnownOne, TD);
return !!KnownZero;
}
case Instruction::Load: {
@@ -2076,14 +2066,18 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {
// Alloca never returns null, malloc might.
if (isa<AllocaInst>(V)) return true;
- // A byval or inalloca argument is never null.
+ // A byval, inalloca, or nonnull argument is never null.
if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValOrInAllocaAttr();
+ return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
// Global values are not null unless extern weak.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return !GV->hasExternalWeakLinkage();
+ if (ImmutableCallSite CS = V)
+ if (CS.paramHasAttr(0, Attribute::NonNull))
+ return true;
+
// operator new never returns null.
if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true))
return true;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 1a5eec3..44a3412 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -34,6 +34,10 @@ bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
return true;
}
+void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
+ SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
+}
+
//===----------------------------------------------------------------------===//
// Helper functions.
//===----------------------------------------------------------------------===//
@@ -146,7 +150,7 @@ static bool isLabelChar(char C) {
static const char *isLabelTail(const char *CurPtr) {
while (1) {
if (CurPtr[0] == ':') return CurPtr+1;
- if (!isLabelChar(CurPtr[0])) return 0;
+ if (!isLabelChar(CurPtr[0])) return nullptr;
++CurPtr;
}
}
@@ -431,8 +435,8 @@ lltok::Kind LLLexer::LexHash() {
/// HexIntConstant [us]0x[0-9A-Fa-f]+
lltok::Kind LLLexer::LexIdentifier() {
const char *StartChar = CurPtr;
- const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
- const char *KeywordEnd = 0;
+ const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
+ const char *KeywordEnd = nullptr;
for (; isLabelChar(*CurPtr); ++CurPtr) {
// If we decide this is an integer, remember the end of the sequence.
@@ -451,7 +455,7 @@ lltok::Kind LLLexer::LexIdentifier() {
// Otherwise, this wasn't a label. If this was valid as an integer type,
// return it.
- if (IntEnd == 0) IntEnd = CurPtr;
+ if (!IntEnd) IntEnd = CurPtr;
if (IntEnd != StartChar) {
CurPtr = IntEnd;
uint64_t NumBits = atoull(StartChar, CurPtr);
@@ -465,7 +469,7 @@ lltok::Kind LLLexer::LexIdentifier() {
}
// Otherwise, this was a letter sequence. See which keyword this is.
- if (KeywordEnd == 0) KeywordEnd = CurPtr;
+ if (!KeywordEnd) KeywordEnd = CurPtr;
CurPtr = KeywordEnd;
--StartChar;
unsigned Len = CurPtr-StartChar;
@@ -481,6 +485,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(internal);
+ KEYWORD(linker_private); // NOTE: deprecated, for parser compatibility
+ KEYWORD(linker_private_weak); // NOTE: deprecated, for parser compatibility
KEYWORD(available_externally);
KEYWORD(linkonce);
KEYWORD(linkonce_odr);
@@ -506,6 +512,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(null);
KEYWORD(to);
KEYWORD(tail);
+ KEYWORD(musttail);
KEYWORD(target);
KEYWORD(triple);
KEYWORD(unwind);
@@ -548,7 +555,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
KEYWORD(x86_thiscallcc);
- KEYWORD(x86_cdeclmethodcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
@@ -587,6 +593,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(noimplicitfloat);
KEYWORD(noinline);
KEYWORD(nonlazybind);
+ KEYWORD(nonnull);
KEYWORD(noredzone);
KEYWORD(noreturn);
KEYWORD(nounwind);
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 85703c7..ad11d49 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -63,6 +63,10 @@ namespace llvm {
bool Error(LocTy L, const Twine &Msg) const;
bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+
+ void Warning(LocTy WarningLoc, const Twine &Msg) const;
+ void Warning(const Twine &Msg) const { return Warning(getLoc(), Msg); }
+
std::string getFilename() const;
private:
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 37151e6..3282e8a 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -57,7 +57,8 @@ bool LLParser::ValidateEndOfModule() {
for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
unsigned SlotNo = MDList[i].MDSlot;
- if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+ if (SlotNo >= NumberedMetadata.size() ||
+ NumberedMetadata[SlotNo] == nullptr)
return Error(MDList[i].Loc, "use of undefined metadata '!" +
Twine(SlotNo) + "'");
Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
@@ -132,20 +133,20 @@ bool LLParser::ValidateEndOfModule() {
// references after the function was defined. Resolve those now.
while (!ForwardRefBlockAddresses.empty()) {
// Okay, we are referencing an already-parsed function, resolve them now.
- Function *TheFn = 0;
+ Function *TheFn = nullptr;
const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
if (Fn.Kind == ValID::t_GlobalName)
TheFn = M->getFunction(Fn.StrVal);
else if (Fn.UIntVal < NumberedVals.size())
TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
- if (TheFn == 0)
+ if (!TheFn)
return Error(Fn.Loc, "unknown function referenced by blockaddress");
// Resolve all these references.
if (ResolveForwardRefBlockAddresses(TheFn,
ForwardRefBlockAddresses.begin()->second,
- 0))
+ nullptr))
return true;
ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
@@ -206,7 +207,7 @@ bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn,
TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
}
- if (Res == 0)
+ if (!Res)
return Error(Refs[i].first.Loc,
"referenced value is not a basic block");
@@ -247,6 +248,8 @@ bool LLParser::ParseTopLevelEntities() {
// ('constant'|'global') ...
case lltok::kw_private: // OptionalLinkage
case lltok::kw_internal: // OptionalLinkage
+ case lltok::kw_linker_private: // Obsolete OptionalLinkage
+ case lltok::kw_linker_private_weak: // Obsolete OptionalLinkage
case lltok::kw_weak: // OptionalLinkage
case lltok::kw_weak_odr: // OptionalLinkage
case lltok::kw_linkonce: // OptionalLinkage
@@ -362,7 +365,7 @@ bool LLParser::ParseUnnamedType() {
if (TypeID >= NumberedTypes.size())
NumberedTypes.resize(TypeID+1);
- Type *Result = 0;
+ Type *Result = nullptr;
if (ParseStructDefinition(TypeLoc, "",
NumberedTypes[TypeID], Result)) return true;
@@ -389,7 +392,7 @@ bool LLParser::ParseNamedType() {
ParseToken(lltok::kw_type, "expected 'type' after name"))
return true;
- Type *Result = 0;
+ Type *Result = nullptr;
if (ParseStructDefinition(NameLoc, Name,
NamedTypes[Name], Result)) return true;
@@ -521,10 +524,10 @@ bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
if (ParseUInt32(SlotNo)) return true;
// Check existing MDNode.
- if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+ if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != nullptr)
Result = NumberedMetadata[SlotNo];
else
- Result = 0;
+ Result = nullptr;
return false;
}
@@ -565,7 +568,7 @@ bool LLParser::ParseNamedMetadata() {
if (ParseToken(lltok::exclaim, "Expected '!' here"))
return true;
- MDNode *N = 0;
+ MDNode *N = nullptr;
if (ParseMDNodeID(N)) return true;
NMD->addOperand(N);
} while (EatIfPresent(lltok::comma));
@@ -584,14 +587,14 @@ bool LLParser::ParseStandaloneMetadata() {
unsigned MetadataID = 0;
LocTy TyLoc;
- Type *Ty = 0;
+ Type *Ty = nullptr;
SmallVector<Value *, 16> Elts;
if (ParseUInt32(MetadataID) ||
ParseToken(lltok::equal, "expected '=' here") ||
ParseType(Ty, TyLoc) ||
ParseToken(lltok::exclaim, "Expected '!' here") ||
ParseToken(lltok::lbrace, "Expected '{' here") ||
- ParseMDNodeVector(Elts, NULL) ||
+ ParseMDNodeVector(Elts, nullptr) ||
ParseToken(lltok::rbrace, "expected end of metadata node"))
return true;
@@ -611,7 +614,7 @@ bool LLParser::ParseStandaloneMetadata() {
if (MetadataID >= NumberedMetadata.size())
NumberedMetadata.resize(MetadataID+1);
- if (NumberedMetadata[MetadataID] != 0)
+ if (NumberedMetadata[MetadataID] != nullptr)
return TokError("Metadata id is already used");
NumberedMetadata[MetadataID] = Init;
}
@@ -619,13 +622,19 @@ bool LLParser::ParseStandaloneMetadata() {
return false;
}
+static bool isValidVisibilityForLinkage(unsigned V, unsigned L) {
+ return !GlobalValue::isLocalLinkage((GlobalValue::LinkageTypes)L) ||
+ (GlobalValue::VisibilityTypes)V == GlobalValue::DefaultVisibility;
+}
+
/// ParseAlias:
/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias'
/// OptionalLinkage Aliasee
+/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias'
+/// OptionalLinkage OptionalAddrSpace Type, Aliasee
+///
/// Aliasee
/// ::= TypeAndValue
-/// ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
-/// ::= 'getelementptr' 'inbounds'? '(' ... ')'
///
/// Everything through DLL storage class has already been parsed.
///
@@ -643,27 +652,53 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
if(!GlobalAlias::isValidLinkage(Linkage))
return Error(LinkageLoc, "invalid linkage type for alias");
- Constant *Aliasee;
- LocTy AliaseeLoc = Lex.getLoc();
- if (Lex.getKind() != lltok::kw_bitcast &&
- Lex.getKind() != lltok::kw_getelementptr) {
- if (ParseGlobalTypeAndValue(Aliasee)) return true;
+ if (!isValidVisibilityForLinkage(Visibility, L))
+ return Error(LinkageLoc,
+ "symbol with local linkage must have default visibility");
+
+ bool HasAddrSpace = Lex.getKind() == lltok::kw_addrspace;
+ unsigned AddrSpace;
+ LocTy AddrSpaceLoc = Lex.getLoc();
+ if (ParseOptionalAddrSpace(AddrSpace))
+ return true;
+
+ LocTy TyLoc = Lex.getLoc();
+ Type *Ty = nullptr;
+ if (ParseType(Ty))
+ return true;
+
+ bool DifferentType = EatIfPresent(lltok::comma);
+ if (HasAddrSpace && !DifferentType)
+ return Error(AddrSpaceLoc, "A type is required if addrspace is given");
+
+ Type *AliaseeType = nullptr;
+ if (DifferentType) {
+ if (ParseType(AliaseeType))
+ return true;
} else {
- // The bitcast dest type is not present, it is implied by the dest type.
- ValID ID;
- if (ParseValID(ID)) return true;
- if (ID.Kind != ValID::t_Constant)
- return Error(AliaseeLoc, "invalid aliasee");
- Aliasee = ID.ConstantVal;
+ AliaseeType = Ty;
+ auto *PTy = dyn_cast<PointerType>(Ty);
+ if (!PTy)
+ return Error(TyLoc, "An alias must have pointer type");
+ Ty = PTy->getElementType();
+ AddrSpace = PTy->getAddressSpace();
}
- if (!Aliasee->getType()->isPointerTy())
- return Error(AliaseeLoc, "alias must have pointer type");
+ LocTy AliaseeLoc = Lex.getLoc();
+ Constant *C;
+ if (ParseGlobalValue(AliaseeType, C))
+ return true;
+
+ auto *Aliasee = dyn_cast<GlobalObject>(C);
+ if (!Aliasee)
+ return Error(AliaseeLoc, "Alias must point to function or variable");
+
+ assert(Aliasee->getType()->isPointerTy());
// Okay, create the alias but do not insert it into the module yet.
- GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
- (GlobalValue::LinkageTypes)Linkage, Name,
- Aliasee);
+ std::unique_ptr<GlobalAlias> GA(
+ GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage,
+ Name, Aliasee, /*Parent*/ nullptr));
GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass);
@@ -685,15 +720,23 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
// If they agree, just RAUW the old value with the alias and remove the
// forward ref info.
- Val->replaceAllUsesWith(GA);
+ for (auto *User : Val->users()) {
+ if (auto *GA = dyn_cast<GlobalAlias>(User))
+ return Error(NameLoc, "Alias is pointed by alias " + GA->getName());
+ }
+
+ Val->replaceAllUsesWith(GA.get());
Val->eraseFromParent();
ForwardRefVals.erase(I);
}
// Insert into the module, we know its name won't collide now.
- M->getAliasList().push_back(GA);
+ M->getAliasList().push_back(GA.get());
assert(GA->getName() == Name && "Should not be a name conflict!");
+ // The module owns this now
+ GA.release();
+
return false;
}
@@ -711,6 +754,10 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
unsigned Linkage, bool HasLinkage,
unsigned Visibility, unsigned DLLStorageClass) {
+ if (!isValidVisibilityForLinkage(Visibility, Linkage))
+ return Error(NameLoc,
+ "symbol with local linkage must have default visibility");
+
unsigned AddrSpace;
bool IsConstant, UnnamedAddr, IsExternallyInitialized;
GlobalVariable::ThreadLocalMode TLM;
@@ -718,7 +765,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
LocTy IsExternallyInitializedLoc;
LocTy TyLoc;
- Type *Ty = 0;
+ Type *Ty = nullptr;
if (ParseOptionalThreadLocal(TLM) ||
ParseOptionalAddrSpace(AddrSpace) ||
ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -732,7 +779,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
// If the linkage is specified and is external, then no initializer is
// present.
- Constant *Init = 0;
+ Constant *Init = nullptr;
if (!HasLinkage || (Linkage != GlobalValue::ExternalWeakLinkage &&
Linkage != GlobalValue::ExternalLinkage)) {
if (ParseGlobalValue(Ty, Init))
@@ -742,7 +789,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
if (Ty->isFunctionTy() || Ty->isLabelTy())
return Error(TyLoc, "invalid type for global variable");
- GlobalVariable *GV = 0;
+ GlobalVariable *GV = nullptr;
// See if the global was forward referenced, if so, use the global.
if (!Name.empty()) {
@@ -760,9 +807,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
}
}
- if (GV == 0) {
- GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
- Name, 0, GlobalVariable::NotThreadLocal,
+ if (!GV) {
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, nullptr,
+ Name, nullptr, GlobalVariable::NotThreadLocal,
AddrSpace);
} else {
if (GV->getType()->getElementType() != Ty)
@@ -956,6 +1003,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
case lltok::kw_nest:
case lltok::kw_noalias:
case lltok::kw_nocapture:
+ case lltok::kw_nonnull:
case lltok::kw_returned:
case lltok::kw_sret:
HaveError |=
@@ -978,9 +1026,9 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
- if (PTy == 0) {
+ if (!PTy) {
Error(Loc, "global variable reference must have pointer type");
- return 0;
+ return nullptr;
}
// Look this name up in the normal function symbol table.
@@ -989,7 +1037,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
// If this is a forward reference for the value, see if we already created a
// forward ref record.
- if (Val == 0) {
+ if (!Val) {
std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
I = ForwardRefVals.find(Name);
if (I != ForwardRefVals.end())
@@ -1001,7 +1049,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
if (Val->getType() == Ty) return Val;
Error(Loc, "'@" + Name + "' defined with type '" +
getTypeString(Val->getType()) + "'");
- return 0;
+ return nullptr;
}
// Otherwise, create a new forward reference for this value and remember it.
@@ -1010,8 +1058,8 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
else
FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
- GlobalValue::ExternalWeakLinkage, 0, Name,
- 0, GlobalVariable::NotThreadLocal,
+ GlobalValue::ExternalWeakLinkage, nullptr, Name,
+ nullptr, GlobalVariable::NotThreadLocal,
PTy->getAddressSpace());
ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
@@ -1020,16 +1068,16 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, Type *Ty,
GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
PointerType *PTy = dyn_cast<PointerType>(Ty);
- if (PTy == 0) {
+ if (!PTy) {
Error(Loc, "global variable reference must have pointer type");
- return 0;
+ return nullptr;
}
- GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+ GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
// If this is a forward reference for the value, see if we already created a
// forward ref record.
- if (Val == 0) {
+ if (!Val) {
std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
I = ForwardRefValIDs.find(ID);
if (I != ForwardRefValIDs.end())
@@ -1041,7 +1089,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
if (Val->getType() == Ty) return Val;
Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
getTypeString(Val->getType()) + "'");
- return 0;
+ return nullptr;
}
// Otherwise, create a new forward reference for this value and remember it.
@@ -1050,7 +1098,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
else
FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
- GlobalValue::ExternalWeakLinkage, 0, "");
+ GlobalValue::ExternalWeakLinkage, nullptr, "");
ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
return FwdVal;
@@ -1170,6 +1218,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
case lltok::kw_nest: B.addAttribute(Attribute::Nest); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break;
+ case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break;
case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break;
case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break;
case lltok::kw_returned: B.addAttribute(Attribute::Returned); break;
@@ -1222,6 +1271,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
return HaveError;
case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break;
case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break;
+ case lltok::kw_nonnull: B.addAttribute(Attribute::NonNull); break;
case lltok::kw_signext: B.addAttribute(Attribute::SExt); break;
case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break;
@@ -1286,6 +1336,10 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
/// ::= 'common'
/// ::= 'extern_weak'
/// ::= 'external'
+///
+/// Deprecated Values:
+/// ::= 'linker_private'
+/// ::= 'linker_private_weak'
bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
HasLinkage = false;
switch (Lex.getKind()) {
@@ -1303,6 +1357,15 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
case lltok::kw_common: Res = GlobalValue::CommonLinkage; break;
case lltok::kw_extern_weak: Res = GlobalValue::ExternalWeakLinkage; break;
case lltok::kw_external: Res = GlobalValue::ExternalLinkage; break;
+
+ case lltok::kw_linker_private:
+ case lltok::kw_linker_private_weak:
+ Lex.Warning("'" + Lex.getStrVal() + "' is deprecated, treating as"
+ " PrivateLinkage");
+ Lex.Lex();
+ // treat linker_private and linker_private_weak as PrivateLinkage
+ Res = GlobalValue::PrivateLinkage;
+ return false;
}
Lex.Lex();
HasLinkage = true;
@@ -1350,7 +1413,6 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
/// ::= 'x86_thiscallcc'
-/// ::= 'x86_cdeclmethodcc'
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
@@ -1376,7 +1438,6 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
- case lltok::kw_x86_cdeclmethodcc:CC = CallingConv::X86_CDeclMethod; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
@@ -1623,7 +1684,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
- if (Entry.first == 0) {
+ if (!Entry.first) {
Entry.first = StructType::create(Context, Lex.getStrVal());
Entry.second = Lex.getLoc();
}
@@ -1640,7 +1701,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
// If the type hasn't been defined yet, create a forward definition and
// remember where that forward def'n was seen (in case it never is defined).
- if (Entry.first == 0) {
+ if (!Entry.first) {
Entry.first = StructType::create(Context);
Entry.second = Lex.getLoc();
}
@@ -1716,7 +1777,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
// Parse the argument.
LocTy ArgLoc;
- Type *ArgTy = 0;
+ Type *ArgTy = nullptr;
AttrBuilder ArgAttrs;
Value *V;
if (ParseType(ArgTy, ArgLoc))
@@ -1758,7 +1819,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
Lex.Lex();
} else {
LocTy TypeLoc = Lex.getLoc();
- Type *ArgTy = 0;
+ Type *ArgTy = nullptr;
AttrBuilder Attrs;
std::string Name;
@@ -1870,7 +1931,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
Entry.second = SMLoc();
// If this type number has never been uttered, create it.
- if (Entry.first == 0)
+ if (!Entry.first)
Entry.first = StructType::create(Context, Name);
ResultTy = Entry.first;
return false;
@@ -1886,7 +1947,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
if (Entry.first)
return Error(TypeLoc, "forward references to non-struct type");
- ResultTy = 0;
+ ResultTy = nullptr;
if (isPacked)
return ParseArrayVectorType(ResultTy, true);
return ParseType(ResultTy);
@@ -1896,7 +1957,7 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
Entry.second = SMLoc();
// If this type number has never been uttered, create it.
- if (Entry.first == 0)
+ if (!Entry.first)
Entry.first = StructType::create(Context, Name);
StructType *STy = cast<StructType>(Entry.first);
@@ -1927,7 +1988,7 @@ bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
return false;
LocTy EltTyLoc = Lex.getLoc();
- Type *Ty = 0;
+ Type *Ty = nullptr;
if (ParseType(Ty)) return true;
Body.push_back(Ty);
@@ -1965,7 +2026,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
return true;
LocTy TypeLoc = Lex.getLoc();
- Type *EltTy = 0;
+ Type *EltTy = nullptr;
if (ParseType(EltTy)) return true;
if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
@@ -2011,7 +2072,7 @@ LLParser::PerFunctionState::~PerFunctionState() {
I->second.first->replaceAllUsesWith(
UndefValue::get(I->second.first->getType()));
delete I->second.first;
- I->second.first = 0;
+ I->second.first = nullptr;
}
for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
@@ -2020,7 +2081,7 @@ LLParser::PerFunctionState::~PerFunctionState() {
I->second.first->replaceAllUsesWith(
UndefValue::get(I->second.first->getType()));
delete I->second.first;
- I->second.first = 0;
+ I->second.first = nullptr;
}
}
@@ -2069,7 +2130,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
// If this is a forward reference for the value, see if we already created a
// forward ref record.
- if (Val == 0) {
+ if (!Val) {
std::map<std::string, std::pair<Value*, LocTy> >::iterator
I = ForwardRefVals.find(Name);
if (I != ForwardRefVals.end())
@@ -2084,13 +2145,13 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
else
P.Error(Loc, "'%" + Name + "' defined with type '" +
getTypeString(Val->getType()) + "'");
- return 0;
+ return nullptr;
}
// Don't make placeholders with invalid type.
if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
- return 0;
+ return nullptr;
}
// Otherwise, create a new forward reference for this value and remember it.
@@ -2107,11 +2168,11 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
LocTy Loc) {
// Look this name up in the normal function symbol table.
- Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+ Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : nullptr;
// If this is a forward reference for the value, see if we already created a
// forward ref record.
- if (Val == 0) {
+ if (!Val) {
std::map<unsigned, std::pair<Value*, LocTy> >::iterator
I = ForwardRefValIDs.find(ID);
if (I != ForwardRefValIDs.end())
@@ -2126,12 +2187,12 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, Type *Ty,
else
P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
getTypeString(Val->getType()) + "'");
- return 0;
+ return nullptr;
}
if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
- return 0;
+ return nullptr;
}
// Otherwise, create a new forward reference for this value and remember it.
@@ -2227,7 +2288,7 @@ BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
BB = GetBB(NumberedVals.size(), Loc);
else
BB = GetBB(Name, Loc);
- if (BB == 0) return 0; // Already diagnosed error.
+ if (!BB) return nullptr; // Already diagnosed error.
// Move the block to the end of the function. Forward ref'd blocks are
// inserted wherever they happen to be referenced.
@@ -2435,7 +2496,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
// Make a global variable as a placeholder for this reference.
GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
false, GlobalValue::InternalLinkage,
- 0, "");
+ nullptr, "");
ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
ID.ConstantVal = FwdRef;
ID.Kind = ValID::t_Constant;
@@ -2456,7 +2517,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
case lltok::kw_inttoptr:
case lltok::kw_ptrtoint: {
unsigned Opc = Lex.getUIntVal();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
Constant *SrcVal;
Lex.Lex();
if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
@@ -2720,18 +2781,18 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
/// ParseGlobalValue - Parse a global value with the specified type.
bool LLParser::ParseGlobalValue(Type *Ty, Constant *&C) {
- C = 0;
+ C = nullptr;
ValID ID;
- Value *V = NULL;
+ Value *V = nullptr;
bool Parsed = ParseValID(ID) ||
- ConvertValIDToValue(Ty, ID, V, NULL);
+ ConvertValIDToValue(Ty, ID, V, nullptr);
if (V && !(C = dyn_cast<Constant>(V)))
return Error(ID.Loc, "global values must be constants");
return Parsed;
}
bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
- Type *Ty = 0;
+ Type *Ty = nullptr;
return ParseType(Ty) ||
ParseGlobalValue(Ty, V);
}
@@ -2815,15 +2876,15 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
case ValID::t_LocalID:
if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
- return (V == 0);
+ return V == nullptr;
case ValID::t_LocalName:
if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
- return (V == 0);
+ return V == nullptr;
case ValID::t_InlineAsm: {
PointerType *PTy = dyn_cast<PointerType>(Ty);
FunctionType *FTy =
- PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+ PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : nullptr;
if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
return Error(ID.Loc, "invalid type for inline asm constraint string");
V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1,
@@ -2842,10 +2903,10 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
return false;
case ValID::t_GlobalName:
V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
- return V == 0;
+ return V == nullptr;
case ValID::t_GlobalID:
V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
- return V == 0;
+ return V == nullptr;
case ValID::t_APSInt:
if (!Ty->isIntegerTy())
return Error(ID.Loc, "integer constant must have integer type");
@@ -2928,14 +2989,14 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
}
bool LLParser::ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS) {
- V = 0;
+ V = nullptr;
ValID ID;
return ParseValID(ID, PFS) ||
ConvertValIDToValue(Ty, ID, V, PFS);
}
bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
- Type *Ty = 0;
+ Type *Ty = nullptr;
return ParseType(Ty) ||
ParseValue(Ty, V, PFS);
}
@@ -2965,7 +3026,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
unsigned DLLStorageClass;
AttrBuilder RetAttrs;
CallingConv::ID CC;
- Type *RetType = 0;
+ Type *RetType = nullptr;
LocTy RetTypeLoc = Lex.getLoc();
if (ParseOptionalLinkage(Linkage) ||
ParseOptionalVisibility(Visibility) ||
@@ -2998,6 +3059,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
return Error(LinkageLoc, "invalid function linkage type");
}
+ if (!isValidVisibilityForLinkage(Visibility, Linkage))
+ return Error(LinkageLoc,
+ "symbol with local linkage must have default visibility");
+
if (!FunctionType::isValidReturnType(RetType))
return Error(RetTypeLoc, "invalid function return type");
@@ -3031,7 +3096,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
std::string GC;
bool UnnamedAddr;
LocTy UnnamedAddrLoc;
- Constant *Prefix = 0;
+ Constant *Prefix = nullptr;
if (ParseArgumentList(ArgList, isVarArg) ||
ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -3088,7 +3153,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
FunctionType::get(RetType, ParamTypeList, isVarArg);
PointerType *PFT = PointerType::getUnqual(FT);
- Fn = 0;
+ Fn = nullptr;
if (!FunctionName.empty()) {
// If this was a definition of a forward reference, remove the definition
// from the forward reference table and fill in the forward ref.
@@ -3126,7 +3191,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
}
}
- if (Fn == 0)
+ if (!Fn)
Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
else // Move the forward-reference to the correct spot in the module.
M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
@@ -3203,7 +3268,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
}
BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
- if (BB == 0) return true;
+ if (!BB) return true;
std::string NameStr;
@@ -3351,8 +3416,10 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
case lltok::kw_shufflevector: return ParseShuffleVector(Inst, PFS);
case lltok::kw_phi: return ParsePHI(Inst, PFS);
case lltok::kw_landingpad: return ParseLandingPad(Inst, PFS);
- case lltok::kw_call: return ParseCall(Inst, PFS, false);
- case lltok::kw_tail: return ParseCall(Inst, PFS, true);
+ // Call.
+ case lltok::kw_call: return ParseCall(Inst, PFS, CallInst::TCK_None);
+ case lltok::kw_tail: return ParseCall(Inst, PFS, CallInst::TCK_Tail);
+ case lltok::kw_musttail: return ParseCall(Inst, PFS, CallInst::TCK_MustTail);
// Memory.
case lltok::kw_alloca: return ParseAlloc(Inst, PFS);
case lltok::kw_load: return ParseLoad(Inst, PFS);
@@ -3417,7 +3484,7 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
PerFunctionState &PFS) {
SMLoc TypeLoc = Lex.getLoc();
- Type *Ty = 0;
+ Type *Ty = nullptr;
if (ParseType(Ty, true /*void allowed*/)) return true;
Type *ResType = PFS.getFunction().getReturnType();
@@ -3567,7 +3634,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
std::vector<unsigned> FwdRefAttrGrps;
LocTy NoBuiltinLoc;
CallingConv::ID CC;
- Type *RetType = 0;
+ Type *RetType = nullptr;
LocTy RetTypeLoc;
ValID CalleeID;
SmallVector<ParamInfo, 16> ArgList;
@@ -3589,8 +3656,8 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
- PointerType *PFTy = 0;
- FunctionType *Ty = 0;
+ PointerType *PFTy = nullptr;
+ FunctionType *Ty = nullptr;
if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
!(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
// Pull out the types of all of the arguments...
@@ -3623,7 +3690,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
FunctionType::param_iterator I = Ty->param_begin();
FunctionType::param_iterator E = Ty->param_end();
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
- Type *ExpectedTy = 0;
+ Type *ExpectedTy = nullptr;
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
@@ -3764,7 +3831,7 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
unsigned Opc) {
LocTy Loc;
Value *Op;
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
if (ParseTypeAndValue(Op, Loc, PFS) ||
ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
ParseType(DestTy))
@@ -3803,7 +3870,7 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'va_arg' TypeAndValue ',' Type
bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
Value *Op;
- Type *EltTy = 0;
+ Type *EltTy = nullptr;
LocTy TypeLoc;
if (ParseTypeAndValue(Op, PFS) ||
ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
@@ -3875,7 +3942,7 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
/// ParsePHI
/// ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
- Type *Ty = 0; LocTy TypeLoc;
+ Type *Ty = nullptr; LocTy TypeLoc;
Value *Op0, *Op1;
if (ParseType(Ty, TypeLoc) ||
@@ -3924,7 +3991,7 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'filter'
/// ::= 'filter' TypeAndValue ( ',' TypeAndValue )*
bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
- Type *Ty = 0; LocTy TyLoc;
+ Type *Ty = nullptr; LocTy TyLoc;
Value *PersFn; LocTy PersFnLoc;
if (ParseType(Ty, TyLoc) ||
@@ -3968,21 +4035,26 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) {
}
/// ParseCall
-/// ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ::= 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ParameterList OptionalAttrs
+/// ::= 'tail' 'call' OptionalCallingConv OptionalAttrs Type Value
+/// ParameterList OptionalAttrs
+/// ::= 'musttail' 'call' OptionalCallingConv OptionalAttrs Type Value
/// ParameterList OptionalAttrs
bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
- bool isTail) {
+ CallInst::TailCallKind TCK) {
AttrBuilder RetAttrs, FnAttrs;
std::vector<unsigned> FwdRefAttrGrps;
LocTy BuiltinLoc;
CallingConv::ID CC;
- Type *RetType = 0;
+ Type *RetType = nullptr;
LocTy RetTypeLoc;
ValID CalleeID;
SmallVector<ParamInfo, 16> ArgList;
LocTy CallLoc = Lex.getLoc();
- if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+ if ((TCK != CallInst::TCK_None &&
+ ParseToken(lltok::kw_call, "expected 'tail call'")) ||
ParseOptionalCallingConv(CC) ||
ParseOptionalReturnAttrs(RetAttrs) ||
ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
@@ -3995,8 +4067,8 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
// If RetType is a non-function pointer type, then this is the short syntax
// for the call, which means that RetType is just the return type. Infer the
// rest of the function argument types from the arguments that are present.
- PointerType *PFTy = 0;
- FunctionType *Ty = 0;
+ PointerType *PFTy = nullptr;
+ FunctionType *Ty = nullptr;
if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
!(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
// Pull out the types of all of the arguments...
@@ -4029,7 +4101,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
FunctionType::param_iterator I = Ty->param_begin();
FunctionType::param_iterator E = Ty->param_end();
for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
- Type *ExpectedTy = 0;
+ Type *ExpectedTy = nullptr;
if (I != E) {
ExpectedTy = *I++;
} else if (!Ty->isVarArg()) {
@@ -4058,7 +4130,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
AttributeSet PAL = AttributeSet::get(Context, Attrs);
CallInst *CI = CallInst::Create(Callee, Args);
- CI->setTailCall(isTail);
+ CI->setTailCallKind(TCK);
CI->setCallingConv(CC);
CI->setAttributes(PAL);
ForwardRefAttrGroups[CI] = FwdRefAttrGrps;
@@ -4073,10 +4145,10 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
/// ParseAlloc
/// ::= 'alloca' 'inalloca'? Type (',' TypeAndValue)? (',' 'align' i32)?
int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
- Value *Size = 0;
+ Value *Size = nullptr;
LocTy SizeLoc;
unsigned Alignment = 0;
- Type *Ty = 0;
+ Type *Ty = nullptr;
bool IsInAlloca = EatIfPresent(lltok::kw_inalloca);
@@ -4315,8 +4387,8 @@ int LLParser::ParseFence(Instruction *&Inst, PerFunctionState &PFS) {
/// ParseGetElementPtr
/// ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
- Value *Ptr = 0;
- Value *Val = 0;
+ Value *Ptr = nullptr;
+ Value *Val = nullptr;
LocTy Loc, EltLoc;
bool InBounds = EatIfPresent(lltok::kw_inbounds);
@@ -4418,11 +4490,11 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
do {
// Null is a special case since it is typeless.
if (EatIfPresent(lltok::kw_null)) {
- Elts.push_back(0);
+ Elts.push_back(nullptr);
continue;
}
- Value *V = 0;
+ Value *V = nullptr;
if (ParseTypeAndValue(V, PFS)) return true;
Elts.push_back(V);
} while (EatIfPresent(lltok::comma));
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 294a1e1..e2bf462 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -176,7 +176,8 @@ namespace llvm {
return FMF;
}
- bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
+ bool ParseOptionalToken(lltok::Kind T, bool &Present,
+ LocTy *Loc = nullptr) {
if (Lex.getKind() != T) {
Present = false;
} else {
@@ -348,7 +349,7 @@ namespace llvm {
PerFunctionState &PFS);
// Constant Parsing.
- bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+ bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
bool ParseGlobalValue(Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
@@ -371,6 +372,8 @@ namespace llvm {
bool ParseFunctionBody(Function &Fn);
bool ParseBasicBlock(PerFunctionState &PFS);
+ enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
+
// Instruction Parsing. Each instruction parsing routine can return with a
// normal result, an error result, or return having eaten an extra comma.
enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
@@ -397,7 +400,8 @@ namespace llvm {
bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
int ParsePHI(Instruction *&I, PerFunctionState &PFS);
bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
- bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+ bool ParseCall(Instruction *&I, PerFunctionState &PFS,
+ CallInst::TailCallKind IsTail);
int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
int ParseLoad(Instruction *&I, PerFunctionState &PFS);
int ParseStore(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 532e896..b6b7d82 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -39,6 +39,8 @@ namespace lltok {
kw_private,
kw_internal,
+ kw_linker_private, // NOTE: deprecated, for parser compatibility
+ kw_linker_private_weak, // NOTE: deprecated, for parser compatibility
kw_linkonce, kw_linkonce_odr,
kw_weak, kw_weak_odr, kw_appending,
kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
@@ -52,6 +54,7 @@ namespace lltok {
kw_undef, kw_null,
kw_to,
kw_tail,
+ kw_musttail,
kw_target,
kw_triple,
kw_unwind,
@@ -85,7 +88,7 @@ namespace lltok {
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
kw_intel_ocl_bicc,
- kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc, kw_x86_cdeclmethodcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
kw_ptx_kernel, kw_ptx_device,
@@ -114,6 +117,7 @@ namespace lltok {
kw_noimplicitfloat,
kw_noinline,
kw_nonlazybind,
+ kw_nonnull,
kw_noredzone,
kw_noreturn,
kw_nounwind,
diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp
index a1da5e1..2606bc2 100644
--- a/lib/AsmParser/Parser.cpp
+++ b/lib/AsmParser/Parser.cpp
@@ -30,12 +30,12 @@ Module *llvm::ParseAssembly(MemoryBuffer *F,
// If we are parsing into an existing module, do it.
if (M)
- return LLParser(F, SM, Err, M).Run() ? 0 : M;
+ return LLParser(F, SM, Err, M).Run() ? nullptr : M;
// Otherwise create a new module.
std::unique_ptr<Module> M2(new Module(F->getBufferIdentifier(), Context));
if (LLParser(F, SM, Err, M2.get()).Run())
- return 0;
+ return nullptr;
return M2.release();
}
@@ -45,10 +45,10 @@ Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + ec.message());
- return 0;
+ return nullptr;
}
- return ParseAssembly(File.release(), 0, Err, Context);
+ return ParseAssembly(File.release(), nullptr, Err, Context);
}
Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
diff --git a/lib/AsmParser/module.modulemap b/lib/AsmParser/module.modulemap
new file mode 100644
index 0000000..cc300060
--- /dev/null
+++ b/lib/AsmParser/module.modulemap
@@ -0,0 +1 @@
+module AsmParser { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 3e360a8..716299f 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -35,7 +35,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
if (error_code EC = ModuleOrErr.getError()) {
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
- *OutModule = wrap((Module*)0);
+ *OutModule = wrap((Module*)nullptr);
return 1;
}
@@ -55,7 +55,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef));
if (error_code EC = ModuleOrErr.getError()) {
- *OutM = wrap((Module *)NULL);
+ *OutM = wrap((Module *)nullptr);
if (OutMessage)
*OutMessage = strdup(EC.message().c_str());
return 1;
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index f712d9d..4170f98 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -41,7 +41,7 @@ void BitcodeReader::materializeForwardReferencedFunctions() {
void BitcodeReader::FreeState() {
if (BufferOwned)
delete Buffer;
- Buffer = 0;
+ Buffer = nullptr;
std::vector<Type*>().swap(TypeList);
ValueList.clear();
MDValueList.clear();
@@ -258,7 +258,7 @@ void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
resize(Idx+1);
WeakVH &OldV = ValuePtrs[Idx];
- if (OldV == 0) {
+ if (!OldV) {
OldV = V;
return;
}
@@ -298,12 +298,12 @@ Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
resize(Idx + 1);
if (Value *V = ValuePtrs[Idx]) {
- assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+ assert((!Ty || Ty == V->getType()) && "Type mismatch in value table!");
return V;
}
// No type specified, must be invalid reference.
- if (Ty == 0) return 0;
+ if (!Ty) return nullptr;
// Create and return a placeholder, which will later be RAUW'd.
Value *V = new Argument(Ty);
@@ -403,7 +403,7 @@ void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
resize(Idx+1);
WeakVH &OldV = MDValuePtrs[Idx];
- if (OldV == 0) {
+ if (!OldV) {
OldV = V;
return;
}
@@ -435,7 +435,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
Type *BitcodeReader::getTypeByID(unsigned ID) {
// The type table size is always specified correctly.
if (ID >= TypeList.size())
- return 0;
+ return nullptr;
if (Type *Ty = TypeList[ID])
return Ty;
@@ -569,6 +569,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) {
return Attribute::NoInline;
case bitc::ATTR_KIND_NON_LAZY_BIND:
return Attribute::NonLazyBind;
+ case bitc::ATTR_KIND_NON_NULL:
+ return Attribute::NonNull;
case bitc::ATTR_KIND_NO_RED_ZONE:
return Attribute::NoRedZone;
case bitc::ATTR_KIND_NO_RETURN:
@@ -737,7 +739,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
// Read a record.
Record.clear();
- Type *ResultTy = 0;
+ Type *ResultTy = nullptr;
switch (Stream.readRecord(Entry.ID, Record)) {
default:
return Error(InvalidValue);
@@ -792,7 +794,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
if (Record.size() == 2)
AddressSpace = Record[1];
ResultTy = getTypeByID(Record[0]);
- if (ResultTy == 0)
+ if (!ResultTy)
return Error(InvalidType);
ResultTy = PointerType::get(ResultTy, AddressSpace);
break;
@@ -811,7 +813,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
}
ResultTy = getTypeByID(Record[2]);
- if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+ if (!ResultTy || ArgTys.size() < Record.size()-3)
return Error(InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
@@ -830,7 +832,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
}
ResultTy = getTypeByID(Record[1]);
- if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+ if (!ResultTy || ArgTys.size() < Record.size()-2)
return Error(InvalidType);
ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
@@ -867,7 +869,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
if (Res) {
Res->setName(TypeName);
- TypeList[NumRecords] = 0;
+ TypeList[NumRecords] = nullptr;
} else // Otherwise, create a new struct.
Res = StructType::create(Context, TypeName);
TypeName.clear();
@@ -896,7 +898,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
if (Res) {
Res->setName(TypeName);
- TypeList[NumRecords] = 0;
+ TypeList[NumRecords] = nullptr;
} else // Otherwise, create a new struct with no body.
Res = StructType::create(Context, TypeName);
TypeName.clear();
@@ -924,7 +926,7 @@ error_code BitcodeReader::ParseTypeTableBody() {
if (NumRecords >= TypeList.size())
return Error(InvalidTYPETable);
assert(ResultTy && "Didn't read a type?");
- assert(TypeList[NumRecords] == 0 && "Already read type?");
+ assert(!TypeList[NumRecords] && "Already read type?");
TypeList[NumRecords++] = ResultTy;
}
}
@@ -972,7 +974,7 @@ error_code BitcodeReader::ParseValueSymbolTable() {
if (ConvertToString(Record, 1, ValueName))
return Error(InvalidRecord);
BasicBlock *BB = getBasicBlock(Record[0]);
- if (BB == 0)
+ if (!BB)
return Error(InvalidRecord);
BB->setName(StringRef(ValueName.data(), ValueName.size()));
@@ -1028,7 +1030,7 @@ error_code BitcodeReader::ParseMetadata() {
NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
for (unsigned i = 0; i != Size; ++i) {
MDNode *MD = dyn_cast_or_null<MDNode>(MDValueList.getValueFwdRef(Record[i]));
- if (MD == 0)
+ if (!MD)
return Error(InvalidRecord);
NMD->addOperand(MD);
}
@@ -1052,7 +1054,7 @@ error_code BitcodeReader::ParseMetadata() {
else if (!Ty->isVoidTy())
Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
else
- Elts.push_back(NULL);
+ Elts.push_back(nullptr);
}
Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal);
IsFunctionLocal = false;
@@ -1092,6 +1094,28 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
return 1ULL << 63;
}
+// FIXME: Delete this in LLVM 4.0 and just assert that the aliasee is a
+// GlobalObject.
+static GlobalObject &
+getGlobalObjectInExpr(const DenseMap<GlobalAlias *, Constant *> &Map,
+ Constant &C) {
+ auto *GO = dyn_cast<GlobalObject>(&C);
+ if (GO)
+ return *GO;
+
+ auto *GA = dyn_cast<GlobalAlias>(&C);
+ if (GA)
+ return getGlobalObjectInExpr(Map, *Map.find(GA)->second);
+
+ auto &CE = cast<ConstantExpr>(C);
+ assert(CE.getOpcode() == Instruction::BitCast ||
+ CE.getOpcode() == Instruction::GetElementPtr ||
+ CE.getOpcode() == Instruction::AddrSpaceCast);
+ if (CE.getOpcode() == Instruction::GetElementPtr)
+ assert(cast<GEPOperator>(CE).hasAllZeroIndices());
+ return getGlobalObjectInExpr(Map, *CE.getOperand(0));
+}
+
/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
/// values and aliases that we can.
error_code BitcodeReader::ResolveGlobalAndAliasInits() {
@@ -1117,19 +1141,30 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() {
GlobalInitWorklist.pop_back();
}
+ // FIXME: Delete this in LLVM 4.0
+ // Older versions of llvm could write an alias pointing to another. We cannot
+ // construct those aliases, so we first collect an alias to aliasee expression
+ // and then compute the actual aliasee.
+ DenseMap<GlobalAlias *, Constant *> AliasInit;
+
while (!AliasInitWorklist.empty()) {
unsigned ValID = AliasInitWorklist.back().second;
if (ValID >= ValueList.size()) {
AliasInits.push_back(AliasInitWorklist.back());
} else {
if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID]))
- AliasInitWorklist.back().first->setAliasee(C);
+ AliasInit.insert(std::make_pair(AliasInitWorklist.back().first, C));
else
return Error(ExpectedConstant);
}
AliasInitWorklist.pop_back();
}
+ for (auto &Pair : AliasInit) {
+ auto &GO = getGlobalObjectInExpr(AliasInit, *Pair.second);
+ Pair.first->setAliasee(&GO);
+ }
+
while (!FunctionPrefixWorklist.empty()) {
unsigned ValID = FunctionPrefixWorklist.back().second;
if (ValID >= ValueList.size()) {
@@ -1185,7 +1220,7 @@ error_code BitcodeReader::ParseConstants() {
// Read a record.
Record.clear();
- Value *V = 0;
+ Value *V = nullptr;
unsigned BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: // Default behavior: unknown constant
@@ -1418,34 +1453,52 @@ error_code BitcodeReader::ParseConstants() {
ValueList.getConstantFwdRef(Record[2],CurTy));
break;
}
- case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+ case bitc::CST_CODE_CE_EXTRACTELT
+ : { // CE_EXTRACTELT: [opty, opval, opty, opval]
if (Record.size() < 3)
return Error(InvalidRecord);
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
- if (OpTy == 0)
+ if (!OpTy)
return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
- Constant *Op1 = ValueList.getConstantFwdRef(Record[2],
- Type::getInt32Ty(Context));
+ Constant *Op1 = nullptr;
+ if (Record.size() == 4) {
+ Type *IdxTy = getTypeByID(Record[2]);
+ if (!IdxTy)
+ return Error(InvalidRecord);
+ Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+ } else // TODO: Remove with llvm 4.0
+ Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ if (!Op1)
+ return Error(InvalidRecord);
V = ConstantExpr::getExtractElement(Op0, Op1);
break;
}
- case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+ case bitc::CST_CODE_CE_INSERTELT
+ : { // CE_INSERTELT: [opval, opval, opty, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
- if (Record.size() < 3 || OpTy == 0)
+ if (Record.size() < 3 || !OpTy)
return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
OpTy->getElementType());
- Constant *Op2 = ValueList.getConstantFwdRef(Record[2],
- Type::getInt32Ty(Context));
+ Constant *Op2 = nullptr;
+ if (Record.size() == 4) {
+ Type *IdxTy = getTypeByID(Record[2]);
+ if (!IdxTy)
+ return Error(InvalidRecord);
+ Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy);
+ } else // TODO: Remove with llvm 4.0
+ Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+ if (!Op2)
+ return Error(InvalidRecord);
V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
break;
}
case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
VectorType *OpTy = dyn_cast<VectorType>(CurTy);
- if (Record.size() < 3 || OpTy == 0)
+ if (Record.size() < 3 || !OpTy)
return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
@@ -1459,7 +1512,7 @@ error_code BitcodeReader::ParseConstants() {
VectorType *RTy = dyn_cast<VectorType>(CurTy);
VectorType *OpTy =
dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
- if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+ if (Record.size() < 4 || !RTy || !OpTy)
return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
@@ -1473,7 +1526,7 @@ error_code BitcodeReader::ParseConstants() {
if (Record.size() < 4)
return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
- if (OpTy == 0)
+ if (!OpTy)
return Error(InvalidRecord);
Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
@@ -1538,11 +1591,11 @@ error_code BitcodeReader::ParseConstants() {
if (Record.size() < 3)
return Error(InvalidRecord);
Type *FnTy = getTypeByID(Record[0]);
- if (FnTy == 0)
+ if (!FnTy)
return Error(InvalidRecord);
Function *Fn =
dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
- if (Fn == 0)
+ if (!Fn)
return Error(InvalidRecord);
// If the function is already parsed we can insert the block address right
@@ -1561,7 +1614,7 @@ error_code BitcodeReader::ParseConstants() {
GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
Type::getInt8Ty(Context),
false, GlobalValue::InternalLinkage,
- 0, "");
+ nullptr, "");
BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
V = FwdRef;
}
@@ -1649,8 +1702,11 @@ error_code BitcodeReader::GlobalCleanup() {
// Look for global variables which need to be renamed.
for (Module::global_iterator
GI = TheModule->global_begin(), GE = TheModule->global_end();
- GI != GE; ++GI)
- UpgradeGlobalVariable(GI);
+ GI != GE;) {
+ GlobalVariable *GV = GI++;
+ UpgradeGlobalVariable(GV);
+ }
+
// Force deallocation of memory for these vectors to favor the client that
// want lazy deserialization.
std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
@@ -1838,7 +1894,9 @@ error_code BitcodeReader::ParseModule(bool Resume) {
Section = SectionTable[Record[5]-1];
}
GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
- if (Record.size() > 6)
+ // Local linkage must have default visibility.
+ if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage))
+ // FIXME: Change to an error if non-default in 4.0.
Visibility = GetDecodedVisibility(Record[6]);
GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal;
@@ -1854,7 +1912,7 @@ error_code BitcodeReader::ParseModule(bool Resume) {
ExternallyInitialized = Record[9];
GlobalVariable *NewGV =
- new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
+ new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", nullptr,
TLM, AddressSpace, ExternallyInitialized);
NewGV->setAlignment(Alignment);
if (!Section.empty())
@@ -1904,7 +1962,10 @@ error_code BitcodeReader::ParseModule(bool Resume) {
return Error(InvalidID);
Func->setSection(SectionTable[Record[6]-1]);
}
- Func->setVisibility(GetDecodedVisibility(Record[7]));
+ // Local linkage must have default visibility.
+ if (!Func->hasLocalLinkage())
+ // FIXME: Change to an error if non-default in 4.0.
+ Func->setVisibility(GetDecodedVisibility(Record[7]));
if (Record.size() > 8 && Record[8]) {
if (Record[8]-1 > GCTable.size())
return Error(InvalidID);
@@ -1940,13 +2001,17 @@ error_code BitcodeReader::ParseModule(bool Resume) {
Type *Ty = getTypeByID(Record[0]);
if (!Ty)
return Error(InvalidRecord);
- if (!Ty->isPointerTy())
+ auto *PTy = dyn_cast<PointerType>(Ty);
+ if (!PTy)
return Error(InvalidTypeForValue);
- GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
- "", 0, TheModule);
+ auto *NewGA =
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ GetDecodedLinkage(Record[2]), "", TheModule);
// Old bitcode files didn't have visibility field.
- if (Record.size() > 3)
+ // Local linkage must have default visibility.
+ if (Record.size() > 3 && !NewGA->hasLocalLinkage())
+ // FIXME: Change to an error if non-default in 4.0.
NewGA->setVisibility(GetDecodedVisibility(Record[3]));
if (Record.size() > 4)
NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4]));
@@ -1969,7 +2034,7 @@ error_code BitcodeReader::ParseModule(bool Resume) {
}
error_code BitcodeReader::ParseBitcodeInto(Module *M) {
- TheModule = 0;
+ TheModule = nullptr;
if (error_code EC = InitStream())
return EC;
@@ -2173,7 +2238,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
ValueList.push_back(I);
unsigned NextValueNo = ValueList.size();
- BasicBlock *CurBB = 0;
+ BasicBlock *CurBB = nullptr;
unsigned CurBBNo = 0;
DebugLoc LastLoc;
@@ -2222,7 +2287,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
// Read a record.
Record.clear();
- Instruction *I = 0;
+ Instruction *I = nullptr;
unsigned BitCode = Stream.readRecord(Entry.ID, Record);
switch (BitCode) {
default: // Default behavior: reject
@@ -2240,7 +2305,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN
// This record indicates that the last instruction is at the same
// location as the previous instruction with a location.
- I = 0;
+ I = nullptr;
// Get the last instruction emitted.
if (CurBB && !CurBB->empty())
@@ -2249,31 +2314,31 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
- if (I == 0)
+ if (!I)
return Error(InvalidRecord);
I->setDebugLoc(LastLoc);
- I = 0;
+ I = nullptr;
continue;
case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia]
- I = 0; // Get the last instruction emitted.
+ I = nullptr; // Get the last instruction emitted.
if (CurBB && !CurBB->empty())
I = &CurBB->back();
else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
!FunctionBBs[CurBBNo-1]->empty())
I = &FunctionBBs[CurBBNo-1]->back();
- if (I == 0 || Record.size() < 4)
+ if (!I || Record.size() < 4)
return Error(InvalidRecord);
unsigned Line = Record[0], Col = Record[1];
unsigned ScopeID = Record[2], IAID = Record[3];
- MDNode *Scope = 0, *IA = 0;
+ MDNode *Scope = nullptr, *IA = nullptr;
if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
if (IAID) IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
LastLoc = DebugLoc::get(Line, Col, Scope, IA);
I->setDebugLoc(LastLoc);
- I = 0;
+ I = nullptr;
continue;
}
@@ -2333,9 +2398,9 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
Type *ResTy = getTypeByID(Record[OpNum]);
int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
- if (Opc == -1 || ResTy == 0)
+ if (Opc == -1 || !ResTy)
return Error(InvalidRecord);
- Instruction *Temp = 0;
+ Instruction *Temp = nullptr;
if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) {
if (Temp) {
InstructionList.push_back(Temp);
@@ -2460,7 +2525,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
unsigned OpNum = 0;
Value *Vec, *Idx;
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
- popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+ getValueTypePair(Record, OpNum, NextValueNo, Idx))
return Error(InvalidRecord);
I = ExtractElementInst::Create(Vec, Idx);
InstructionList.push_back(I);
@@ -2473,7 +2538,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
popValue(Record, OpNum, NextValueNo,
cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
- popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx))
+ getValueTypePair(Record, OpNum, NextValueNo, Idx))
return Error(InvalidRecord);
I = InsertElementInst::Create(Vec, Elt, Idx);
InstructionList.push_back(I);
@@ -2526,7 +2591,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
unsigned OpNum = 0;
- Value *Op = NULL;
+ Value *Op = nullptr;
if (getValueTypePair(Record, OpNum, NextValueNo, Op))
return Error(InvalidRecord);
if (OpNum != Record.size())
@@ -2540,7 +2605,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
if (Record.size() != 1 && Record.size() != 3)
return Error(InvalidRecord);
BasicBlock *TrueDest = getBasicBlock(Record[0]);
- if (TrueDest == 0)
+ if (!TrueDest)
return Error(InvalidRecord);
if (Record.size() == 1) {
@@ -2551,7 +2616,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
BasicBlock *FalseDest = getBasicBlock(Record[1]);
Value *Cond = getValue(Record, 2, NextValueNo,
Type::getInt1Ty(Context));
- if (FalseDest == 0 || Cond == 0)
+ if (!FalseDest || !Cond)
return Error(InvalidRecord);
I = BranchInst::Create(TrueDest, FalseDest, Cond);
InstructionList.push_back(I);
@@ -2571,7 +2636,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
Value *Cond = getValue(Record, 2, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[3]);
- if (OpTy == 0 || Cond == 0 || Default == 0)
+ if (!OpTy || !Cond || !Default)
return Error(InvalidRecord);
unsigned NumCases = Record[4];
@@ -2628,7 +2693,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
Type *OpTy = getTypeByID(Record[0]);
Value *Cond = getValue(Record, 1, NextValueNo, OpTy);
BasicBlock *Default = getBasicBlock(Record[2]);
- if (OpTy == 0 || Cond == 0 || Default == 0)
+ if (!OpTy || !Cond || !Default)
return Error(InvalidRecord);
unsigned NumCases = (Record.size()-3)/2;
SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
@@ -2637,7 +2702,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
ConstantInt *CaseVal =
dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
- if (CaseVal == 0 || DestBB == 0) {
+ if (!CaseVal || !DestBB) {
delete SI;
return Error(InvalidRecord);
}
@@ -2651,7 +2716,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
return Error(InvalidRecord);
Type *OpTy = getTypeByID(Record[0]);
Value *Address = getValue(Record, 1, NextValueNo, OpTy);
- if (OpTy == 0 || Address == 0)
+ if (!OpTy || !Address)
return Error(InvalidRecord);
unsigned NumDests = Record.size()-2;
IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
@@ -2683,11 +2748,11 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
return Error(InvalidRecord);
PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
- FunctionType *FTy = !CalleeTy ? 0 :
+ FunctionType *FTy = !CalleeTy ? nullptr :
dyn_cast<FunctionType>(CalleeTy->getElementType());
// Check that the right number of fixed parameters are here.
- if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+ if (!FTy || !NormalBB || !UnwindBB ||
Record.size() < OpNum+FTy->getNumParams())
return Error(InvalidRecord);
@@ -2695,7 +2760,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
Ops.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
- if (Ops.back() == 0)
+ if (!Ops.back())
return Error(InvalidRecord);
}
@@ -2721,7 +2786,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
}
case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval]
unsigned Idx = 0;
- Value *Val = 0;
+ Value *Val = nullptr;
if (getValueTypePair(Record, Idx, NextValueNo, Val))
return Error(InvalidRecord);
I = ResumeInst::Create(Val);
@@ -2768,7 +2833,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
Type *Ty = getTypeByID(Record[Idx++]);
if (!Ty)
return Error(InvalidRecord);
- Value *PersFn = 0;
+ Value *PersFn = nullptr;
if (getValueTypePair(Record, Idx, NextValueNo, PersFn))
return Error(InvalidRecord);
@@ -2961,7 +3026,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
return Error(InvalidRecord);
PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
- FunctionType *FTy = 0;
+ FunctionType *FTy = nullptr;
if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
return Error(InvalidRecord);
@@ -2974,7 +3039,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
else
Args.push_back(getValue(Record, OpNum, NextValueNo,
FTy->getParamType(i)));
- if (Args.back() == 0)
+ if (!Args.back())
return Error(InvalidRecord);
}
@@ -2994,8 +3059,13 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
I = CallInst::Create(Callee, Args);
InstructionList.push_back(I);
cast<CallInst>(I)->setCallingConv(
- static_cast<CallingConv::ID>(CCInfo>>1));
- cast<CallInst>(I)->setTailCall(CCInfo & 1);
+ static_cast<CallingConv::ID>((~(1U << 14) & CCInfo) >> 1));
+ CallInst::TailCallKind TCK = CallInst::TCK_None;
+ if (CCInfo & 1)
+ TCK = CallInst::TCK_Tail;
+ if (CCInfo & (1 << 14))
+ TCK = CallInst::TCK_MustTail;
+ cast<CallInst>(I)->setTailCallKind(TCK);
cast<CallInst>(I)->setAttributes(PAL);
break;
}
@@ -3015,7 +3085,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
// Add instruction to end of current BB. If there is no current BB, reject
// this file.
- if (CurBB == 0) {
+ if (!CurBB) {
delete I;
return Error(InvalidInstructionWithNoBB);
}
@@ -3024,7 +3094,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) {
// If this was a terminator instruction, move to the next block.
if (isa<TerminatorInst>(I)) {
++CurBBNo;
- CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+ CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr;
}
// Non-void values get registered in the value table for future use.
@@ -3036,10 +3106,10 @@ OutOfRecordLoop:
// Check the function list for unresolved values.
if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
- if (A->getParent() == 0) {
+ if (!A->getParent()) {
// We found at least one unresolved value. Nuke them all to avoid leaks.
for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
- if ((A = dyn_cast_or_null<Argument>(ValueList[i])) && A->getParent() == 0) {
+ if ((A = dyn_cast_or_null<Argument>(ValueList[i])) && !A->getParent()) {
A->replaceAllUsesWith(UndefValue::get(A->getType()));
delete A;
}
@@ -3348,7 +3418,7 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name,
if (ErrMsg)
*ErrMsg = EC.message();
delete M; // Also deletes R.
- return 0;
+ return nullptr;
}
R->setBufferOwned(false); // no buffer to delete
return M;
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index 15be31f..593d8f9 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -224,13 +224,13 @@ public:
}
explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
- : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
- LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+ : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false),
+ LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false),
ValueList(C), MDValueList(C),
SeenFirstFunctionBody(false), UseRelativeIDs(false) {
}
explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C)
- : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+ : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false),
LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
ValueList(C), MDValueList(C),
SeenFirstFunctionBody(false), UseRelativeIDs(false) {
@@ -271,7 +271,7 @@ private:
return ValueList.getValueFwdRef(ID, Ty);
}
BasicBlock *getBasicBlock(unsigned ID) const {
- if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+ if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
return FunctionBBs[ID];
}
AttributeSet getAttributes(unsigned i) const {
@@ -293,15 +293,15 @@ private:
if (ValNo < InstNum) {
// If this is not a forward reference, just return the value we already
// have.
- ResVal = getFnValueByID(ValNo, 0);
- return ResVal == 0;
+ ResVal = getFnValueByID(ValNo, nullptr);
+ return ResVal == nullptr;
} else if (Slot == Record.size()) {
return true;
}
unsigned TypeNo = (unsigned)Record[Slot++];
ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
- return ResVal == 0;
+ return ResVal == nullptr;
}
/// popValue - Read a value out of the specified record from slot 'Slot'.
@@ -320,14 +320,14 @@ private:
bool getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty, Value *&ResVal) {
ResVal = getValue(Record, Slot, InstNum, Ty);
- return ResVal == 0;
+ return ResVal == nullptr;
}
/// getValue -- Version of getValue that returns ResVal directly,
/// or 0 if there is an error.
Value *getValue(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty) {
- if (Slot == Record.size()) return 0;
+ if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)Record[Slot];
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
@@ -338,7 +338,7 @@ private:
/// getValueSigned -- Like getValue, but decodes signed VBRs.
Value *getValueSigned(SmallVectorImpl<uint64_t> &Record, unsigned Slot,
unsigned InstNum, Type *Ty) {
- if (Slot == Record.size()) return 0;
+ if (Slot == Record.size()) return nullptr;
unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]);
// Adjust the ValNo, if it was encoded relative to the InstNum.
if (UseRelativeIDs)
diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp
index 1fd9abd..f31e1fa 100644
--- a/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -315,7 +315,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
SmallVector<uint64_t, 64> Record;
- BitstreamReader::BlockInfo *CurBlockInfo = 0;
+ BitstreamReader::BlockInfo *CurBlockInfo = nullptr;
// Read all the records for this module.
while (1) {
diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp
index 0275f96..3747122 100644
--- a/lib/Bitcode/Writer/BitWriter.cpp
+++ b/lib/Bitcode/Writer/BitWriter.cpp
@@ -10,6 +10,7 @@
#include "llvm-c/BitWriter.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 5d1dac1..cc73b84 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -197,6 +197,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_NO_INLINE;
case Attribute::NonLazyBind:
return bitc::ATTR_KIND_NON_LAZY_BIND;
+ case Attribute::NonNull:
+ return bitc::ATTR_KIND_NON_NULL;
case Attribute::NoRedZone:
return bitc::ATTR_KIND_NO_RED_ZONE;
case Attribute::NoReturn:
@@ -474,8 +476,8 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
Stream.ExitBlock();
}
-static unsigned getEncodedLinkage(const GlobalValue *GV) {
- switch (GV->getLinkage()) {
+static unsigned getEncodedLinkage(const GlobalValue &GV) {
+ switch (GV.getLinkage()) {
case GlobalValue::ExternalLinkage: return 0;
case GlobalValue::WeakAnyLinkage: return 1;
case GlobalValue::AppendingLinkage: return 2;
@@ -491,8 +493,8 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
llvm_unreachable("Invalid linkage");
}
-static unsigned getEncodedVisibility(const GlobalValue *GV) {
- switch (GV->getVisibility()) {
+static unsigned getEncodedVisibility(const GlobalValue &GV) {
+ switch (GV.getVisibility()) {
case GlobalValue::DefaultVisibility: return 0;
case GlobalValue::HiddenVisibility: return 1;
case GlobalValue::ProtectedVisibility: return 2;
@@ -500,8 +502,8 @@ static unsigned getEncodedVisibility(const GlobalValue *GV) {
llvm_unreachable("Invalid visibility");
}
-static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) {
- switch (GV->getDLLStorageClass()) {
+static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) {
+ switch (GV.getDLLStorageClass()) {
case GlobalValue::DefaultStorageClass: return 0;
case GlobalValue::DLLImportStorageClass: return 1;
case GlobalValue::DLLExportStorageClass: return 2;
@@ -509,8 +511,8 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue *GV) {
llvm_unreachable("Invalid DLL storage class");
}
-static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) {
- switch (GV->getThreadLocalMode()) {
+static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) {
+ switch (GV.getThreadLocalMode()) {
case GlobalVariable::NotThreadLocal: return 0;
case GlobalVariable::GeneralDynamicTLSModel: return 1;
case GlobalVariable::LocalDynamicTLSModel: return 2;
@@ -541,36 +543,35 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
std::map<std::string, unsigned> GCMap;
unsigned MaxAlignment = 0;
unsigned MaxGlobalType = 0;
- for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
- GV != E; ++GV) {
- MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
- MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
- if (GV->hasSection()) {
+ for (const GlobalValue &GV : M->globals()) {
+ MaxAlignment = std::max(MaxAlignment, GV.getAlignment());
+ MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getType()));
+ if (GV.hasSection()) {
// Give section names unique ID's.
- unsigned &Entry = SectionMap[GV->getSection()];
+ unsigned &Entry = SectionMap[GV.getSection()];
if (!Entry) {
- WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
0/*TODO*/, Stream);
Entry = SectionMap.size();
}
}
}
- for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
- MaxAlignment = std::max(MaxAlignment, F->getAlignment());
- if (F->hasSection()) {
+ for (const Function &F : *M) {
+ MaxAlignment = std::max(MaxAlignment, F.getAlignment());
+ if (F.hasSection()) {
// Give section names unique ID's.
- unsigned &Entry = SectionMap[F->getSection()];
+ unsigned &Entry = SectionMap[F.getSection()];
if (!Entry) {
- WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+ WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
0/*TODO*/, Stream);
Entry = SectionMap.size();
}
}
- if (F->hasGC()) {
+ if (F.hasGC()) {
// Same for GC names.
- unsigned &Entry = GCMap[F->getGC()];
+ unsigned &Entry = GCMap[F.getGC()];
if (!Entry) {
- WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+ WriteStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(),
0/*TODO*/, Stream);
Entry = GCMap.size();
}
@@ -606,28 +607,27 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
// Emit the global variable information.
SmallVector<unsigned, 64> Vals;
- for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
- GV != E; ++GV) {
+ for (const GlobalVariable &GV : M->globals()) {
unsigned AbbrevToUse = 0;
// GLOBALVAR: [type, isconst, initid,
// linkage, alignment, section, visibility, threadlocal,
// unnamed_addr, externally_initialized, dllstorageclass]
- Vals.push_back(VE.getTypeID(GV->getType()));
- Vals.push_back(GV->isConstant());
- Vals.push_back(GV->isDeclaration() ? 0 :
- (VE.getValueID(GV->getInitializer()) + 1));
+ Vals.push_back(VE.getTypeID(GV.getType()));
+ Vals.push_back(GV.isConstant());
+ Vals.push_back(GV.isDeclaration() ? 0 :
+ (VE.getValueID(GV.getInitializer()) + 1));
Vals.push_back(getEncodedLinkage(GV));
- Vals.push_back(Log2_32(GV->getAlignment())+1);
- Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
- if (GV->isThreadLocal() ||
- GV->getVisibility() != GlobalValue::DefaultVisibility ||
- GV->hasUnnamedAddr() || GV->isExternallyInitialized() ||
- GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) {
+ Vals.push_back(Log2_32(GV.getAlignment())+1);
+ Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0);
+ if (GV.isThreadLocal() ||
+ GV.getVisibility() != GlobalValue::DefaultVisibility ||
+ GV.hasUnnamedAddr() || GV.isExternallyInitialized() ||
+ GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass) {
Vals.push_back(getEncodedVisibility(GV));
Vals.push_back(getEncodedThreadLocalMode(GV));
- Vals.push_back(GV->hasUnnamedAddr());
- Vals.push_back(GV->isExternallyInitialized());
+ Vals.push_back(GV.hasUnnamedAddr());
+ Vals.push_back(GV.isExternallyInitialized());
Vals.push_back(getEncodedDLLStorageClass(GV));
} else {
AbbrevToUse = SimpleGVarAbbrev;
@@ -638,20 +638,20 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
}
// Emit the function proto information.
- for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+ for (const Function &F : *M) {
// FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment,
// section, visibility, gc, unnamed_addr, prefix]
- Vals.push_back(VE.getTypeID(F->getType()));
- Vals.push_back(F->getCallingConv());
- Vals.push_back(F->isDeclaration());
+ Vals.push_back(VE.getTypeID(F.getType()));
+ Vals.push_back(F.getCallingConv());
+ Vals.push_back(F.isDeclaration());
Vals.push_back(getEncodedLinkage(F));
- Vals.push_back(VE.getAttributeID(F->getAttributes()));
- Vals.push_back(Log2_32(F->getAlignment())+1);
- Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+ Vals.push_back(VE.getAttributeID(F.getAttributes()));
+ Vals.push_back(Log2_32(F.getAlignment())+1);
+ Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0);
Vals.push_back(getEncodedVisibility(F));
- Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
- Vals.push_back(F->hasUnnamedAddr());
- Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1)
+ Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0);
+ Vals.push_back(F.hasUnnamedAddr());
+ Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1)
: 0);
Vals.push_back(getEncodedDLLStorageClass(F));
@@ -661,14 +661,13 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
}
// Emit the alias information.
- for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
- AI != E; ++AI) {
+ for (const GlobalAlias &A : M->aliases()) {
// ALIAS: [alias type, aliasee val#, linkage, visibility]
- Vals.push_back(VE.getTypeID(AI->getType()));
- Vals.push_back(VE.getValueID(AI->getAliasee()));
- Vals.push_back(getEncodedLinkage(AI));
- Vals.push_back(getEncodedVisibility(AI));
- Vals.push_back(getEncodedDLLStorageClass(AI));
+ Vals.push_back(VE.getTypeID(A.getType()));
+ Vals.push_back(VE.getValueID(A.getAliasee()));
+ Vals.push_back(getEncodedLinkage(A));
+ Vals.push_back(getEncodedVisibility(A));
+ Vals.push_back(getEncodedDLLStorageClass(A));
unsigned AbbrevToUse = 0;
Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
Vals.clear();
@@ -917,7 +916,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
SmallVector<uint64_t, 64> Record;
const ValueEnumerator::ValueList &Vals = VE.getValues();
- Type *LastTy = 0;
+ Type *LastTy = nullptr;
for (unsigned i = FirstVal; i != LastVal; ++i) {
const Value *V = Vals[i].first;
// If we need to switch types, do so now.
@@ -1087,12 +1086,14 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
Code = bitc::CST_CODE_CE_EXTRACTELT;
Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
Record.push_back(VE.getValueID(C->getOperand(0)));
+ Record.push_back(VE.getTypeID(C->getOperand(1)->getType()));
Record.push_back(VE.getValueID(C->getOperand(1)));
break;
case Instruction::InsertElement:
Code = bitc::CST_CODE_CE_INSERTELT;
Record.push_back(VE.getValueID(C->getOperand(0)));
Record.push_back(VE.getValueID(C->getOperand(1)));
+ Record.push_back(VE.getTypeID(C->getOperand(2)->getType()));
Record.push_back(VE.getValueID(C->getOperand(2)));
break;
case Instruction::ShuffleVector:
@@ -1253,13 +1254,13 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::ExtractElement:
Code = bitc::FUNC_CODE_INST_EXTRACTELT;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
- pushValue(I.getOperand(1), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(1), InstID, Vals, VE);
break;
case Instruction::InsertElement:
Code = bitc::FUNC_CODE_INST_INSERTELT;
PushValueAndType(I.getOperand(0), InstID, Vals, VE);
pushValue(I.getOperand(1), InstID, Vals, VE);
- pushValue(I.getOperand(2), InstID, Vals, VE);
+ PushValueAndType(I.getOperand(2), InstID, Vals, VE);
break;
case Instruction::ShuffleVector:
Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
@@ -1469,7 +1470,8 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Code = bitc::FUNC_CODE_INST_CALL;
Vals.push_back(VE.getAttributeID(CI.getAttributes()));
- Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+ Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()) |
+ unsigned(CI.isMustTailCall()) << 14);
PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
// Emit value #'s for the fixed parameters.
diff --git a/lib/Bitcode/module.modulemap b/lib/Bitcode/module.modulemap
new file mode 100644
index 0000000..7df1a0a
--- /dev/null
+++ b/lib/Bitcode/module.modulemap
@@ -0,0 +1 @@
+module Bitcode { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 25c438c..0f38c64 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "post-RA-sched"
#include "AggressiveAntiDepBreaker.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +28,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "post-RA-sched"
+
// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
static cl::opt<int>
DebugDiv("agg-antidep-debugdiv",
@@ -121,7 +122,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
RegClassInfo(RCI),
- State(NULL) {
+ State(nullptr) {
/* Collect a bitset of all registers that are only broken if they
are on the critical path. */
for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
@@ -144,7 +145,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
}
void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
- assert(State == NULL);
+ assert(!State);
State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
bool IsReturnBlock = (!BB->empty() && BB->back().isReturn());
@@ -169,7 +170,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
@@ -183,7 +184,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
void AggressiveAntiDepBreaker::FinishBlock() {
delete State;
- State = NULL;
+ State = nullptr;
}
void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
@@ -230,13 +231,13 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
if (Reg == 0)
return false;
- MachineOperand *Op = NULL;
+ MachineOperand *Op = nullptr;
if (MO.isDef())
Op = MI->findRegisterUseOperand(Reg, true);
else
Op = MI->findRegisterDefOperand(Reg);
- return((Op != NULL) && Op->isImplicit());
+ return(Op && Op->isImplicit());
}
void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
@@ -273,10 +274,10 @@ static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static const SUnit *CriticalPathStep(const SUnit *SU) {
- const SDep *Next = 0;
+ const SDep *Next = nullptr;
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
- if (SU != 0) {
+ if (SU) {
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
const SUnit *PredSU = P->getSUnit();
@@ -292,7 +293,7 @@ static const SUnit *CriticalPathStep(const SUnit *SU) {
}
}
- return (Next) ? Next->getSUnit() : 0;
+ return (Next) ? Next->getSUnit() : nullptr;
}
void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
@@ -309,8 +310,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DefIndices[Reg] = ~0u;
RegRefs.erase(Reg);
State->LeaveGroup(Reg);
- DEBUG(if (header != NULL) {
- dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(if (header) {
+ dbgs() << header << TRI->getName(Reg); header = nullptr; });
DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
}
// Repeat for subregisters.
@@ -321,14 +322,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
DefIndices[SubregReg] = ~0u;
RegRefs.erase(SubregReg);
State->LeaveGroup(SubregReg);
- DEBUG(if (header != NULL) {
- dbgs() << header << TRI->getName(Reg); header = NULL; });
+ DEBUG(if (header) {
+ dbgs() << header << TRI->getName(Reg); header = nullptr; });
DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
State->GetGroup(SubregReg) << tag);
}
}
- DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+ DEBUG(if (!header && footer) dbgs() << footer);
}
void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
@@ -382,7 +383,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
}
// Note register reference...
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
if (i < MI->getDesc().getNumOperands())
RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
@@ -466,7 +467,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
}
// Note register reference...
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
if (i < MI->getDesc().getNumOperands())
RC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
@@ -516,7 +517,7 @@ BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
QE = Range.second; Q != QE; ++Q) {
const TargetRegisterClass *RC = Q->second.RC;
- if (RC == NULL) continue;
+ if (!RC) continue;
BitVector RCBV = TRI->getAllocatableSet(MF, RC);
if (first) {
@@ -734,8 +735,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Track progress along the critical path through the SUnit graph as
// we walk the instructions. This is needed for regclasses that only
// break critical-path anti-dependencies.
- const SUnit *CriticalPathSU = 0;
- MachineInstr *CriticalPathMI = 0;
+ const SUnit *CriticalPathSU = nullptr;
+ MachineInstr *CriticalPathMI = nullptr;
if (CriticalPathSet.any()) {
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
const SUnit *SU = &SUnits[i];
@@ -788,10 +789,10 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// If MI is not on the critical path, then we don't rename
// registers in the CriticalPathSet.
- BitVector *ExcludeRegs = NULL;
+ BitVector *ExcludeRegs = nullptr;
if (MI == CriticalPathMI) {
CriticalPathSU = CriticalPathStep(CriticalPathSU);
- CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr;
} else if (CriticalPathSet.any()) {
ExcludeRegs = &CriticalPathSet;
}
@@ -815,7 +816,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(dbgs() << " (non-allocatable)\n");
continue;
- } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) {
// Don't break anti-dependencies for critical path registers
// if not on the critical path
DEBUG(dbgs() << " (not critical-path)\n");
@@ -829,9 +830,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
} else {
// No anti-dep breaking for implicit deps
MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
- assert(AntiDepOp != NULL &&
- "Can't find index for defined register operand");
- if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+ assert(AntiDepOp && "Can't find index for defined register operand");
+ if (!AntiDepOp || AntiDepOp->isImplicit()) {
DEBUG(dbgs() << " (implicit)\n");
continue;
}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 29b6a10..2ab9d89 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -170,7 +170,8 @@ class RegisterClassInfo;
void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
- const char *header =NULL, const char *footer =NULL);
+ const char *header = nullptr,
+ const char *footer = nullptr);
void PrescanInstruction(MachineInstr *MI, unsigned Count,
std::set<unsigned>& PassthruRegs);
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 3fa1f8f..dc9bcff 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "AllocationOrder.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
// Compare VirtRegMap::getRegAllocPref().
AllocationOrder::AllocationOrder(unsigned VirtReg,
const VirtRegMap &VRM,
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 6ac5de2..6fc83a2 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -46,7 +46,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
EI != EE; ++EI) {
if (Indices && *Indices == unsigned(EI - EB))
return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex);
}
return CurIndex;
}
@@ -56,7 +56,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
if (Indices && *Indices == i)
return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, nullptr, nullptr, CurIndex);
}
return CurIndex;
}
@@ -228,7 +228,7 @@ static const Value *getNoopInput(const Value *V,
// through.
const Instruction *I = dyn_cast<Instruction>(V);
if (!I || I->getNumOperands() == 0) return V;
- const Value *NoopInput = 0;
+ const Value *NoopInput = nullptr;
Value *Op = I->getOperand(0);
if (isa<BitCastInst>(I)) {
diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk
index 26f04d0..7feb42c 100644
--- a/lib/CodeGen/Android.mk
+++ b/lib/CodeGen/Android.mk
@@ -4,6 +4,7 @@ codegen_SRC_FILES := \
AggressiveAntiDepBreaker.cpp \
AllocationOrder.cpp \
Analysis.cpp \
+ AtomicExpandLoadLinkedPass.cpp \
BasicTargetTransformInfo.cpp \
BranchFolding.cpp \
CalcSpillWeights.cpp \
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 403feb4..1cb0159 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -57,10 +57,10 @@ void ARMException::endModule() {
/// beginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void ARMException::beginFunction(const MachineFunction *MF) {
- getTargetStreamer().emitFnStart();
- if (Asm->MF->getFunction()->needsUnwindTableEntry())
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ getTargetStreamer().emitFnStart();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
// See if we need call frame info.
AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
assert(MoveType != AsmPrinter::CFI_M_EH &&
@@ -77,16 +77,16 @@ void ARMException::endFunction(const MachineFunction *) {
if (shouldEmitCFI)
Asm->OutStreamer.EmitCFIEndProc();
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
ARMTargetStreamer &ATS = getTargetStreamer();
- if (!Asm->MF->getFunction()->needsUnwindTableEntry())
+ if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
+ MMI->getLandingPads().empty())
ATS.emitCantUnwind();
else {
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
Asm->getFunctionNumber()));
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
if (!MMI->getLandingPads().empty()) {
// Emit references to personality.
if (const Function * Personality =
@@ -104,7 +104,8 @@ void ARMException::endFunction(const MachineFunction *) {
}
}
- ATS.emitFnEnd();
+ if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitFnEnd();
}
void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
@@ -144,7 +145,7 @@ void ARMException::EmitTypeInfos(unsigned TTypeEncoding) {
Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry));
}
- Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]),
+ Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]),
TTypeEncoding);
}
}
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
new file mode 100644
index 0000000..8dab5e5
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AddressPool.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+using namespace llvm;
+
+class MCExpr;
+
+unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
+ HasBeenUsed = true;
+ auto IterBool =
+ Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS)));
+ return IterBool.first->second.Number;
+}
+
+// Emit addresses into the section given.
+void AddressPool::emit(AsmPrinter &Asm, const MCSection *AddrSection) {
+ if (Pool.empty())
+ return;
+
+ // Start the dwarf addr section.
+ Asm.OutStreamer.SwitchSection(AddrSection);
+
+ // Order the address pool entries by ID
+ SmallVector<const MCExpr *, 64> Entries(Pool.size());
+
+ for (const auto &I : Pool)
+ Entries[I.second.Number] =
+ I.second.TLS
+ ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first)
+ : MCSymbolRefExpr::Create(I.first, Asm.OutContext);
+
+ for (const MCExpr *Entry : Entries)
+ Asm.OutStreamer.EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+}
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
new file mode 100644
index 0000000..42757d7
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -0,0 +1,52 @@
+//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
+#define CODEGEN_ASMPRINTER_ADDRESSPOOL_H__
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+class MCSection;
+class MCSymbol;
+class AsmPrinter;
+// Collection of addresses for this unit and assorted labels.
+// A Symbol->unsigned mapping of addresses used by indirect
+// references.
+class AddressPool {
+ struct AddressPoolEntry {
+ unsigned Number;
+ bool TLS;
+ AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
+ };
+ DenseMap<const MCSymbol *, AddressPoolEntry> Pool;
+
+ /// Record whether the AddressPool has been queried for an address index since
+ /// the last "resetUsedFlag" call. Used to implement type unit fallback - a
+ /// type that references addresses cannot be placed in a type unit when using
+ /// fission.
+ bool HasBeenUsed;
+
+public:
+ AddressPool() : HasBeenUsed(false) {}
+
+ /// \brief Returns the index into the address pool with the given
+ /// label/symbol.
+ unsigned getIndex(const MCSymbol *Sym, bool TLS = false);
+
+ void emit(AsmPrinter &Asm, const MCSection *AddrSection);
+
+ bool isEmpty() { return Pool.empty(); }
+
+ bool hasBeenUsed() const { return HasBeenUsed; }
+
+ void resetUsedFlag() { HasBeenUsed = false; }
+};
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk
index a725fba..f56eb6e 100644
--- a/lib/CodeGen/AsmPrinter/Android.mk
+++ b/lib/CodeGen/AsmPrinter/Android.mk
@@ -8,17 +8,21 @@ codegen_asmprinter_SRC_FILES := \
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
+ AddressPool.cpp \
AsmPrinter.cpp \
AsmPrinterDwarf.cpp \
AsmPrinterInlineAsm.cpp \
ARMException.cpp \
+ DbgValueHistoryCalculator.cpp \
DIE.cpp \
DIEHash.cpp \
DwarfAccelTable.cpp \
DwarfCFIException.cpp \
DwarfDebug.cpp \
DwarfException.cpp \
- DwarfUnit.cpp \
+ DwarfFile.cpp \
+ DwarfStringPool.cpp \
+ DwarfUnit.cpp \
ErlangGCPrinter.cpp \
OcamlGCPrinter.cpp \
Win64Exception.cpp \
@@ -38,17 +42,21 @@ ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
+ AddressPool.cpp \
AsmPrinter.cpp \
AsmPrinterDwarf.cpp \
AsmPrinterInlineAsm.cpp \
ARMException.cpp \
+ DbgValueHistoryCalculator.cpp \
DIE.cpp \
DIEHash.cpp \
DwarfAccelTable.cpp \
DwarfCFIException.cpp \
DwarfDebug.cpp \
DwarfException.cpp \
- DwarfUnit.cpp \
+ DwarfFile.cpp \
+ DwarfStringPool.cpp \
+ DwarfUnit.cpp \
ErlangGCPrinter.cpp \
OcamlGCPrinter.cpp \
Win64Exception.cpp \
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c3afc8b..7de9c6d 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "llvm/CodeGen/AsmPrinter.h"
#include "DwarfDebug.h"
#include "DwarfException.h"
@@ -53,6 +52,8 @@
#include "llvm/Transforms/Utils/GlobalStatus.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
static const char *const DWARFGroupName = "DWARF Emission";
static const char *const DbgTimerName = "Debug Info Emission";
static const char *const EHTimerName = "DWARF Exception Writer";
@@ -62,9 +63,9 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
char AsmPrinter::ID = 0;
-typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type;
static gcp_map_type &getGCMap(void *&P) {
- if (P == 0)
+ if (!P)
P = new gcp_map_type();
return *(gcp_map_type*)P;
}
@@ -101,23 +102,21 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
- LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
- DD = 0; MMI = 0; LI = 0; MF = 0;
- CurrentFnSym = CurrentFnSymForSize = 0;
- GCMetadataPrinters = 0;
+ LastMI(nullptr), LastFn(0), Counter(~0U), SetCounter(0) {
+ DD = nullptr; MMI = nullptr; LI = nullptr; MF = nullptr;
+ CurrentFnSym = CurrentFnSymForSize = nullptr;
+ GCMetadataPrinters = nullptr;
VerboseAsm = Streamer.isVerboseAsm();
}
AsmPrinter::~AsmPrinter() {
- assert(DD == 0 && Handlers.empty() && "Debug/EH info didn't get finalized");
+ assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized");
- if (GCMetadataPrinters != 0) {
+ if (GCMetadataPrinters) {
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
- for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
- delete I->second;
delete &GCMap;
- GCMetadataPrinters = 0;
+ GCMetadataPrinters = nullptr;
}
delete &OutStreamer;
@@ -209,7 +208,7 @@ bool AsmPrinter::doInitialization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
- for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+ for (auto &I : *MI)
if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
MP->beginAssembly(*this);
@@ -233,7 +232,7 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
- DwarfException *DE = 0;
+ DwarfException *DE = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
break;
@@ -370,10 +369,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// sections and expected to be contiguous (e.g. ObjC metadata).
unsigned AlignLog = getGVAlignmentLog2(GV, *DL);
- for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
- const HandlerInfo &OI = Handlers[I];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
- OI.Handler->setSymbolSize(GVSym, Size);
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->setSymbolSize(GVSym, Size);
}
// Handle common and BSS local symbols (.lcomm).
@@ -545,10 +543,9 @@ void AsmPrinter::EmitFunctionHeader() {
}
// Emit pre-function debug and/or EH information.
- for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
- const HandlerInfo &OI = Handlers[I];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
- OI.Handler->beginFunction(MF);
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->beginFunction(MF);
}
// Emit the prefix data.
@@ -746,69 +743,65 @@ void AsmPrinter::EmitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
- const MachineInstr *LastMI = 0;
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
+ const MachineInstr *LastMI = nullptr;
+ for (auto &MBB : *MF) {
// Print a label for the basic block.
- EmitBasicBlockStart(I);
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- LastMI = II;
+ EmitBasicBlockStart(MBB);
+ for (auto &MI : MBB) {
+ LastMI = &MI;
// Print the assembly for the instruction.
- if (!II->isPosition() && !II->isImplicitDef() && !II->isKill() &&
- !II->isDebugValue()) {
+ if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() &&
+ !MI.isDebugValue()) {
HasAnyRealCode = true;
++EmittedInsts;
}
if (ShouldPrintDebugScopes) {
- for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) {
- const HandlerInfo &OI = Handlers[III];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
TimePassesIsEnabled);
- OI.Handler->beginInstruction(II);
+ HI.Handler->beginInstruction(&MI);
}
}
if (isVerbose())
- emitComments(*II, OutStreamer.GetCommentOS());
+ emitComments(MI, OutStreamer.GetCommentOS());
- switch (II->getOpcode()) {
+ switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
- emitCFIInstruction(*II);
+ emitCFIInstruction(MI);
break;
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
- OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+ OutStreamer.EmitLabel(MI.getOperand(0).getMCSymbol());
break;
case TargetOpcode::INLINEASM:
- EmitInlineAsm(II);
+ EmitInlineAsm(&MI);
break;
case TargetOpcode::DBG_VALUE:
if (isVerbose()) {
- if (!emitDebugValueComment(II, *this))
- EmitInstruction(II);
+ if (!emitDebugValueComment(&MI, *this))
+ EmitInstruction(&MI);
}
break;
case TargetOpcode::IMPLICIT_DEF:
- if (isVerbose()) emitImplicitDef(II);
+ if (isVerbose()) emitImplicitDef(&MI);
break;
case TargetOpcode::KILL:
- if (isVerbose()) emitKill(II, *this);
+ if (isVerbose()) emitKill(&MI, *this);
break;
default:
- EmitInstruction(II);
+ EmitInstruction(&MI);
break;
}
if (ShouldPrintDebugScopes) {
- for (unsigned III = 0, EEE = Handlers.size(); III != EEE; ++III) {
- const HandlerInfo &OI = Handlers[III];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
TimePassesIsEnabled);
- OI.Handler->endInstruction();
+ HI.Handler->endInstruction();
}
}
}
@@ -835,11 +828,10 @@ void AsmPrinter::EmitFunctionBody() {
}
const Function *F = MF->getFunction();
- for (Function::const_iterator i = F->begin(), e = F->end(); i != e; ++i) {
- const BasicBlock *BB = i;
- if (!BB->hasAddressTaken())
+ for (const auto &BB : *F) {
+ if (!BB.hasAddressTaken())
continue;
- MCSymbol *Sym = GetBlockAddressSymbol(BB);
+ MCSymbol *Sym = GetBlockAddressSymbol(&BB);
if (Sym->isDefined())
continue;
OutStreamer.AddComment("Address of block that was removed by CodeGen");
@@ -866,10 +858,9 @@ void AsmPrinter::EmitFunctionBody() {
}
// Emit post-function debug and/or EH information.
- for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
- const HandlerInfo &OI = Handlers[I];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName, TimePassesIsEnabled);
- OI.Handler->endFunction(MF);
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+ HI.Handler->endFunction(MF);
}
MMI->EndFunction();
@@ -881,13 +872,11 @@ void AsmPrinter::EmitFunctionBody() {
bool AsmPrinter::doFinalization(Module &M) {
// Emit global variables.
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I)
- EmitGlobalVariable(I);
+ for (const auto &G : M.globals())
+ EmitGlobalVariable(&G);
// Emit visibility info for declarations
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
- const Function &F = *I;
+ for (const Function &F : M) {
if (!F.isDeclaration())
continue;
GlobalValue::VisibilityTypes V = F.getVisibility();
@@ -908,15 +897,14 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer.Flush();
// Finalize debug and EH information.
- for (unsigned I = 0, E = Handlers.size(); I != E; ++I) {
- const HandlerInfo &OI = Handlers[I];
- NamedRegionTimer T(OI.TimerName, OI.TimerGroupName,
+ for (const HandlerInfo &HI : Handlers) {
+ NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
TimePassesIsEnabled);
- OI.Handler->endModule();
- delete OI.Handler;
+ HI.Handler->endModule();
+ delete HI.Handler;
}
Handlers.clear();
- DD = 0;
+ DD = nullptr;
// If the target wants to know about weak references, print them all.
if (MAI->getWeakRefDirective()) {
@@ -926,36 +914,36 @@ bool AsmPrinter::doFinalization(Module &M) {
// happen with the MC stuff eventually.
// Print out module-level global variables here.
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!I->hasExternalWeakLinkage()) continue;
- OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
+ for (const auto &G : M.globals()) {
+ if (!G.hasExternalWeakLinkage())
+ continue;
+ OutStreamer.EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference);
}
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->hasExternalWeakLinkage()) continue;
- OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference);
+ for (const auto &F : M) {
+ if (!F.hasExternalWeakLinkage())
+ continue;
+ OutStreamer.EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference);
}
}
if (MAI->hasSetDirective()) {
OutStreamer.AddBlankLine();
- for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- MCSymbol *Name = getSymbol(I);
+ for (const auto &Alias : M.aliases()) {
+ MCSymbol *Name = getSymbol(&Alias);
- const GlobalValue *GV = I->getAliasedGlobal();
+ const GlobalValue *GV = Alias.getAliasee();
assert(!GV->isDeclaration());
MCSymbol *Target = getSymbol(GV);
- if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+ if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective())
OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
- else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+ else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage())
OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
else
- assert(I->hasLocalLinkage() && "Invalid alias linkage");
+ assert(Alias.hasLocalLinkage() && "Invalid alias linkage");
- EmitVisibility(Name, I->getVisibility());
+ EmitVisibility(Name, Alias.getVisibility());
// Emit the directives as assignments aka .set:
OutStreamer.EmitAssignment(Name,
@@ -966,7 +954,7 @@ bool AsmPrinter::doFinalization(Module &M) {
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
- if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+ if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I))
MP->finishAssembly(*this);
// Emit llvm.ident metadata in an '.ident' directive.
@@ -983,8 +971,8 @@ bool AsmPrinter::doFinalization(Module &M) {
// after everything else has gone out.
EmitEndOfAsmFile(M);
- delete Mang; Mang = 0;
- MMI = 0;
+ delete Mang; Mang = nullptr;
+ MMI = nullptr;
OutStreamer.Finish();
OutStreamer.reset();
@@ -1100,7 +1088,7 @@ void AsmPrinter::EmitConstantPool() {
void AsmPrinter::EmitJumpTableInfo() {
const DataLayout *DL = MF->getTarget().getDataLayout();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return;
+ if (!MJTI) return;
if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
@@ -1185,7 +1173,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned UID) const {
assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
- const MCExpr *Value = 0;
+ const MCExpr *Value = nullptr;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
llvm_unreachable("Cannot emit EK_Inline jump table entry");
@@ -1308,6 +1296,15 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
}
}
+namespace {
+struct Structor {
+ Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {}
+ int Priority;
+ llvm::Constant *Func;
+ llvm::GlobalValue *ComdatKey;
+};
+} // end namespace
+
/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init
/// priority.
void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
@@ -1319,37 +1316,52 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) {
const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
if (!InitList) return; // Not an array!
StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
- if (!ETy || ETy->getNumElements() != 2) return; // Not an array of pairs!
+ // FIXME: Only allow the 3-field form in LLVM 4.0.
+ if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3)
+ return; // Not an array of two or three elements!
if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
!isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
+ if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U)))
+ return; // Not (int, ptr, ptr).
// Gather the structors in a form that's convenient for sorting by priority.
- typedef std::pair<unsigned, Constant *> Structor;
SmallVector<Structor, 8> Structors;
- for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
- ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
+ for (Value *O : InitList->operands()) {
+ ConstantStruct *CS = dyn_cast<ConstantStruct>(O);
if (!CS) continue; // Malformed.
if (CS->getOperand(1)->isNullValue())
break; // Found a null terminator, skip the rest.
ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0));
if (!Priority) continue; // Malformed.
- Structors.push_back(std::make_pair(Priority->getLimitedValue(65535),
- CS->getOperand(1)));
+ Structors.push_back(Structor());
+ Structor &S = Structors.back();
+ S.Priority = Priority->getLimitedValue(65535);
+ S.Func = CS->getOperand(1);
+ if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
+ S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
}
// Emit the function pointers in the target-specific order
const DataLayout *DL = TM.getDataLayout();
unsigned Align = Log2_32(DL->getPointerPrefAlignment());
- std::stable_sort(Structors.begin(), Structors.end(), less_first());
- for (unsigned i = 0, e = Structors.size(); i != e; ++i) {
+ std::stable_sort(Structors.begin(), Structors.end(),
+ [](const Structor &L,
+ const Structor &R) { return L.Priority < R.Priority; });
+ for (Structor &S : Structors) {
+ const TargetLoweringObjectFile &Obj = getObjFileLowering();
+ const MCSymbol *KeySym = nullptr;
+ const MCSection *KeySec = nullptr;
+ if (S.ComdatKey) {
+ KeySym = getSymbol(S.ComdatKey);
+ KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM);
+ }
const MCSection *OutputSection =
- (isCtor ?
- getObjFileLowering().getStaticCtorSection(Structors[i].first) :
- getObjFileLowering().getStaticDtorSection(Structors[i].first));
+ (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec)
+ : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec));
OutStreamer.SwitchSection(OutputSection);
if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection())
EmitAlignment(Align);
- EmitXXStructor(Structors[i].second);
+ EmitXXStructor(S.Func);
}
}
@@ -1470,7 +1482,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
//
-void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getDataLayout(), NumBits);
if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
@@ -1503,7 +1515,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (CE == 0) {
+ if (!CE) {
llvm_unreachable("Unknown constant value to lower!");
}
@@ -1528,7 +1540,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) {
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/false,
- !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
case Instruction::GetElementPtr: {
@@ -2055,7 +2067,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
/// PrintParentLoopComment - Print comments about parent loops of this one.
static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
- if (Loop == 0) return;
+ if (!Loop) return;
PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
OS.indent(Loop->getLoopDepth()*2)
<< "Parent Loop BB" << FunctionNumber << "_"
@@ -2069,12 +2081,12 @@ static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
unsigned FunctionNumber) {
// Add child loop information
- for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
- OS.indent((*CL)->getLoopDepth()*2)
+ for (const MachineLoop *CL : *Loop) {
+ OS.indent(CL->getLoopDepth()*2)
<< "Child Loop BB" << FunctionNumber << "_"
- << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+ << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth()
<< '\n';
- PrintChildLoopComment(OS, *CL, FunctionNumber);
+ PrintChildLoopComment(OS, CL, FunctionNumber);
}
}
@@ -2084,7 +2096,7 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
const AsmPrinter &AP) {
// Add loop depth information
const MachineLoop *Loop = LI->getLoopFor(&MBB);
- if (Loop == 0) return;
+ if (!Loop) return;
MachineBasicBlock *Header = Loop->getHeader();
assert(Header && "No header for loop");
@@ -2120,42 +2132,41 @@ static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB,
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
-void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
// Emit an alignment directive for this block, if needed.
- if (unsigned Align = MBB->getAlignment())
+ if (unsigned Align = MBB.getAlignment())
EmitAlignment(Align);
// If the block has its address taken, emit any labels that were used to
// reference the block. It is possible that there is more than one label
// here, because multiple LLVM BB's may have been RAUW'd to this block after
// the references were generated.
- if (MBB->hasAddressTaken()) {
- const BasicBlock *BB = MBB->getBasicBlock();
+ if (MBB.hasAddressTaken()) {
+ const BasicBlock *BB = MBB.getBasicBlock();
if (isVerbose())
OutStreamer.AddComment("Block address taken");
- std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
-
- for (unsigned i = 0, e = Syms.size(); i != e; ++i)
- OutStreamer.EmitLabel(Syms[i]);
+ std::vector<MCSymbol*> Symbols = MMI->getAddrLabelSymbolToEmit(BB);
+ for (auto *Sym : Symbols)
+ OutStreamer.EmitLabel(Sym);
}
// Print some verbose block comments.
if (isVerbose()) {
- if (const BasicBlock *BB = MBB->getBasicBlock())
+ if (const BasicBlock *BB = MBB.getBasicBlock())
if (BB->hasName())
OutStreamer.AddComment("%" + BB->getName());
- emitBasicBlockLoopComments(*MBB, LI, *this);
+ emitBasicBlockLoopComments(MBB, LI, *this);
}
// Print the main label for the block.
- if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+ if (MBB.pred_empty() || isBlockOnlyReachableByFallthrough(&MBB)) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
- OutStreamer.emitRawComment(" BB#" + Twine(MBB->getNumber()) + ":", false);
+ OutStreamer.emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false);
}
} else {
- OutStreamer.EmitLabel(MBB->getSymbol());
+ OutStreamer.EmitLabel(MBB.getSymbol());
}
}
@@ -2191,14 +2202,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return false;
// If there isn't exactly one predecessor, it can't be a fall through.
- MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
- ++PI2;
- if (PI2 != MBB->pred_end())
+ if (MBB->pred_size() > 1)
return false;
// The predecessor has to be immediately before this block.
- MachineBasicBlock *Pred = *PI;
-
+ MachineBasicBlock *Pred = *MBB->pred_begin();
if (!Pred->isLayoutSuccessor(MBB))
return false;
@@ -2207,10 +2215,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
return true;
// Check the terminators in the previous blocks
- for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
- IE = Pred->end(); II != IE; ++II) {
- MachineInstr &MI = *II;
-
+ for (const auto &MI : Pred->terminators()) {
// If it is not a simple branch, we are in a table somewhere.
if (!MI.isBranch() || MI.isIndirectBranch())
return false;
@@ -2231,25 +2236,25 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
-GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
- if (!S->usesMetadata())
- return 0;
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
+ if (!S.usesMetadata())
+ return nullptr;
gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
- gcp_map_type::iterator GCPI = GCMap.find(S);
+ gcp_map_type::iterator GCPI = GCMap.find(&S);
if (GCPI != GCMap.end())
- return GCPI->second;
+ return GCPI->second.get();
- const char *Name = S->getName().c_str();
+ const char *Name = S.getName().c_str();
for (GCMetadataPrinterRegistry::iterator
I = GCMetadataPrinterRegistry::begin(),
E = GCMetadataPrinterRegistry::end(); I != E; ++I)
if (strcmp(Name, I->getName()) == 0) {
- GCMetadataPrinter *GMP = I->instantiate();
- GMP->S = S;
- GCMap.insert(std::make_pair(S, GMP));
- return GMP;
+ std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
+ GMP->S = &S;
+ auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
+ return IterBool.first->second.get();
}
report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index b696069..02cd12b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "ByteStreamer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/ADT/SmallBitVector.h"
@@ -30,6 +29,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
//===----------------------------------------------------------------------===//
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
@@ -216,30 +217,48 @@ static void emitDwarfRegOpIndirect(ByteStreamer &Streamer, int Reg, int Offset,
/// Emit a dwarf register operation for describing
/// - a small value occupying only part of a register or
/// - a small register representing only part of a value.
-static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned Size,
- unsigned Offset) {
- assert(Size > 0);
- if (Offset > 0) {
+static void emitDwarfOpPiece(ByteStreamer &Streamer, unsigned SizeInBits,
+ unsigned OffsetInBits) {
+ assert(SizeInBits > 0 && "zero-sized piece");
+ unsigned SizeOfByte = 8;
+ if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
Streamer.EmitInt8(dwarf::DW_OP_bit_piece, "DW_OP_bit_piece");
- Streamer.EmitULEB128(Size, Twine(Size));
- Streamer.EmitULEB128(Offset, Twine(Offset));
+ Streamer.EmitULEB128(SizeInBits, Twine(SizeInBits));
+ Streamer.EmitULEB128(OffsetInBits, Twine(OffsetInBits));
} else {
Streamer.EmitInt8(dwarf::DW_OP_piece, "DW_OP_piece");
- unsigned ByteSize = Size / 8; // Assuming 8 bits per byte.
+ unsigned ByteSize = SizeInBits / SizeOfByte;
Streamer.EmitULEB128(ByteSize, Twine(ByteSize));
}
}
-/// Some targets do not provide a DWARF register number for every
-/// register. This function attempts to emit a dwarf register by
-/// emitting a piece of a super-register or by piecing together
-/// multiple subregisters that alias the register.
-static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
- const MachineLocation &MLoc) {
- assert(!MLoc.isIndirect());
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+/// Emit a shift-right dwarf expression.
+static void emitDwarfOpShr(ByteStreamer &Streamer,
+ unsigned ShiftBy) {
+ Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu");
+ Streamer.EmitULEB128(ShiftBy);
+ Streamer.EmitInt8(dwarf::DW_OP_shr, "DW_OP_shr");
+}
+
+// Some targets do not provide a DWARF register number for every
+// register. This function attempts to emit a DWARF register by
+// emitting a piece of a super-register or by piecing together
+// multiple subregisters that alias the register.
+void AsmPrinter::EmitDwarfRegOpPiece(ByteStreamer &Streamer,
+ const MachineLocation &MLoc,
+ unsigned PieceSizeInBits,
+ unsigned PieceOffsetInBits) const {
+ assert(MLoc.isReg() && "MLoc must be a register");
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false);
+ // If this is a valid register number, emit it.
+ if (Reg >= 0) {
+ emitDwarfRegOp(Streamer, Reg);
+ emitDwarfOpPiece(Streamer, PieceSizeInBits, PieceOffsetInBits);
+ return;
+ }
+
// Walk up the super-register chain until we find a valid number.
// For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
for (MCSuperRegIterator SR(MLoc.getReg(), TRI); SR.isValid(); ++SR) {
@@ -248,9 +267,19 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
unsigned Idx = TRI->getSubRegIndex(*SR, MLoc.getReg());
unsigned Size = TRI->getSubRegIdxSize(Idx);
unsigned Offset = TRI->getSubRegIdxOffset(Idx);
- AP.OutStreamer.AddComment("super-register");
+ OutStreamer.AddComment("super-register");
emitDwarfRegOp(Streamer, Reg);
- emitDwarfOpPiece(Streamer, Size, Offset);
+ if (PieceOffsetInBits == Offset) {
+ emitDwarfOpPiece(Streamer, Size, Offset);
+ } else {
+ // If this is part of a variable in a sub-register at a
+ // non-zero offset, we need to manually shift the value into
+ // place, since the DW_OP_piece describes the part of the
+ // variable, not the position of the subregister.
+ emitDwarfOpPiece(Streamer, Size, PieceOffsetInBits);
+ if (Offset)
+ emitDwarfOpShr(Streamer, Offset);
+ }
return;
}
}
@@ -260,7 +289,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
//
// Keep track of the current position so we can emit the more
// efficient DW_OP_piece.
- unsigned CurPos = 0;
+ unsigned CurPos = PieceOffsetInBits;
// The size of the register in bits, assuming 8 bits per byte.
unsigned RegSize = TRI->getMinimalPhysRegClass(MLoc.getReg())->getSize() * 8;
// Keep track of the bits in the register we already emitted, so we
@@ -281,7 +310,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
// If this sub-register has a DWARF number and we haven't covered
// its range, emit a DWARF piece for it.
if (Reg >= 0 && Intersection.any()) {
- AP.OutStreamer.AddComment("sub-register");
+ OutStreamer.AddComment("sub-register");
emitDwarfRegOp(Streamer, Reg);
emitDwarfOpPiece(Streamer, Size, Offset == CurPos ? 0 : Offset);
CurPos = Offset + Size;
@@ -291,7 +320,7 @@ static void EmitDwarfRegOpPiece(ByteStreamer &Streamer, const AsmPrinter &AP,
}
}
- if (CurPos == 0) {
+ if (CurPos == PieceOffsetInBits) {
// FIXME: We have no reasonable way of handling errors in here.
Streamer.EmitInt8(dwarf::DW_OP_nop,
"nop (could not find a dwarf register number)");
@@ -317,8 +346,7 @@ void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
}
// Attempt to find a valid super- or sub-register.
- if (!Indirect && !MLoc.isIndirect())
- return EmitDwarfRegOpPiece(Streamer, *this, MLoc);
+ return EmitDwarfRegOpPiece(Streamer, MLoc);
}
if (MLoc.isIndirect())
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 567b6e3..46ee0c8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -36,6 +36,8 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
namespace {
struct SrcMgrDiagInfo {
const MDNode *LocInfo;
@@ -88,7 +90,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
if (!MCAI->useIntegratedAssembler() &&
!OutStreamer.isIntegratedAssemblerRequired()) {
OutStreamer.EmitRawText(Str);
- emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), 0);
+ emitInlineAsmEnd(TM.getSubtarget<MCSubtargetInfo>(), nullptr);
return;
}
@@ -98,7 +100,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
bool HasDiagHandler = false;
- if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) {
// If the source manager has an issue, we arrange for srcMgrDiagHandler
// to be invoked, getting DiagInfo passed into it.
DiagInfo.LocInfo = LocMDNode;
@@ -134,8 +136,11 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode,
// emitInlineAsmEnd().
MCSubtargetInfo STIOrig = *STI;
+ MCTargetOptions MCOptions;
+ if (MF)
+ MCOptions = MF->getTarget().Options.MCOptions;
std::unique_ptr<MCTargetAsmParser> TAP(
- TM.getTarget().createMCAsmParser(*STI, *Parser, *MII));
+ TM.getTarget().createMCAsmParser(*STI, *Parser, *MII, MCOptions));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
" we don't have an asm parser for this target\n");
@@ -229,10 +234,10 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ 0, OS);
+ /*Modifier*/ nullptr, OS);
} else {
Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ 0, OS);
+ /*Modifier*/ nullptr, OS);
}
}
if (Error) {
@@ -324,7 +329,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++LastEmitted;
const char *StrStart = LastEmitted;
const char *StrEnd = strchr(StrStart, '}');
- if (StrEnd == 0)
+ if (!StrEnd)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
@@ -399,11 +404,11 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
else {
if (InlineAsm::isMemKind(OpFlags)) {
Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- Modifier[0] ? Modifier : 0,
+ Modifier[0] ? Modifier : nullptr,
OS);
} else {
Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
- Modifier[0] ? Modifier : 0, OS);
+ Modifier[0] ? Modifier : nullptr, OS);
}
}
}
@@ -452,7 +457,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Get the !srcloc metadata node if we have it, and decode the loc cookie from
// it.
unsigned LocCookie = 0;
- const MDNode *LocMD = 0;
+ const MDNode *LocMD = nullptr;
for (unsigned i = MI->getNumOperands(); i != 0; --i) {
if (MI->getOperand(i-1).isMetadata() &&
(LocMD = MI->getOperand(i-1).getMetadata()) &&
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index b3eddac..b4ef185 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -1,14 +1,18 @@
add_llvm_library(LLVMAsmPrinter
+ AddressPool.cpp
ARMException.cpp
AsmPrinter.cpp
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
+ DbgValueHistoryCalculator.cpp
DIE.cpp
DIEHash.cpp
DwarfAccelTable.cpp
DwarfCFIException.cpp
DwarfDebug.cpp
DwarfException.cpp
+ DwarfFile.cpp
+ DwarfStringPool.cpp
DwarfUnit.cpp
ErlangGCPrinter.cpp
OcamlGCPrinter.cpp
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index 26e8f2d..c3dcd9c 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -104,15 +104,6 @@ void DIEAbbrev::print(raw_ostream &O) {
void DIEAbbrev::dump() { print(dbgs()); }
#endif
-//===----------------------------------------------------------------------===//
-// DIE Implementation
-//===----------------------------------------------------------------------===//
-
-DIE::~DIE() {
- for (unsigned i = 0, N = Children.size(); i < N; ++i)
- delete Children[i];
-}
-
/// Climb up the parent chain to get the unit DIE to which this DIE
/// belongs.
const DIE *DIE::getUnit() const {
@@ -131,7 +122,7 @@ const DIE *DIE::getUnitOrNull() const {
return p;
p = p->getParent();
}
- return NULL;
+ return nullptr;
}
DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const {
@@ -143,7 +134,7 @@ DIEValue *DIE::findAttribute(dwarf::Attribute Attribute) const {
for (size_t i = 0; i < Values.size(); ++i)
if (Abbrevs.getData()[i].getAttribute() == Attribute)
return Values[i];
- return NULL;
+ return nullptr;
}
#ifndef NDEBUG
@@ -385,12 +376,12 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
if (Form == dwarf::DW_FORM_ref_addr) {
const DwarfDebug *DD = AP->getDwarfDebug();
- unsigned Addr = Entry->getOffset();
+ unsigned Addr = Entry.getOffset();
assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
// For DW_FORM_ref_addr, output the offset from beginning of debug info
// section. Entry->getOffset() returns the offset from start of the
// compile unit.
- DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
+ DwarfCompileUnit *CU = DD->lookupUnit(Entry.getUnit());
assert(CU && "CUDie should belong to a CU.");
Addr += CU->getDebugInfoOffset();
if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
@@ -401,7 +392,7 @@ void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const {
CU->getSectionSym(),
DIEEntry::getRefAddrSize(AP));
} else
- AP->EmitInt32(Entry->getOffset());
+ AP->EmitInt32(Entry.getOffset());
}
unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
@@ -418,7 +409,7 @@ unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) {
#ifndef NDEBUG
void DIEEntry::print(raw_ostream &O) const {
- O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+ O << format("Die: 0x%lx", (long)(intptr_t)&Entry);
}
#endif
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 7fefd4f..ef05f17 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -124,7 +124,13 @@ protected:
/// Children DIEs.
///
- std::vector<DIE *> Children;
+ // This can't be a vector<DIE> because pointer validity is requirent for the
+ // Parent pointer and DIEEntry.
+ // It can't be a list<DIE> because some clients need pointer validity before
+ // the object has been added to any child list
+ // (eg: DwarfUnit::constructVariableDIE). These aren't insurmountable, but may
+ // be more convoluted than beneficial.
+ std::vector<std::unique_ptr<DIE>> Children;
DIE *Parent;
@@ -132,11 +138,15 @@ protected:
///
SmallVector<DIEValue *, 12> Values;
+protected:
+ DIE()
+ : Offset(0), Size(0), Abbrev((dwarf::Tag)0, dwarf::DW_CHILDREN_no),
+ Parent(nullptr) {}
+
public:
- explicit DIE(unsigned Tag)
+ explicit DIE(dwarf::Tag Tag)
: Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no),
- Parent(0) {}
- ~DIE();
+ Parent(nullptr) {}
// Accessors.
DIEAbbrev &getAbbrev() { return Abbrev; }
@@ -145,7 +155,9 @@ public:
dwarf::Tag getTag() const { return Abbrev.getTag(); }
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
- const std::vector<DIE *> &getChildren() const { return Children; }
+ const std::vector<std::unique_ptr<DIE>> &getChildren() const {
+ return Children;
+ }
const SmallVectorImpl<DIEValue *> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
/// Climb up the parent chain to get the compile or type unit DIE this DIE
@@ -166,11 +178,11 @@ public:
/// addChild - Add a child to the DIE.
///
- void addChild(DIE *Child) {
+ void addChild(std::unique_ptr<DIE> Child) {
assert(!Child->getParent());
Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
- Children.push_back(Child);
Child->Parent = this;
+ Children.push_back(std::move(Child));
}
/// findAttribute - Find a value in the DIE with the attribute given,
@@ -399,14 +411,13 @@ public:
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
class DIEEntry : public DIEValue {
- DIE *const Entry;
+ DIE &Entry;
public:
- explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {
- assert(E && "Cannot construct a DIEEntry with a null DIE");
+ explicit DIEEntry(DIE &E) : DIEValue(isEntry), Entry(E) {
}
- DIE *getEntry() const { return Entry; }
+ DIE &getEntry() const { return Entry; }
/// EmitValue - Emit debug information entry offset.
///
@@ -464,7 +475,7 @@ public:
class DIELoc : public DIEValue, public DIE {
mutable unsigned Size; // Size in bytes excluding size header.
public:
- DIELoc() : DIEValue(isLoc), DIE(0), Size(0) {}
+ DIELoc() : DIEValue(isLoc), Size(0) {}
/// ComputeSize - Calculate the size of the location expression.
///
@@ -507,7 +518,7 @@ public:
class DIEBlock : public DIEValue, public DIE {
mutable unsigned Size; // Size in bytes excluding size header.
public:
- DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {}
+ DIEBlock() : DIEValue(isBlock), Size(0) {}
/// ComputeSize - Calculate the size of the location expression.
///
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 74beec1..c2fad59 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dwarfdebug"
-
#include "ByteStreamer.h"
#include "DIEHash.h"
#include "DIE.h"
@@ -28,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
/// \brief Grabs the string in whichever attribute is passed in and returns
/// a reference to it.
static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) {
@@ -309,7 +309,7 @@ void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) {
// ... An attribute that refers to another type entry T is processed as
// follows:
case DIEValue::isEntry:
- hashDIEEntry(Attribute, Tag, *cast<DIEEntry>(Value)->getEntry());
+ hashDIEEntry(Attribute, Tag, cast<DIEEntry>(Value)->getEntry());
break;
case DIEValue::isInteger: {
addULEB128('A');
@@ -463,20 +463,18 @@ void DIEHash::computeHash(const DIE &Die) {
addAttributes(Die);
// Then hash each of the children of the DIE.
- for (std::vector<DIE *>::const_iterator I = Die.getChildren().begin(),
- E = Die.getChildren().end();
- I != E; ++I) {
+ for (auto &C : Die.getChildren()) {
// 7.27 Step 7
// If C is a nested type entry or a member function entry, ...
- if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) {
- StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name);
+ if (isType(C->getTag()) || C->getTag() == dwarf::DW_TAG_subprogram) {
+ StringRef Name = getDIEStringAttr(*C, dwarf::DW_AT_name);
// ... and has a DW_AT_name attribute
if (!Name.empty()) {
- hashNestedType(**I, Name);
+ hashNestedType(*C, Name);
continue;
}
}
- computeHash(**I);
+ computeHash(*C);
}
// Following the last (or if there are no children), append a zero byte.
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index 48f1601..175d660 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -89,7 +89,7 @@ class DIEHash {
};
public:
- DIEHash(AsmPrinter *A = NULL) : AP(A) {}
+ DIEHash(AsmPrinter *A = nullptr) : AP(A) {}
/// \brief Computes the ODR signature.
uint64_t computeDIEODRSignature(const DIE &Die);
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
new file mode 100644
index 0000000..6103254
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -0,0 +1,175 @@
+//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DbgValueHistoryCalculator.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <map>
+
+#define DEBUG_TYPE "dwarfdebug"
+
+namespace llvm {
+
+// \brief If @MI is a DBG_VALUE with debug value described by a
+// defined register, returns the number of this register.
+// In the other case, returns 0.
+static unsigned isDescribedByReg(const MachineInstr &MI) {
+ assert(MI.isDebugValue());
+ assert(MI.getNumOperands() == 3);
+ // If location of variable is described using a register (directly or
+ // indirecltly), this register is always a first operand.
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+}
+
+void DbgValueHistoryMap::startInstrRange(const MDNode *Var,
+ const MachineInstr &MI) {
+ // Instruction range should start with a DBG_VALUE instruction for the
+ // variable.
+ assert(MI.isDebugValue() && MI.getDebugVariable() == Var);
+ auto &Ranges = VarInstrRanges[Var];
+ if (!Ranges.empty() && Ranges.back().second == nullptr &&
+ Ranges.back().first->isIdenticalTo(&MI)) {
+ DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
+ << "\t" << Ranges.back().first << "\t" << MI << "\n");
+ return;
+ }
+ Ranges.push_back(std::make_pair(&MI, nullptr));
+}
+
+void DbgValueHistoryMap::endInstrRange(const MDNode *Var,
+ const MachineInstr &MI) {
+ auto &Ranges = VarInstrRanges[Var];
+ // Verify that the current instruction range is not yet closed.
+ assert(!Ranges.empty() && Ranges.back().second == nullptr);
+ // For now, instruction ranges are not allowed to cross basic block
+ // boundaries.
+ assert(Ranges.back().first->getParent() == MI.getParent());
+ Ranges.back().second = &MI;
+}
+
+unsigned DbgValueHistoryMap::getRegisterForVar(const MDNode *Var) const {
+ const auto &I = VarInstrRanges.find(Var);
+ if (I == VarInstrRanges.end())
+ return 0;
+ const auto &Ranges = I->second;
+ if (Ranges.empty() || Ranges.back().second != nullptr)
+ return 0;
+ return isDescribedByReg(*Ranges.back().first);
+}
+
+namespace {
+// Maps physreg numbers to the variables they describe.
+typedef std::map<unsigned, SmallVector<const MDNode *, 1>> RegDescribedVarsMap;
+}
+
+// \brief Claim that @Var is not described by @RegNo anymore.
+static void dropRegDescribedVar(RegDescribedVarsMap &RegVars,
+ unsigned RegNo, const MDNode *Var) {
+ const auto &I = RegVars.find(RegNo);
+ assert(RegNo != 0U && I != RegVars.end());
+ auto &VarSet = I->second;
+ const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var);
+ assert(VarPos != VarSet.end());
+ VarSet.erase(VarPos);
+ // Don't keep empty sets in a map to keep it as small as possible.
+ if (VarSet.empty())
+ RegVars.erase(I);
+}
+
+// \brief Claim that @Var is now described by @RegNo.
+static void addRegDescribedVar(RegDescribedVarsMap &RegVars,
+ unsigned RegNo, const MDNode *Var) {
+ assert(RegNo != 0U);
+ auto &VarSet = RegVars[RegNo];
+ assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
+ VarSet.push_back(Var);
+}
+
+// \brief Terminate the location range for variables described by register
+// @RegNo by inserting @ClobberingInstr to their history.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ const auto &I = RegVars.find(RegNo);
+ if (I == RegVars.end())
+ return;
+ // Iterate over all variables described by this register and add this
+ // instruction to their history, clobbering it.
+ for (const auto &Var : I->second)
+ HistMap.endInstrRange(Var, ClobberingInstr);
+ RegVars.erase(I);
+}
+
+// \brief Terminate location ranges for all variables, described by registers
+// clobbered by @MI.
+static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &HistMap) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg())
+ continue;
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI) {
+ unsigned RegNo = *AI;
+ clobberRegisterUses(RegVars, RegNo, HistMap, MI);
+ }
+ }
+}
+
+// \brief Terminate the location range for all register-described variables
+// by inserting @ClobberingInstr to their history.
+static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars,
+ DbgValueHistoryMap &HistMap,
+ const MachineInstr &ClobberingInstr) {
+ for (const auto &I : RegVars)
+ for (const auto &Var : I.second)
+ HistMap.endInstrRange(Var, ClobberingInstr);
+ RegVars.clear();
+}
+
+void calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result) {
+ RegDescribedVarsMap RegVars;
+
+ for (const auto &MBB : *MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isDebugValue()) {
+ // Not a DBG_VALUE instruction. It may clobber registers which describe
+ // some variables.
+ clobberRegisterUses(RegVars, MI, TRI, Result);
+ continue;
+ }
+
+ assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
+ const MDNode *Var = MI.getDebugVariable();
+
+ if (unsigned PrevReg = Result.getRegisterForVar(Var))
+ dropRegDescribedVar(RegVars, PrevReg, Var);
+
+ Result.startInstrRange(Var, MI);
+
+ if (unsigned NewReg = isDescribedByReg(MI))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ }
+
+ // Make sure locations for register-described variables are valid only
+ // until the end of the basic block (unless it's the last basic block, in
+ // which case let their liveness run off to the end of the function).
+ if (!MBB.empty() && &MBB != &MF->back())
+ clobberAllRegistersUses(RegVars, Result, MBB.back());
+ }
+}
+
+}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
new file mode 100644
index 0000000..b9177f0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
+#define CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H_
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+class MDNode;
+class TargetRegisterInfo;
+
+// For each user variable, keep a list of instruction ranges where this variable
+// is accessible. The variables are listed in order of appearance.
+class DbgValueHistoryMap {
+ // Each instruction range starts with a DBG_VALUE instruction, specifying the
+ // location of a variable, which is assumed to be valid until the end of the
+ // range. If end is not specified, location is valid until the start
+ // instruction of the next instruction range, or until the end of the
+ // function.
+ typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange;
+ typedef SmallVector<InstrRange, 4> InstrRanges;
+ typedef MapVector<const MDNode *, InstrRanges> InstrRangesMap;
+ InstrRangesMap VarInstrRanges;
+
+public:
+ void startInstrRange(const MDNode *Var, const MachineInstr &MI);
+ void endInstrRange(const MDNode *Var, const MachineInstr &MI);
+ // Returns register currently describing @Var. If @Var is currently
+ // unaccessible or is not described by a register, returns 0.
+ unsigned getRegisterForVar(const MDNode *Var) const;
+
+ bool empty() const { return VarInstrRanges.empty(); }
+ void clear() { VarInstrRanges.clear(); }
+ InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); }
+ InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); }
+};
+
+void calculateDbgValueHistory(const MachineFunction *MF,
+ const TargetRegisterInfo *TRI,
+ DbgValueHistoryMap &Result);
+}
+
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 470453f..3beb799 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -23,75 +23,82 @@ class DebugLocEntry {
const MCSymbol *Begin;
const MCSymbol *End;
- // Type of entry that this represents.
- enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
- enum EntryType EntryKind;
+public:
+ /// A single location or constant.
+ struct Value {
+ Value(const MDNode *Var, int64_t i)
+ : Variable(Var), EntryKind(E_Integer) {
+ Constant.Int = i;
+ }
+ Value(const MDNode *Var, const ConstantFP *CFP)
+ : Variable(Var), EntryKind(E_ConstantFP) {
+ Constant.CFP = CFP;
+ }
+ Value(const MDNode *Var, const ConstantInt *CIP)
+ : Variable(Var), EntryKind(E_ConstantInt) {
+ Constant.CIP = CIP;
+ }
+ Value(const MDNode *Var, MachineLocation Loc)
+ : Variable(Var), EntryKind(E_Location), Loc(Loc) {
+ }
- union {
- int64_t Int;
- const ConstantFP *CFP;
- const ConstantInt *CIP;
- } Constants;
+ // The variable to which this location entry corresponds.
+ const MDNode *Variable;
- // The location in the machine frame.
- MachineLocation Loc;
+ // Type of entry that this represents.
+ enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType EntryKind;
- // The variable to which this location entry corresponds.
- const MDNode *Variable;
+ // Either a constant,
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constant;
- // The compile unit to which this location entry is referenced by.
- const DwarfCompileUnit *Unit;
+ // Or a location in the machine frame.
+ MachineLocation Loc;
- bool hasSameValueOrLocation(const DebugLocEntry &Next) {
- if (EntryKind != Next.EntryKind)
- return false;
+ bool operator==(const Value &other) const {
+ if (EntryKind != other.EntryKind)
+ return false;
- bool EqualValues;
- switch (EntryKind) {
- case E_Location:
- EqualValues = Loc == Next.Loc;
- break;
- case E_Integer:
- EqualValues = Constants.Int == Next.Constants.Int;
- break;
- case E_ConstantFP:
- EqualValues = Constants.CFP == Next.Constants.CFP;
- break;
- case E_ConstantInt:
- EqualValues = Constants.CIP == Next.Constants.CIP;
- break;
+ switch (EntryKind) {
+ case E_Location:
+ return Loc == other.Loc;
+ case E_Integer:
+ return Constant.Int == other.Constant.Int;
+ case E_ConstantFP:
+ return Constant.CFP == other.Constant.CFP;
+ case E_ConstantInt:
+ return Constant.CIP == other.Constant.CIP;
+ }
+ llvm_unreachable("unhandled EntryKind");
}
- return EqualValues;
- }
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() const { return Constant.Int; }
+ const ConstantFP *getConstantFP() const { return Constant.CFP; }
+ const ConstantInt *getConstantInt() const { return Constant.CIP; }
+ MachineLocation getLoc() const { return Loc; }
+ const MDNode *getVariable() const { return Variable; }
+ };
+private:
+ /// A list of locations/constants belonging to this entry.
+ SmallVector<Value, 1> Values;
+
+ /// The compile unit that this location entry is referenced by.
+ const DwarfCompileUnit *Unit;
public:
- DebugLocEntry() : Begin(0), End(0), Variable(0), Unit(0) {
- Constants.Int = 0;
- }
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
- const MDNode *V, const DwarfCompileUnit *U)
- : Begin(B), End(E), Loc(L), Variable(V), Unit(U) {
- Constants.Int = 0;
- EntryKind = E_Location;
- }
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i,
- const DwarfCompileUnit *U)
- : Begin(B), End(E), Variable(0), Unit(U) {
- Constants.Int = i;
- EntryKind = E_Integer;
- }
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr,
- const DwarfCompileUnit *U)
- : Begin(B), End(E), Variable(0), Unit(U) {
- Constants.CFP = FPtr;
- EntryKind = E_ConstantFP;
- }
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr,
- const DwarfCompileUnit *U)
- : Begin(B), End(E), Variable(0), Unit(U) {
- Constants.CIP = IPtr;
- EntryKind = E_ConstantInt;
+ DebugLocEntry() : Begin(nullptr), End(nullptr), Unit(nullptr) {}
+ DebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+ Value Val, const DwarfCompileUnit *U)
+ : Begin(B), End(E), Unit(U) {
+ Values.push_back(std::move(Val));
}
/// \brief Attempt to merge this DebugLocEntry with Next and return
@@ -99,24 +106,17 @@ public:
/// share the same Loc/Constant and if Next immediately follows this
/// Entry.
bool Merge(const DebugLocEntry &Next) {
- if (End == Next.Begin && hasSameValueOrLocation(Next)) {
+ if ((End == Next.Begin && Values == Next.Values)) {
End = Next.End;
return true;
}
return false;
}
- bool isLocation() const { return EntryKind == E_Location; }
- bool isInt() const { return EntryKind == E_Integer; }
- bool isConstantFP() const { return EntryKind == E_ConstantFP; }
- bool isConstantInt() const { return EntryKind == E_ConstantInt; }
- int64_t getInt() const { return Constants.Int; }
- const ConstantFP *getConstantFP() const { return Constants.CFP; }
- const ConstantInt *getConstantInt() const { return Constants.CIP; }
- const MDNode *getVariable() const { return Variable; }
const MCSymbol *getBeginSym() const { return Begin; }
const MCSymbol *getEndSym() const { return End; }
const DwarfCompileUnit *getCU() const { return Unit; }
- MachineLocation getLoc() const { return Loc; }
+ const ArrayRef<Value> getValues() const { return Values; }
+ void addValue(Value Val) { Values.push_back(Val); }
};
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index bcbb6c8..e9527c4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -29,14 +29,15 @@ DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList)
: Header(8 + (atomList.size() * 4)), HeaderData(atomList),
Entries(Allocator) {}
-DwarfAccelTable::~DwarfAccelTable() {}
-
-void DwarfAccelTable::AddName(StringRef Name, const DIE *die, char Flags) {
+void DwarfAccelTable::AddName(StringRef Name, MCSymbol *StrSym, const DIE *die,
+ char Flags) {
assert(Data.empty() && "Already finalized!");
// If the string is in the list already then add this die to the list
// otherwise add a new one.
DataArray &DIEs = Entries[Name];
- DIEs.push_back(new (Allocator) HashDataContents(die, Flags));
+ assert(!DIEs.StrSym || DIEs.StrSym == StrSym);
+ DIEs.StrSym = StrSym;
+ DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags));
}
void DwarfAccelTable::ComputeBucketCount(void) {
@@ -72,9 +73,10 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) {
EI != EE; ++EI) {
// Unique the entries.
- std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs);
- EI->second.erase(std::unique(EI->second.begin(), EI->second.end()),
- EI->second.end());
+ std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs);
+ EI->second.Values.erase(
+ std::unique(EI->second.Values.begin(), EI->second.Values.end()),
+ EI->second.Values.end());
HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second);
Data.push_back(Entry);
@@ -181,21 +183,18 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfFile *D) {
// Remember to emit the label for our offset.
Asm->OutStreamer.EmitLabel((*HI)->Sym);
Asm->OutStreamer.AddComment((*HI)->Str);
- Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str),
- D->getStringPoolSym());
+ Asm->EmitSectionOffset((*HI)->Data.StrSym,
+ D->getStringPool().getSectionSymbol());
Asm->OutStreamer.AddComment("Num DIEs");
- Asm->EmitInt32((*HI)->Data.size());
- for (ArrayRef<HashDataContents *>::const_iterator
- DI = (*HI)->Data.begin(),
- DE = (*HI)->Data.end();
- DI != DE; ++DI) {
+ Asm->EmitInt32((*HI)->Data.Values.size());
+ for (HashDataContents *HD : (*HI)->Data.Values) {
// Emit the DIE offset
- Asm->EmitInt32((*DI)->Die->getOffset());
+ Asm->EmitInt32(HD->Die->getOffset());
// If we have multiple Atoms emit that info too.
// FIXME: A bit of a hack, we either emit only one atom or all info.
if (HeaderData.Atoms.size() > 1) {
- Asm->EmitInt16((*DI)->Die->getTag());
- Asm->EmitInt8((*DI)->Flags);
+ Asm->EmitInt16(HD->Die->getTag());
+ Asm->EmitInt8(HD->Flags);
}
}
// Emit a 0 to terminate the data unless we have a hash collision.
@@ -235,10 +234,8 @@ void DwarfAccelTable::print(raw_ostream &O) {
EE = Entries.end();
EI != EE; ++EI) {
O << "Name: " << EI->getKeyData() << "\n";
- for (DataArray::const_iterator DI = EI->second.begin(),
- DE = EI->second.end();
- DI != DE; ++DI)
- (*DI)->print(O);
+ for (HashDataContents *HD : EI->second.Values)
+ HD->print(O);
}
O << "Buckets and Hashes: \n";
diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4a14497..a3cc95f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
@@ -125,7 +126,8 @@ public:
uint16_t type; // enum AtomType
uint16_t form; // DWARF DW_FORM_ defines
- Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
+ LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
+ : type(type), form(form) {}
#ifndef NDEBUG
void print(raw_ostream &O) {
O << "Type: " << dwarf::AtomTypeString(type) << "\n"
@@ -177,12 +179,19 @@ public:
};
private:
+ // String Data
+ struct DataArray {
+ MCSymbol *StrSym;
+ std::vector<HashDataContents *> Values;
+ DataArray() : StrSym(nullptr) {}
+ };
+ friend struct HashData;
struct HashData {
StringRef Str;
uint32_t HashValue;
MCSymbol *Sym;
- ArrayRef<HashDataContents *> Data; // offsets
- HashData(StringRef S, ArrayRef<HashDataContents *> Data)
+ DwarfAccelTable::DataArray &Data; // offsets
+ HashData(StringRef S, DwarfAccelTable::DataArray &Data)
: Str(S), Data(Data) {
HashValue = DwarfAccelTable::HashDJB(S);
}
@@ -196,10 +205,10 @@ private:
else
O << "<none>";
O << "\n";
- for (size_t i = 0; i < Data.size(); i++) {
- O << " Offset: " << Data[i]->Die->getOffset() << "\n";
- O << " Tag: " << dwarf::TagString(Data[i]->Die->getTag()) << "\n";
- O << " Flags: " << Data[i]->Flags << "\n";
+ for (HashDataContents *C : Data.Values) {
+ O << " Offset: " << C->Die->getOffset() << "\n";
+ O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n";
+ O << " Flags: " << C->Flags << "\n";
}
}
void dump() { print(dbgs()); }
@@ -224,8 +233,6 @@ private:
TableHeaderData HeaderData;
std::vector<HashData *> Data;
- // String Data
- typedef std::vector<HashDataContents *> DataArray;
typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries;
StringEntries Entries;
@@ -238,8 +245,8 @@ private:
// Public Implementation
public:
DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>);
- ~DwarfAccelTable();
- void AddName(StringRef, const DIE *, char = 0);
+ void AddName(StringRef Name, MCSymbol *StrSym, const DIE *Die,
+ char Flags = 0);
void FinalizeTable(AsmPrinter *, StringRef);
void Emit(AsmPrinter *, MCSymbol *, DwarfFile *);
#ifndef NDEBUG
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 11345eb..2a0615d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -11,12 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dwarfdebug"
#include "ByteStreamer.h"
#include "DwarfDebug.h"
#include "DIE.h"
#include "DIEHash.h"
-#include "DwarfAccelTable.h"
#include "DwarfUnit.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -51,6 +49,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -107,8 +107,6 @@ static const char *const DbgTimerName = "DWARF Debug Writer";
//===----------------------------------------------------------------------===//
-namespace llvm {
-
/// resolve - Look in the DwarfDebug map for the MDNode that
/// corresponds to the reference.
template <typename T> T DbgVariable::resolve(DIRef<T> Ref) const {
@@ -120,7 +118,6 @@ bool DbgVariable::isBlockByrefVariable() const {
return Var.isBlockByrefVariable(DD->getTypeIdentifierMap());
}
-
DIType DbgVariable::getType() const {
DIType Ty = Var.getType().resolve(DD->getTypeIdentifierMap());
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
@@ -166,29 +163,32 @@ DIType DbgVariable::getType() const {
return Ty;
}
-} // end llvm namespace
-
-/// Return Dwarf Version by checking module flags.
-static unsigned getDwarfVersionFromModule(const Module *M) {
- Value *Val = M->getModuleFlag("Dwarf Version");
- if (!Val)
- return dwarf::DWARF_VERSION;
- return cast<ConstantInt>(Val)->getZExtValue();
-}
+static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
+ DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), FirstCU(0), PrevLabel(NULL), GlobalRangeCount(0),
- InfoHolder(A, "info_string", DIEValueAllocator),
+ : Asm(A), MMI(Asm->MMI), FirstCU(nullptr), PrevLabel(nullptr),
+ GlobalRangeCount(0), InfoHolder(A, "info_string", DIEValueAllocator),
UsedNonDefaultText(false),
- SkeletonHolder(A, "skel_string", DIEValueAllocator) {
-
- DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = 0;
- DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0;
- DwarfAddrSectionSym = 0;
- DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0;
- FunctionBeginSym = FunctionEndSym = 0;
- CurFn = 0;
- CurMI = 0;
+ SkeletonHolder(A, "skel_string", DIEValueAllocator),
+ AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
+ dwarf::DW_FORM_data4)),
+ AccelTypes(TypeAtoms) {
+
+ DwarfInfoSectionSym = DwarfAbbrevSectionSym = DwarfStrSectionSym = nullptr;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = nullptr;
+ DwarfLineSectionSym = nullptr;
+ DwarfAddrSectionSym = nullptr;
+ DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = nullptr;
+ FunctionBeginSym = FunctionEndSym = nullptr;
+ CurFn = nullptr;
+ CurMI = nullptr;
// Turn on accelerator tables for Darwin by default, pubnames by
// default for non-Darwin, and handle split dwarf.
@@ -209,9 +209,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
else
HasDwarfPubSections = DwarfPubSections == Enable;
- DwarfVersion = DwarfVersionNumber
- ? DwarfVersionNumber
- : getDwarfVersionFromModule(MMI->getModule());
+ DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+ : MMI->getModule()->getDwarfVersion();
{
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
@@ -219,76 +218,22 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
}
}
+// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h.
+DwarfDebug::~DwarfDebug() { }
+
// Switch to the specified MCSection and emit an assembler
// temporary label to it if SymbolStem is specified.
static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section,
- const char *SymbolStem = 0) {
+ const char *SymbolStem = nullptr) {
Asm->OutStreamer.SwitchSection(Section);
if (!SymbolStem)
- return 0;
+ return nullptr;
MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
Asm->OutStreamer.EmitLabel(TmpSym);
return TmpSym;
}
-DwarfFile::~DwarfFile() {
- for (DwarfUnit *DU : CUs)
- delete DU;
-}
-
-MCSymbol *DwarfFile::getStringPoolSym() {
- return Asm->GetTempSymbol(StringPref);
-}
-
-MCSymbol *DwarfFile::getStringPoolEntry(StringRef Str) {
- std::pair<MCSymbol *, unsigned> &Entry =
- StringPool.GetOrCreateValue(Str).getValue();
- if (Entry.first)
- return Entry.first;
-
- Entry.second = NextStringPoolNumber++;
- return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
-}
-
-unsigned DwarfFile::getStringPoolIndex(StringRef Str) {
- std::pair<MCSymbol *, unsigned> &Entry =
- StringPool.GetOrCreateValue(Str).getValue();
- if (Entry.first)
- return Entry.second;
-
- Entry.second = NextStringPoolNumber++;
- Entry.first = Asm->GetTempSymbol(StringPref, Entry.second);
- return Entry.second;
-}
-
-unsigned DwarfFile::getAddrPoolIndex(const MCSymbol *Sym, bool TLS) {
- std::pair<AddrPool::iterator, bool> P = AddressPool.insert(
- std::make_pair(Sym, AddressPoolEntry(NextAddrPoolNumber, TLS)));
- if (P.second)
- ++NextAddrPoolNumber;
- return P.first->second.Number;
-}
-
-// Define a unique number for the abbreviation.
-//
-void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) {
- // Check the set for priors.
- DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
-
- // If it's newly added.
- if (InSet == &Abbrev) {
- // Add to abbreviation list.
- Abbreviations.push_back(&Abbrev);
-
- // Assign the vector position + 1 as its number.
- Abbrev.setNumber(Abbreviations.size());
- } else {
- // Assign existing abbreviation number.
- Abbrev.setNumber(InSet->getNumber());
- }
-}
-
static bool isObjCClass(StringRef Name) {
return Name.startswith("+") || Name.startswith("-");
}
@@ -328,26 +273,26 @@ static bool SectionSort(const MCSection *A, const MCSection *B) {
// TODO: Determine whether or not we should add names for programs
// that do not have a DW_AT_name or DW_AT_linkage_name field - this
// is only slightly different than the lookup of non-standard ObjC names.
-static void addSubprogramNames(DwarfUnit *TheU, DISubprogram SP, DIE *Die) {
+void DwarfDebug::addSubprogramNames(DISubprogram SP, DIE &Die) {
if (!SP.isDefinition())
return;
- TheU->addAccelName(SP.getName(), Die);
+ addAccelName(SP.getName(), Die);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName())
- TheU->addAccelName(SP.getLinkageName(), Die);
+ addAccelName(SP.getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
// too.
if (isObjCClass(SP.getName())) {
StringRef Class, Category;
getObjCClassCategory(SP.getName(), Class, Category);
- TheU->addAccelObjC(Class, Die);
+ addAccelObjC(Class, Die);
if (Category != "")
- TheU->addAccelObjC(Category, Die);
+ addAccelObjC(Category, Die);
// Also add the base method name to the name table.
- TheU->addAccelName(getObjCMethodName(SP.getName()), Die);
+ addAccelName(getObjCMethodName(SP.getName()), Die);
}
}
@@ -367,58 +312,21 @@ bool DwarfDebug::isSubprogramContext(const MDNode *Context) {
// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc
// and DW_AT_high_pc attributes. If there are global variables in this
// scope then create and insert DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit *SPCU,
+DIE &DwarfDebug::updateSubprogramScopeDIE(DwarfCompileUnit &SPCU,
DISubprogram SP) {
- DIE *SPDie = SPCU->getDIE(SP);
-
- assert(SPDie && "Unable to find subprogram DIE!");
-
- // If we're updating an abstract DIE, then we will be adding the children and
- // object pointer later on. But what we don't want to do is process the
- // concrete DIE twice.
- if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
- // Pick up abstract subprogram DIE.
- SPDie =
- SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getUnitDie());
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE);
- } else {
- DISubprogram SPDecl = SP.getFunctionDeclaration();
- if (!SPDecl.isSubprogram()) {
- // There is not any need to generate specification DIE for a function
- // defined at compile unit level. If a function is defined inside another
- // function then gdb prefers the definition at top level and but does not
- // expect specification DIE in parent function. So avoid creating
- // specification DIE for a function defined inside a function.
- DIScope SPContext = resolve(SP.getContext());
- if (SP.isDefinition() && !SPContext.isCompileUnit() &&
- !SPContext.isFile() && !isSubprogramContext(SPContext)) {
- SPCU->addFlag(SPDie, dwarf::DW_AT_declaration);
-
- // Add arguments.
- DICompositeType SPTy = SP.getType();
- DIArray Args = SPTy.getTypeArray();
- uint16_t SPTag = SPTy.getTag();
- if (SPTag == dwarf::DW_TAG_subroutine_type)
- SPCU->constructSubprogramArguments(*SPDie, Args);
- DIE *SPDeclDie = SPDie;
- SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
- *SPCU->getUnitDie());
- SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie);
- }
- }
- }
+ DIE *SPDie = SPCU.getOrCreateSubprogramDIE(SP);
- attachLowHighPC(SPCU, SPDie, FunctionBeginSym, FunctionEndSym);
+ attachLowHighPC(SPCU, *SPDie, FunctionBeginSym, FunctionEndSym);
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ SPCU.addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_subprogram nodes.
- addSubprogramNames(SPCU, SP, SPDie);
+ addSubprogramNames(SP, *SPDie);
- return SPDie;
+ return *SPDie;
}
/// Check whether we should create a DIE for the given Scope, return true
@@ -442,16 +350,16 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
return !End;
}
-static void addSectionLabel(AsmPrinter *Asm, DwarfUnit *U, DIE *D,
+static void addSectionLabel(AsmPrinter &Asm, DwarfUnit &U, DIE &D,
dwarf::Attribute A, const MCSymbol *L,
const MCSymbol *Sec) {
- if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- U->addSectionLabel(D, A, L);
+ if (Asm.MAI->doesDwarfUseRelocationsAcrossSections())
+ U.addSectionLabel(D, A, L);
else
- U->addSectionDelta(D, A, L, Sec);
+ U.addSectionDelta(D, A, L, Sec);
}
-void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
+void DwarfDebug::addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
const SmallVectorImpl<InsnRange> &Range) {
// Emit offset in .debug_range as a relocatable label. emitDIE will handle
// emitting it appropriately.
@@ -460,10 +368,10 @@ void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
// Under fission, ranges are specified by constant offsets relative to the
// CU's DW_AT_GNU_ranges_base.
if (useSplitDwarf())
- TheCU->addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
- DwarfDebugRangeSectionSym);
+ TheCU.addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
+ DwarfDebugRangeSectionSym);
else
- addSectionLabel(Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
+ addSectionLabel(*Asm, TheCU, ScopeDIE, dwarf::DW_AT_ranges, RangeSym,
DwarfDebugRangeSectionSym);
RangeSpanList List(RangeSym);
@@ -473,227 +381,256 @@ void DwarfDebug::addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
}
// Add the range list to the set of ranges to be emitted.
- TheCU->addRangeList(std::move(List));
+ TheCU.addRangeList(std::move(List));
+}
+
+void DwarfDebug::attachRangesOrLowHighPC(DwarfCompileUnit &TheCU, DIE &Die,
+ const SmallVectorImpl<InsnRange> &Ranges) {
+ assert(!Ranges.empty());
+ if (Ranges.size() == 1)
+ attachLowHighPC(TheCU, Die, getLabelBeforeInsn(Ranges.front().first),
+ getLabelAfterInsn(Ranges.front().second));
+ else
+ addScopeRangeList(TheCU, Die, Ranges);
}
// Construct new DW_TAG_lexical_block for this scope and attach
// DW_AT_low_pc/DW_AT_high_pc labels.
-DIE *DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit *TheCU,
- LexicalScope *Scope) {
+std::unique_ptr<DIE>
+DwarfDebug::constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope) {
if (isLexicalScopeDIENull(Scope))
- return 0;
+ return nullptr;
- DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_lexical_block);
if (Scope->isAbstractScope())
return ScopeDIE;
- const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges();
-
- // If we have multiple ranges, emit them into the range section.
- if (ScopeRanges.size() > 1) {
- addScopeRangeList(TheCU, ScopeDIE, ScopeRanges);
- return ScopeDIE;
- }
-
- // Construct the address range for this DIE.
- SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin();
- MCSymbol *Start = getLabelBeforeInsn(RI->first);
- MCSymbol *End = getLabelAfterInsn(RI->second);
- assert(End && "End label should not be null!");
-
- assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
- assert(End->isDefined() && "Invalid end label for an inlined scope!");
-
- attachLowHighPC(TheCU, ScopeDIE, Start, End);
+ attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
return ScopeDIE;
}
// This scope represents inlined body of a function. Construct DIE to
// represent this concrete inlined copy of the function.
-DIE *DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit *TheCU,
- LexicalScope *Scope) {
- const SmallVectorImpl<InsnRange> &ScopeRanges = Scope->getRanges();
- assert(!ScopeRanges.empty() &&
- "LexicalScope does not have instruction markers!");
-
- if (!Scope->getScopeNode())
- return NULL;
+std::unique_ptr<DIE>
+DwarfDebug::constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope) {
+ assert(Scope->getScopeNode());
DIScope DS(Scope->getScopeNode());
DISubprogram InlinedSP = getDISubprogram(DS);
- DIE *OriginDIE = TheCU->getDIE(InlinedSP);
- if (!OriginDIE) {
- DEBUG(dbgs() << "Unable to find original DIE for an inlined subprogram.");
- return NULL;
- }
-
- DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
- TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE);
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DIE *OriginDIE = AbstractSPDies[InlinedSP];
+ assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
- // If we have multiple ranges, emit them into the range section.
- if (ScopeRanges.size() > 1)
- addScopeRangeList(TheCU, ScopeDIE, ScopeRanges);
- else {
- SmallVectorImpl<InsnRange>::const_iterator RI = ScopeRanges.begin();
- MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
- MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+ auto ScopeDIE = make_unique<DIE>(dwarf::DW_TAG_inlined_subroutine);
+ TheCU.addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE);
- if (StartLabel == 0 || EndLabel == 0)
- llvm_unreachable("Unexpected Start and End labels for an inlined scope!");
-
- assert(StartLabel->isDefined() &&
- "Invalid starting label for an inlined scope!");
- assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!");
-
- attachLowHighPC(TheCU, ScopeDIE, StartLabel, EndLabel);
- }
+ attachRangesOrLowHighPC(TheCU, *ScopeDIE, Scope->getRanges());
InlinedSubprogramDIEs.insert(OriginDIE);
// Add the call site information to the DIE.
DILocation DL(Scope->getInlinedAt());
- TheCU->addUInt(
- ScopeDIE, dwarf::DW_AT_call_file, None,
- TheCU->getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
- TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
+ TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
+ TheCU.getOrCreateSourceID(DL.getFilename(), DL.getDirectory()));
+ TheCU.addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber());
// Add name to the name table, we do this here because we're guaranteed
// to have concrete versions of our DW_TAG_inlined_subprogram nodes.
- addSubprogramNames(TheCU, InlinedSP, ScopeDIE);
+ addSubprogramNames(InlinedSP, *ScopeDIE);
return ScopeDIE;
}
-DIE *DwarfDebug::createScopeChildrenDIE(DwarfCompileUnit *TheCU,
- LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children) {
- DIE *ObjectPointer = NULL;
+static std::unique_ptr<DIE> constructVariableDIE(DwarfCompileUnit &TheCU,
+ DbgVariable &DV,
+ const LexicalScope &Scope,
+ DIE *&ObjectPointer) {
+ auto Var = TheCU.constructVariableDIE(DV, Scope.isAbstractScope());
+ if (DV.isObjectPointer())
+ ObjectPointer = Var.get();
+ return Var;
+}
+
+DIE *DwarfDebug::createScopeChildrenDIE(
+ DwarfCompileUnit &TheCU, LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &Children) {
+ DIE *ObjectPointer = nullptr;
// Collect arguments for current function.
if (LScopes.isCurrentFunctionScope(Scope)) {
for (DbgVariable *ArgDV : CurrentFnArguments)
if (ArgDV)
- if (DIE *Arg =
- TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) {
- Children.push_back(Arg);
- if (ArgDV->isObjectPointer())
- ObjectPointer = Arg;
- }
+ Children.push_back(
+ constructVariableDIE(TheCU, *ArgDV, *Scope, ObjectPointer));
// If this is a variadic function, add an unspecified parameter.
DISubprogram SP(Scope->getScopeNode());
DIArray FnArgs = SP.getType().getTypeArray();
if (FnArgs.getElement(FnArgs.getNumElements() - 1)
.isUnspecifiedParameter()) {
- DIE *Ellipsis = new DIE(dwarf::DW_TAG_unspecified_parameters);
- Children.push_back(Ellipsis);
+ Children.push_back(
+ make_unique<DIE>(dwarf::DW_TAG_unspecified_parameters));
}
}
// Collect lexical scope children first.
for (DbgVariable *DV : ScopeVariables.lookup(Scope))
- if (DIE *Variable = TheCU->constructVariableDIE(*DV,
- Scope->isAbstractScope())) {
- Children.push_back(Variable);
- if (DV->isObjectPointer())
- ObjectPointer = Variable;
- }
+ Children.push_back(constructVariableDIE(TheCU, *DV, *Scope, ObjectPointer));
+
for (LexicalScope *LS : Scope->getChildren())
- if (DIE *Nested = constructScopeDIE(TheCU, LS))
- Children.push_back(Nested);
+ if (std::unique_ptr<DIE> Nested = constructScopeDIE(TheCU, LS))
+ Children.push_back(std::move(Nested));
return ObjectPointer;
}
+void DwarfDebug::createAndAddScopeChildren(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope, DIE &ScopeDIE) {
+ // We create children when the scope DIE is not null.
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
+ if (DIE *ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children))
+ TheCU.addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+
+ // Add children
+ for (auto &I : Children)
+ ScopeDIE.addChild(std::move(I));
+}
+
+void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(Scope->isAbstractScope());
+ assert(!Scope->getInlinedAt());
+
+ DISubprogram SP(Scope->getScopeNode());
+
+ ProcessedSPNodes.insert(SP);
+
+ DIE *&AbsDef = AbstractSPDies[SP];
+ if (AbsDef)
+ return;
+
+ // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
+ // was inlined from another compile unit.
+ DwarfCompileUnit &SPCU = *SPMap[SP];
+ DIE *ContextDIE;
+
+ // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with
+ // the important distinction that the DIDescriptor is not associated with the
+ // DIE (since the DIDescriptor will be associated with the concrete DIE, if
+ // any). It could be refactored to some common utility function.
+ if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+ ContextDIE = &SPCU.getUnitDie();
+ SPCU.getOrCreateSubprogramDIE(SPDecl);
+ } else
+ ContextDIE = SPCU.getOrCreateContextDIE(resolve(SP.getContext()));
+
+ // Passing null as the associated DIDescriptor because the abstract definition
+ // shouldn't be found by lookup.
+ AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE,
+ DIDescriptor());
+ SPCU.applySubprogramAttributes(SP, *AbsDef);
+ SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext()));
+
+ SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ createAndAddScopeChildren(SPCU, Scope, *AbsDef);
+}
+
+DIE &DwarfDebug::constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope) {
+ assert(Scope && Scope->getScopeNode());
+ assert(!Scope->getInlinedAt());
+ assert(!Scope->isAbstractScope());
+ DISubprogram Sub(Scope->getScopeNode());
+
+ assert(Sub.isSubprogram());
+
+ ProcessedSPNodes.insert(Sub);
+
+ DIE &ScopeDIE = updateSubprogramScopeDIE(TheCU, Sub);
+
+ createAndAddScopeChildren(TheCU, Scope, ScopeDIE);
+
+ return ScopeDIE;
+}
+
// Construct a DIE for this scope.
-DIE *DwarfDebug::constructScopeDIE(DwarfCompileUnit *TheCU,
- LexicalScope *Scope) {
+std::unique_ptr<DIE> DwarfDebug::constructScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope) {
if (!Scope || !Scope->getScopeNode())
- return NULL;
+ return nullptr;
DIScope DS(Scope->getScopeNode());
- SmallVector<DIE *, 8> Children;
- DIE *ObjectPointer = NULL;
- bool ChildrenCreated = false;
+ assert((Scope->getInlinedAt() || !DS.isSubprogram()) &&
+ "Only handle inlined subprograms here, use "
+ "constructSubprogramScopeDIE for non-inlined "
+ "subprograms");
+
+ SmallVector<std::unique_ptr<DIE>, 8> Children;
// We try to create the scope DIE first, then the children DIEs. This will
// avoid creating un-used children then removing them later when we find out
// the scope DIE is null.
- DIE *ScopeDIE = NULL;
- if (Scope->getInlinedAt())
+ std::unique_ptr<DIE> ScopeDIE;
+ if (Scope->getParent() && DS.isSubprogram()) {
ScopeDIE = constructInlinedScopeDIE(TheCU, Scope);
- else if (DS.isSubprogram()) {
- ProcessedSPNodes.insert(DS);
- if (Scope->isAbstractScope()) {
- ScopeDIE = TheCU->getDIE(DS);
- // Note down abstract DIE.
- if (ScopeDIE)
- AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
- } else
- ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS));
+ if (!ScopeDIE)
+ return nullptr;
+ // We create children when the scope DIE is not null.
+ createScopeChildrenDIE(TheCU, Scope, Children);
} else {
// Early exit when we know the scope DIE is going to be null.
if (isLexicalScopeDIENull(Scope))
- return NULL;
+ return nullptr;
// We create children here when we know the scope DIE is not going to be
// null and the children will be added to the scope DIE.
- ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children);
- ChildrenCreated = true;
+ createScopeChildrenDIE(TheCU, Scope, Children);
// There is no need to emit empty lexical block DIE.
std::pair<ImportedEntityMap::const_iterator,
ImportedEntityMap::const_iterator> Range =
- std::equal_range(
- ScopesWithImportedEntities.begin(),
- ScopesWithImportedEntities.end(),
- std::pair<const MDNode *, const MDNode *>(DS, (const MDNode *)0),
- less_first());
+ std::equal_range(ScopesWithImportedEntities.begin(),
+ ScopesWithImportedEntities.end(),
+ std::pair<const MDNode *, const MDNode *>(DS, nullptr),
+ less_first());
if (Children.empty() && Range.first == Range.second)
- return NULL;
+ return nullptr;
ScopeDIE = constructLexicalScopeDIE(TheCU, Scope);
assert(ScopeDIE && "Scope DIE should not be null.");
for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second;
++i)
- constructImportedEntityDIE(TheCU, i->second, ScopeDIE);
+ constructImportedEntityDIE(TheCU, i->second, *ScopeDIE);
}
- if (!ScopeDIE) {
- assert(Children.empty() &&
- "We create children only when the scope DIE is not null.");
- return NULL;
- }
- if (!ChildrenCreated)
- // We create children when the scope DIE is not null.
- ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children);
-
// Add children
- for (DIE *I : Children)
- ScopeDIE->addChild(I);
-
- if (DS.isSubprogram() && ObjectPointer != NULL)
- TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer);
+ for (auto &I : Children)
+ ScopeDIE->addChild(std::move(I));
return ScopeDIE;
}
-void DwarfDebug::addGnuPubAttributes(DwarfUnit *U, DIE *D) const {
+void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const {
if (!GenerateGnuPubSections)
return;
- U->addFlag(D, dwarf::DW_AT_GNU_pubnames);
+ U.addFlag(D, dwarf::DW_AT_GNU_pubnames);
}
// Create new DwarfCompileUnit for the given metadata node with tag
// DW_TAG_compile_unit.
-DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
+DwarfCompileUnit &DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
StringRef FN = DIUnit.getFilename();
CompilationDir = DIUnit.getDirectory();
- DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- DwarfCompileUnit *NewCU = new DwarfCompileUnit(
- InfoHolder.getUnits().size(), Die, DIUnit, Asm, this, &InfoHolder);
- InfoHolder.addUnit(NewCU);
+ auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ DIE &Die = NewCU.getUnitDie();
+ InfoHolder.addUnit(std::move(OwnedUnit));
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
@@ -701,116 +638,89 @@ DwarfCompileUnit *DwarfDebug::constructDwarfCompileUnit(DICompileUnit DIUnit) {
// compilation directory.
if (!Asm->OutStreamer.hasRawTextSupport() || SingleCU)
Asm->OutStreamer.getContext().setMCLineTableCompilationDir(
- NewCU->getUniqueID(), CompilationDir);
+ NewCU.getUniqueID(), CompilationDir);
- NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
- NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
- DIUnit.getLanguage());
- NewCU->addString(Die, dwarf::DW_AT_name, FN);
+ NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer());
+ NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ DIUnit.getLanguage());
+ NewCU.addString(Die, dwarf::DW_AT_name, FN);
if (!useSplitDwarf()) {
- NewCU->initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList(DwarfLineSectionSym);
// If we're using split dwarf the compilation dir is going to be in the
// skeleton CU and so we don't need to duplicate it here.
if (!CompilationDir.empty())
- NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
+ NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
addGnuPubAttributes(NewCU, Die);
}
if (DIUnit.isOptimized())
- NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized);
+ NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized);
StringRef Flags = DIUnit.getFlags();
if (!Flags.empty())
- NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
+ NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags);
if (unsigned RVer = DIUnit.getRunTimeVersion())
- NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
- dwarf::DW_FORM_data1, RVer);
+ NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+ dwarf::DW_FORM_data1, RVer);
if (!FirstCU)
- FirstCU = NewCU;
+ FirstCU = &NewCU;
if (useSplitDwarf()) {
- NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
- DwarfInfoDWOSectionSym);
- NewCU->setSkeleton(constructSkeletonCU(NewCU));
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection(),
+ DwarfInfoDWOSectionSym);
+ NewCU.setSkeleton(constructSkeletonCU(NewCU));
} else
- NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
+ DwarfInfoSectionSym);
- CUMap.insert(std::make_pair(DIUnit, NewCU));
- CUDieMap.insert(std::make_pair(Die, NewCU));
+ CUMap.insert(std::make_pair(DIUnit, &NewCU));
+ CUDieMap.insert(std::make_pair(&Die, &NewCU));
return NewCU;
}
-// Construct subprogram DIE.
-void DwarfDebug::constructSubprogramDIE(DwarfCompileUnit *TheCU,
- const MDNode *N) {
- // FIXME: We should only call this routine once, however, during LTO if a
- // program is defined in multiple CUs we could end up calling it out of
- // beginModule as we walk the CUs.
-
- DwarfCompileUnit *&CURef = SPMap[N];
- if (CURef)
- return;
- CURef = TheCU;
-
- DISubprogram SP(N);
- if (!SP.isDefinition())
- // This is a method declaration which will be handled while constructing
- // class type.
- return;
-
- DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP);
-
- // Expose as a global name.
- TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext()));
-}
-
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
const MDNode *N) {
DIImportedEntity Module(N);
assert(Module.Verify());
- if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext()))
- constructImportedEntityDIE(TheCU, Module, D);
+ if (DIE *D = TheCU.getOrCreateContextDIE(Module.getContext()))
+ constructImportedEntityDIE(TheCU, Module, *D);
}
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
- const MDNode *N, DIE *Context) {
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const MDNode *N, DIE &Context) {
DIImportedEntity Module(N);
assert(Module.Verify());
return constructImportedEntityDIE(TheCU, Module, Context);
}
-void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit *TheCU,
+void DwarfDebug::constructImportedEntityDIE(DwarfCompileUnit &TheCU,
const DIImportedEntity &Module,
- DIE *Context) {
+ DIE &Context) {
assert(Module.Verify() &&
"Use one of the MDNode * overloads to handle invalid metadata");
- assert(Context && "Should always have a context for an imported_module");
- DIE *IMDie = new DIE(Module.getTag());
- TheCU->insertDIE(Module, IMDie);
+ DIE &IMDie = TheCU.createAndAddDIE(Module.getTag(), Context, Module);
DIE *EntityDie;
DIDescriptor Entity = resolve(Module.getEntity());
if (Entity.isNameSpace())
- EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity));
+ EntityDie = TheCU.getOrCreateNameSpace(DINameSpace(Entity));
else if (Entity.isSubprogram())
- EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity));
+ EntityDie = TheCU.getOrCreateSubprogramDIE(DISubprogram(Entity));
else if (Entity.isType())
- EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity));
+ EntityDie = TheCU.getOrCreateTypeDIE(DIType(Entity));
else
- EntityDie = TheCU->getDIE(Entity);
- TheCU->addSourceLine(IMDie, Module.getLineNumber(),
- Module.getContext().getFilename(),
- Module.getContext().getDirectory());
- TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie);
+ EntityDie = TheCU.getDIE(Entity);
+ TheCU.addSourceLine(IMDie, Module.getLineNumber(),
+ Module.getContext().getFilename(),
+ Module.getContext().getDirectory());
+ TheCU.addDIEEntry(IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module.getName();
if (!Name.empty())
- TheCU->addString(IMDie, dwarf::DW_AT_name, Name);
- Context->addChild(IMDie);
+ TheCU.addString(IMDie, dwarf::DW_AT_name, Name);
}
// Emit all Dwarf sections that should come prior to the content. Create
@@ -836,7 +746,7 @@ void DwarfDebug::beginModule() {
for (MDNode *N : CU_Nodes->operands()) {
DICompileUnit CUNode(N);
- DwarfCompileUnit *CU = constructDwarfCompileUnit(CUNode);
+ DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
DIArray ImportedEntities = CUNode.getImportedEntities();
for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i)
ScopesWithImportedEntities.push_back(std::make_pair(
@@ -846,20 +756,20 @@ void DwarfDebug::beginModule() {
ScopesWithImportedEntities.end(), less_first());
DIArray GVs = CUNode.getGlobalVariables();
for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i)
- CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
+ CU.createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i)));
DIArray SPs = CUNode.getSubprograms();
for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i)
- constructSubprogramDIE(CU, SPs.getElement(i));
+ SPMap.insert(std::make_pair(SPs.getElement(i), &CU));
DIArray EnumTypes = CUNode.getEnumTypes();
for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i)
- CU->getOrCreateTypeDIE(EnumTypes.getElement(i));
+ CU.getOrCreateTypeDIE(EnumTypes.getElement(i));
DIArray RetainedTypes = CUNode.getRetainedTypes();
for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) {
DIType Ty(RetainedTypes.getElement(i));
// The retained types array by design contains pointers to
// MDNodes rather than DIRefs. Unique them here.
DIType UniqueTy(resolve(Ty.getRef()));
- CU->getOrCreateTypeDIE(UniqueTy);
+ CU.getOrCreateTypeDIE(UniqueTy);
}
// Emit imported_modules last so that the relevant context is already
// available.
@@ -874,20 +784,41 @@ void DwarfDebug::beginModule() {
SectionMap[Asm->getObjFileLowering().getTextSection()];
}
-// Attach DW_AT_inline attribute with inlined subprogram DIEs.
-void DwarfDebug::computeInlinedDIEs() {
- // Attach DW_AT_inline attribute with inlined subprogram DIEs.
- for (DIE *ISP : InlinedSubprogramDIEs)
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+void DwarfDebug::finishSubprogramDefinitions() {
+ const Module *M = MMI->getModule();
- for (const auto &AI : AbstractSPDies) {
- DIE *ISP = AI.second;
- if (InlinedSubprogramDIEs.count(ISP))
- continue;
- FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined);
+ NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
+ for (MDNode *N : CU_Nodes->operands()) {
+ DICompileUnit TheCU(N);
+ // Construct subprogram DIE and add variables DIEs.
+ DwarfCompileUnit *SPCU =
+ static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
+ DIArray Subprograms = TheCU.getSubprograms();
+ for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
+ DISubprogram SP(Subprograms.getElement(i));
+ // Perhaps the subprogram is in another CU (such as due to comdat
+ // folding, etc), in which case ignore it here.
+ if (SPMap[SP] != SPCU)
+ continue;
+ DIE *D = SPCU->getDIE(SP);
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) {
+ if (D)
+ // If this subprogram has an abstract definition, reference that
+ SPCU->addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
+ } else {
+ if (!D)
+ // Lazily construct the subprogram if we didn't see either concrete or
+ // inlined versions during codegen.
+ D = SPCU->getOrCreateSubprogramDIE(SP);
+ // And attach the attributes
+ SPCU->applySubprogramAttributes(SP, *D);
+ SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext()));
+ }
+ }
}
}
+
// Collect info for variables that were optimized out.
void DwarfDebug::collectDeadVariables() {
const Module *M = MMI->getModule();
@@ -895,34 +826,32 @@ void DwarfDebug::collectDeadVariables() {
if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) {
for (MDNode *N : CU_Nodes->operands()) {
DICompileUnit TheCU(N);
+ // Construct subprogram DIE and add variables DIEs.
+ DwarfCompileUnit *SPCU =
+ static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
+ assert(SPCU && "Unable to find Compile Unit!");
DIArray Subprograms = TheCU.getSubprograms();
for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) {
DISubprogram SP(Subprograms.getElement(i));
if (ProcessedSPNodes.count(SP) != 0)
continue;
- if (!SP.isSubprogram())
- continue;
- if (!SP.isDefinition())
- continue;
+ assert(SP.isSubprogram() &&
+ "CU's subprogram list contains a non-subprogram");
+ assert(SP.isDefinition() &&
+ "CU's subprogram list contains a subprogram declaration");
DIArray Variables = SP.getVariables();
if (Variables.getNumElements() == 0)
continue;
- // Construct subprogram DIE and add variables DIEs.
- DwarfCompileUnit *SPCU =
- static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU));
- assert(SPCU && "Unable to find Compile Unit!");
- // FIXME: See the comment in constructSubprogramDIE about duplicate
- // subprogram DIEs.
- constructSubprogramDIE(SPCU, SP);
- DIE *SPDIE = SPCU->getDIE(SP);
+ DIE *SPDIE = AbstractSPDies.lookup(SP);
+ if (!SPDIE)
+ SPDIE = SPCU->getDIE(SP);
+ assert(SPDIE);
for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) {
DIVariable DV(Variables.getElement(vi));
- if (!DV.isVariable())
- continue;
- DbgVariable NewVar(DV, NULL, this);
- if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, false))
- SPDIE->addChild(VariableDIE);
+ assert(DV.isVariable());
+ DbgVariable NewVar(DV, nullptr, this);
+ SPDIE->addChild(SPCU->constructVariableDIE(NewVar));
}
}
}
@@ -930,28 +859,27 @@ void DwarfDebug::collectDeadVariables() {
}
void DwarfDebug::finalizeModuleInfo() {
+ finishSubprogramDefinitions();
+
// Collect info for variables that were optimized out.
collectDeadVariables();
- // Attach DW_AT_inline attribute with inlined subprogram DIEs.
- computeInlinedDIEs();
-
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
- for (DwarfUnit *TheU : getUnits()) {
+ for (const auto &TheU : getUnits()) {
// Emit DW_AT_containing_type attribute to connect types with their
// vtable holding type.
TheU->constructContainingTypeDIEs();
// Add CU specific attributes if we need to add any.
- if (TheU->getUnitDie()->getTag() == dwarf::DW_TAG_compile_unit) {
+ if (TheU->getUnitDie().getTag() == dwarf::DW_TAG_compile_unit) {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
DwarfCompileUnit *SkCU =
static_cast<DwarfCompileUnit *>(TheU->getSkeleton());
if (useSplitDwarf()) {
// Emit a unique identifier for this CU.
- uint64_t ID = DIEHash(Asm).computeCUSignature(*TheU->getUnitDie());
+ uint64_t ID = DIEHash(Asm).computeCUSignature(TheU->getUnitDie());
TheU->addUInt(TheU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
dwarf::DW_FORM_data8, ID);
SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id,
@@ -959,12 +887,12 @@ void DwarfDebug::finalizeModuleInfo() {
// We don't keep track of which addresses are used in which CU so this
// is a bit pessimistic under LTO.
- if (!InfoHolder.getAddrPool()->empty())
- addSectionLabel(Asm, SkCU, SkCU->getUnitDie(),
+ if (!AddrPool.isEmpty())
+ addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
dwarf::DW_AT_GNU_addr_base, DwarfAddrSectionSym,
DwarfAddrSectionSym);
if (!TheU->getRangeLists().empty())
- addSectionLabel(Asm, SkCU, SkCU->getUnitDie(),
+ addSectionLabel(*Asm, *SkCU, SkCU->getUnitDie(),
dwarf::DW_AT_GNU_ranges_base,
DwarfDebugRangeSectionSym, DwarfDebugRangeSectionSym);
}
@@ -975,26 +903,27 @@ void DwarfDebug::finalizeModuleInfo() {
// FIXME: We should use ranges allow reordering of code ala
// .subsections_via_symbols in mach-o. This would mean turning on
// ranges for all subprogram DIEs for mach-o.
- DwarfCompileUnit *U = SkCU ? SkCU : static_cast<DwarfCompileUnit *>(TheU);
+ DwarfCompileUnit &U =
+ SkCU ? *SkCU : static_cast<DwarfCompileUnit &>(*TheU);
unsigned NumRanges = TheU->getRanges().size();
if (NumRanges) {
if (NumRanges > 1) {
- addSectionLabel(Asm, U, U->getUnitDie(), dwarf::DW_AT_ranges,
- Asm->GetTempSymbol("cu_ranges", U->getUniqueID()),
+ addSectionLabel(*Asm, U, U.getUnitDie(), dwarf::DW_AT_ranges,
+ Asm->GetTempSymbol("cu_ranges", U.getUniqueID()),
DwarfDebugRangeSectionSym);
// A DW_AT_low_pc attribute may also be specified in combination with
// DW_AT_ranges to specify the default base address for use in
// location lists (see Section 2.6.2) and range lists (see Section
// 2.17.3).
- U->addUInt(U->getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- 0);
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ 0);
} else {
RangeSpan &Range = TheU->getRanges().back();
- U->addLocalLabelAddress(U->getUnitDie(), dwarf::DW_AT_low_pc,
- Range.getStart());
- U->addLabelDelta(U->getUnitDie(), dwarf::DW_AT_high_pc,
- Range.getEnd(), Range.getStart());
+ U.addLocalLabelAddress(U.getUnitDie(), dwarf::DW_AT_low_pc,
+ Range.getStart());
+ U.addLabelDelta(U.getUnitDie(), dwarf::DW_AT_high_pc, Range.getEnd(),
+ Range.getStart());
}
}
}
@@ -1018,7 +947,7 @@ void DwarfDebug::endSections() {
// Some symbols (e.g. common/bss on mach-o) can have no section but still
// appear in the output. This sucks as we rely on sections to build
// arange spans. We can do it without, but it's icky.
- SectionMap[NULL].push_back(SCU);
+ SectionMap[nullptr].push_back(SCU);
}
}
@@ -1036,7 +965,7 @@ void DwarfDebug::endSections() {
// Add terminating symbols for each section.
for (unsigned ID = 0, E = Sections.size(); ID != E; ID++) {
const MCSection *Section = Sections[ID];
- MCSymbol *Sym = NULL;
+ MCSymbol *Sym = nullptr;
if (Section) {
// We can't call MCSection::getLabelEndName, as it's only safe to do so
@@ -1049,14 +978,14 @@ void DwarfDebug::endSections() {
}
// Insert a final terminator.
- SectionMap[Section].push_back(SymbolCU(NULL, Sym));
+ SectionMap[Section].push_back(SymbolCU(nullptr, Sym));
}
}
// Emit all Dwarf sections that should come after the content.
void DwarfDebug::endModule() {
- assert(CurFn == 0);
- assert(CurMI == 0);
+ assert(CurFn == nullptr);
+ assert(CurMI == nullptr);
if (!FirstCU)
return;
@@ -1089,7 +1018,7 @@ void DwarfDebug::endModule() {
emitDebugAbbrevDWO();
emitDebugLineDWO();
// Emit DWO addresses.
- InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection());
+ AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection());
emitDebugLocDWO();
} else
// Emit info into a debug loc section.
@@ -1111,29 +1040,34 @@ void DwarfDebug::endModule() {
// clean up.
SPMap.clear();
+ AbstractVariables.clear();
// Reset these for the next Module if we have one.
- FirstCU = NULL;
+ FirstCU = nullptr;
}
// Find abstract variable, if any, associated with Var.
DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
DebugLoc ScopeLoc) {
+ return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext()));
+}
+
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV,
+ const MDNode *ScopeNode) {
LLVMContext &Ctx = DV->getContext();
// More then one inlined variable corresponds to one abstract variable.
DIVariable Var = cleanseInlinedVariable(DV, Ctx);
- DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
- if (AbsDbgVariable)
- return AbsDbgVariable;
+ auto I = AbstractVariables.find(Var);
+ if (I != AbstractVariables.end())
+ return I->second.get();
- LexicalScope *Scope = LScopes.findAbstractScope(ScopeLoc.getScope(Ctx));
+ LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode);
if (!Scope)
- return NULL;
+ return nullptr;
- AbsDbgVariable = new DbgVariable(Var, NULL, this);
- addScopeVariable(Scope, AbsDbgVariable);
- AbstractVariables[Var] = AbsDbgVariable;
- return AbsDbgVariable;
+ auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this);
+ addScopeVariable(Scope, AbsDbgVariable.get());
+ return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get();
}
// If Var is a current function argument then add it to CurrentFnArguments list.
@@ -1169,7 +1103,7 @@ void DwarfDebug::collectVariableInfoFromMMITable(
LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc);
// If variable scope is not found then skip this variable.
- if (Scope == 0)
+ if (!Scope)
continue;
DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc);
@@ -1177,28 +1111,12 @@ void DwarfDebug::collectVariableInfoFromMMITable(
RegVar->setFrameIndex(VI.Slot);
if (!addCurrentFnArgument(RegVar, Scope))
addScopeVariable(Scope, RegVar);
- if (AbsDbgVariable)
- AbsDbgVariable->setFrameIndex(VI.Slot);
}
}
-// Return true if debug value, encoded by DBG_VALUE instruction, is in a
-// defined reg.
-static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
- assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
- return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() &&
- MI->getOperand(0).getReg() &&
- (MI->getOperand(1).isImm() ||
- (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U));
-}
-
// Get .debug_loc entry for the instruction range starting at MI.
-static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
- const MCSymbol *FLabel,
- const MCSymbol *SLabel,
- const MachineInstr *MI,
- DwarfCompileUnit *Unit) {
- const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
+ const MDNode *Var = MI->getDebugVariable();
assert(MI->getNumOperands() == 3);
if (MI->getOperand(0).isReg()) {
@@ -1209,14 +1127,14 @@ static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
MLoc.set(MI->getOperand(0).getReg());
else
MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- return DebugLocEntry(FLabel, SLabel, MLoc, Var, Unit);
+ return DebugLocEntry::Value(Var, MLoc);
}
if (MI->getOperand(0).isImm())
- return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm(), Unit);
+ return DebugLocEntry::Value(Var, MI->getOperand(0).getImm());
if (MI->getOperand(0).isFPImm())
- return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm(), Unit);
+ return DebugLocEntry::Value(Var, MI->getOperand(0).getFPImm());
if (MI->getOperand(0).isCImm())
- return DebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm(), Unit);
+ return DebugLocEntry::Value(Var, MI->getOperand(0).getCImm());
llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!");
}
@@ -1224,35 +1142,38 @@ static DebugLocEntry getDebugLocEntry(AsmPrinter *Asm,
// Find variables for each lexical scope.
void
DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
+ LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
+ DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
// Grab the variable info that was squirreled away in the MMI side-table.
collectVariableInfoFromMMITable(Processed);
- for (const MDNode *Var : UserVariables) {
- if (Processed.count(Var))
+ for (const auto &I : DbgValues) {
+ DIVariable DV(I.first);
+ if (Processed.count(DV))
continue;
- // History contains relevant DBG_VALUE instructions for Var and instructions
- // clobbering it.
- SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var];
- if (History.empty())
+ // Instruction ranges, specifying where DV is accessible.
+ const auto &Ranges = I.second;
+ if (Ranges.empty())
continue;
- const MachineInstr *MInsn = History.front();
- DIVariable DV(Var);
- LexicalScope *Scope = NULL;
+ LexicalScope *Scope = nullptr;
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(CurFn->getFunction()))
Scope = LScopes.getCurrentFunctionScope();
- else if (MDNode *IA = DV.getInlinedAt())
- Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA));
- else
- Scope = LScopes.findLexicalScope(cast<MDNode>(DV->getOperand(1)));
+ else if (MDNode *IA = DV.getInlinedAt()) {
+ DebugLoc DL = DebugLoc::getFromDILocation(IA);
+ Scope = LScopes.findInlinedScope(DebugLoc::get(
+ DL.getLine(), DL.getCol(), DV.getContext(), IA));
+ } else
+ Scope = LScopes.findLexicalScope(DV.getContext());
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
Processed.insert(DV);
+ const MachineInstr *MInsn = Ranges.front().first;
assert(MInsn->isDebugValue() && "History must begin with debug value");
DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc());
DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this);
@@ -1261,9 +1182,8 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
if (AbsVar)
AbsVar->setMInsn(MInsn);
- // Simplify ranges that are fully coalesced.
- if (History.size() <= 1 ||
- (History.size() == 2 && MInsn->isIdenticalTo(History.back()))) {
+ // Check if the first DBG_VALUE is valid for the rest of the function.
+ if (Ranges.size() == 1 && Ranges.front().second == nullptr) {
RegVar->setMInsn(MInsn);
continue;
}
@@ -1276,58 +1196,48 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) {
LocList.Label =
Asm->GetTempSymbol("debug_loc", DotDebugLocEntries.size() - 1);
SmallVector<DebugLocEntry, 4> &DebugLoc = LocList.List;
- for (SmallVectorImpl<const MachineInstr *>::const_iterator
- HI = History.begin(),
- HE = History.end();
- HI != HE; ++HI) {
- const MachineInstr *Begin = *HI;
+ for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
+ const MachineInstr *Begin = I->first;
+ const MachineInstr *End = I->second;
assert(Begin->isDebugValue() && "Invalid History entry");
- // Check if DBG_VALUE is truncating a range.
+ // Check if a variable is unaccessible in this range.
if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() &&
!Begin->getOperand(0).getReg())
continue;
- // Compute the range for a register location.
- const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
- const MCSymbol *SLabel = 0;
-
- if (HI + 1 == HE)
- // If Begin is the last instruction in History then its value is valid
- // until the end of the function.
- SLabel = FunctionEndSym;
- else {
- const MachineInstr *End = HI[1];
- DEBUG(dbgs() << "DotDebugLoc Pair:\n"
- << "\t" << *Begin << "\t" << *End << "\n");
- if (End->isDebugValue())
- SLabel = getLabelBeforeInsn(End);
- else {
- // End is a normal instruction clobbering the range.
- SLabel = getLabelAfterInsn(End);
- assert(SLabel && "Forgot label after clobber instruction");
- ++HI;
- }
- }
+ const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
+ assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+
+ const MCSymbol *EndLabel;
+ if (End != nullptr)
+ EndLabel = getLabelAfterInsn(End);
+ else if (std::next(I) == Ranges.end())
+ EndLabel = FunctionEndSym;
+ else
+ EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ assert(EndLabel && "Forgot label after instruction ending a range!");
- // The value is valid until the next DBG_VALUE or clobber.
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
- DebugLocEntry Loc = getDebugLocEntry(Asm, FLabel, SLabel, Begin, TheCU);
+ DEBUG(dbgs() << "DotDebugLoc Pair:\n"
+ << "\t" << *Begin << "\t" << *End << "\n");
+ DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU);
if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc))
DebugLoc.push_back(std::move(Loc));
}
}
// Collect info for variables that were optimized out.
- LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables();
for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
DIVariable DV(Variables.getElement(i));
- if (!DV || !DV.isVariable() || !Processed.insert(DV))
+ assert(DV.isVariable());
+ if (!Processed.insert(DV))
continue;
if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext()))
- addScopeVariable(Scope, new DbgVariable(DV, NULL, this));
+ addScopeVariable(
+ Scope,
+ new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()),
+ this));
}
}
@@ -1345,7 +1255,7 @@ MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
// Process beginning of an instruction.
void DwarfDebug::beginInstruction(const MachineInstr *MI) {
- assert(CurMI == 0);
+ assert(CurMI == nullptr);
CurMI = MI;
// Check if source location changes, but ignore DBG_VALUE locations.
if (!MI->isDebugValue()) {
@@ -1364,7 +1274,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
} else
- recordSourceLine(0, 0, 0, 0);
+ recordSourceLine(0, 0, nullptr, 0);
}
}
@@ -1389,15 +1299,15 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// Process end of an instruction.
void DwarfDebug::endInstruction() {
- assert(CurMI != 0);
+ assert(CurMI != nullptr);
// Don't create a new label after DBG_VALUE instructions.
// They don't generate code.
if (!CurMI->isDebugValue())
- PrevLabel = 0;
+ PrevLabel = nullptr;
DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
LabelsAfterInsn.find(CurMI);
- CurMI = 0;
+ CurMI = nullptr;
// No label needed.
if (I == LabelsAfterInsn.end())
@@ -1441,6 +1351,17 @@ void DwarfDebug::identifyScopeMarkers() {
}
}
+static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
+ // First known non-DBG_VALUE and non-frame setup location marks
+ // the beginning of the function body.
+ for (const auto &MBB : *MF)
+ for (const auto &MI : MBB)
+ if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) &&
+ !MI.getDebugLoc().isUnknown())
+ return MI.getDebugLoc();
+ return DebugLoc();
+}
+
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunction(const MachineFunction *MF) {
@@ -1456,7 +1377,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (LScopes.empty())
return;
- assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
+ assert(DbgValues.empty() && "DbgValues map wasn't cleaned!");
// Make sure that each lexical scope will have a begin/end label.
identifyScopeMarkers();
@@ -1478,144 +1399,26 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionBeginSym);
- const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
- // LiveUserVar - Map physreg numbers to the MDNode they contain.
- std::vector<const MDNode *> LiveUserVar(TRI->getNumRegs());
-
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
- ++I) {
- bool AtBlockEntry = true;
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- const MachineInstr *MI = II;
-
- if (MI->isDebugValue()) {
- assert(MI->getNumOperands() > 1 && "Invalid machine instruction!");
-
- // Keep track of user variables.
- const MDNode *Var =
- MI->getOperand(MI->getNumOperands() - 1).getMetadata();
-
- // Variable is in a register, we need to check for clobbers.
- if (isDbgValueInDefinedReg(MI))
- LiveUserVar[MI->getOperand(0).getReg()] = Var;
-
- // Check the history of this variable.
- SmallVectorImpl<const MachineInstr *> &History = DbgValues[Var];
- if (History.empty()) {
- UserVariables.push_back(Var);
- // The first mention of a function argument gets the FunctionBeginSym
- // label, so arguments are visible when breaking at function entry.
- DIVariable DV(Var);
- if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
- getDISubprogram(DV.getContext()).describes(MF->getFunction()))
- LabelsBeforeInsn[MI] = FunctionBeginSym;
- } else {
- // We have seen this variable before. Try to coalesce DBG_VALUEs.
- const MachineInstr *Prev = History.back();
- if (Prev->isDebugValue()) {
- // Coalesce identical entries at the end of History.
- if (History.size() >= 2 &&
- Prev->isIdenticalTo(History[History.size() - 2])) {
- DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << *Prev << "\t"
- << *History[History.size() - 2] << "\n");
- History.pop_back();
- }
-
- // Terminate old register assignments that don't reach MI;
- MachineFunction::const_iterator PrevMBB = Prev->getParent();
- if (PrevMBB != I && (!AtBlockEntry || std::next(PrevMBB) != I) &&
- isDbgValueInDefinedReg(Prev)) {
- // Previous register assignment needs to terminate at the end of
- // its basic block.
- MachineBasicBlock::const_iterator LastMI =
- PrevMBB->getLastNonDebugInstr();
- if (LastMI == PrevMBB->end()) {
- // Drop DBG_VALUE for empty range.
- DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n"
- << "\t" << *Prev << "\n");
- History.pop_back();
- } else if (std::next(PrevMBB) != PrevMBB->getParent()->end())
- // Terminate after LastMI.
- History.push_back(LastMI);
- }
- }
- }
- History.push_back(MI);
- } else {
- // Not a DBG_VALUE instruction.
- if (!MI->isPosition())
- AtBlockEntry = false;
-
- // First known non-DBG_VALUE and non-frame setup location marks
- // the beginning of the function body.
- if (!MI->getFlag(MachineInstr::FrameSetup) &&
- (PrologEndLoc.isUnknown() && !MI->getDebugLoc().isUnknown()))
- PrologEndLoc = MI->getDebugLoc();
-
- // Check if the instruction clobbers any registers with debug vars.
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg())
- continue;
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI) {
- unsigned Reg = *AI;
- const MDNode *Var = LiveUserVar[Reg];
- if (!Var)
- continue;
- // Reg is now clobbered.
- LiveUserVar[Reg] = 0;
-
- // Was MD last defined by a DBG_VALUE referring to Reg?
- DbgValueHistoryMap::iterator HistI = DbgValues.find(Var);
- if (HistI == DbgValues.end())
- continue;
- SmallVectorImpl<const MachineInstr *> &History = HistI->second;
- if (History.empty())
- continue;
- const MachineInstr *Prev = History.back();
- // Sanity-check: Register assignments are terminated at the end of
- // their block.
- if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent())
- continue;
- // Is the variable still in Reg?
- if (!isDbgValueInDefinedReg(Prev) ||
- Prev->getOperand(0).getReg() != Reg)
- continue;
- // Var is clobbered. Make sure the next instruction gets a label.
- History.push_back(MI);
- }
- }
- }
- }
- }
+ // Calculate history for local variables.
+ calculateDbgValueHistory(MF, Asm->TM.getRegisterInfo(), DbgValues);
- for (auto &I : DbgValues) {
- SmallVectorImpl<const MachineInstr *> &History = I.second;
- if (History.empty())
+ // Request labels for the full history.
+ for (const auto &I : DbgValues) {
+ const auto &Ranges = I.second;
+ if (Ranges.empty())
continue;
- // Make sure the final register assignments are terminated.
- const MachineInstr *Prev = History.back();
- if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) {
- const MachineBasicBlock *PrevMBB = Prev->getParent();
- MachineBasicBlock::const_iterator LastMI =
- PrevMBB->getLastNonDebugInstr();
- if (LastMI == PrevMBB->end())
- // Drop DBG_VALUE for empty range.
- History.pop_back();
- else if (PrevMBB != &PrevMBB->getParent()->back()) {
- // Terminate after LastMI.
- History.push_back(LastMI);
- }
- }
- // Request labels for the full history.
- for (const MachineInstr *MI : History) {
- if (MI->isDebugValue())
- requestLabelBeforeInsn(MI);
- else
- requestLabelAfterInsn(MI);
+ // The first mention of a function argument gets the FunctionBeginSym
+ // label, so arguments are visible when breaking at function entry.
+ DIVariable DV(I.first);
+ if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ getDISubprogram(DV.getContext()).describes(MF->getFunction()))
+ LabelsBeforeInsn[Ranges.front().first] = FunctionBeginSym;
+
+ for (const auto &Range : Ranges) {
+ requestLabelBeforeInsn(Range.first);
+ if (Range.second)
+ requestLabelAfterInsn(Range.second);
}
}
@@ -1623,6 +1426,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
PrevLabel = FunctionBeginSym;
// Record beginning of function.
+ PrologEndLoc = findPrologueEndLoc(MF);
if (!PrologEndLoc.isUnknown()) {
DebugLoc FnStartDL =
PrologEndLoc.getFnDebugLoc(MF->getFunction()->getContext());
@@ -1671,11 +1475,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Every beginFunction(MF) call should be followed by an endFunction(MF) call,
// though the beginFunction may not be called at all.
// We should handle both cases.
- if (CurFn == 0)
+ if (!CurFn)
CurFn = MF;
else
assert(CurFn == MF);
- assert(CurFn != 0);
+ assert(CurFn != nullptr);
if (!MMI->hasDebugInfo() || LScopes.empty()) {
// If we don't have a lexical scope for this function then there will
@@ -1683,7 +1487,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// previously used section to nullptr.
PrevSection = nullptr;
PrevCU = nullptr;
- CurFn = 0;
+ CurFn = nullptr;
return;
}
@@ -1699,55 +1503,50 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
collectVariableInfo(ProcessedVars);
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
- DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode());
- assert(TheCU && "Unable to find compile unit!");
+ DwarfCompileUnit &TheCU = *SPMap.lookup(FnScope->getScopeNode());
// Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
DISubprogram SP(AScope->getScopeNode());
- if (SP.isSubprogram()) {
- // Collect info for variables that were optimized out.
- DIArray Variables = SP.getVariables();
- for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
- DIVariable DV(Variables.getElement(i));
- if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV))
- continue;
- // Check that DbgVariable for DV wasn't created earlier, when
- // findAbstractVariable() was called for inlined instance of DV.
- LLVMContext &Ctx = DV->getContext();
- DIVariable CleanDV = cleanseInlinedVariable(DV, Ctx);
- if (AbstractVariables.lookup(CleanDV))
- continue;
- if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext()))
- addScopeVariable(Scope, new DbgVariable(DV, NULL, this));
- }
+ if (!SP.isSubprogram())
+ continue;
+ // Collect info for variables that were optimized out.
+ DIArray Variables = SP.getVariables();
+ for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) {
+ DIVariable DV(Variables.getElement(i));
+ assert(DV && DV.isVariable());
+ if (!ProcessedVars.insert(DV))
+ continue;
+ findAbstractVariable(DV, DV.getContext());
}
- if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0)
- constructScopeDIE(TheCU, AScope);
+ constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
- DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope);
+ DIE &CurFnDIE = constructSubprogramScopeDIE(TheCU, FnScope);
if (!CurFn->getTarget().Options.DisableFramePointerElim(*CurFn))
- TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
+ TheCU.addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr);
// Add the range of this function to the list of ranges for the CU.
RangeSpan Span(FunctionBeginSym, FunctionEndSym);
- TheCU->addRange(std::move(Span));
+ TheCU.addRange(std::move(Span));
PrevSection = Asm->getCurrentSection();
- PrevCU = TheCU;
+ PrevCU = &TheCU;
// Clear debug info
- for (auto &I : ScopeVariables)
- DeleteContainerPointers(I.second);
+ // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
+ // DbgVariables except those that are also in AbstractVariables (since they
+ // can be used cross-function)
+ for (const auto &I : ScopeVariables)
+ for (const auto *Var : I.second)
+ if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable())
+ delete Var;
ScopeVariables.clear();
DeleteContainerPointers(CurrentFnArguments);
- UserVariables.clear();
DbgValues.clear();
- AbstractVariables.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
- PrevLabel = NULL;
- CurFn = 0;
+ PrevLabel = nullptr;
+ CurFn = nullptr;
}
// Register a source line with debug info. Returns the unique label that was
@@ -1758,36 +1557,16 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
StringRef Dir;
unsigned Src = 1;
unsigned Discriminator = 0;
- if (S) {
- DIDescriptor Scope(S);
-
- if (Scope.isCompileUnit()) {
- DICompileUnit CU(S);
- Fn = CU.getFilename();
- Dir = CU.getDirectory();
- } else if (Scope.isFile()) {
- DIFile F(S);
- Fn = F.getFilename();
- Dir = F.getDirectory();
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(S);
- Fn = SP.getFilename();
- Dir = SP.getDirectory();
- } else if (Scope.isLexicalBlockFile()) {
- DILexicalBlockFile DBF(S);
- Fn = DBF.getFilename();
- Dir = DBF.getDirectory();
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(S);
- Fn = DB.getFilename();
- Dir = DB.getDirectory();
- Discriminator = DB.getDiscriminator();
- } else
- llvm_unreachable("Unexpected scope info");
+ if (DIScope Scope = DIScope(S)) {
+ assert(Scope.isScope());
+ Fn = Scope.getFilename();
+ Dir = Scope.getDirectory();
+ if (Scope.isLexicalBlock())
+ Discriminator = DILexicalBlock(S).getDiscriminator();
unsigned CUID = Asm->OutStreamer.getContext().getDwarfCompileUnitID();
- Src = static_cast<DwarfCompileUnit *>(InfoHolder.getUnits()[CUID])
- ->getOrCreateSourceID(Fn, Dir);
+ Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
+ .getOrCreateSourceID(Fn, Dir);
}
Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0,
Discriminator, Fn);
@@ -1797,68 +1576,6 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
// Emit Methods
//===----------------------------------------------------------------------===//
-// Compute the size and offset of a DIE. The offset is relative to start of the
-// CU. It returns the offset after laying out the DIE.
-unsigned DwarfFile::computeSizeAndOffset(DIE *Die, unsigned Offset) {
- // Record the abbreviation.
- assignAbbrevNumber(Die->getAbbrev());
-
- // Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die->getAbbrev();
-
- // Set DIE offset
- Die->setOffset(Offset);
-
- // Start the size with the size of abbreviation code.
- Offset += getULEB128Size(Die->getAbbrevNumber());
-
- const SmallVectorImpl<DIEValue *> &Values = Die->getValues();
- const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
-
- // Size the DIE attribute values.
- for (unsigned i = 0, N = Values.size(); i < N; ++i)
- // Size attribute value.
- Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
-
- // Get the children.
- const std::vector<DIE *> &Children = Die->getChildren();
-
- // Size the DIE children if any.
- if (!Children.empty()) {
- assert(Abbrev.hasChildren() && "Children flag not set");
-
- for (DIE *Child : Children)
- Offset = computeSizeAndOffset(Child, Offset);
-
- // End of children marker.
- Offset += sizeof(int8_t);
- }
-
- Die->setSize(Offset - Die->getOffset());
- return Offset;
-}
-
-// Compute the size and offset for each DIE.
-void DwarfFile::computeSizeAndOffsets() {
- // Offset from the first CU in the debug info section is 0 initially.
- unsigned SecOffset = 0;
-
- // Iterate over each compile unit and set the size and offsets for each
- // DIE within each compile unit. All offsets are CU relative.
- for (DwarfUnit *TheU : CUs) {
- TheU->setDebugInfoOffset(SecOffset);
-
- // CU-relative offset is reset to 0 here.
- unsigned Offset = sizeof(int32_t) + // Length of Unit Info
- TheU->getHeaderSize(); // Unit-specific headers
-
- // EndOffset here is CU-relative, after laying out
- // all of the CU DIE.
- unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset);
- SecOffset += EndOffset;
- }
-}
-
// Emit initial Dwarf sections with a label at the start of each one.
void DwarfDebug::emitSectionLabels() {
const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -1906,19 +1623,19 @@ void DwarfDebug::emitSectionLabels() {
}
// Recursively emits a debug information entry.
-void DwarfDebug::emitDIE(DIE *Die) {
+void DwarfDebug::emitDIE(DIE &Die) {
// Get the abbreviation for this DIE.
- const DIEAbbrev &Abbrev = Die->getAbbrev();
+ const DIEAbbrev &Abbrev = Die.getAbbrev();
// Emit the code (index) for the abbreviation.
if (Asm->isVerbose())
Asm->OutStreamer.AddComment("Abbrev [" + Twine(Abbrev.getNumber()) +
- "] 0x" + Twine::utohexstr(Die->getOffset()) +
- ":0x" + Twine::utohexstr(Die->getSize()) + " " +
+ "] 0x" + Twine::utohexstr(Die.getOffset()) +
+ ":0x" + Twine::utohexstr(Die.getSize()) + " " +
dwarf::TagString(Abbrev.getTag()));
Asm->EmitULEB128(Abbrev.getNumber());
- const SmallVectorImpl<DIEValue *> &Values = Die->getValues();
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
// Emit the DIE attribute values.
@@ -1940,38 +1657,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
// Emit the DIE children if any.
if (Abbrev.hasChildren()) {
- const std::vector<DIE *> &Children = Die->getChildren();
-
- for (DIE *Child : Children)
- emitDIE(Child);
+ for (auto &Child : Die.getChildren())
+ emitDIE(*Child);
Asm->OutStreamer.AddComment("End Of Children Mark");
Asm->EmitInt8(0);
}
}
-// Emit the various dwarf units to the unit section USection with
-// the abbreviations going into ASection.
-void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) {
- for (DwarfUnit *TheU : CUs) {
- DIE *Die = TheU->getUnitDie();
- const MCSection *USection = TheU->getSection();
- Asm->OutStreamer.SwitchSection(USection);
-
- // Emit the compile units header.
- Asm->OutStreamer.EmitLabel(TheU->getLabelBegin());
-
- // Emit size of content not including length itself
- Asm->OutStreamer.AddComment("Length of Unit");
- Asm->EmitInt32(TheU->getHeaderSize() + Die->getSize());
-
- TheU->emitHeader(ASectionSym);
-
- DD->emitDIE(Die);
- Asm->OutStreamer.EmitLabel(TheU->getLabelEnd());
- }
-}
-
// Emit the debug info section.
void DwarfDebug::emitDebugInfo() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -1986,26 +1679,6 @@ void DwarfDebug::emitAbbreviations() {
Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection());
}
-void DwarfFile::emitAbbrevs(const MCSection *Section) {
- // Check to see if it is worth the effort.
- if (!Abbreviations.empty()) {
- // Start the debug abbrev section.
- Asm->OutStreamer.SwitchSection(Section);
-
- // For each abbrevation.
- for (const DIEAbbrev *Abbrev : Abbreviations) {
- // Emit the abbrevations code (base 1 index.)
- Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
-
- // Emit the abbreviations data.
- Abbrev->Emit(Asm);
- }
-
- // Mark end of abbreviations.
- Asm->EmitULEB128(0, "EOM(3)");
- }
-}
-
// Emit the last address of the section and the end of the line matrix.
void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
// Define last address of section.
@@ -2032,97 +1705,52 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
// Emit visible names into a hashed accelerator table section.
void DwarfDebug::emitAccelNames() {
- DwarfAccelTable AT(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
- for (DwarfUnit *TheU : getUnits()) {
- for (const auto &GI : TheU->getAccelNames()) {
- StringRef Name = GI.getKey();
- for (const DIE *D : GI.second)
- AT.AddName(Name, D);
- }
- }
-
- AT.FinalizeTable(Asm, "Names");
+ AccelNames.FinalizeTable(Asm, "Names");
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfAccelNamesSection());
MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin");
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AT.Emit(Asm, SectionBegin, &InfoHolder);
+ AccelNames.Emit(Asm, SectionBegin, &InfoHolder);
}
// Emit objective C classes and categories into a hashed accelerator table
// section.
void DwarfDebug::emitAccelObjC() {
- DwarfAccelTable AT(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
- for (DwarfUnit *TheU : getUnits()) {
- for (const auto &GI : TheU->getAccelObjC()) {
- StringRef Name = GI.getKey();
- for (const DIE *D : GI.second)
- AT.AddName(Name, D);
- }
- }
-
- AT.FinalizeTable(Asm, "ObjC");
+ AccelObjC.FinalizeTable(Asm, "ObjC");
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfAccelObjCSection());
MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin");
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AT.Emit(Asm, SectionBegin, &InfoHolder);
+ AccelObjC.Emit(Asm, SectionBegin, &InfoHolder);
}
// Emit namespace dies into a hashed accelerator table.
void DwarfDebug::emitAccelNamespaces() {
- DwarfAccelTable AT(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
- for (DwarfUnit *TheU : getUnits()) {
- for (const auto &GI : TheU->getAccelNamespace()) {
- StringRef Name = GI.getKey();
- for (const DIE *D : GI.second)
- AT.AddName(Name, D);
- }
- }
-
- AT.FinalizeTable(Asm, "namespac");
+ AccelNamespace.FinalizeTable(Asm, "namespac");
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfAccelNamespaceSection());
MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin");
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AT.Emit(Asm, SectionBegin, &InfoHolder);
+ AccelNamespace.Emit(Asm, SectionBegin, &InfoHolder);
}
// Emit type dies into a hashed accelerator table.
void DwarfDebug::emitAccelTypes() {
- std::vector<DwarfAccelTable::Atom> Atoms;
- Atoms.push_back(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4));
- Atoms.push_back(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2));
- Atoms.push_back(
- DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1));
- DwarfAccelTable AT(Atoms);
- for (DwarfUnit *TheU : getUnits()) {
- for (const auto &GI : TheU->getAccelTypes()) {
- StringRef Name = GI.getKey();
- for (const auto &DI : GI.second)
- AT.AddName(Name, DI.first, DI.second);
- }
- }
- AT.FinalizeTable(Asm, "types");
+ AccelTypes.FinalizeTable(Asm, "types");
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfAccelTypesSection());
MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin");
Asm->OutStreamer.EmitLabel(SectionBegin);
// Emit the full data.
- AT.Emit(Asm, SectionBegin, &InfoHolder);
+ AccelTypes.Emit(Asm, SectionBegin, &InfoHolder);
}
// Public name handling.
@@ -2148,8 +1776,8 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
// look for that now.
DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification);
if (SpecVal) {
- DIE *SpecDIE = cast<DIEEntry>(SpecVal)->getEntry();
- if (SpecDIE->findAttribute(dwarf::DW_AT_external))
+ DIE &SpecDIE = cast<DIEEntry>(SpecVal)->getEntry();
+ if (SpecDIE.findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
} else if (Die->findAttribute(dwarf::DW_AT_external))
Linkage = dwarf::GIEL_EXTERNAL;
@@ -2261,69 +1889,6 @@ void DwarfDebug::emitDebugPubTypes(bool GnuStyle) {
emitDebugPubSection(GnuStyle, PSec, "Types", &DwarfUnit::getGlobalTypes);
}
-// Emit strings into a string section.
-void DwarfFile::emitStrings(const MCSection *StrSection,
- const MCSection *OffsetSection = NULL,
- const MCSymbol *StrSecSym = NULL) {
-
- if (StringPool.empty())
- return;
-
- // Start the dwarf str section.
- Asm->OutStreamer.SwitchSection(StrSection);
-
- // Get all of the string pool entries and put them in an array by their ID so
- // we can sort them.
- SmallVector<std::pair<unsigned, const StrPool::value_type *>, 64 > Entries;
-
- for (const auto &I : StringPool)
- Entries.push_back(std::make_pair(I.second.second, &I));
-
- array_pod_sort(Entries.begin(), Entries.end());
-
- for (const auto &Entry : Entries) {
- // Emit a label for reference from debug information entries.
- Asm->OutStreamer.EmitLabel(Entry.second->getValue().first);
-
- // Emit the string itself with a terminating null byte.
- Asm->OutStreamer.EmitBytes(StringRef(Entry.second->getKeyData(),
- Entry.second->getKeyLength() + 1));
- }
-
- // If we've got an offset section go ahead and emit that now as well.
- if (OffsetSection) {
- Asm->OutStreamer.SwitchSection(OffsetSection);
- unsigned offset = 0;
- unsigned size = 4; // FIXME: DWARF64 is 8.
- for (const auto &Entry : Entries) {
- Asm->OutStreamer.EmitIntValue(offset, size);
- offset += Entry.second->getKeyLength() + 1;
- }
- }
-}
-
-// Emit addresses into the section given.
-void DwarfFile::emitAddresses(const MCSection *AddrSection) {
-
- if (AddressPool.empty())
- return;
-
- // Start the dwarf addr section.
- Asm->OutStreamer.SwitchSection(AddrSection);
-
- // Order the address pool entries by ID
- SmallVector<const MCExpr *, 64> Entries(AddressPool.size());
-
- for (const auto &I : AddressPool)
- Entries[I.second.Number] =
- I.second.TLS
- ? Asm->getObjFileLowering().getDebugThreadLocalSymbol(I.first)
- : MCSymbolRefExpr::Create(I.first, Asm->OutContext);
-
- for (const MCExpr *Entry : Entries)
- Asm->OutStreamer.EmitValue(Entry, Asm->getDataLayout().getPointerSize());
-}
-
// Emit visible names into a debug str section.
void DwarfDebug::emitDebugStr() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
@@ -2332,19 +1897,22 @@ void DwarfDebug::emitDebugStr() {
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
const DebugLocEntry &Entry) {
- DIVariable DV(Entry.getVariable());
- if (Entry.isInt()) {
+ assert(Entry.getValues().size() == 1 &&
+ "multi-value entries are not supported yet.");
+ const DebugLocEntry::Value Value = Entry.getValues()[0];
+ DIVariable DV(Value.getVariable());
+ if (Value.isInt()) {
DIBasicType BTy(resolve(DV.getType()));
if (BTy.Verify() && (BTy.getEncoding() == dwarf::DW_ATE_signed ||
BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
Streamer.EmitInt8(dwarf::DW_OP_consts, "DW_OP_consts");
- Streamer.EmitSLEB128(Entry.getInt());
+ Streamer.EmitSLEB128(Value.getInt());
} else {
Streamer.EmitInt8(dwarf::DW_OP_constu, "DW_OP_constu");
- Streamer.EmitULEB128(Entry.getInt());
+ Streamer.EmitULEB128(Value.getInt());
}
- } else if (Entry.isLocation()) {
- MachineLocation Loc = Entry.getLoc();
+ } else if (Value.isLocation()) {
+ MachineLocation Loc = Value.getLoc();
if (!DV.hasComplexAddress())
// Regular entry.
Asm->EmitDwarfRegOp(Streamer, Loc, DV.isIndirect());
@@ -2443,7 +2011,7 @@ void DwarfDebug::emitDebugLocDWO() {
// address we know we've emitted elsewhere (the start of the function?
// The start of the CU or CU subrange that encloses this range?)
Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
- unsigned idx = InfoHolder.getAddrPoolIndex(Entry.getBeginSym());
+ unsigned idx = AddrPool.getIndex(Entry.getBeginSym());
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.getEndSym(), Entry.getBeginSym(), 4);
@@ -2464,7 +2032,7 @@ void DwarfDebug::emitDebugARanges() {
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfARangesSection());
- typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan> > SpansType;
+ typedef DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> SpansType;
SpansType Spans;
@@ -2502,11 +2070,11 @@ void DwarfDebug::emitDebugARanges() {
// If we have no section (e.g. common), just write out
// individual spans for each symbol.
- if (Section == NULL) {
+ if (!Section) {
for (const SymbolCU &Cur : List) {
ArangeSpan Span;
Span.Start = Cur.Sym;
- Span.End = NULL;
+ Span.End = nullptr;
if (Cur.CU)
Spans[Cur.CU].push_back(Span);
}
@@ -2613,9 +2181,6 @@ void DwarfDebug::emitDebugRanges() {
for (const auto &I : CUMap) {
DwarfCompileUnit *TheCU = I.second;
- // Emit a symbol so we can find the beginning of our ranges.
- Asm->OutStreamer.EmitLabel(TheCU->getLabelRange());
-
// Iterate over the misc ranges for the compile units in the module.
for (const RangeSpanList &List : TheCU->getRangeLists()) {
// Emit our symbol so we can find the beginning of the range.
@@ -2626,8 +2191,15 @@ void DwarfDebug::emitDebugRanges() {
const MCSymbol *End = Range.getEnd();
assert(Begin && "Range without a begin symbol?");
assert(End && "Range without an end symbol?");
- Asm->OutStreamer.EmitSymbolValue(Begin, Size);
- Asm->OutStreamer.EmitSymbolValue(End, Size);
+ if (TheCU->getRanges().size() == 1) {
+ // Grab the begin symbol from the first range as our base.
+ const MCSymbol *Base = TheCU->getRanges()[0].getStart();
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer.EmitSymbolValue(End, Size);
+ }
}
// And terminate the list with two 0 values.
@@ -2656,52 +2228,52 @@ void DwarfDebug::emitDebugRanges() {
// DWARF5 Experimental Separate Dwarf emitters.
-void DwarfDebug::initSkeletonUnit(const DwarfUnit *U, DIE *Die,
- DwarfUnit *NewU) {
+void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfUnit> NewU) {
NewU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name,
- U->getCUNode().getSplitDebugFilename());
+ U.getCUNode().getSplitDebugFilename());
if (!CompilationDir.empty())
NewU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir);
- addGnuPubAttributes(NewU, Die);
+ addGnuPubAttributes(*NewU, Die);
- SkeletonHolder.addUnit(NewU);
+ SkeletonHolder.addUnit(std::move(NewU));
}
// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list,
// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id,
// DW_AT_addr_base, DW_AT_ranges_base.
-DwarfCompileUnit *DwarfDebug::constructSkeletonCU(const DwarfCompileUnit *CU) {
+DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
- DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- DwarfCompileUnit *NewCU = new DwarfCompileUnit(
- CU->getUniqueID(), Die, CU->getCUNode(), Asm, this, &SkeletonHolder);
- NewCU->initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
- DwarfInfoSectionSym);
+ auto OwnedUnit = make_unique<DwarfCompileUnit>(
+ CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
+ DwarfCompileUnit &NewCU = *OwnedUnit;
+ NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection(),
+ DwarfInfoSectionSym);
- NewCU->initStmtList(DwarfLineSectionSym);
+ NewCU.initStmtList(DwarfLineSectionSym);
- initSkeletonUnit(CU, Die, NewCU);
+ initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit));
return NewCU;
}
// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_dwo_name,
// DW_AT_addr_base.
-DwarfTypeUnit *DwarfDebug::constructSkeletonTU(DwarfTypeUnit *TU) {
+DwarfTypeUnit &DwarfDebug::constructSkeletonTU(DwarfTypeUnit &TU) {
DwarfCompileUnit &CU = static_cast<DwarfCompileUnit &>(
- *SkeletonHolder.getUnits()[TU->getCU().getUniqueID()]);
+ *SkeletonHolder.getUnits()[TU.getCU().getUniqueID()]);
- DIE *Die = new DIE(dwarf::DW_TAG_type_unit);
- DwarfTypeUnit *NewTU =
- new DwarfTypeUnit(TU->getUniqueID(), Die, CU, Asm, this, &SkeletonHolder);
- NewTU->setTypeSignature(TU->getTypeSignature());
- NewTU->setType(NULL);
- NewTU->initSection(
- Asm->getObjFileLowering().getDwarfTypesSection(TU->getTypeSignature()));
+ auto OwnedUnit = make_unique<DwarfTypeUnit>(TU.getUniqueID(), CU, Asm, this,
+ &SkeletonHolder);
+ DwarfTypeUnit &NewTU = *OwnedUnit;
+ NewTU.setTypeSignature(TU.getTypeSignature());
+ NewTU.setType(nullptr);
+ NewTU.initSection(
+ Asm->getObjFileLowering().getDwarfTypesSection(TU.getTypeSignature()));
- initSkeletonUnit(TU, Die, NewTU);
+ initSkeletonUnit(TU, NewTU.getUnitDie(), std::move(OwnedUnit));
return NewTU;
}
@@ -2711,7 +2283,7 @@ void DwarfDebug::emitDebugInfoDWO() {
assert(useSplitDwarf() && "No split dwarf debug info?");
// Don't pass an abbrev symbol, using a constant zero instead so as not to
// emit relocations into the dwo file.
- InfoHolder.emitUnits(this, /* AbbrevSymbol */nullptr);
+ InfoHolder.emitUnits(this, /* AbbrevSymbol */ nullptr);
}
// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the
@@ -2748,14 +2320,25 @@ MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) {
return &SplitTypeUnitFileTable;
}
+static uint64_t makeTypeSignature(StringRef Identifier) {
+ MD5 Hash;
+ Hash.update(Identifier);
+ // ... take the least significant 8 bytes and return those. Our MD5
+ // implementation always returns its results in little endian, swap bytes
+ // appropriately.
+ MD5::MD5Result Result;
+ Hash.final(Result);
+ return *reinterpret_cast<support::ulittle64_t *>(Result + 8);
+}
+
void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
- StringRef Identifier, DIE *RefDie,
+ StringRef Identifier, DIE &RefDie,
DICompositeType CTy) {
- // Flag the type unit reference as a declaration so that if it contains
- // members (implicit special members, static data member definitions, member
- // declarations for definitions in this CU, etc) consumers don't get confused
- // and think this is a full definition.
- CU.addFlag(RefDie, dwarf::DW_AT_declaration);
+ // Fast path if we're building some type units and one has already used the
+ // address pool we know we're going to throw away all this work anyway, so
+ // don't bother building dependent types.
+ if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
+ return;
const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy];
if (TU) {
@@ -2763,45 +2346,111 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
return;
}
- DIE *UnitDie = new DIE(dwarf::DW_TAG_type_unit);
- DwarfTypeUnit *NewTU =
- new DwarfTypeUnit(InfoHolder.getUnits().size(), UnitDie, CU, Asm, this,
- &InfoHolder, getDwoLineTable(CU));
- TU = NewTU;
- InfoHolder.addUnit(NewTU);
+ bool TopLevelType = TypeUnitsUnderConstruction.empty();
+ AddrPool.resetUsedFlag();
- NewTU->addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
- CU.getLanguage());
+ auto OwnedUnit =
+ make_unique<DwarfTypeUnit>(InfoHolder.getUnits().size(), CU, Asm, this,
+ &InfoHolder, getDwoLineTable(CU));
+ DwarfTypeUnit &NewTU = *OwnedUnit;
+ DIE &UnitDie = NewTU.getUnitDie();
+ TU = &NewTU;
+ TypeUnitsUnderConstruction.push_back(
+ std::make_pair(std::move(OwnedUnit), CTy));
- MD5 Hash;
- Hash.update(Identifier);
- // ... take the least significant 8 bytes and return those. Our MD5
- // implementation always returns its results in little endian, swap bytes
- // appropriately.
- MD5::MD5Result Result;
- Hash.final(Result);
- uint64_t Signature = *reinterpret_cast<support::ulittle64_t *>(Result + 8);
- NewTU->setTypeSignature(Signature);
- if (useSplitDwarf())
- NewTU->setSkeleton(constructSkeletonTU(NewTU));
- else
- CU.applyStmtList(*UnitDie);
+ NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+ CU.getLanguage());
- NewTU->setType(NewTU->createTypeDIE(CTy));
+ uint64_t Signature = makeTypeSignature(Identifier);
+ NewTU.setTypeSignature(Signature);
- NewTU->initSection(
+ if (!useSplitDwarf())
+ CU.applyStmtList(UnitDie);
+
+ // FIXME: Skip using COMDAT groups for type units in the .dwo file once tools
+ // such as DWP ( http://gcc.gnu.org/wiki/DebugFissionDWP ) can cope with it.
+ NewTU.initSection(
useSplitDwarf()
? Asm->getObjFileLowering().getDwarfTypesDWOSection(Signature)
: Asm->getObjFileLowering().getDwarfTypesSection(Signature));
- CU.addDIETypeSignature(RefDie, *NewTU);
+ NewTU.setType(NewTU.createTypeDIE(CTy));
+
+ if (TopLevelType) {
+ auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction);
+ TypeUnitsUnderConstruction.clear();
+
+ // Types referencing entries in the address table cannot be placed in type
+ // units.
+ if (AddrPool.hasBeenUsed()) {
+
+ // Remove all the types built while building this type.
+ // This is pessimistic as some of these types might not be dependent on
+ // the type that used an address.
+ for (const auto &TU : TypeUnitsToAdd)
+ DwarfTypeUnits.erase(TU.second);
+
+ // Construct this type in the CU directly.
+ // This is inefficient because all the dependent types will be rebuilt
+ // from scratch, including building them in type units, discovering that
+ // they depend on addresses, throwing them out and rebuilding them.
+ CU.constructTypeDIE(RefDie, CTy);
+ return;
+ }
+
+ // If the type wasn't dependent on fission addresses, finish adding the type
+ // and all its dependent types.
+ for (auto &TU : TypeUnitsToAdd) {
+ if (useSplitDwarf())
+ TU.first->setSkeleton(constructSkeletonTU(*TU.first));
+ InfoHolder.addUnit(std::move(TU.first));
+ }
+ }
+ CU.addDIETypeSignature(RefDie, NewTU);
}
-void DwarfDebug::attachLowHighPC(DwarfCompileUnit *Unit, DIE *D,
+void DwarfDebug::attachLowHighPC(DwarfCompileUnit &Unit, DIE &D,
MCSymbol *Begin, MCSymbol *End) {
- Unit->addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
+ assert(Begin && "Begin label should not be null!");
+ assert(End && "End label should not be null!");
+ assert(Begin->isDefined() && "Invalid starting label");
+ assert(End->isDefined() && "Invalid end label");
+
+ Unit.addLabelAddress(D, dwarf::DW_AT_low_pc, Begin);
if (DwarfVersion < 4)
- Unit->addLabelAddress(D, dwarf::DW_AT_high_pc, End);
+ Unit.addLabelAddress(D, dwarf::DW_AT_high_pc, End);
else
- Unit->addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+ Unit.addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin);
+}
+
+// Accelerator table mutators - add each name along with its companion
+// DIE to the proper table while ensuring that the name that we're going
+// to reference is in the string table. We do this since the names we
+// add may not only be identical to the names in the DIE.
+void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelNames.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
+ &Die);
+}
+
+void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelObjC.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
+ &Die);
+}
+
+void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelNamespace.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
+ &Die);
+}
+
+void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
+ if (!useDwarfAccelTables())
+ return;
+ AccelTypes.AddName(Name, InfoHolder.getStringPool().getSymbol(*Asm, Name),
+ &Die);
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index da708f5..2f5abc8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -14,10 +14,13 @@
#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#include "DwarfFile.h"
#include "AsmPrinterHandler.h"
#include "DIE.h"
+#include "DbgValueHistoryCalculator.h"
#include "DebugLocEntry.h"
#include "DebugLocList.h"
+#include "DwarfAccelTable.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -30,6 +33,8 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/Support/Allocator.h"
+#include <memory>
+
namespace llvm {
class AsmPrinter;
@@ -74,12 +79,12 @@ class DbgVariable {
public:
// AbsVar may be NULL.
DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD)
- : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0),
- FrameIndex(~0), DD(DD) {}
+ : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV),
+ MInsn(nullptr), FrameIndex(~0), DD(DD) {}
// Accessors.
DIVariable getVariable() const { return Var; }
- void setDIE(DIE *D) { TheDIE = D; }
+ void setDIE(DIE &D) { TheDIE = &D; }
DIE *getDIE() const { return TheDIE; }
void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
@@ -90,7 +95,7 @@ public:
int getFrameIndex() const { return FrameIndex; }
void setFrameIndex(int FI) { FrameIndex = FI; }
// Translate tag to proper Dwarf tag.
- uint16_t getTag() const {
+ dwarf::Tag getTag() const {
if (Var.getTag() == dwarf::DW_TAG_arg_variable)
return dwarf::DW_TAG_formal_parameter;
@@ -131,99 +136,6 @@ private:
template <typename T> T resolve(DIRef<T> Ref) const;
};
-/// \brief Collects and handles information specific to a particular
-/// collection of units. This collection represents all of the units
-/// that will be ultimately output into a single object file.
-class DwarfFile {
- // Target of Dwarf emission, used for sizing of abbreviations.
- AsmPrinter *Asm;
-
- // Used to uniquely define abbreviations.
- FoldingSet<DIEAbbrev> AbbreviationsSet;
-
- // A list of all the unique abbreviations in use.
- std::vector<DIEAbbrev *> Abbreviations;
-
- // A pointer to all units in the section.
- SmallVector<DwarfUnit *, 1> CUs;
-
- // Collection of strings for this unit and assorted symbols.
- // A String->Symbol mapping of strings used by indirect
- // references.
- typedef StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &>
- StrPool;
- StrPool StringPool;
- unsigned NextStringPoolNumber;
- std::string StringPref;
-
- struct AddressPoolEntry {
- unsigned Number;
- bool TLS;
- AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {}
- };
- // Collection of addresses for this unit and assorted labels.
- // A Symbol->unsigned mapping of addresses used by indirect
- // references.
- typedef DenseMap<const MCSymbol *, AddressPoolEntry> AddrPool;
- AddrPool AddressPool;
- unsigned NextAddrPoolNumber;
-
-public:
- DwarfFile(AsmPrinter *AP, const char *Pref, BumpPtrAllocator &DA)
- : Asm(AP), StringPool(DA), NextStringPoolNumber(0), StringPref(Pref),
- AddressPool(), NextAddrPoolNumber(0) {}
-
- ~DwarfFile();
-
- const SmallVectorImpl<DwarfUnit *> &getUnits() { return CUs; }
-
- /// \brief Compute the size and offset of a DIE given an incoming Offset.
- unsigned computeSizeAndOffset(DIE *Die, unsigned Offset);
-
- /// \brief Compute the size and offset of all the DIEs.
- void computeSizeAndOffsets();
-
- /// \brief Define a unique number for the abbreviation.
- void assignAbbrevNumber(DIEAbbrev &Abbrev);
-
- /// \brief Add a unit to the list of CUs.
- void addUnit(DwarfUnit *CU) { CUs.push_back(CU); }
-
- /// \brief Emit all of the units to the section listed with the given
- /// abbreviation section.
- void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym);
-
- /// \brief Emit a set of abbreviations to the specific section.
- void emitAbbrevs(const MCSection *);
-
- /// \brief Emit all of the strings to the section given.
- void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection,
- const MCSymbol *StrSecSym);
-
- /// \brief Emit all of the addresses to the section given.
- void emitAddresses(const MCSection *AddrSection);
-
- /// \brief Returns the entry into the start of the pool.
- MCSymbol *getStringPoolSym();
-
- /// \brief Returns an entry into the string pool with the given
- /// string text.
- MCSymbol *getStringPoolEntry(StringRef Str);
-
- /// \brief Returns the index into the string pool with the given
- /// string text.
- unsigned getStringPoolIndex(StringRef Str);
-
- /// \brief Returns the string pool.
- StrPool *getStringPool() { return &StringPool; }
-
- /// \brief Returns the index into the address pool with the given
- /// label/symbol.
- unsigned getAddrPoolIndex(const MCSymbol *Sym, bool TLS = false);
-
- /// \brief Returns the address pool.
- AddrPool *getAddrPool() { return &AddressPool; }
-};
/// \brief Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
@@ -287,7 +199,7 @@ class DwarfDebug : public AsmPrinterHandler {
ScopeVariablesMap ScopeVariables;
// Collection of abstract variables.
- DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+ DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables;
// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists
// can refer to them in spite of insertions into this list.
@@ -307,15 +219,8 @@ class DwarfDebug : public AsmPrinterHandler {
// Maps instruction with label emitted after instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
- // Every user variable mentioned by a DBG_VALUE instruction in order of
- // appearance.
- SmallVector<const MDNode *, 8> UserVariables;
-
- // For each user variable, keep a list of DBG_VALUE instructions in order.
- // The list can also contain normal instructions that clobber the previous
- // DBG_VALUE.
- typedef DenseMap<const MDNode *, SmallVector<const MachineInstr *, 4> >
- DbgValueHistoryMap;
+ // History of DBG_VALUE and clobber instructions for each user variable.
+ // Variables are listed in order of appearance.
DbgValueHistoryMap DbgValues;
// Previous instruction's location information. This is used to determine
@@ -373,6 +278,8 @@ class DwarfDebug : public AsmPrinterHandler {
// them.
DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits;
+ SmallVector<std::pair<std::unique_ptr<DwarfTypeUnit>, DICompositeType>, 1> TypeUnitsUnderConstruction;
+
// Whether to emit the pubnames/pubtypes sections.
bool HasDwarfPubSections;
@@ -411,22 +318,30 @@ class DwarfDebug : public AsmPrinterHandler {
// True iff there are multiple CUs in this module.
bool SingleCU;
+ AddressPool AddrPool;
+
+ DwarfAccelTable AccelNames;
+ DwarfAccelTable AccelObjC;
+ DwarfAccelTable AccelNamespace;
+ DwarfAccelTable AccelTypes;
+
MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &);
void addScopeVariable(LexicalScope *LS, DbgVariable *Var);
- const SmallVectorImpl<DwarfUnit *> &getUnits() {
+ const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() {
return InfoHolder.getUnits();
}
/// \brief Find abstract variable associated with Var.
DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+ DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope);
/// \brief Find DIE for the given subprogram and attach appropriate
/// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global
/// variables in this scope then create and insert DIEs for these
/// variables.
- DIE *updateSubprogramScopeDIE(DwarfCompileUnit *SPCU, DISubprogram SP);
+ DIE &updateSubprogramScopeDIE(DwarfCompileUnit &SPCU, DISubprogram SP);
/// \brief A helper function to check whether the DIE for a given Scope is
/// going to be null.
@@ -434,22 +349,33 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief A helper function to construct a RangeSpanList for a given
/// lexical scope.
- void addScopeRangeList(DwarfCompileUnit *TheCU, DIE *ScopeDIE,
+ void addScopeRangeList(DwarfCompileUnit &TheCU, DIE &ScopeDIE,
const SmallVectorImpl<InsnRange> &Range);
/// \brief Construct new DW_TAG_lexical_block for this scope and
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
- DIE *constructLexicalScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
+ std::unique_ptr<DIE> constructLexicalScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope);
/// \brief This scope represents inlined body of a function. Construct
/// DIE to represent this concrete inlined copy of the function.
- DIE *constructInlinedScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
+ std::unique_ptr<DIE> constructInlinedScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope);
/// \brief Construct a DIE for this scope.
- DIE *constructScopeDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope);
+ std::unique_ptr<DIE> constructScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope);
+ void createAndAddScopeChildren(DwarfCompileUnit &TheCU, LexicalScope *Scope,
+ DIE &ScopeDIE);
+ /// \brief Construct a DIE for this abstract scope.
+ void constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope);
+ /// \brief Construct a DIE for this subprogram scope.
+ DIE &constructSubprogramScopeDIE(DwarfCompileUnit &TheCU,
+ LexicalScope *Scope);
/// A helper function to create children of a Scope DIE.
- DIE *createScopeChildrenDIE(DwarfCompileUnit *TheCU, LexicalScope *Scope,
- SmallVectorImpl<DIE *> &Children);
+ DIE *createScopeChildrenDIE(DwarfCompileUnit &TheCU, LexicalScope *Scope,
+ SmallVectorImpl<std::unique_ptr<DIE>> &Children);
/// \brief Emit initial Dwarf sections with a label at the start of each one.
void emitSectionLabels();
@@ -460,12 +386,11 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Compute the size and offset of all the DIEs.
void computeSizeAndOffsets();
- /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs.
- void computeInlinedDIEs();
-
/// \brief Collect info for variables that were optimized out.
void collectDeadVariables();
+ void finishSubprogramDefinitions();
+
/// \brief Finish off debug information after all functions have been
/// processed.
void finalizeModuleInfo();
@@ -535,15 +460,16 @@ class DwarfDebug : public AsmPrinterHandler {
/// DWARF 5 Experimental Split Dwarf Emitters
/// \brief Initialize common features of skeleton units.
- void initSkeletonUnit(const DwarfUnit *U, DIE *Die, DwarfUnit *NewU);
+ void initSkeletonUnit(const DwarfUnit &U, DIE &Die,
+ std::unique_ptr<DwarfUnit> NewU);
/// \brief Construct the split debug info compile unit for the debug info
/// section.
- DwarfCompileUnit *constructSkeletonCU(const DwarfCompileUnit *CU);
+ DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU);
/// \brief Construct the split debug info compile unit for the debug info
/// section.
- DwarfTypeUnit *constructSkeletonTU(DwarfTypeUnit *TU);
+ DwarfTypeUnit &constructSkeletonTU(DwarfTypeUnit &TU);
/// \brief Emit the debug info dwo section.
void emitDebugInfoDWO();
@@ -559,25 +485,22 @@ class DwarfDebug : public AsmPrinterHandler {
/// Flags to let the linker know we have emitted new style pubnames. Only
/// emit it here if we don't have a skeleton CU for split dwarf.
- void addGnuPubAttributes(DwarfUnit *U, DIE *D) const;
+ void addGnuPubAttributes(DwarfUnit &U, DIE &D) const;
/// \brief Create new DwarfCompileUnit for the given metadata node with tag
/// DW_TAG_compile_unit.
- DwarfCompileUnit *constructDwarfCompileUnit(DICompileUnit DIUnit);
-
- /// \brief Construct subprogram DIE.
- void constructSubprogramDIE(DwarfCompileUnit *TheCU, const MDNode *N);
+ DwarfCompileUnit &constructDwarfCompileUnit(DICompileUnit DIUnit);
/// \brief Construct imported_module or imported_declaration DIE.
- void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N);
+ void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N);
/// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit *TheCU, const MDNode *N,
- DIE *Context);
+ void constructImportedEntityDIE(DwarfCompileUnit &TheCU, const MDNode *N,
+ DIE &Context);
/// \brief Construct import_module DIE.
- void constructImportedEntityDIE(DwarfCompileUnit *TheCU,
- const DIImportedEntity &Module, DIE *Context);
+ void constructImportedEntityDIE(DwarfCompileUnit &TheCU,
+ const DIImportedEntity &Module, DIE &Context);
/// \brief Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
@@ -602,7 +525,7 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Ensure that a label will be emitted before MI.
void requestLabelBeforeInsn(const MachineInstr *MI) {
- LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol *)0));
+ LabelsBeforeInsn.insert(std::make_pair(MI, nullptr));
}
/// \brief Return Label preceding the instruction.
@@ -610,13 +533,15 @@ class DwarfDebug : public AsmPrinterHandler {
/// \brief Ensure that a label will be emitted after MI.
void requestLabelAfterInsn(const MachineInstr *MI) {
- LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol *)0));
+ LabelsAfterInsn.insert(std::make_pair(MI, nullptr));
}
/// \brief Return Label immediately following the instruction.
MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- void attachLowHighPC(DwarfCompileUnit *Unit, DIE *D, MCSymbol *Begin,
+ void attachRangesOrLowHighPC(DwarfCompileUnit &Unit, DIE &D,
+ const SmallVectorImpl<InsnRange> &Ranges);
+ void attachLowHighPC(DwarfCompileUnit &Unit, DIE &D, MCSymbol *Begin,
MCSymbol *End);
public:
@@ -625,6 +550,8 @@ public:
//
DwarfDebug(AsmPrinter *A, Module *M);
+ ~DwarfDebug() override;
+
void insertDIE(const MDNode *TypeMD, DIE *Die) {
MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die));
}
@@ -654,7 +581,7 @@ public:
/// \brief Add a DIE to the set of types that we're going to pull into
/// type units.
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
- DIE *Die, DICompositeType CTy);
+ DIE &Die, DICompositeType CTy);
/// \brief Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
@@ -666,7 +593,7 @@ public:
}
/// \brief Recursively Emits a debug information entry.
- void emitDIE(DIE *Die);
+ void emitDIE(DIE &Die);
// Experimental DWARF5 features.
@@ -720,6 +647,18 @@ public:
/// isSubprogramContext - Return true if Context is either a subprogram
/// or another context nested inside a subprogram.
bool isSubprogramContext(const MDNode *Context);
+
+ void addSubprogramNames(DISubprogram SP, DIE &Die);
+
+ AddressPool &getAddressPool() { return AddrPool; }
+
+ void addAccelName(StringRef Name, const DIE &Die);
+
+ void addAccelObjC(StringRef Name, const DIE &Die);
+
+ void addAccelNamespace(StringRef Name, const DIE &Die);
+
+ void addAccelType(StringRef Name, const DIE &Die, char Flags);
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 113a9e4..3a12c73 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -103,7 +103,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
int FirstAction = 0;
unsigned SizeActions = 0;
- const LandingPadInfo *PrevLPI = 0;
+ const LandingPadInfo *PrevLPI = nullptr;
for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
@@ -181,7 +181,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
if (!MO.isGlobal()) continue;
const Function *F = dyn_cast<Function>(MO.getGlobal());
- if (F == 0) continue;
+ if (!F) continue;
if (SawFunc) {
// Be conservative. If we have more than one function operand for this
@@ -214,7 +214,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
const SmallVectorImpl<unsigned> &FirstActions) {
// The end label of the previous invoke or nounwind try-range.
- MCSymbol *LastLabel = 0;
+ MCSymbol *LastLabel = nullptr;
// Whether there is a potentially throwing instruction (currently this means
// an ordinary call) between the end of the previous try-range and now.
@@ -224,18 +224,16 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
bool PreviousIsInvoke = false;
// Visit all instructions in order of address.
- for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
- I != E; ++I) {
- for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
- MI != E; ++MI) {
- if (!MI->isEHLabel()) {
- if (MI->isCall())
- SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+ for (const auto &MBB : *Asm->MF) {
+ for (const auto &MI : MBB) {
+ if (!MI.isEHLabel()) {
+ if (MI.isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI);
continue;
}
// End of the previous try-range?
- MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+ MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol();
if (BeginLabel == LastLabel)
SawPotentiallyThrowing = false;
@@ -255,7 +253,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
- CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+ CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
}
@@ -305,7 +303,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
- CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+ CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 };
CallSites.push_back(Site);
}
}
@@ -571,10 +569,10 @@ void DwarfException::EmitExceptionTable() {
Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
MCSymbol *BeginLabel = S.BeginLabel;
- if (BeginLabel == 0)
+ if (!BeginLabel)
BeginLabel = EHFuncBeginSym;
MCSymbol *EndLabel = S.EndLabel;
- if (EndLabel == 0)
+ if (!EndLabel)
EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
new file mode 100644
index 0000000..737ee54
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -0,0 +1,156 @@
+//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfFile.h"
+
+#include "DwarfDebug.h"
+#include "DwarfUnit.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/LEB128.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
+ : Asm(AP), StrPool(DA, *Asm, Pref) {}
+
+DwarfFile::~DwarfFile() {}
+
+// Define a unique number for the abbreviation.
+//
+void DwarfFile::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+ // Check the set for priors.
+ DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+ // If it's newly added.
+ if (InSet == &Abbrev) {
+ // Add to abbreviation list.
+ Abbreviations.push_back(&Abbrev);
+
+ // Assign the vector position + 1 as its number.
+ Abbrev.setNumber(Abbreviations.size());
+ } else {
+ // Assign existing abbreviation number.
+ Abbrev.setNumber(InSet->getNumber());
+ }
+}
+
+void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) {
+ CUs.push_back(std::move(U));
+}
+
+// Emit the various dwarf units to the unit section USection with
+// the abbreviations going into ASection.
+void DwarfFile::emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym) {
+ for (const auto &TheU : CUs) {
+ DIE &Die = TheU->getUnitDie();
+ const MCSection *USection = TheU->getSection();
+ Asm->OutStreamer.SwitchSection(USection);
+
+ // Emit the compile units header.
+ Asm->OutStreamer.EmitLabel(TheU->getLabelBegin());
+
+ // Emit size of content not including length itself
+ Asm->OutStreamer.AddComment("Length of Unit");
+ Asm->EmitInt32(TheU->getHeaderSize() + Die.getSize());
+
+ TheU->emitHeader(ASectionSym);
+
+ DD->emitDIE(Die);
+ Asm->OutStreamer.EmitLabel(TheU->getLabelEnd());
+ }
+}
+// Compute the size and offset for each DIE.
+void DwarfFile::computeSizeAndOffsets() {
+ // Offset from the first CU in the debug info section is 0 initially.
+ unsigned SecOffset = 0;
+
+ // Iterate over each compile unit and set the size and offsets for each
+ // DIE within each compile unit. All offsets are CU relative.
+ for (const auto &TheU : CUs) {
+ TheU->setDebugInfoOffset(SecOffset);
+
+ // CU-relative offset is reset to 0 here.
+ unsigned Offset = sizeof(int32_t) + // Length of Unit Info
+ TheU->getHeaderSize(); // Unit-specific headers
+
+ // EndOffset here is CU-relative, after laying out
+ // all of the CU DIE.
+ unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset);
+ SecOffset += EndOffset;
+ }
+}
+// Compute the size and offset of a DIE. The offset is relative to start of the
+// CU. It returns the offset after laying out the DIE.
+unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
+ // Record the abbreviation.
+ assignAbbrevNumber(Die.getAbbrev());
+
+ // Get the abbreviation for this DIE.
+ const DIEAbbrev &Abbrev = Die.getAbbrev();
+
+ // Set DIE offset
+ Die.setOffset(Offset);
+
+ // Start the size with the size of abbreviation code.
+ Offset += getULEB128Size(Die.getAbbrevNumber());
+
+ const SmallVectorImpl<DIEValue *> &Values = Die.getValues();
+ const SmallVectorImpl<DIEAbbrevData> &AbbrevData = Abbrev.getData();
+
+ // Size the DIE attribute values.
+ for (unsigned i = 0, N = Values.size(); i < N; ++i)
+ // Size attribute value.
+ Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+ // Get the children.
+ const auto &Children = Die.getChildren();
+
+ // Size the DIE children if any.
+ if (!Children.empty()) {
+ assert(Abbrev.hasChildren() && "Children flag not set");
+
+ for (auto &Child : Children)
+ Offset = computeSizeAndOffset(*Child, Offset);
+
+ // End of children marker.
+ Offset += sizeof(int8_t);
+ }
+
+ Die.setSize(Offset - Die.getOffset());
+ return Offset;
+}
+void DwarfFile::emitAbbrevs(const MCSection *Section) {
+ // Check to see if it is worth the effort.
+ if (!Abbreviations.empty()) {
+ // Start the debug abbrev section.
+ Asm->OutStreamer.SwitchSection(Section);
+
+ // For each abbrevation.
+ for (const DIEAbbrev *Abbrev : Abbreviations) {
+ // Emit the abbrevations code (base 1 index.)
+ Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+ // Emit the abbreviations data.
+ Abbrev->Emit(Asm);
+ }
+
+ // Mark end of abbreviations.
+ Asm->EmitULEB128(0, "EOM(3)");
+ }
+}
+
+// Emit strings into a string section.
+void DwarfFile::emitStrings(const MCSection *StrSection,
+ const MCSection *OffsetSection,
+ const MCSymbol *StrSecSym) {
+ StrPool.emit(*Asm, StrSection, OffsetSection, StrSecSym);
+}
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
new file mode 100644
index 0000000..3985eb2
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -0,0 +1,84 @@
+//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFFILE_H__
+#define CODEGEN_ASMPRINTER_DWARFFILE_H__
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Allocator.h"
+#include "AddressPool.h"
+#include "DwarfStringPool.h"
+
+#include <vector>
+#include <string>
+#include <memory>
+
+namespace llvm {
+class AsmPrinter;
+class DwarfUnit;
+class DIEAbbrev;
+class MCSymbol;
+class DIE;
+class StringRef;
+class DwarfDebug;
+class MCSection;
+class DwarfFile {
+ // Target of Dwarf emission, used for sizing of abbreviations.
+ AsmPrinter *Asm;
+
+ // Used to uniquely define abbreviations.
+ FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+ // A list of all the unique abbreviations in use.
+ std::vector<DIEAbbrev *> Abbreviations;
+
+ // A pointer to all units in the section.
+ SmallVector<std::unique_ptr<DwarfUnit>, 1> CUs;
+
+ DwarfStringPool StrPool;
+
+public:
+ DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
+
+ ~DwarfFile();
+
+ const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return CUs; }
+
+ /// \brief Compute the size and offset of a DIE given an incoming Offset.
+ unsigned computeSizeAndOffset(DIE &Die, unsigned Offset);
+
+ /// \brief Compute the size and offset of all the DIEs.
+ void computeSizeAndOffsets();
+
+ /// \brief Define a unique number for the abbreviation.
+ void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+ /// \brief Add a unit to the list of CUs.
+ void addUnit(std::unique_ptr<DwarfUnit> U);
+
+ /// \brief Emit all of the units to the section listed with the given
+ /// abbreviation section.
+ void emitUnits(DwarfDebug *DD, const MCSymbol *ASectionSym);
+
+ /// \brief Emit a set of abbreviations to the specific section.
+ void emitAbbrevs(const MCSection *);
+
+ /// \brief Emit all of the strings to the section given.
+ void emitStrings(const MCSection *StrSection,
+ const MCSection *OffsetSection = nullptr,
+ const MCSymbol *StrSecSym = nullptr);
+
+ /// \brief Returns the string pool.
+ DwarfStringPool &getStringPool() { return StrPool; }
+};
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
new file mode 100644
index 0000000..72cab60
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -0,0 +1,74 @@
+//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfStringPool.h"
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+MCSymbol *DwarfStringPool::getSectionSymbol() { return SectionSymbol; }
+
+static std::pair<MCSymbol *, unsigned> &
+getEntry(AsmPrinter &Asm,
+ StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> &Pool,
+ StringRef Prefix, StringRef Str) {
+ std::pair<MCSymbol *, unsigned> &Entry =
+ Pool.GetOrCreateValue(Str).getValue();
+ if (!Entry.first) {
+ Entry.second = Pool.size() - 1;
+ Entry.first = Asm.GetTempSymbol(Prefix, Entry.second);
+ }
+ return Entry;
+}
+
+MCSymbol *DwarfStringPool::getSymbol(AsmPrinter &Asm, StringRef Str) {
+ return getEntry(Asm, Pool, Prefix, Str).first;
+}
+
+unsigned DwarfStringPool::getIndex(AsmPrinter &Asm, StringRef Str) {
+ return getEntry(Asm, Pool, Prefix, Str).second;
+}
+
+void DwarfStringPool::emit(AsmPrinter &Asm, const MCSection *StrSection,
+ const MCSection *OffsetSection,
+ const MCSymbol *StrSecSym) {
+ if (Pool.empty())
+ return;
+
+ // Start the dwarf str section.
+ Asm.OutStreamer.SwitchSection(StrSection);
+
+ // Get all of the string pool entries and put them in an array by their ID so
+ // we can sort them.
+ SmallVector<const StringMapEntry<std::pair<MCSymbol *, unsigned>> *, 64>
+ Entries(Pool.size());
+
+ for (const auto &E : Pool)
+ Entries[E.getValue().second] = &E;
+
+ for (const auto &Entry : Entries) {
+ // Emit a label for reference from debug information entries.
+ Asm.OutStreamer.EmitLabel(Entry->getValue().first);
+
+ // Emit the string itself with a terminating null byte.
+ Asm.OutStreamer.EmitBytes(
+ StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1));
+ }
+
+ // If we've got an offset section go ahead and emit that now as well.
+ if (OffsetSection) {
+ Asm.OutStreamer.SwitchSection(OffsetSection);
+ unsigned offset = 0;
+ unsigned size = 4; // FIXME: DWARF64 is 8.
+ for (const auto &Entry : Entries) {
+ Asm.OutStreamer.EmitIntValue(offset, size);
+ offset += Entry->getKeyLength() + 1;
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
new file mode 100644
index 0000000..c1615fb
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -0,0 +1,55 @@
+//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_STRINGPOOL_H__
+#define CODEGEN_ASMPRINTER_STRINGPOOL_H__
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Allocator.h"
+
+#include <utility>
+
+namespace llvm {
+
+class MCSymbol;
+class MCSection;
+class StringRef;
+
+// Collection of strings for this unit and assorted symbols.
+// A String->Symbol mapping of strings used by indirect
+// references.
+class DwarfStringPool {
+ StringMap<std::pair<MCSymbol *, unsigned>, BumpPtrAllocator &> Pool;
+ StringRef Prefix;
+ MCSymbol *SectionSymbol;
+
+public:
+ DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix)
+ : Pool(A), Prefix(Prefix), SectionSymbol(Asm.GetTempSymbol(Prefix)) {}
+
+ void emit(AsmPrinter &Asm, const MCSection *StrSection,
+ const MCSection *OffsetSection = nullptr,
+ const MCSymbol *StrSecSym = nullptr);
+
+ /// \brief Returns the entry into the start of the pool.
+ MCSymbol *getSectionSymbol();
+
+ /// \brief Returns an entry into the string pool with the given
+ /// string text.
+ MCSymbol *getSymbol(AsmPrinter &Asm, StringRef Str);
+
+ /// \brief Returns the index into the string pool with the given
+ /// string text.
+ unsigned getIndex(AsmPrinter &Asm, StringRef Str);
+
+ bool empty() const { return Pool.empty(); }
+};
+}
+#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 82e9bb0..a70c0f7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dwarfdebug"
-
#include "DwarfUnit.h"
#include "DwarfAccelTable.h"
#include "DwarfDebug.h"
@@ -35,33 +33,38 @@
using namespace llvm;
+#define DEBUG_TYPE "dwarfdebug"
+
static cl::opt<bool>
GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
cl::desc("Generate DWARF4 type units."),
cl::init(false));
/// Unit - Unit constructor.
-DwarfUnit::DwarfUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A,
- DwarfDebug *DW, DwarfFile *DWU)
- : UniqueID(UID), CUNode(Node), UnitDie(D), DebugInfoOffset(0), Asm(A),
- DD(DW), DU(DWU), IndexTyDie(0), Section(0), Skeleton(0) {
+DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, DICompileUnit Node,
+ AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
+ : UniqueID(UID), CUNode(Node), UnitDie(UnitTag), DebugInfoOffset(0), Asm(A),
+ DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr),
+ Skeleton(nullptr) {
+ assert(UnitTag == dwarf::DW_TAG_compile_unit ||
+ UnitTag == dwarf::DW_TAG_type_unit);
DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
}
-DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node,
+DwarfCompileUnit::DwarfCompileUnit(unsigned UID, DICompileUnit Node,
AsmPrinter *A, DwarfDebug *DW,
DwarfFile *DWU)
- : DwarfUnit(UID, D, Node, A, DW, DWU) {
- insertDIE(Node, D);
+ : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU) {
+ insertDIE(Node, &getUnitDie());
}
-DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU,
- AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU,
+DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
+ DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable)
- : DwarfUnit(UID, D, CU.getCUNode(), A, DW, DWU), CU(CU),
- SplitLineTable(SplitLineTable) {
+ : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU),
+ CU(CU), SplitLineTable(SplitLineTable) {
if (SplitLineTable)
- addSectionOffset(UnitDie.get(), dwarf::DW_AT_stmt_list, 0);
+ addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
}
/// ~Unit - Destructor for compile unit.
@@ -74,7 +77,7 @@ DwarfUnit::~DwarfUnit() {
/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
/// information entry.
-DIEEntry *DwarfUnit::createDIEEntry(DIE *Entry) {
+DIEEntry *DwarfUnit::createDIEEntry(DIE &Entry) {
DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
return Value;
}
@@ -159,39 +162,39 @@ void DwarfUnit::insertDIE(DIDescriptor Desc, DIE *D) {
}
/// addFlag - Add a flag that is true.
-void DwarfUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) {
+void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
- Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
+ Die.addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne);
else
- Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
+ Die.addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne);
}
/// addUInt - Add an unsigned integer attribute data and value.
///
-void DwarfUnit::addUInt(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addUInt(DIE &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, uint64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(false, Integer);
DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator)
DIEInteger(Integer);
- Die->addValue(Attribute, *Form, Value);
+ Die.addValue(Attribute, *Form, Value);
}
-void DwarfUnit::addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer) {
+void DwarfUnit::addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer) {
addUInt(Block, (dwarf::Attribute)0, Form, Integer);
}
/// addSInt - Add an signed integer attribute data and value.
///
-void DwarfUnit::addSInt(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSInt(DIE &Die, dwarf::Attribute Attribute,
Optional<dwarf::Form> Form, int64_t Integer) {
if (!Form)
Form = DIEInteger::BestForm(true, Integer);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
- Die->addValue(Attribute, *Form, Value);
+ Die.addValue(Attribute, *Form, Value);
}
-void DwarfUnit::addSInt(DIELoc *Die, Optional<dwarf::Form> Form,
+void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form,
int64_t Integer) {
addSInt(Die, (dwarf::Attribute)0, Form, Integer);
}
@@ -201,66 +204,66 @@ void DwarfUnit::addSInt(DIELoc *Die, Optional<dwarf::Form> Form,
/// more predictable sizes. In the case of split dwarf we emit an index
/// into another table which gets us the static offset into the string
/// table.
-void DwarfUnit::addString(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
if (!DD->useSplitDwarf())
return addLocalString(Die, Attribute, String);
- unsigned idx = DU->getStringPoolIndex(String);
+ unsigned idx = DU->getStringPool().getIndex(*Asm, String);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
- Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str);
+ Die.addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Str);
}
/// addLocalString - Add a string attribute data and value. This is guaranteed
/// to be in the local string pool instead of indirected.
-void DwarfUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addLocalString(DIE &Die, dwarf::Attribute Attribute,
StringRef String) {
- MCSymbol *Symb = DU->getStringPoolEntry(String);
+ MCSymbol *Symb = DU->getStringPool().getSymbol(*Asm, String);
DIEValue *Value;
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
Value = new (DIEValueAllocator) DIELabel(Symb);
else {
- MCSymbol *StringPool = DU->getStringPoolSym();
+ MCSymbol *StringPool = DU->getStringPool().getSectionSymbol();
Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool);
}
DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String);
- Die->addValue(Attribute, dwarf::DW_FORM_strp, Str);
+ Die.addValue(Attribute, dwarf::DW_FORM_strp, Str);
}
/// addExpr - Add a Dwarf expression attribute data and value.
///
-void DwarfUnit::addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr) {
+void DwarfUnit::addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr) {
DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr);
- Die->addValue((dwarf::Attribute)0, Form, Value);
+ Die.addValue((dwarf::Attribute)0, Form, Value);
}
/// addLocationList - Add a Dwarf loclistptr attribute data and value.
///
-void DwarfUnit::addLocationList(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute,
unsigned Index) {
DIEValue *Value = new (DIEValueAllocator) DIELocList(Index);
dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
: dwarf::DW_FORM_data4;
- Die->addValue(Attribute, Form, Value);
+ Die.addValue(Attribute, Form, Value);
}
/// addLabel - Add a Dwarf label attribute data and value.
///
-void DwarfUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
+void DwarfUnit::addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
const MCSymbol *Label) {
DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
- Die->addValue(Attribute, Form, Value);
+ Die.addValue(Attribute, Form, Value);
}
-void DwarfUnit::addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label) {
+void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) {
addLabel(Die, (dwarf::Attribute)0, Form, Label);
}
/// addSectionLabel - Add a Dwarf section label attribute data and value.
///
-void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (DD->getDwarfVersion() >= 4)
addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label);
@@ -270,7 +273,7 @@ void DwarfUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute,
/// addSectionOffset - Add an offset into a section attribute data and value.
///
-void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
uint64_t Integer) {
if (DD->getDwarfVersion() >= 4)
addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer);
@@ -281,7 +284,7 @@ void DwarfUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute,
/// addLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr or DW_FORM_GNU_addr_index.
///
-void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (!DD->useSplitDwarf())
@@ -290,24 +293,20 @@ void DwarfCompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
- unsigned idx = DU->getAddrPoolIndex(Label);
+ unsigned idx = DD->getAddressPool().getIndex(Label);
DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx);
- Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
+ Die.addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value);
}
-void DwarfCompileUnit::addLocalLabelAddress(DIE *Die,
+void DwarfCompileUnit::addLocalLabelAddress(DIE &Die,
dwarf::Attribute Attribute,
const MCSymbol *Label) {
if (Label)
DD->addArangeLabel(SymbolCU(this, Label));
- if (Label) {
- DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
- Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
- } else {
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(0);
- Die->addValue(Attribute, dwarf::DW_FORM_addr, Value);
- }
+ Die.addValue(Attribute, dwarf::DW_FORM_addr,
+ Label ? (DIEValue *)new (DIEValueAllocator) DIELabel(Label)
+ : new (DIEValueAllocator) DIEInteger(0));
}
unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) {
@@ -329,86 +328,94 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirNam
/// addOpAddress - Add a dwarf op address data and value using the
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
///
-void DwarfUnit::addOpAddress(DIELoc *Die, const MCSymbol *Sym) {
+void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
if (!DD->useSplitDwarf()) {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Die, dwarf::DW_FORM_udata, Sym);
} else {
addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index);
- addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym));
+ addUInt(Die, dwarf::DW_FORM_GNU_addr_index,
+ DD->getAddressPool().getIndex(Sym));
}
}
/// addSectionDelta - Add a section label delta attribute data and value.
///
-void DwarfUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
- if (DD->getDwarfVersion() >= 4)
- Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value);
- else
- Die->addValue(Attribute, dwarf::DW_FORM_data4, Value);
+ Die.addValue(Attribute, DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset
+ : dwarf::DW_FORM_data4,
+ Value);
}
-void DwarfUnit::addLabelDelta(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Hi, const MCSymbol *Lo) {
DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
- Die->addValue(Attribute, dwarf::DW_FORM_data4, Value);
+ Die.addValue(Attribute, dwarf::DW_FORM_data4, Value);
}
/// addDIEEntry - Add a DIE attribute data and value.
///
-void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) {
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) {
addDIEEntry(Die, Attribute, createDIEEntry(Entry));
}
-void DwarfUnit::addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type) {
- Die->addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
- new (DIEValueAllocator) DIETypeSignature(Type));
+void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) {
+ // Flag the type unit reference as a declaration so that if it contains
+ // members (implicit special members, static data member definitions, member
+ // declarations for definitions in this CU, etc) consumers don't get confused
+ // and think this is a full definition.
+ addFlag(Die, dwarf::DW_AT_declaration);
+
+ Die.addValue(dwarf::DW_AT_signature, dwarf::DW_FORM_ref_sig8,
+ new (DIEValueAllocator) DIETypeSignature(Type));
}
-void DwarfUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
DIEEntry *Entry) {
- const DIE *DieCU = Die->getUnitOrNull();
- const DIE *EntryCU = Entry->getEntry()->getUnitOrNull();
+ const DIE *DieCU = Die.getUnitOrNull();
+ const DIE *EntryCU = Entry->getEntry().getUnitOrNull();
if (!DieCU)
// We assume that Die belongs to this CU, if it is not linked to any CU yet.
- DieCU = getUnitDie();
+ DieCU = &getUnitDie();
if (!EntryCU)
- EntryCU = getUnitDie();
- Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4
- : dwarf::DW_FORM_ref_addr,
- Entry);
+ EntryCU = &getUnitDie();
+ Die.addValue(Attribute,
+ EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+ Entry);
}
/// Create a DIE with the given Tag, add the DIE to its parent, and
/// call insertDIE if MD is not null.
-DIE *DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
- DIE *Die = new DIE(Tag);
- Parent.addChild(Die);
+DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) {
+ assert(Tag != dwarf::DW_TAG_auto_variable &&
+ Tag != dwarf::DW_TAG_arg_variable);
+ Parent.addChild(make_unique<DIE>((dwarf::Tag)Tag));
+ DIE &Die = *Parent.getChildren().back();
if (N)
- insertDIE(N, Die);
+ insertDIE(N, &Die);
return Die;
}
/// addBlock - Add block data.
///
-void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Loc) {
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) {
Loc->ComputeSize(Asm);
DIELocs.push_back(Loc); // Memoize so we can call the destructor later on.
- Die->addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
+ Die.addValue(Attribute, Loc->BestForm(DD->getDwarfVersion()), Loc);
}
-void DwarfUnit::addBlock(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute,
DIEBlock *Block) {
Block->ComputeSize(Asm);
DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
- Die->addValue(Attribute, Block->BestForm(), Block);
+ Die.addValue(Attribute, Block->BestForm(), Block);
}
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File,
+void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File,
StringRef Directory) {
if (Line == 0)
return;
@@ -421,7 +428,7 @@ void DwarfUnit::addSourceLine(DIE *Die, unsigned Line, StringRef File,
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) {
+void DwarfUnit::addSourceLine(DIE &Die, DIVariable V) {
assert(V.isVariable());
addSourceLine(Die, V.getLineNumber(), V.getContext().getFilename(),
@@ -430,7 +437,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIVariable V) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
+void DwarfUnit::addSourceLine(DIE &Die, DIGlobalVariable G) {
assert(G.isGlobalVariable());
addSourceLine(Die, G.getLineNumber(), G.getFilename(), G.getDirectory());
@@ -438,7 +445,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIGlobalVariable G) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) {
+void DwarfUnit::addSourceLine(DIE &Die, DISubprogram SP) {
assert(SP.isSubprogram());
addSourceLine(Die, SP.getLineNumber(), SP.getFilename(), SP.getDirectory());
@@ -446,7 +453,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DISubprogram SP) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) {
+void DwarfUnit::addSourceLine(DIE &Die, DIType Ty) {
assert(Ty.isType());
addSourceLine(Die, Ty.getLineNumber(), Ty.getFilename(), Ty.getDirectory());
@@ -454,7 +461,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIType Ty) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
+void DwarfUnit::addSourceLine(DIE &Die, DIObjCProperty Ty) {
assert(Ty.isObjCProperty());
DIFile File = Ty.getFile();
@@ -464,7 +471,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) {
+void DwarfUnit::addSourceLine(DIE &Die, DINameSpace NS) {
assert(NS.Verify());
addSourceLine(Die, NS.getLineNumber(), NS.getFilename(), NS.getDirectory());
@@ -472,7 +479,7 @@ void DwarfUnit::addSourceLine(DIE *Die, DINameSpace NS) {
/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
-void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
+void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
if (DV.variableHasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
@@ -484,7 +491,7 @@ void DwarfUnit::addVariableAddress(const DbgVariable &DV, DIE *Die,
}
/// addRegisterOp - Add register operand.
-void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) {
+void DwarfUnit::addRegisterOp(DIELoc &TheDie, unsigned Reg) {
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
int DWReg = RI->getDwarfRegNum(Reg, false);
bool isSubRegister = DWReg < 0;
@@ -529,7 +536,7 @@ void DwarfUnit::addRegisterOp(DIELoc *TheDie, unsigned Reg) {
}
/// addRegisterOffset - Add register offset.
-void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg,
+void DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
int64_t Offset) {
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
unsigned DWReg = RI->getDwarfRegNum(Reg, false);
@@ -548,16 +555,16 @@ void DwarfUnit::addRegisterOffset(DIELoc *TheDie, unsigned Reg,
/// addAddress - Add an address attribute to a die based on the location
/// provided.
-void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute,
+void DwarfUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location, bool Indirect) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
if (Location.isReg() && !Indirect)
- addRegisterOp(Loc, Location.getReg());
+ addRegisterOp(*Loc, Location.getReg());
else {
- addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
if (Indirect && !Location.isReg()) {
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
}
}
@@ -570,7 +577,7 @@ void DwarfUnit::addAddress(DIE *Die, dwarf::Attribute Attribute,
/// given the extra address information encoded in the DbgVariable, starting
/// from the starting location. Add the DWARF information to the die.
///
-void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
+void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -580,21 +587,21 @@ void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
// If first address element is OpPlus then emit
// DW_OP_breg + Offset instead of DW_OP_reg + Offset.
- addRegisterOffset(Loc, Location.getReg(), DV.getAddrElement(1));
+ addRegisterOffset(*Loc, Location.getReg(), DV.getAddrElement(1));
i = 2;
} else
- addRegisterOp(Loc, Location.getReg());
+ addRegisterOp(*Loc, Location.getReg());
} else
- addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
for (; i < N; ++i) {
uint64_t Element = DV.getAddrElement(i);
if (Element == DIBuilder::OpPlus) {
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, DV.getAddrElement(++i));
} else if (Element == DIBuilder::OpDeref) {
if (!Location.isReg())
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
} else
llvm_unreachable("unknown DIBuilder Opcode");
}
@@ -663,7 +670,7 @@ void DwarfUnit::addComplexAddress(const DbgVariable &DV, DIE *Die,
/// starting location. Add the DWARF information to the die. For
/// more information, read large comment just above here.
///
-void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
+void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location) {
DIType Ty = DV.getType();
@@ -705,68 +712,78 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
if (Location.isReg())
- addRegisterOp(Loc, Location.getReg());
+ addRegisterOp(*Loc, Location.getReg());
else
- addRegisterOffset(Loc, Location.getReg(), Location.getOffset());
+ addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
// If we started with a pointer to the __Block_byref... struct, then
// the first thing we need to do is dereference the pointer (DW_OP_deref).
if (isPointer)
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
// Next add the offset for the '__forwarding' field:
// DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in
// adding the offset if it's 0.
if (forwardingFieldOffset > 0) {
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
}
// Now dereference the __forwarding field to get to the real __Block_byref
// struct: DW_OP_deref.
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
// Now that we've got the real __Block_byref... struct, add the offset
// for the variable's field to get to the location of the actual variable:
// DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0.
if (varFieldOffset > 0) {
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(Loc, dwarf::DW_FORM_udata, varFieldOffset);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset);
}
// Now attach the location information to the DIE.
addBlock(Die, Attribute, Loc);
}
-/// isTypeSigned - Return true if the type is signed.
-static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) {
- if (Ty.isDerivedType())
- return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()),
- SizeInBits);
- if (Ty.isBasicType())
- if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed ||
- DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) {
- *SizeInBits = Ty.getSizeInBits();
- return true;
- }
- return false;
-}
-
/// Return true if type encoding is unsigned.
static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) {
DIDerivedType DTy(Ty);
- if (DTy.isDerivedType())
- return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom()));
-
- DIBasicType BTy(Ty);
- if (BTy.isBasicType()) {
- unsigned Encoding = BTy.getEncoding();
- if (Encoding == dwarf::DW_ATE_unsigned ||
- Encoding == dwarf::DW_ATE_unsigned_char ||
- Encoding == dwarf::DW_ATE_boolean)
+ if (DTy.isDerivedType()) {
+ dwarf::Tag T = (dwarf::Tag)Ty.getTag();
+ // Encode pointer constants as unsigned bytes. This is used at least for
+ // null pointer constant emission.
+ // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed
+ // here, but accept them for now due to a bug in SROA producing bogus
+ // dbg.values.
+ if (T == dwarf::DW_TAG_pointer_type ||
+ T == dwarf::DW_TAG_ptr_to_member_type ||
+ T == dwarf::DW_TAG_reference_type ||
+ T == dwarf::DW_TAG_rvalue_reference_type)
return true;
+ assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
+ T == dwarf::DW_TAG_volatile_type ||
+ T == dwarf::DW_TAG_restrict_type ||
+ T == dwarf::DW_TAG_enumeration_type);
+ if (DITypeRef Deriv = DTy.getTypeDerivedFrom())
+ return isUnsignedDIType(DD, DD->resolve(Deriv));
+ // FIXME: Enums without a fixed underlying type have unknown signedness
+ // here, leading to incorrectly emitted constants.
+ assert(DTy.getTag() == dwarf::DW_TAG_enumeration_type);
+ return false;
}
- return false;
+
+ DIBasicType BTy(Ty);
+ assert(BTy.isBasicType());
+ unsigned Encoding = BTy.getEncoding();
+ assert((Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_signed ||
+ Encoding == dwarf::DW_ATE_signed_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean) &&
+ "Unsupported encoding");
+ return (Encoding == dwarf::DW_ATE_unsigned ||
+ Encoding == dwarf::DW_ATE_unsigned_char ||
+ Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean);
}
/// If this type is derived from a base type then return base type size.
@@ -798,47 +815,8 @@ static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) {
return BaseType.getSizeInBits();
}
-/// addConstantValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantValue(DIE *Die, const MachineOperand &MO,
- DIType Ty) {
- // FIXME: This is a bit conservative/simple - it emits negative values at
- // their maximum bit width which is a bit unfortunate (& doesn't prefer
- // udata/sdata over dataN as suggested by the DWARF spec)
- assert(MO.isImm() && "Invalid machine operand!");
- int SizeInBits = -1;
- bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits);
- dwarf::Form Form;
-
- // If we're a signed constant definitely use sdata.
- if (SignedConstant) {
- addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm());
- return;
- }
-
- // Else use data for now unless it's larger than we can deal with.
- switch (SizeInBits) {
- case 8:
- Form = dwarf::DW_FORM_data1;
- break;
- case 16:
- Form = dwarf::DW_FORM_data2;
- break;
- case 32:
- Form = dwarf::DW_FORM_data4;
- break;
- case 64:
- Form = dwarf::DW_FORM_data8;
- break;
- default:
- Form = dwarf::DW_FORM_udata;
- addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm());
- return;
- }
- addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm());
-}
-
/// addConstantFPValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) {
assert(MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -855,55 +833,47 @@ void DwarfUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
// Output the constant to DWARF one byte at a time.
for (; Start != Stop; Start += Incr)
- addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
+ addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]);
addBlock(Die, dwarf::DW_AT_const_value, Block);
}
/// addConstantFPValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) {
+void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) {
// Pass this down to addConstantValue as an unsigned bag of bits.
addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true);
}
/// addConstantValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
- bool Unsigned) {
- addConstantValue(Die, CI->getValue(), Unsigned);
+void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty) {
+ addConstantValue(Die, CI->getValue(), Ty);
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
+ DIType Ty) {
+ assert(MO.isImm() && "Invalid machine operand!");
+
+ addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
+ // FIXME: This is a bit conservative/simple - it emits negative values always
+ // sign extended to 64 bits rather than minimizing the number of bytes.
+ addUInt(Die, dwarf::DW_AT_const_value,
+ Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val);
+}
+
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, DIType Ty) {
+ addConstantValue(Die, Val, isUnsignedDIType(DD, Ty));
}
// addConstantValue - Add constant value entry in variable DIE.
-void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
+void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) {
unsigned CIBitWidth = Val.getBitWidth();
if (CIBitWidth <= 64) {
- // If we're a signed constant definitely use sdata.
- if (!Unsigned) {
- addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
- Val.getSExtValue());
- return;
- }
-
- // Else use data for now unless it's larger than we can deal with.
- dwarf::Form Form;
- switch (CIBitWidth) {
- case 8:
- Form = dwarf::DW_FORM_data1;
- break;
- case 16:
- Form = dwarf::DW_FORM_data2;
- break;
- case 32:
- Form = dwarf::DW_FORM_data4;
- break;
- case 64:
- Form = dwarf::DW_FORM_data8;
- break;
- default:
- addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
- Val.getZExtValue());
- return;
- }
- addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue());
+ addConstantValue(Die, Unsigned,
+ Unsigned ? Val.getZExtValue() : Val.getSExtValue());
return;
}
@@ -922,7 +892,7 @@ void DwarfUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) {
c = Ptr64[i / 8] >> (8 * (i & 7));
else
c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7));
- addUInt(Block, dwarf::DW_FORM_data1, c);
+ addUInt(*Block, dwarf::DW_FORM_data1, c);
}
addBlock(Die, dwarf::DW_AT_const_value, Block);
@@ -945,7 +915,7 @@ void DwarfUnit::addTemplateParams(DIE &Buffer, DIArray TParams) {
/// getOrCreateContextDIE - Get context owner's DIE.
DIE *DwarfUnit::getOrCreateContextDIE(DIScope Context) {
if (!Context || Context.isFile())
- return getUnitDie();
+ return &getUnitDie();
if (Context.isType())
return getOrCreateTypeDIE(DIType(Context));
if (Context.isNameSpace())
@@ -959,66 +929,68 @@ DIE *DwarfUnit::createTypeDIE(DICompositeType Ty) {
DIScope Context = resolve(Ty.getContext());
DIE *ContextDIE = getOrCreateContextDIE(Context);
- DIE *TyDIE = getDIE(Ty);
- if (TyDIE)
+ if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
// Create new type.
- TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+ DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
- constructTypeDIE(*TyDIE, Ty);
+ constructTypeDIE(TyDIE, Ty);
updateAcceleratorTables(Context, Ty, TyDIE);
- return TyDIE;
+ return &TyDIE;
}
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
if (!TyNode)
- return NULL;
+ return nullptr;
DIType Ty(TyNode);
assert(Ty.isType());
assert(Ty == resolve(Ty.getRef()) &&
"type was not uniqued, possible ODR violation.");
+ // DW_TAG_restrict_type is not supported in DWARF2
+ if (Ty.getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
+ return getOrCreateTypeDIE(resolve(DIDerivedType(Ty).getTypeDerivedFrom()));
+
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
DIScope Context = resolve(Ty.getContext());
DIE *ContextDIE = getOrCreateContextDIE(Context);
assert(ContextDIE);
- DIE *TyDIE = getDIE(Ty);
- if (TyDIE)
+ if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
// Create new type.
- TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
+ DIE &TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty);
updateAcceleratorTables(Context, Ty, TyDIE);
if (Ty.isBasicType())
- constructTypeDIE(*TyDIE, DIBasicType(Ty));
+ constructTypeDIE(TyDIE, DIBasicType(Ty));
else if (Ty.isCompositeType()) {
DICompositeType CTy(Ty);
if (GenerateDwarfTypeUnits && !Ty.isForwardDecl())
if (MDString *TypeId = CTy.getIdentifier()) {
DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
// Skip updating the accelerator tables since this is not the full type.
- return TyDIE;
+ return &TyDIE;
}
- constructTypeDIE(*TyDIE, CTy);
+ constructTypeDIE(TyDIE, CTy);
} else {
assert(Ty.isDerivedType() && "Unknown kind of DIType");
- constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+ constructTypeDIE(TyDIE, DIDerivedType(Ty));
}
- return TyDIE;
+ return &TyDIE;
}
void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
- const DIE *TyDIE) {
+ const DIE &TyDIE) {
if (!Ty.getName().empty() && !Ty.isForwardDecl()) {
bool IsImplementation = 0;
if (Ty.isCompositeType()) {
@@ -1028,17 +1000,18 @@ void DwarfUnit::updateAcceleratorTables(DIScope Context, DIType Ty,
IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete();
}
unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0;
- addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags));
+ DD->addAccelType(Ty.getName(), TyDIE, Flags);
if ((!Context || Context.isCompileUnit() || Context.isFile() ||
Context.isNameSpace()) &&
getCUNode().getEmissionKind() != DIBuilder::LineTablesOnly)
- GlobalTypes[getParentContextString(Context) + Ty.getName().str()] = TyDIE;
+ GlobalTypes[getParentContextString(Context) + Ty.getName().str()] =
+ &TyDIE;
}
}
/// addType - Add a new type attribute to the specified entity.
-void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) {
+void DwarfUnit::addType(DIE &Entity, DIType Ty, dwarf::Attribute Attribute) {
assert(Ty && "Trying to add a type that doesn't exist?");
// Check for pre-existence.
@@ -1053,54 +1026,17 @@ void DwarfUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) {
DIE *Buffer = getOrCreateTypeDIE(Ty);
// Set up proxy.
- Entry = createDIEEntry(Buffer);
+ Entry = createDIEEntry(*Buffer);
insertDIEEntry(Ty, Entry);
addDIEEntry(Entity, Attribute, Entry);
}
-// Accelerator table mutators - add each name along with its companion
-// DIE to the proper table while ensuring that the name that we're going
-// to reference is in the string table. We do this since the names we
-// add may not only be identical to the names in the DIE.
-void DwarfUnit::addAccelName(StringRef Name, const DIE *Die) {
- if (!DD->useDwarfAccelTables())
- return;
- DU->getStringPoolEntry(Name);
- std::vector<const DIE *> &DIEs = AccelNames[Name];
- DIEs.push_back(Die);
-}
-
-void DwarfUnit::addAccelObjC(StringRef Name, const DIE *Die) {
- if (!DD->useDwarfAccelTables())
- return;
- DU->getStringPoolEntry(Name);
- std::vector<const DIE *> &DIEs = AccelObjC[Name];
- DIEs.push_back(Die);
-}
-
-void DwarfUnit::addAccelNamespace(StringRef Name, const DIE *Die) {
- if (!DD->useDwarfAccelTables())
- return;
- DU->getStringPoolEntry(Name);
- std::vector<const DIE *> &DIEs = AccelNamespace[Name];
- DIEs.push_back(Die);
-}
-
-void DwarfUnit::addAccelType(StringRef Name,
- std::pair<const DIE *, unsigned> Die) {
- if (!DD->useDwarfAccelTables())
- return;
- DU->getStringPoolEntry(Name);
- std::vector<std::pair<const DIE *, unsigned> > &DIEs = AccelTypes[Name];
- DIEs.push_back(Die);
-}
-
/// addGlobalName - Add a new global name to the compile unit.
-void DwarfUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) {
+void DwarfUnit::addGlobalName(StringRef Name, DIE &Die, DIScope Context) {
if (getCUNode().getEmissionKind() == DIBuilder::LineTablesOnly)
return;
std::string FullName = getParentContextString(Context) + Name.str();
- GlobalNames[FullName] = Die;
+ GlobalNames[FullName] = &Die;
}
/// getParentContextString - Walks the metadata parent chain in a language
@@ -1149,17 +1085,17 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
StringRef Name = BTy.getName();
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, Name);
+ addString(Buffer, dwarf::DW_AT_name, Name);
// An unspecified type only has a name attribute.
if (BTy.getTag() == dwarf::DW_TAG_unspecified_type)
return;
- addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
BTy.getEncoding());
uint64_t Size = BTy.getSizeInBits() >> 3;
- addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
}
/// constructTypeDIE - Construct derived type die from DIDerivedType.
@@ -1172,22 +1108,22 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Map to main type, void will not have a type.
DIType FromTy = resolve(DTy.getTypeDerivedFrom());
if (FromTy)
- addType(&Buffer, FromTy);
+ addType(Buffer, FromTy);
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, Name);
+ addString(Buffer, dwarf::DW_AT_name, Name);
// Add size if non-zero (derived types might be zero-sized.)
if (Size && Tag != dwarf::DW_TAG_pointer_type)
- addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type,
- getOrCreateTypeDIE(resolve(DTy.getClassType())));
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(resolve(DTy.getClassType())));
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy.isForwardDecl())
- addSourceLine(&Buffer, DTy);
+ addSourceLine(Buffer, DTy);
}
/// constructSubprogramArguments - Construct function argument DIEs.
@@ -1198,7 +1134,7 @@ void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DIArray Args) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
} else {
- DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
+ DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer);
addType(Arg, DIType(Ty));
if (DIType(Ty).isArtificial())
addFlag(Arg, dwarf::DW_AT_artificial);
@@ -1226,7 +1162,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIArray Elements = CTy.getTypeArray();
DIType RTy(Elements.getElement(0));
if (RTy)
- addType(&Buffer, RTy);
+ addType(Buffer, RTy);
bool isPrototyped = true;
if (Elements.getNumElements() == 2 &&
@@ -1241,13 +1177,13 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (isPrototyped &&
(Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 ||
Language == dwarf::DW_LANG_ObjC))
- addFlag(&Buffer, dwarf::DW_AT_prototyped);
+ addFlag(Buffer, dwarf::DW_AT_prototyped);
if (CTy.isLValueReference())
- addFlag(&Buffer, dwarf::DW_AT_reference);
+ addFlag(Buffer, dwarf::DW_AT_reference);
if (CTy.isRValueReference())
- addFlag(&Buffer, dwarf::DW_AT_rvalue_reference);
+ addFlag(Buffer, dwarf::DW_AT_rvalue_reference);
} break;
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_union_type:
@@ -1256,13 +1192,12 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIArray Elements = CTy.getTypeArray();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
- DIE *ElemDie = NULL;
if (Element.isSubprogram())
- ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element));
+ getOrCreateSubprogramDIE(DISubprogram(Element));
else if (Element.isDerivedType()) {
DIDerivedType DDTy(Element);
if (DDTy.getTag() == dwarf::DW_TAG_friend) {
- ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
+ DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()),
dwarf::DW_AT_friend);
} else if (DDTy.isStaticMember()) {
@@ -1272,7 +1207,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
} else if (Element.isObjCProperty()) {
DIObjCProperty Property(Element);
- ElemDie = createAndAddDIE(Property.getTag(), Buffer);
+ DIE &ElemDie = createAndAddDIE(Property.getTag(), Buffer);
StringRef PropertyName = Property.getObjCPropertyName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
if (Property.getType())
@@ -1311,15 +1246,15 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
if (CTy.isAppleBlockExtension())
- addFlag(&Buffer, dwarf::DW_AT_APPLE_block);
+ addFlag(Buffer, dwarf::DW_AT_APPLE_block);
DICompositeType ContainingType(resolve(CTy.getContainingType()));
if (ContainingType)
- addDIEEntry(&Buffer, dwarf::DW_AT_containing_type,
- getOrCreateTypeDIE(ContainingType));
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(ContainingType));
if (CTy.isObjcClassComplete())
- addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
+ addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type);
// Add template parameters to a class, structure or union types.
// FIXME: The support isn't in the metadata for this yet.
@@ -1335,7 +1270,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add name if not anonymous or intermediate type.
if (!Name.empty())
- addString(&Buffer, dwarf::DW_AT_name, Name);
+ addString(Buffer, dwarf::DW_AT_name, Name);
if (Tag == dwarf::DW_TAG_enumeration_type ||
Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type ||
@@ -1343,23 +1278,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add size if non-zero (derived types might be zero-sized.)
// TODO: Do we care about size for enum forward declarations?
if (Size)
- addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
else if (!CTy.isForwardDecl())
// Add zero size if it is not a forward declaration.
- addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0);
+ addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0);
// If we're a forward decl, say so.
if (CTy.isForwardDecl())
- addFlag(&Buffer, dwarf::DW_AT_declaration);
+ addFlag(Buffer, dwarf::DW_AT_declaration);
// Add source line info if available.
if (!CTy.isForwardDecl())
- addSourceLine(&Buffer, CTy);
+ addSourceLine(Buffer, CTy);
// No harm in adding the runtime language to the declaration.
unsigned RLang = CTy.getRunTimeLang();
if (RLang)
- addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
+ addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
RLang);
}
}
@@ -1368,7 +1303,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
/// DITemplateTypeParameter.
void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
DITemplateTypeParameter TP) {
- DIE *ParamDIE =
+ DIE &ParamDIE =
createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
// Add the type if it exists, it could be void and therefore no type.
if (TP.getType())
@@ -1382,7 +1317,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(DIE &Buffer,
void
DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
DITemplateValueParameter VP) {
- DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
+ DIE &ParamDIE = createAndAddDIE(VP.getTag(), Buffer);
// Add the type if there is one, template template and template parameter
// packs will not have a type.
@@ -1392,16 +1327,15 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
addString(ParamDIE, dwarf::DW_AT_name, VP.getName());
if (Value *Val = VP.getValue()) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val))
- addConstantValue(ParamDIE, CI,
- isUnsignedDIType(DD, resolve(VP.getType())));
+ addConstantValue(ParamDIE, CI, resolve(VP.getType()));
else if (GlobalValue *GV = dyn_cast<GlobalValue>(Val)) {
// For declaration non-type template parameters (such as global values and
// functions)
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
- addOpAddress(Loc, Asm->getSymbol(GV));
+ addOpAddress(*Loc, Asm->getSymbol(GV));
// Emit DW_OP_stack_value to use the address as the immediate value of the
// parameter, rather than a pointer to it.
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(ParamDIE, dwarf::DW_AT_location, Loc);
} else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) {
assert(isa<MDString>(Val));
@@ -1410,7 +1344,7 @@ DwarfUnit::constructTemplateValueParameterDIE(DIE &Buffer,
} else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) {
assert(isa<MDNode>(Val));
DIArray A(cast<MDNode>(Val));
- addTemplateParams(*ParamDIE, A);
+ addTemplateParams(ParamDIE, A);
}
}
}
@@ -1421,19 +1355,18 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) {
// such construction creates the DIE.
DIE *ContextDIE = getOrCreateContextDIE(NS.getContext());
- DIE *NDie = getDIE(NS);
- if (NDie)
+ if (DIE *NDie = getDIE(NS))
return NDie;
- NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS);
if (!NS.getName().empty()) {
addString(NDie, dwarf::DW_AT_name, NS.getName());
- addAccelNamespace(NS.getName(), NDie);
+ DD->addAccelNamespace(NS.getName(), NDie);
addGlobalName(NS.getName(), NDie, NS.getContext());
} else
- addAccelNamespace("(anonymous namespace)", NDie);
+ DD->addAccelNamespace("(anonymous namespace)", NDie);
addSourceLine(NDie, NS);
- return NDie;
+ return &NDie;
}
/// getOrCreateSubprogramDIE - Create new DIE using SP.
@@ -1441,47 +1374,58 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE (as is the case for member function
// declarations).
- DIScope Context = resolve(SP.getContext());
- DIE *ContextDIE = getOrCreateContextDIE(Context);
+ DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext()));
- // Unique declarations based on the ODR, where applicable.
- SP = DISubprogram(DD->resolve(SP.getRef()));
- assert(SP.Verify());
-
- DIE *SPDie = getDIE(SP);
- if (SPDie)
+ if (DIE *SPDie = getDIE(SP))
return SPDie;
- DISubprogram SPDecl = SP.getFunctionDeclaration();
- if (SPDecl.isSubprogram())
+ if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
// Add subprogram definitions to the CU die directly.
- ContextDIE = UnitDie.get();
+ ContextDIE = &getUnitDie();
+ // Build the decl now to ensure it preceeds the definition.
+ getOrCreateSubprogramDIE(SPDecl);
+ }
// DW_TAG_inlined_subroutine may refer to this DIE.
- SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
-
- DIE *DeclDie = NULL;
- if (SPDecl.isSubprogram())
- DeclDie = getOrCreateSubprogramDIE(SPDecl);
+ DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP);
- // Add function template parameters.
- addTemplateParams(*SPDie, SP.getTemplateParams());
+ // Abort here and fill this in later, depending on whether or not this
+ // subprogram turns out to have inlined instances or not.
+ if (SP.isDefinition())
+ return &SPDie;
- // If this DIE is going to refer declaration info using AT_specification
- // then there is no need to add other attributes.
- if (DeclDie) {
- // Refer function declaration directly.
- addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie);
+ applySubprogramAttributes(SP, SPDie);
+ return &SPDie;
+}
- return SPDie;
+void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) {
+ DIE *DeclDie = nullptr;
+ StringRef DeclLinkageName;
+ if (DISubprogram SPDecl = SP.getFunctionDeclaration()) {
+ DeclDie = getDIE(SPDecl);
+ assert(DeclDie);
+ DeclLinkageName = SPDecl.getLinkageName();
}
- // Add the linkage name if we have one.
+ // Add function template parameters.
+ addTemplateParams(SPDie, SP.getTemplateParams());
+
+ // Add the linkage name if we have one and it isn't in the Decl.
StringRef LinkageName = SP.getLinkageName();
- if (!LinkageName.empty())
+ assert(((LinkageName.empty() || DeclLinkageName.empty()) ||
+ LinkageName == DeclLinkageName) &&
+ "decl has a linkage name and it is different");
+ if (!LinkageName.empty() && DeclLinkageName.empty())
addString(SPDie, dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
+ if (DeclDie) {
+ // Refer to the function declaration where all the other attributes will be
+ // found.
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie);
+ return;
+ }
+
// Constructors and operators for anonymous aggregates do not have names.
if (!SP.getName().empty())
addString(SPDie, dwarf::DW_AT_name, SP.getName());
@@ -1510,11 +1454,11 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (VK) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK);
DIELoc *Block = getDIELoc();
- addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+ addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*Block, dwarf::DW_FORM_udata, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
ContainingTypeMap.insert(
- std::make_pair(SPDie, resolve(SP.getContainingType())));
+ std::make_pair(&SPDie, resolve(SP.getContainingType())));
}
if (!SP.isDefinition()) {
@@ -1522,7 +1466,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
// Add arguments. Do not add arguments for subprogram definition. They will
// be handled while processing variables.
- constructSubprogramArguments(*SPDie, Args);
+ constructSubprogramArguments(SPDie, Args);
}
if (SP.isArtificial())
@@ -1556,8 +1500,6 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) {
if (SP.isExplicit())
addFlag(SPDie, dwarf::DW_AT_explicit);
-
- return SPDie;
}
// Return const expression if value is a GEP to access merged global
@@ -1567,22 +1509,22 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
if (!CE || CE->getNumOperands() != 3 ||
CE->getOpcode() != Instruction::GetElementPtr)
- return NULL;
+ return nullptr;
// First operand points to a global struct.
Value *Ptr = CE->getOperand(0);
if (!isa<GlobalValue>(Ptr) ||
!isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType()))
- return NULL;
+ return nullptr;
// Second operand is zero.
const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
if (!CI || !CI->isZero())
- return NULL;
+ return nullptr;
// Third operand is offset.
if (!isa<ConstantInt>(CE->getOperand(2)))
- return NULL;
+ return nullptr;
return CE;
}
@@ -1600,7 +1542,7 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
// If this is a static data member definition, some attributes belong
// to the declaration DIE.
- DIE *VariableDIE = NULL;
+ DIE *VariableDIE = nullptr;
bool IsStaticMember = false;
DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration();
if (SDMDecl.Verify()) {
@@ -1618,24 +1560,24 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
DIE *ContextDIE = getOrCreateContextDIE(GVContext);
// Add to map.
- VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV);
+ VariableDIE = &createAndAddDIE(GV.getTag(), *ContextDIE, GV);
// Add name and type.
- addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
- addType(VariableDIE, GTy);
+ addString(*VariableDIE, dwarf::DW_AT_name, GV.getDisplayName());
+ addType(*VariableDIE, GTy);
// Add scoping info.
if (!GV.isLocalToUnit())
- addFlag(VariableDIE, dwarf::DW_AT_external);
+ addFlag(*VariableDIE, dwarf::DW_AT_external);
// Add line number info.
- addSourceLine(VariableDIE, GV);
+ addSourceLine(*VariableDIE, GV);
}
// Add location.
bool addToAccelTable = false;
- DIE *VariableSpecDIE = NULL;
- bool isGlobalVariable = GV.getGlobal() != NULL;
+ DIE *VariableSpecDIE = nullptr;
+ bool isGlobalVariable = GV.getGlobal() != nullptr;
if (isGlobalVariable) {
addToAccelTable = true;
DIELoc *Loc = new (DIEValueAllocator) DIELoc();
@@ -1648,36 +1590,36 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
// Based on GCC's support for TLS:
if (!DD->useSplitDwarf()) {
// 1) Start with a constNu of the appropriate pointer size
- addUInt(Loc, dwarf::DW_FORM_data1,
+ addUInt(*Loc, dwarf::DW_FORM_data1,
PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u);
// 2) containing the (relocated) offset of the TLS variable
// within the module's TLS block.
- addExpr(Loc, dwarf::DW_FORM_udata,
+ addExpr(*Loc, dwarf::DW_FORM_udata,
Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
} else {
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
- addUInt(Loc, dwarf::DW_FORM_udata,
- DU->getAddrPoolIndex(Sym, /* TLS */ true));
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+ addUInt(*Loc, dwarf::DW_FORM_udata,
+ DD->getAddressPool().getIndex(Sym, /* TLS */ true));
}
// 3) followed by a custom OP to make the debugger do a TLS lookup.
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address);
} else {
DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(Loc, Sym);
+ addOpAddress(*Loc, Sym);
}
// Do not create specification DIE if context is either compile unit
// or a subprogram.
if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() &&
!GVContext.isFile() && !DD->isSubprogramContext(GVContext)) {
// Create specification DIE.
- VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *UnitDie);
- addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE);
- addBlock(VariableSpecDIE, dwarf::DW_AT_location, Loc);
+ VariableSpecDIE = &createAndAddDIE(dwarf::DW_TAG_variable, UnitDie);
+ addDIEEntry(*VariableSpecDIE, dwarf::DW_AT_specification, *VariableDIE);
+ addBlock(*VariableSpecDIE, dwarf::DW_AT_location, Loc);
// A static member's declaration is already flagged as such.
if (!SDMDecl.Verify())
- addFlag(VariableDIE, dwarf::DW_AT_declaration);
+ addFlag(*VariableDIE, dwarf::DW_AT_declaration);
} else {
- addBlock(VariableDIE, dwarf::DW_AT_location, Loc);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
}
// Add the linkage name.
StringRef LinkageName = GV.getLinkageName();
@@ -1685,8 +1627,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
// From DWARF4: DIEs to which DW_AT_linkage_name may apply include:
// TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and
// TAG_variable.
- addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE
- : VariableDIE,
+ addString(IsStaticMember && VariableSpecDIE ? *VariableSpecDIE
+ : *VariableDIE,
DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name
: dwarf::DW_AT_MIPS_linkage_name,
GlobalValue::getRealLinkageName(LinkageName));
@@ -1696,7 +1638,7 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
// emitting AT_const_value multiple times, we only add AT_const_value when
// it is not a static member.
if (!IsStaticMember)
- addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy));
+ addConstantValue(*VariableDIE, CI, GTy);
} else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) {
addToAccelTable = true;
// GV is a merged global.
@@ -1704,34 +1646,35 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) {
Value *Ptr = CE->getOperand(0);
MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr));
DD->addArangeLabel(SymbolCU(this, Sym));
- addOpAddress(Loc, Sym);
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
- addUInt(Loc, dwarf::DW_FORM_udata,
+ addUInt(*Loc, dwarf::DW_FORM_udata,
Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx));
- addUInt(Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
- addBlock(VariableDIE, dwarf::DW_AT_location, Loc);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
}
if (addToAccelTable) {
- DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE;
- addAccelName(GV.getName(), AddrDIE);
+ DIE &AddrDIE = VariableSpecDIE ? *VariableSpecDIE : *VariableDIE;
+ DD->addAccelName(GV.getName(), AddrDIE);
// If the linkage name is different than the name, go ahead and output
// that as well into the name table.
if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName())
- addAccelName(GV.getLinkageName(), AddrDIE);
+ DD->addAccelName(GV.getLinkageName(), AddrDIE);
}
if (!GV.isLocalToUnit())
- addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE,
+ addGlobalName(GV.getName(),
+ VariableSpecDIE ? *VariableSpecDIE : *VariableDIE,
GV.getContext());
}
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
- DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
- addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy);
+ DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer);
+ addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy);
// The LowerBound value defines the lower bounds which is typically zero for
// C/C++. The Count value is the number of elements. Values are 64 bit. If
@@ -1756,10 +1699,10 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) {
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (CTy.isVector())
- addFlag(&Buffer, dwarf::DW_AT_GNU_vector);
+ addFlag(Buffer, dwarf::DW_AT_GNU_vector);
// Emit the element type.
- addType(&Buffer, resolve(CTy.getTypeDerivedFrom()));
+ addType(Buffer, resolve(CTy.getTypeDerivedFrom()));
// Get an anonymous type for index type.
// FIXME: This type should be passed down from the front end
@@ -1767,10 +1710,10 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIE *IdxTy = getIndexTyDie();
if (!IdxTy) {
// Construct an integer type to use for indexes.
- IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *UnitDie);
- addString(IdxTy, dwarf::DW_AT_name, "sizetype");
- addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
- addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+ IdxTy = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+ addString(*IdxTy, dwarf::DW_AT_name, "sizetype");
+ addUInt(*IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
+ addUInt(*IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_unsigned);
setIndexTyDie(IdxTy);
}
@@ -1792,7 +1735,7 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIEnumerator Enum(Elements.getElement(i));
if (Enum.isEnumerator()) {
- DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
+ DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer);
StringRef Name = Enum.getName();
addString(Enumerator, dwarf::DW_AT_name, Name);
int64_t Value = Enum.getEnumValue();
@@ -1802,8 +1745,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
DIType DTy = resolve(CTy.getTypeDerivedFrom());
if (DTy) {
- addType(&Buffer, DTy);
- addFlag(&Buffer, dwarf::DW_AT_enum_class);
+ addType(Buffer, DTy);
+ addFlag(Buffer, dwarf::DW_AT_enum_class);
}
}
@@ -1813,48 +1756,51 @@ void DwarfUnit::constructContainingTypeDIEs() {
for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
CE = ContainingTypeMap.end();
CI != CE; ++CI) {
- DIE *SPDie = CI->first;
+ DIE &SPDie = *CI->first;
DIDescriptor D(CI->second);
if (!D)
continue;
DIE *NDie = getDIE(D);
if (!NDie)
continue;
- addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie);
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie);
}
}
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) {
+std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV,
+ bool Abstract) {
+ auto D = constructVariableDIEImpl(DV, Abstract);
+ DV.setDIE(*D);
+ return D;
+}
+
+std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract) {
StringRef Name = DV.getName();
// Define variable debug information entry.
- DIE *VariableDie = new DIE(DV.getTag());
+ auto VariableDie = make_unique<DIE>(DV.getTag());
DbgVariable *AbsVar = DV.getAbstractVariable();
- DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL;
- if (AbsDIE)
- addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE);
+ if (AbsVar && AbsVar->getDIE())
+ addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE());
else {
if (!Name.empty())
- addString(VariableDie, dwarf::DW_AT_name, Name);
- addSourceLine(VariableDie, DV.getVariable());
- addType(VariableDie, DV.getType());
+ addString(*VariableDie, dwarf::DW_AT_name, Name);
+ addSourceLine(*VariableDie, DV.getVariable());
+ addType(*VariableDie, DV.getType());
+ if (DV.isArtificial())
+ addFlag(*VariableDie, dwarf::DW_AT_artificial);
}
- if (DV.isArtificial())
- addFlag(VariableDie, dwarf::DW_AT_artificial);
-
- if (isScopeAbstract) {
- DV.setDIE(VariableDie);
+ if (Abstract)
return VariableDie;
- }
// Add variable address.
unsigned Offset = DV.getDotDebugLocOffset();
if (Offset != ~0U) {
- addLocationList(VariableDie, dwarf::DW_AT_location, Offset);
- DV.setDIE(VariableDie);
+ addLocationList(*VariableDie, dwarf::DW_AT_location, Offset);
return VariableDie;
}
@@ -1867,38 +1813,36 @@ DIE *DwarfUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) {
if (DVInsn->getOperand(1).isImm()) {
MachineLocation Location(RegOp.getReg(),
DVInsn->getOperand(1).getImm());
- addVariableAddress(DV, VariableDie, Location);
+ addVariableAddress(DV, *VariableDie, Location);
} else if (RegOp.getReg())
- addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg()));
+ addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg()));
} else if (DVInsn->getOperand(0).isImm())
- addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType());
+ addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
else if (DVInsn->getOperand(0).isFPImm())
- addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+ addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isCImm())
- addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(),
- isUnsignedDIType(DD, DV.getType()));
+ addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
+ DV.getType());
- DV.setDIE(VariableDie);
return VariableDie;
- } else {
- // .. else use frame index.
- int FI = DV.getFrameIndex();
- if (FI != ~0) {
- unsigned FrameReg = 0;
- const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
- int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
- MachineLocation Location(FrameReg, Offset);
- addVariableAddress(DV, VariableDie, Location);
- }
}
- DV.setDIE(VariableDie);
+ // .. else use frame index.
+ int FI = DV.getFrameIndex();
+ if (FI != ~0) {
+ unsigned FrameReg = 0;
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ MachineLocation Location(FrameReg, Offset);
+ addVariableAddress(DV, *VariableDie, Location);
+ }
+
return VariableDie;
}
/// constructMemberDIE - Construct member DIE from DIDerivedType.
void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
- DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer);
+ DIE &MemberDie = createAndAddDIE(DT.getTag(), Buffer);
StringRef Name = DT.getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
@@ -1914,13 +1858,13 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
// BaseAddr = ObAddr + *((*ObAddr) - Offset)
DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc();
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
- addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+ addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie);
} else {
@@ -1953,8 +1897,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
if (DD->getDwarfVersion() <= 2) {
DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc();
- addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
- addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
+ addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+ addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes);
addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie);
} else
addUInt(MemberDie, dwarf::DW_AT_data_member_location, None,
@@ -1978,8 +1922,8 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
// Objective-C properties.
if (MDNode *PNode = DT.getObjCProperty())
if (DIEEntry *PropertyDie = getDIEEntry(PNode))
- MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
- PropertyDie);
+ MemberDie.addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4,
+ PropertyDie);
if (DT.isArtificial())
addFlag(MemberDie, dwarf::DW_AT_artificial);
@@ -1988,7 +1932,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) {
/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member.
DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
if (!DT.Verify())
- return NULL;
+ return nullptr;
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
@@ -1996,11 +1940,10 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
assert(dwarf::isType(ContextDIE->getTag()) &&
"Static member should belong to a type.");
- DIE *StaticMemberDIE = getDIE(DT);
- if (StaticMemberDIE)
+ if (DIE *StaticMemberDIE = getDIE(DT))
return StaticMemberDIE;
- StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
+ DIE &StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT);
DIType Ty = resolve(DT.getTypeDerivedFrom());
@@ -2023,11 +1966,11 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) {
dwarf::DW_ACCESS_public);
if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT.getConstant()))
- addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty));
+ addConstantValue(StaticMemberDIE, CI, Ty);
if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT.getConstant()))
addConstantFPValue(StaticMemberDIE, CFP);
- return StaticMemberDIE;
+ return &StaticMemberDIE;
}
void DwarfUnit::emitHeader(const MCSymbol *ASectionSym) const {
@@ -2072,7 +2015,7 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
MCSymbol *LineTableStartSym =
Asm->OutStreamer.getDwarfLineTableSymbol(getUniqueID());
- stmtListIndex = UnitDie->getValues().size();
+ stmtListIndex = UnitDie.getValues().size();
// DW_AT_stmt_list is a offset of line number information for this
// compile unit in debug_line section. For split dwarf this is
@@ -2080,16 +2023,16 @@ void DwarfCompileUnit::initStmtList(MCSymbol *DwarfLineSectionSym) {
// The line table entries are not always emitted in assembly, so it
// is not okay to use line_table_start here.
if (Asm->MAI->doesDwarfUseRelocationsAcrossSections())
- addSectionLabel(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym);
+ addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym);
else
- addSectionDelta(UnitDie.get(), dwarf::DW_AT_stmt_list, LineTableStartSym,
+ addSectionDelta(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
DwarfLineSectionSym);
}
void DwarfCompileUnit::applyStmtList(DIE &D) {
D.addValue(dwarf::DW_AT_stmt_list,
- UnitDie->getAbbrev().getData()[stmtListIndex].getForm(),
- UnitDie->getValues()[stmtListIndex]);
+ UnitDie.getAbbrev().getData()[stmtListIndex].getForm(),
+ UnitDie.getValues()[stmtListIndex]);
}
void DwarfTypeUnit::emitHeader(const MCSymbol *ASectionSym) const {
@@ -2114,5 +2057,4 @@ void DwarfTypeUnit::initSection(const MCSection *Section) {
Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
this->LabelEnd =
Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
- this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID());
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index ef713f7..acb7528 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -73,7 +73,7 @@ protected:
DICompileUnit CUNode;
/// Unit debug information entry.
- const std::unique_ptr<DIE> UnitDie;
+ DIE UnitDie;
/// Offset of the UnitDie from beginning of debug info section.
unsigned DebugInfoOffset;
@@ -102,18 +102,6 @@ protected:
/// GlobalTypes - A map of globally visible types for this unit.
StringMap<const DIE *> GlobalTypes;
- /// AccelNames - A map of names for the name accelerator table.
- StringMap<std::vector<const DIE *> > AccelNames;
-
- /// AccelObjC - A map of objc spec for the objc accelerator table.
- StringMap<std::vector<const DIE *> > AccelObjC;
-
- /// AccelNamespace - A map of names for the namespace accelerator table.
- StringMap<std::vector<const DIE *> > AccelNamespace;
-
- /// AccelTypes - A map of names for the type accelerator table.
- StringMap<std::vector<std::pair<const DIE *, unsigned> > > AccelTypes;
-
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -150,20 +138,17 @@ protected:
/// The end of the unit within its section.
MCSymbol *LabelEnd;
- /// The label for the start of the range sets for the elements of this unit.
- MCSymbol *LabelRange;
-
/// Skeleton unit associated with this unit.
DwarfUnit *Skeleton;
- DwarfUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A,
+ DwarfUnit(unsigned UID, dwarf::Tag, DICompileUnit CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
public:
virtual ~DwarfUnit();
/// Set the skeleton unit associated with this unit.
- void setSkeleton(DwarfUnit *Skel) { Skeleton = Skel; }
+ void setSkeleton(DwarfUnit &Skel) { Skeleton = &Skel; }
/// Get the skeleton unit associated with this unit.
DwarfUnit *getSkeleton() const { return Skeleton; }
@@ -179,7 +164,6 @@ public:
Asm->GetTempSymbol(Section->getLabelBeginName(), getUniqueID());
this->LabelEnd =
Asm->GetTempSymbol(Section->getLabelEndName(), getUniqueID());
- this->LabelRange = Asm->GetTempSymbol("gnu_ranges", getUniqueID());
}
const MCSection *getSection() const {
@@ -218,38 +202,19 @@ public:
return LabelEnd;
}
- MCSymbol *getLabelRange() const {
- assert(Section);
- return LabelRange;
- }
-
// Accessors.
unsigned getUniqueID() const { return UniqueID; }
uint16_t getLanguage() const { return CUNode.getLanguage(); }
DICompileUnit getCUNode() const { return CUNode; }
- DIE *getUnitDie() const { return UnitDie.get(); }
+ DIE &getUnitDie() { return UnitDie; }
const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; }
const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; }
- const StringMap<std::vector<const DIE *> > &getAccelNames() const {
- return AccelNames;
- }
- const StringMap<std::vector<const DIE *> > &getAccelObjC() const {
- return AccelObjC;
- }
- const StringMap<std::vector<const DIE *> > &getAccelNamespace() const {
- return AccelNamespace;
- }
- const StringMap<std::vector<std::pair<const DIE *, unsigned> > > &
- getAccelTypes() const {
- return AccelTypes;
- }
-
unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
/// hasContent - Return true if this compile unit has something to write out.
- bool hasContent() const { return !UnitDie->getChildren().empty(); }
+ bool hasContent() const { return !UnitDie.getChildren().empty(); }
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
@@ -273,19 +238,10 @@ public:
/// addGlobalName - Add a new global entity to the compile unit.
///
- void addGlobalName(StringRef Name, DIE *Die, DIScope Context);
-
- /// addAccelName - Add a new name to the name accelerator table.
- void addAccelName(StringRef Name, const DIE *Die);
-
- /// addAccelObjC - Add a new name to the ObjC accelerator table.
- void addAccelObjC(StringRef Name, const DIE *Die);
+ void addGlobalName(StringRef Name, DIE &Die, DIScope Context);
/// addAccelNamespace - Add a new name to the namespace accelerator table.
- void addAccelNamespace(StringRef Name, const DIE *Die);
-
- /// addAccelType - Add a new type to the type accelerator table.
- void addAccelType(StringRef Name, std::pair<const DIE *, unsigned> Die);
+ void addAccelNamespace(StringRef Name, const DIE &Die);
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable. We delegate the request to DwarfDebug
@@ -303,118 +259,116 @@ public:
/// kept in DwarfDebug.
void insertDIE(DIDescriptor Desc, DIE *D);
- /// addDie - Adds or interns the DIE to the compile unit.
- ///
- void addDie(DIE *Buffer) { UnitDie->addChild(Buffer); }
-
/// addFlag - Add a flag that is true to the DIE.
- void addFlag(DIE *Die, dwarf::Attribute Attribute);
+ void addFlag(DIE &Die, dwarf::Attribute Attribute);
/// addUInt - Add an unsigned integer attribute data and value.
- void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+ void addUInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
uint64_t Integer);
- void addUInt(DIE *Block, dwarf::Form Form, uint64_t Integer);
+ void addUInt(DIE &Block, dwarf::Form Form, uint64_t Integer);
/// addSInt - Add an signed integer attribute data and value.
- void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
+ void addSInt(DIE &Die, dwarf::Attribute Attribute, Optional<dwarf::Form> Form,
int64_t Integer);
- void addSInt(DIELoc *Die, Optional<dwarf::Form> Form, int64_t Integer);
+ void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer);
/// addString - Add a string attribute data and value.
- void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str);
+ void addString(DIE &Die, dwarf::Attribute Attribute, const StringRef Str);
/// addLocalString - Add a string attribute data and value.
- void addLocalString(DIE *Die, dwarf::Attribute Attribute,
+ void addLocalString(DIE &Die, dwarf::Attribute Attribute,
const StringRef Str);
/// addExpr - Add a Dwarf expression attribute data and value.
- void addExpr(DIELoc *Die, dwarf::Form Form, const MCExpr *Expr);
+ void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr);
/// addLabel - Add a Dwarf label attribute data and value.
- void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form,
+ void addLabel(DIE &Die, dwarf::Attribute Attribute, dwarf::Form Form,
const MCSymbol *Label);
- void addLabel(DIELoc *Die, dwarf::Form Form, const MCSymbol *Label);
+ void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label);
/// addLocationList - Add a Dwarf loclistptr attribute data and value.
- void addLocationList(DIE *Die, dwarf::Attribute Attribute, unsigned Index);
+ void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index);
/// addSectionLabel - Add a Dwarf section label attribute data and value.
///
- void addSectionLabel(DIE *Die, dwarf::Attribute Attribute,
+ void addSectionLabel(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
/// addSectionOffset - Add an offset into a section attribute data and value.
///
- void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer);
+ void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer);
/// addOpAddress - Add a dwarf op address data and value using the
/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index.
- void addOpAddress(DIELoc *Die, const MCSymbol *Label);
+ void addOpAddress(DIELoc &Die, const MCSymbol *Label);
/// addSectionDelta - Add a label delta attribute data and value.
- void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ void addSectionDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
const MCSymbol *Lo);
/// addLabelDelta - Add a label delta attribute data and value.
- void addLabelDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
+ void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi,
const MCSymbol *Lo);
/// addDIEEntry - Add a DIE attribute data and value.
- void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry);
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry);
/// addDIEEntry - Add a DIE attribute data and value.
- void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry);
+ void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry *Entry);
- void addDIETypeSignature(DIE *Die, const DwarfTypeUnit &Type);
+ void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type);
/// addBlock - Add block data.
- void addBlock(DIE *Die, dwarf::Attribute Attribute, DIELoc *Block);
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block);
/// addBlock - Add block data.
- void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block);
+ void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block);
/// addSourceLine - Add location information to specified debug information
/// entry.
- void addSourceLine(DIE *Die, unsigned Line, StringRef File,
+ void addSourceLine(DIE &Die, unsigned Line, StringRef File,
StringRef Directory);
- void addSourceLine(DIE *Die, DIVariable V);
- void addSourceLine(DIE *Die, DIGlobalVariable G);
- void addSourceLine(DIE *Die, DISubprogram SP);
- void addSourceLine(DIE *Die, DIType Ty);
- void addSourceLine(DIE *Die, DINameSpace NS);
- void addSourceLine(DIE *Die, DIObjCProperty Ty);
+ void addSourceLine(DIE &Die, DIVariable V);
+ void addSourceLine(DIE &Die, DIGlobalVariable G);
+ void addSourceLine(DIE &Die, DISubprogram SP);
+ void addSourceLine(DIE &Die, DIType Ty);
+ void addSourceLine(DIE &Die, DINameSpace NS);
+ void addSourceLine(DIE &Die, DIObjCProperty Ty);
/// addAddress - Add an address attribute to a die based on the location
/// provided.
- void addAddress(DIE *Die, dwarf::Attribute Attribute,
+ void addAddress(DIE &Die, dwarf::Attribute Attribute,
const MachineLocation &Location, bool Indirect = false);
/// addConstantValue - Add constant value entry in variable DIE.
- void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
- void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
- void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, const MachineOperand &MO, DIType Ty);
+ void addConstantValue(DIE &Die, const ConstantInt *CI, DIType Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, DIType Ty);
+ void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
/// addConstantFPValue - Add constant value entry in variable DIE.
- void addConstantFPValue(DIE *Die, const MachineOperand &MO);
- void addConstantFPValue(DIE *Die, const ConstantFP *CFP);
+ void addConstantFPValue(DIE &Die, const MachineOperand &MO);
+ void addConstantFPValue(DIE &Die, const ConstantFP *CFP);
/// addTemplateParams - Add template parameters in buffer.
void addTemplateParams(DIE &Buffer, DIArray TParams);
/// addRegisterOp - Add register operand.
- void addRegisterOp(DIELoc *TheDie, unsigned Reg);
+ void addRegisterOp(DIELoc &TheDie, unsigned Reg);
/// addRegisterOffset - Add register offset.
- void addRegisterOffset(DIELoc *TheDie, unsigned Reg, int64_t Offset);
+ void addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
/// addComplexAddress - Start with the address based on the location provided,
/// and generate the DWARF information necessary to find the actual variable
/// (navigating the extra location information encoded in the type) based on
/// the starting location. Add the DWARF information to the die.
- void addComplexAddress(const DbgVariable &DV, DIE *Die,
+ void addComplexAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location);
@@ -424,19 +378,19 @@ public:
/// actual Block variable (navigating the Block struct) based on the
/// starting location. Add the DWARF information to the die. Obsolete,
/// please use addComplexAddress instead.
- void addBlockByrefAddress(const DbgVariable &DV, DIE *Die,
+ void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
dwarf::Attribute Attribute,
const MachineLocation &Location);
/// addVariableAddress - Add DW_AT_location attribute for a
/// DbgVariable based on provided MachineLocation.
- void addVariableAddress(const DbgVariable &DV, DIE *Die,
+ void addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location);
/// addType - Add a new type attribute to the specified entity. This takes
/// and attribute parameter because DW_AT_friend attributes are also
/// type references.
- void addType(DIE *Entity, DIType Ty,
+ void addType(DIE &Entity, DIType Ty,
dwarf::Attribute Attribute = dwarf::DW_AT_type);
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
@@ -445,6 +399,8 @@ public:
/// getOrCreateSubprogramDIE - Create new DIE using SP.
DIE *getOrCreateSubprogramDIE(DISubprogram SP);
+ void applySubprogramAttributes(DISubprogram SP, DIE &SPDie);
+
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
DIE *getOrCreateTypeDIE(const MDNode *N);
@@ -460,14 +416,15 @@ public:
void constructContainingTypeDIEs();
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract);
+ std::unique_ptr<DIE> constructVariableDIE(DbgVariable &DV,
+ bool Abstract = false);
/// constructSubprogramArguments - Construct function argument DIEs.
void constructSubprogramArguments(DIE &Buffer, DIArray Args);
/// Create a DIE with the given Tag, add the DIE to its parent, and
/// call insertDIE if MD is not null.
- DIE *createAndAddDIE(unsigned Tag, DIE &Parent,
+ DIE &createAndAddDIE(unsigned Tag, DIE &Parent,
DIDescriptor N = DIDescriptor());
/// Compute the size of a header for this unit, not including the initial
@@ -483,6 +440,9 @@ public:
virtual DwarfCompileUnit &getCU() = 0;
+ /// constructTypeDIE - Construct type DIE from DICompositeType.
+ void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
+
protected:
/// getOrCreateStaticMemberDIE - Create new static data member DIE.
DIE *getOrCreateStaticMemberDIE(DIDerivedType DT);
@@ -492,15 +452,17 @@ protected:
virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0;
private:
+ /// \brief Construct a DIE for the given DbgVariable without initializing the
+ /// DbgVariable's DIE reference.
+ std::unique_ptr<DIE> constructVariableDIEImpl(const DbgVariable &DV,
+ bool Abstract);
+
/// constructTypeDIE - Construct basic type die from DIBasicType.
void constructTypeDIE(DIE &Buffer, DIBasicType BTy);
/// constructTypeDIE - Construct derived type die from DIDerivedType.
void constructTypeDIE(DIE &Buffer, DIDerivedType DTy);
- /// constructTypeDIE - Construct type DIE from DICompositeType.
- void constructTypeDIE(DIE &Buffer, DICompositeType CTy);
-
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
@@ -547,7 +509,7 @@ private:
/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
/// information entry.
- DIEEntry *createDIEEntry(DIE *Entry);
+ DIEEntry *createDIEEntry(DIE &Entry);
/// resolve - Look in the DwarfDebug map for the MDNode that
/// corresponds to the reference.
@@ -557,7 +519,7 @@ private:
/// If this is a named finished type then include it in the list of types for
/// the accelerator tables.
- void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE *TyDIE);
+ void updateAcceleratorTables(DIScope Context, DIType Ty, const DIE &TyDIE);
};
class DwarfCompileUnit : public DwarfUnit {
@@ -566,7 +528,7 @@ class DwarfCompileUnit : public DwarfUnit {
unsigned stmtListIndex;
public:
- DwarfCompileUnit(unsigned UID, DIE *D, DICompileUnit Node, AsmPrinter *A,
+ DwarfCompileUnit(unsigned UID, DICompileUnit Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
void initStmtList(MCSymbol *DwarfLineSectionSym);
@@ -579,12 +541,12 @@ public:
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
- void addLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+ void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
/// addLocalLabelAddress - Add a dwarf label attribute data and value using
/// DW_FORM_addr only.
- void addLocalLabelAddress(DIE *Die, dwarf::Attribute Attribute,
+ void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute,
const MCSymbol *Label);
DwarfCompileUnit &getCU() override { return *this; }
@@ -600,7 +562,7 @@ private:
MCDwarfDwoLineTable *SplitLineTable;
public:
- DwarfTypeUnit(unsigned UID, DIE *D, DwarfCompileUnit &CU, AsmPrinter *A,
+ DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
MCDwarfDwoLineTable *SplitLineTable = nullptr);
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index 50b2ca8..2212941 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -29,7 +29,7 @@ StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
StringRef Dir = Scope.getDirectory(),
Filename = Scope.getFilename();
char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
- if (Result != 0)
+ if (Result)
return Result;
// Clang emits directory and relative filename info into the IR, but CodeView
@@ -102,7 +102,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
}
WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
- : Asm(0), CurFn(0) {
+ : Asm(nullptr), CurFn(nullptr) {
MachineModuleInfo *MMI = AP->MMI;
// If module doesn't have named metadata anchors or COFF debug section
@@ -171,7 +171,7 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
// PC-to-linenumber lookup table:
- MCSymbol *FileSegmentEnd = 0;
+ MCSymbol *FileSegmentEnd = nullptr;
for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
MCSymbol *Instr = FI.Instrs[J];
assert(InstrInfo.count(Instr));
@@ -216,7 +216,7 @@ void WinCodeViewLineTables::endModule() {
if (FnDebugInfo.empty())
return;
- assert(Asm != 0);
+ assert(Asm != nullptr);
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
@@ -277,20 +277,19 @@ void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
// for the first instruction of the function, not the last of the prolog?
DebugLoc PrologEndLoc;
bool EmptyPrologue = true;
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E && PrologEndLoc.isUnknown(); ++I) {
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- const MachineInstr *MI = II;
- if (MI->isDebugValue())
+ for (const auto &MBB : *MF) {
+ if (!PrologEndLoc.isUnknown())
+ break;
+ for (const auto &MI : MBB) {
+ if (MI.isDebugValue())
continue;
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
// FIXME: do we need the first subcondition?
- if (!MI->getFlag(MachineInstr::FrameSetup) &&
- (!MI->getDebugLoc().isUnknown())) {
- PrologEndLoc = MI->getDebugLoc();
+ if (!MI.getFlag(MachineInstr::FrameSetup) &&
+ (!MI.getDebugLoc().isUnknown())) {
+ PrologEndLoc = MI.getDebugLoc();
break;
}
EmptyPrologue = false;
@@ -321,7 +320,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
Asm->OutStreamer.EmitLabel(FunctionEndSym);
CurFn->End = FunctionEndSym;
}
- CurFn = 0;
+ CurFn = nullptr;
}
void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
index a7a6205..0734d97 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h
@@ -38,7 +38,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
struct FunctionInfo {
SmallVector<MCSymbol *, 10> Instrs;
MCSymbol *End;
- FunctionInfo() : End(0) {}
+ FunctionInfo() : End(nullptr) {}
} *CurFn;
typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy;
@@ -104,7 +104,7 @@ class WinCodeViewLineTables : public AsmPrinterHandler {
void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF);
void clear() {
- assert(CurFn == 0);
+ assert(CurFn == nullptr);
FileNameRegistry.clear();
InstrInfo.clear();
}
diff --git a/lib/Target/ARM/ARMAtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
index 18e0783..d995333 100644
--- a/lib/Target/ARM/ARMAtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARMAtomicExpandPass.cpp - Expand atomic instructions --------------===//
+//===-- AtomicExpandLoadLinkedPass.cpp - Expand atomic instructions -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-atomic-expand"
-#include "ARM.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -25,13 +23,17 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-atomic-expand"
+
namespace {
- class ARMAtomicExpandPass : public FunctionPass {
+ class AtomicExpandLoadLinked : public FunctionPass {
const TargetLowering *TLI;
public:
static char ID; // Pass identification, replacement for typeid
- explicit ARMAtomicExpandPass(const TargetMachine *TM = 0)
- : FunctionPass(ID), TLI(TM->getTargetLowering()) {}
+ explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) {
+ initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry());
+ }
bool runOnFunction(Function &F) override;
bool expandAtomicInsts(Function &F);
@@ -43,30 +45,36 @@ namespace {
AtomicOrdering insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
void insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord);
+ };
+}
- /// Perform a load-linked operation on Addr, returning a "Value *" with the
- /// corresponding pointee type. This may entail some non-trivial operations
- /// to truncate or reconstruct illegal types since intrinsics must be legal
- Value *loadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord);
-
- /// Perform a store-conditional operation to Addr. Return the status of the
- /// store: 0 if the it succeeded, non-zero otherwise.
- Value *storeConditional(IRBuilder<> &Builder, Value *Val, Value *Addr,
- AtomicOrdering Ord);
+char AtomicExpandLoadLinked::ID = 0;
+char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID;
+
+static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) {
+ PassInfo *PI = new PassInfo(
+ "Expand Atomic calls in terms of load-linked & store-conditional",
+ "atomic-ll-sc", &AtomicExpandLoadLinked::ID,
+ PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false,
+ false, PassInfo::TargetMachineCtor_t(
+ callTargetMachineCtor<AtomicExpandLoadLinked>));
+ Registry.registerPass(*PI, true);
+ return PI;
+}
- /// Return true if the given (atomic) instruction should be expanded by this
- /// pass.
- bool shouldExpandAtomic(Instruction *Inst);
- };
+void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) {
+ CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce)
}
-char ARMAtomicExpandPass::ID = 0;
-FunctionPass *llvm::createARMAtomicExpandPass(const TargetMachine *TM) {
- return new ARMAtomicExpandPass(TM);
+FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) {
+ return new AtomicExpandLoadLinked(TM);
}
-bool ARMAtomicExpandPass::runOnFunction(Function &F) {
+bool AtomicExpandLoadLinked::runOnFunction(Function &F) {
+ if (!TLI)
+ return false;
+
SmallVector<Instruction *, 1> AtomicInsts;
// Changing control-flow while iterating through it is a bad idea, so gather a
@@ -81,7 +89,7 @@ bool ARMAtomicExpandPass::runOnFunction(Function &F) {
bool MadeChange = false;
for (Instruction *Inst : AtomicInsts) {
- if (!shouldExpandAtomic(Inst))
+ if (!TLI->shouldExpandAtomicInIR(Inst))
continue;
if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst))
@@ -99,7 +107,7 @@ bool ARMAtomicExpandPass::runOnFunction(Function &F) {
return MadeChange;
}
-bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
+bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) {
// Load instructions don't actually need a leading fence, even in the
// SequentiallyConsistent case.
AtomicOrdering MemOpOrder =
@@ -108,7 +116,8 @@ bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
// The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is
// an ldrexd (A3.5.3).
IRBuilder<> Builder(LI);
- Value *Val = loadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
+ Value *Val =
+ TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder);
insertTrailingFence(Builder, LI->getOrdering());
@@ -118,7 +127,7 @@ bool ARMAtomicExpandPass::expandAtomicLoad(LoadInst *LI) {
return true;
}
-bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
+bool AtomicExpandLoadLinked::expandAtomicStore(StoreInst *SI) {
// The only atomic 64-bit store on ARM is an strexd that succeeds, which means
// we need a loop and the entire instruction is essentially an "atomicrmw
// xchg" that ignores the value loaded.
@@ -132,7 +141,7 @@ bool ARMAtomicExpandPass::expandAtomicStore(StoreInst *SI) {
return expandAtomicRMW(AI);
}
-bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
+bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) {
AtomicOrdering Order = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
@@ -169,7 +178,7 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *NewVal;
switch (AI->getOperation()) {
@@ -215,7 +224,8 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
llvm_unreachable("Unknown atomic op");
}
- Value *StoreSuccess = storeConditional(Builder, NewVal, Addr, MemOpOrder);
+ Value *StoreSuccess =
+ TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
@@ -229,7 +239,7 @@ bool ARMAtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) {
return true;
}
-bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
+bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
AtomicOrdering FailureOrder = CI->getFailureOrdering();
Value *Addr = CI->getPointerOperand();
@@ -257,8 +267,8 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// cmpxchg.end:
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
- auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ExitBB);
- auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, BarrierBB);
+ auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB);
+ auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB);
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
// This grabs the DebugLoc from CI
@@ -274,7 +284,7 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
// Start the main loop block now that we've taken care of the preliminaries.
Builder.SetInsertPoint(LoopBB);
- Value *Loaded = loadLinked(Builder, Addr, MemOpOrder);
+ Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *ShouldStore =
Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store");
@@ -284,8 +294,8 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
Builder.SetInsertPoint(TryStoreBB);
- Value *StoreSuccess =
- storeConditional(Builder, CI->getNewValOperand(), Addr, MemOpOrder);
+ Value *StoreSuccess = TLI->emitStoreConditional(
+ Builder, CI->getNewValOperand(), Addr, MemOpOrder);
Value *TryAgain = Builder.CreateICmpNE(
StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB);
@@ -302,73 +312,7 @@ bool ARMAtomicExpandPass::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
return true;
}
-Value *ARMAtomicExpandPass::loadLinked(IRBuilder<> &Builder, Value *Addr,
- AtomicOrdering Ord) {
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
- bool IsAcquire =
- Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
-
- // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
- // intrinsic must return {i32, i32} and we have to recombine them into a
- // single i64 here.
- if (ValTy->getPrimitiveSizeInBits() == 64) {
- Intrinsic::ID Int =
- IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
-
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
- Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
-
- Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
- Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
- Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
- Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
- return Builder.CreateOr(
- Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
- }
-
- Type *Tys[] = { Addr->getType() };
- Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
- Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
-
- return Builder.CreateTruncOrBitCast(
- Builder.CreateCall(Ldrex, Addr),
- cast<PointerType>(Addr->getType())->getElementType());
-}
-
-Value *ARMAtomicExpandPass::storeConditional(IRBuilder<> &Builder, Value *Val,
- Value *Addr, AtomicOrdering Ord) {
- Module *M = Builder.GetInsertBlock()->getParent()->getParent();
- bool IsRelease =
- Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
-
- // Since the intrinsics must have legal type, the i64 intrinsics take two
- // parameters: "i32, i32". We must marshal Val into the appropriate form
- // before the call.
- if (Val->getType()->getPrimitiveSizeInBits() == 64) {
- Intrinsic::ID Int =
- IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
- Function *Strex = Intrinsic::getDeclaration(M, Int);
- Type *Int32Ty = Type::getInt32Ty(M->getContext());
-
- Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
- Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
- Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
- return Builder.CreateCall3(Strex, Lo, Hi, Addr);
- }
-
- Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
- Type *Tys[] = { Addr->getType() };
- Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
-
- return Builder.CreateCall2(
- Strex, Builder.CreateZExtOrBitCast(
- Val, Strex->getFunctionType()->getParamType(0)),
- Addr);
-}
-
-AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
+AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
if (!TLI->getInsertFencesForAtomic())
return Ord;
@@ -381,7 +325,7 @@ AtomicOrdering ARMAtomicExpandPass::insertLeadingFence(IRBuilder<> &Builder,
return Monotonic;
}
-void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
+void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder,
AtomicOrdering Ord) {
if (!TLI->getInsertFencesForAtomic())
return;
@@ -391,16 +335,3 @@ void ARMAtomicExpandPass::insertTrailingFence(IRBuilder<> &Builder,
else if (Ord == SequentiallyConsistent)
Builder.CreateFence(SequentiallyConsistent);
}
-
-bool ARMAtomicExpandPass::shouldExpandAtomic(Instruction *Inst) {
- // Loads and stores less than 64-bits are already atomic; ones above that
- // are doomed anyway, so defer to the default libcall and blame the OS when
- // things go wrong:
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
- else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType()->getPrimitiveSizeInBits() == 64;
-
- // For the real atomic operations, we have ldrex/strex up to 64 bits.
- return Inst->getType()->getPrimitiveSizeInBits() <= 64;
-}
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index c6654ec2..7f31b1a 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -15,13 +15,21 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "basictti"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include <utility>
using namespace llvm;
+static cl::opt<unsigned>
+PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0),
+ cl::desc("Threshold for partial unrolling"), cl::Hidden);
+
+#define DEBUG_TYPE "basictti"
+
namespace {
class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
@@ -34,7 +42,7 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo {
const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); }
public:
- BasicTTI() : ImmutablePass(ID), TM(0) {
+ BasicTTI() : ImmutablePass(ID), TM(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
@@ -186,7 +194,61 @@ bool BasicTTI::haveFastSqrt(Type *Ty) const {
return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
}
-void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { }
+void BasicTTI::getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const {
+ // This unrolling functionality is target independent, but to provide some
+ // motivation for its intended use, for x86:
+
+ // According to the Intel 64 and IA-32 Architectures Optimization Reference
+ // Manual, Intel Core models and later have a loop stream detector
+ // (and associated uop queue) that can benefit from partial unrolling.
+ // The relevant requirements are:
+ // - The loop must have no more than 4 (8 for Nehalem and later) branches
+ // taken, and none of them may be calls.
+ // - The loop can have no more than 18 (28 for Nehalem and later) uops.
+
+ // According to the Software Optimization Guide for AMD Family 15h Processors,
+ // models 30h-4fh (Steamroller and later) have a loop predictor and loop
+ // buffer which can benefit from partial unrolling.
+ // The relevant requirements are:
+ // - The loop must have fewer than 16 branches
+ // - The loop must have less than 40 uops in all executed loop branches
+
+ // The number of taken branches in a loop is hard to estimate here, and
+ // benchmarking has revealed that it is better not to be conservative when
+ // estimating the branch count. As a result, we'll ignore the branch limits
+ // until someone finds a case where it matters in practice.
+
+ unsigned MaxOps;
+ const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>();
+ if (PartialUnrollingThreshold.getNumOccurrences() > 0)
+ MaxOps = PartialUnrollingThreshold;
+ else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0)
+ MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize;
+ else
+ return;
+
+ // Scan the loop: don't unroll loops with calls.
+ for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+ I != E; ++I) {
+ BasicBlock *BB = *I;
+
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
+ if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
+ ImmutableCallSite CS(J);
+ if (const Function *F = CS.getCalledFunction()) {
+ if (!TopTTI->isLoweredToCall(F))
+ continue;
+ }
+
+ return;
+ }
+ }
+
+ // Enable runtime and partial unrolling up to the specified size.
+ UP.Partial = UP.Runtime = true;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
+}
//===----------------------------------------------------------------------===//
//
@@ -424,12 +486,14 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src,
// This is a vector load that legalizes to a larger type than the vector
// itself. Unless the corresponding extending load or truncating store is
// legal, then this will scalarize.
- TargetLowering::LegalizeAction LA;
- MVT MemVT = getTLI()->getSimpleValueType(Src, true);
- if (Opcode == Instruction::Store)
- LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
- else
- LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT);
+ TargetLowering::LegalizeAction LA = TargetLowering::Expand;
+ EVT MemVT = getTLI()->getValueType(Src, true);
+ if (MemVT.isSimple() && MemVT != MVT::Other) {
+ if (Opcode == Instruction::Store)
+ LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
+ else
+ LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT());
+ }
if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
// This is a vector load/store for some illegal type that is scalarized.
@@ -484,7 +548,7 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
case Intrinsic::round: ISD = ISD::FROUND; break;
case Intrinsic::pow: ISD = ISD::FPOW; break;
case Intrinsic::fma: ISD = ISD::FMA; break;
- case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add?
+ case Intrinsic::fmuladd: ISD = ISD::FMA; break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
return 0;
@@ -509,6 +573,12 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
return LT.first * 2;
}
+ // If we can't lower fmuladd into an FMA estimate the cost as a floating
+ // point mul followed by an add.
+ if (IID == Intrinsic::fmuladd)
+ return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
+ TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
+
// Else, assume that we need to scalarize this intrinsic. For math builtins
// this will emit a costly libcall, adding call overhead and spills. Make it
// very expensive.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index b39777e..f623a48 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "branchfolding"
#include "BranchFolding.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -38,6 +37,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "branchfolding"
+
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
@@ -189,7 +190,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
TII = tii;
TRI = tri;
MMI = mmi;
- RS = NULL;
+ RS = nullptr;
// Use a RegScavenger to help update liveness when required.
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -201,7 +202,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Fix CFG. The later algorithms expect it to be right.
bool MadeChange = false;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
- MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+ MachineBasicBlock *MBB = I, *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
@@ -220,7 +221,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// See if any jump tables have become dead as the code generator
// did its thing.
MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
- if (JTI == 0) {
+ if (!JTI) {
delete RS;
return MadeChange;
}
@@ -416,7 +417,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1,
const BasicBlock *BB) {
if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
- return 0;
+ return nullptr;
MachineFunction &MF = *CurMBB.getParent();
@@ -466,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
const TargetInstrInfo *TII) {
MachineFunction *MF = CurMBB->getParent();
MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB));
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
DebugLoc dl; // FIXME: this is nowhere
if (I != MF->end() &&
@@ -475,12 +476,12 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
- TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+ TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
return;
}
}
}
- TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ TII->InsertBranch(*CurMBB, SuccBB, nullptr,
SmallVector<MachineOperand, 0>(), dl);
}
@@ -849,7 +850,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// See if we can do any tail merging on those.
if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(NULL, NULL);
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
@@ -896,7 +897,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (PBB->getLandingPadSuccessor())
continue;
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
// Failing case: IBB is the target of a cbr, and we cannot reverse the
@@ -915,10 +916,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// a bit in the edge so we didn't have to do all this.
if (IBB->isLandingPad()) {
MachineFunction::iterator IP = PBB; IP++;
- MachineBasicBlock *PredNextBB = NULL;
+ MachineBasicBlock *PredNextBB = nullptr;
if (IP != MF.end())
PredNextBB = IP;
- if (TBB == NULL) {
+ if (!TBB) {
if (IBB != PredNextBB) // fallthrough
continue;
} else if (FBB) {
@@ -939,7 +940,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
TII->RemoveBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+ NewCond, dl);
}
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
@@ -1099,7 +1101,7 @@ ReoptimizeBlock:
// one.
MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB));
- MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
bool PriorUnAnalyzable =
TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
@@ -1116,7 +1118,7 @@ ReoptimizeBlock:
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1160,7 +1162,7 @@ ReoptimizeBlock:
// If the previous branch *only* branches to *this* block (conditional or
// not) remove the branch.
- if (PriorTBB == MBB && PriorFBB == 0) {
+ if (PriorTBB == MBB && !PriorFBB) {
TII->RemoveBranch(PrevBB);
MadeChange = true;
++NumBranchOpts;
@@ -1172,7 +1174,7 @@ ReoptimizeBlock:
if (PriorFBB == MBB) {
DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1186,7 +1188,7 @@ ReoptimizeBlock:
if (!TII->ReverseBranchCondition(NewPriorCond)) {
DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+ TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1201,7 +1203,7 @@ ReoptimizeBlock:
// We consider it more likely that execution will stay in the function (e.g.
// due to loops) than it is to exit it. This asserts in loops etc, moving
// the assert condition out of the loop body.
- if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+ if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB &&
MachineFunction::iterator(PriorTBB) == FallThrough &&
!MBB->canFallThrough()) {
bool DoTransform = true;
@@ -1224,7 +1226,7 @@ ReoptimizeBlock:
DebugLoc dl = getBranchDebugLoc(PrevBB);
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+ TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
// Move this block to the end of the function.
MBB->moveAfter(--MF.end());
@@ -1237,7 +1239,7 @@ ReoptimizeBlock:
}
// Analyze the branch in the current block.
- MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+ MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
SmallVector<MachineOperand, 4> CurCond;
bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
if (!CurUnAnalyzable) {
@@ -1263,7 +1265,7 @@ ReoptimizeBlock:
// If this branch is the only thing in its block, see if we can forward
// other blocks across it.
- if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ if (CurTBB && CurCond.empty() && !CurFBB &&
IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
DebugLoc dl = getBranchDebugLoc(*MBB);
@@ -1301,12 +1303,12 @@ ReoptimizeBlock:
// explicit branch to us to make updates simpler.
if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
PriorTBB != MBB && PriorFBB != MBB) {
- if (PriorTBB == 0) {
- assert(PriorCond.empty() && PriorFBB == 0 &&
+ if (!PriorTBB) {
+ assert(PriorCond.empty() && !PriorFBB &&
"Bad branch analysis");
PriorTBB = MBB;
} else {
- assert(PriorFBB == 0 && "Machine CFG out of date!");
+ assert(!PriorFBB && "Machine CFG out of date!");
PriorFBB = MBB;
}
DebugLoc pdl = getBranchDebugLoc(PrevBB);
@@ -1330,7 +1332,7 @@ ReoptimizeBlock:
// If this change resulted in PMBB ending in a conditional
// branch where both conditions go to the same destination,
// change this to an unconditional branch (and fix the CFG).
- MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+ MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr;
SmallVector<MachineOperand, 4> NewCurCond;
bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
NewCurFBB, NewCurCond, true);
@@ -1338,10 +1340,10 @@ ReoptimizeBlock:
DebugLoc pdl = getBranchDebugLoc(*PMBB);
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, pdl);
+ TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
MadeChange = true;
++NumBranchOpts;
- PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
}
}
}
@@ -1358,7 +1360,7 @@ ReoptimizeBlock:
}
// Add the branch back if the block is more than just an uncond branch.
- TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+ TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
}
}
@@ -1379,7 +1381,7 @@ ReoptimizeBlock:
// Analyze the branch at the end of the pred.
MachineBasicBlock *PredBB = *PI;
MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
- MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (PredBB != MBB && !PredBB->canFallThrough() &&
!TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
@@ -1399,7 +1401,7 @@ ReoptimizeBlock:
MachineBasicBlock *NextBB =
std::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond, DebugLoc());
+ TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
}
MBB->moveAfter(PredBB);
MadeChange = true;
@@ -1432,7 +1434,7 @@ ReoptimizeBlock:
// Okay, there is no really great place to put this block. If, however,
// the block before this one would be a fall-through if this block were
// removed, move this block to the end of the function.
- MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+ MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
if (FallThrough != MF.end() &&
!TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
@@ -1473,7 +1475,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
if (SuccBB != TrueBB)
return SuccBB;
}
- return NULL;
+ return nullptr;
}
/// findHoistingInsertPosAndDeps - Find the location to move common instructions
@@ -1547,7 +1549,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
// Also avoid moving code above predicated instruction since it's hard to
// reason about register liveness with predicated instruction.
bool DontMoveAcrossStore = true;
- if (!PI->isSafeToMove(TII, 0, DontMoveAcrossStore) ||
+ if (!PI->isSafeToMove(TII, nullptr, DontMoveAcrossStore) ||
TII->isPredicated(PI))
return MBB->end();
@@ -1581,7 +1583,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
/// sequence at the start of the function, move the instructions before MBB
/// terminator if it's legal.
bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty())
return false;
@@ -1686,7 +1688,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
bool DontMoveAcrossStore = true;
- if (!TIB->isSafeToMove(TII, 0, DontMoveAcrossStore))
+ if (!TIB->isSafeToMove(TII, nullptr, DontMoveAcrossStore))
break;
// Remove kills from LocalDefsSet, these registers had short live ranges.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 8943cb1..0b492a9 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -2,6 +2,7 @@ add_llvm_library(LLVMCodeGen
AggressiveAntiDepBreaker.cpp
AllocationOrder.cpp
Analysis.cpp
+ AtomicExpandLoadLinkedPass.cpp
BasicTargetTransformInfo.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 4833731..bc033f9 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "calcspillweights"
-
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -22,6 +20,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "calcspillweights"
+
void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
MachineFunction &MF,
const MachineLoopInfo &MLI,
@@ -96,8 +96,8 @@ void
VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
MachineRegisterInfo &mri = MF.getRegInfo();
const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo();
- MachineBasicBlock *mbb = 0;
- MachineLoop *loop = 0;
+ MachineBasicBlock *mbb = nullptr;
+ MachineLoop *loop = nullptr;
bool isExiting = false;
float totalWeight = 0;
SmallPtrSet<MachineInstr*, 8> visited;
@@ -149,7 +149,11 @@ VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
unsigned hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
continue;
- float hweight = Hint[hint] += weight;
+ // Force hweight onto the stack so that x86 doesn't add hidden precision,
+ // making the comparison incorrectly pass (i.e., 1 > 1 == true??).
+ //
+ // FIXME: we probably shouldn't use floats at all.
+ volatile float hweight = Hint[hint] += weight;
if (TargetRegisterInfo::isPhysicalRegister(hint)) {
if (hweight > bestPhys && mri.isAllocatable(hint))
bestPhys = hweight, hintPhys = hint;
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index fcfc9dc..add861a 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -76,7 +76,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
dbgs() << "Formal argument #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -108,7 +108,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
dbgs() << "Return operand #" << i << " has unhandled type "
<< EVT(VT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -126,7 +126,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
dbgs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -145,7 +145,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
dbgs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -162,7 +162,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
dbgs() << "Call result #" << i << " has unhandled type "
<< EVT(VT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -175,6 +175,6 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
dbgs() << "Call result has unhandled type "
<< EVT(VT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 17402f0..b3beac3 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -20,6 +20,7 @@ using namespace llvm;
/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeAtomicExpandLoadLinkedPass(Registry);
initializeBasicTTIPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeCodeGenPreparePass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index e82a306..6aa60c6 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "codegenprepare"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
@@ -39,6 +38,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
@@ -46,6 +46,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "codegenprepare"
+
STATISTIC(NumBlocksElim, "Number of blocks eliminated");
STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
@@ -70,6 +72,10 @@ static cl::opt<bool> DisableSelectToBranch(
"disable-cgp-select2branch", cl::Hidden, cl::init(false),
cl::desc("Disable select to branch conversion."));
+static cl::opt<bool> AddrSinkUsingGEPs(
+ "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ cl::desc("Address sinking in CGP using GEPs."));
+
static cl::opt<bool> EnableAndCmpSinking(
"enable-andcmp-sinking", cl::Hidden, cl::init(true),
cl::desc("Enable sinkinig and/cmp into branches."));
@@ -111,8 +117,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy;
public:
static char ID; // Pass identification, replacement for typeid
- explicit CodeGenPrepare(const TargetMachine *TM = 0)
- : FunctionPass(ID), TM(TM), TLI(0) {
+ explicit CodeGenPrepare(const TargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM), TLI(nullptr) {
initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -177,7 +183,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLInfo = &getAnalysis<TargetLibraryInfo>();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::OptimizeForSize);
@@ -623,6 +629,187 @@ static bool OptimizeCmpExpression(CmpInst *CI) {
return MadeChange;
}
+/// isExtractBitsCandidateUse - Check if the candidates could
+/// be combined with shift instruction, which includes:
+/// 1. Truncate instruction
+/// 2. And instruction and the imm is a mask of the low bits:
+/// imm & (imm+1) == 0
+static bool isExtractBitsCandidateUse(Instruction *User) {
+ if (!isa<TruncInst>(User)) {
+ if (User->getOpcode() != Instruction::And ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return false;
+
+ const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
+
+ if ((Cimm & (Cimm + 1)).getBoolValue())
+ return false;
+ }
+ return true;
+}
+
+/// SinkShiftAndTruncate - sink both shift and truncate instruction
+/// to the use of truncate's BB.
+static bool
+SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
+ DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
+ const TargetLowering &TLI) {
+ BasicBlock *UserBB = User->getParent();
+ DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
+ TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ bool MadeChange = false;
+
+ for (Value::user_iterator TruncUI = TruncI->user_begin(),
+ TruncE = TruncI->user_end();
+ TruncUI != TruncE;) {
+
+ Use &TruncTheUse = TruncUI.getUse();
+ Instruction *TruncUser = cast<Instruction>(*TruncUI);
+ // Preincrement use iterator so we don't invalidate it.
+
+ ++TruncUI;
+
+ int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
+ if (!ISDOpcode)
+ continue;
+
+ // If the use is actually a legal node, there will not be an implicit
+ // truncate.
+ if (TLI.isOperationLegalOrCustom(ISDOpcode,
+ EVT::getEVT(TruncUser->getType())))
+ continue;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(TruncUser))
+ continue;
+
+ BasicBlock *TruncUserBB = TruncUser->getParent();
+
+ if (UserBB == TruncUserBB)
+ continue;
+
+ BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
+ CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
+
+ if (!InsertedShift && !InsertedTrunc) {
+ BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
+ // Sink the shift
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ // Sink the trunc
+ BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
+ TruncInsertPt++;
+
+ InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
+ TruncI->getType(), "", TruncInsertPt);
+
+ MadeChange = true;
+
+ TruncTheUse = InsertedTrunc;
+ }
+ }
+ return MadeChange;
+}
+
+/// OptimizeExtractBits - sink the shift *right* instruction into user blocks if
+/// the uses could potentially be combined with this shift instruction and
+/// generate BitExtract instruction. It will only be applied if the architecture
+/// supports BitExtract instruction. Here is an example:
+/// BB1:
+/// %x.extract.shift = lshr i64 %arg1, 32
+/// BB2:
+/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
+/// ==>
+///
+/// BB2:
+/// %x.extract.shift.1 = lshr i64 %arg1, 32
+/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
+///
+/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
+/// instruction.
+/// Return true if any changes are made.
+static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
+ const TargetLowering &TLI) {
+ BasicBlock *DefBB = ShiftI->getParent();
+
+ /// Only insert instructions in each block once.
+ DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
+
+ bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(ShiftI->getType()));
+
+ bool MadeChange = false;
+ for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
+ UI != E;) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ // Don't bother for PHI nodes.
+ if (isa<PHINode>(User))
+ continue;
+
+ if (!isExtractBitsCandidateUse(User))
+ continue;
+
+ BasicBlock *UserBB = User->getParent();
+
+ if (UserBB == DefBB) {
+ // If the shift and truncate instruction are in the same BB. The use of
+ // the truncate(TruncUse) may still introduce another truncate if not
+ // legal. In this case, we would like to sink both shift and truncate
+ // instruction to the BB of TruncUse.
+ // for example:
+ // BB1:
+ // i64 shift.result = lshr i64 opnd, imm
+ // trunc.result = trunc shift.result to i16
+ //
+ // BB2:
+ // ----> We will have an implicit truncate here if the architecture does
+ // not have i16 compare.
+ // cmp i16 trunc.result, opnd2
+ //
+ if (isa<TruncInst>(User) && shiftIsLegal
+ // If the type of the truncate is legal, no trucate will be
+ // introduced in other basic blocks.
+ && (!TLI.isTypeLegal(TLI.getValueType(User->getType()))))
+ MadeChange =
+ SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI);
+
+ continue;
+ }
+ // If we have already inserted a shift into this block, use it.
+ BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
+
+ if (!InsertedShift) {
+ BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
+
+ if (ShiftI->getOpcode() == Instruction::AShr)
+ InsertedShift =
+ BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "", InsertPt);
+ else
+ InsertedShift =
+ BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "", InsertPt);
+
+ MadeChange = true;
+ }
+
+ // Replace a use of the shift with a use of the new shift.
+ TheUse = InsertedShift;
+ }
+
+ // If we removed all uses, nuke the shift.
+ if (ShiftI->use_empty())
+ ShiftI->eraseFromParent();
+
+ return MadeChange;
+}
+
namespace {
class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
protected:
@@ -671,8 +858,9 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// happens.
WeakVH IterHandle(CurInstIterator);
- replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
- TLInfo, ModifiedDT ? 0 : DT);
+ replaceAndRecursivelySimplify(CI, RetVal,
+ TLI ? TLI->getDataLayout() : nullptr,
+ TLInfo, ModifiedDT ? nullptr : DT);
// If the iterator instruction was recursively deleted, start over at the
// start of the block.
@@ -693,10 +881,10 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
}
// From here on out we're working with named functions.
- if (CI->getCalledFunction() == 0) return false;
+ if (!CI->getCalledFunction()) return false;
// We'll need DataLayout from here on out.
- const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
+ const DataLayout *TD = TLI ? TLI->getDataLayout() : nullptr;
if (!TD) return false;
// Lower all default uses of _chk calls. This is very similar
@@ -746,8 +934,8 @@ bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
if (!RI)
return false;
- PHINode *PN = 0;
- BitCastInst *BCI = 0;
+ PHINode *PN = nullptr;
+ BitCastInst *BCI = nullptr;
Value *V = RI->getReturnValue();
if (V) {
BCI = dyn_cast<BitCastInst>(V);
@@ -862,7 +1050,7 @@ namespace {
struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg;
Value *ScaledReg;
- ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+ ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
void print(raw_ostream &OS) const;
void dump() const;
@@ -1189,10 +1377,10 @@ class TypePromotionTransaction {
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
- /// \pre If !Inst->use_empty(), then New != NULL
- InstructionRemover(Instruction *Inst, Value *New = NULL)
+ /// \pre If !Inst->use_empty(), then New != nullptr
+ InstructionRemover(Instruction *Inst, Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(NULL) {
+ Replacer(nullptr) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
@@ -1232,7 +1420,7 @@ public:
/// Same as Instruction::setOperand.
void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
/// Same as Instruction::eraseFromParent.
- void eraseInstruction(Instruction *Inst, Value *NewVal = NULL);
+ void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
/// Same as Value::replaceAllUsesWith.
void replaceAllUsesWith(Instruction *Inst, Value *New);
/// Same as Value::mutateType.
@@ -1245,84 +1433,75 @@ public:
void moveBefore(Instruction *Inst, Instruction *Before);
/// @}
- ~TypePromotionTransaction();
-
private:
/// The ordered list of actions made so far.
- SmallVector<TypePromotionAction *, 16> Actions;
- typedef SmallVectorImpl<TypePromotionAction *>::iterator CommitPt;
+ SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
+ typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
Value *NewVal) {
Actions.push_back(
- new TypePromotionTransaction::OperandSetter(Inst, Idx, NewVal));
+ make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
}
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- new TypePromotionTransaction::InstructionRemover(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
Value *New) {
- Actions.push_back(new TypePromotionTransaction::UsesReplacer(Inst, New));
+ Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
}
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
- Actions.push_back(new TypePromotionTransaction::TypeMutator(Inst, NewTy));
+ Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
Instruction *TypePromotionTransaction::createTrunc(Instruction *Opnd,
Type *Ty) {
- TruncBuilder *TB = new TruncBuilder(Opnd, Ty);
- Actions.push_back(TB);
- return TB->getBuiltInstruction();
+ std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
+ Instruction *I = Ptr->getBuiltInstruction();
+ Actions.push_back(std::move(Ptr));
+ return I;
}
Instruction *TypePromotionTransaction::createSExt(Instruction *Inst,
Value *Opnd, Type *Ty) {
- SExtBuilder *SB = new SExtBuilder(Inst, Opnd, Ty);
- Actions.push_back(SB);
- return SB->getBuiltInstruction();
+ std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
+ Instruction *I = Ptr->getBuiltInstruction();
+ Actions.push_back(std::move(Ptr));
+ return I;
}
void TypePromotionTransaction::moveBefore(Instruction *Inst,
Instruction *Before) {
Actions.push_back(
- new TypePromotionTransaction::InstructionMoveBefore(Inst, Before));
+ make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
}
TypePromotionTransaction::ConstRestorationPt
TypePromotionTransaction::getRestorationPoint() const {
- return Actions.rbegin() != Actions.rend() ? *Actions.rbegin() : NULL;
+ return !Actions.empty() ? Actions.back().get() : nullptr;
}
void TypePromotionTransaction::commit() {
for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
- ++It) {
+ ++It)
(*It)->commit();
- delete *It;
- }
Actions.clear();
}
void TypePromotionTransaction::rollback(
TypePromotionTransaction::ConstRestorationPt Point) {
- while (!Actions.empty() && Point != (*Actions.rbegin())) {
- TypePromotionAction *Curr = Actions.pop_back_val();
+ while (!Actions.empty() && Point != Actions.back().get()) {
+ std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
Curr->undo();
- delete Curr;
}
}
-TypePromotionTransaction::~TypePromotionTransaction() {
- for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; ++It)
- delete *It;
- Actions.clear();
-}
-
/// \brief A helper class for matching addressing modes.
///
/// This encapsulates the logic for matching the target-legal addressing modes.
@@ -1390,7 +1569,7 @@ private:
bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
bool MatchAddr(Value *V, unsigned Depth);
bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
- bool *MovedAway = NULL);
+ bool *MovedAway = nullptr);
bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
ExtAddrMode &AMBefore,
ExtAddrMode &AMAfter);
@@ -1435,7 +1614,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
// X*Scale + C*Scale to addr mode.
- ConstantInt *CI = 0; Value *AddLHS = 0;
+ ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
TestAddrMode.ScaledReg = AddLHS;
@@ -1461,6 +1640,7 @@ bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
static bool MightBeFoldableInst(Instruction *I) {
switch (I->getOpcode()) {
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
// Don't touch identity bitcasts.
if (I->getType() == I->getOperand(0)->getType())
return false;
@@ -1612,13 +1792,13 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
// get through.
// If it, check we can get through.
if (!SExtOpnd || !canGetThrough(SExtOpnd, SExtTy, PromotedInsts))
- return NULL;
+ return nullptr;
// Do not promote if the operand has been added by codegenprepare.
// Otherwise, it means we are undoing an optimization that is likely to be
// redone, thus causing potential infinite loop.
if (isa<TruncInst>(SExtOpnd) && InsertedTruncs.count(SExtOpnd))
- return NULL;
+ return nullptr;
// SExt or Trunc instructions.
// Return the related handler.
@@ -1629,7 +1809,7 @@ TypePromotionHelper::Action TypePromotionHelper::getAction(
// Abort early if we will have to insert non-free instructions.
if (!SExtOpnd->hasOneUse() &&
!TLI.isTruncateFree(SExtTy, SExtOpnd->getType()))
- return NULL;
+ return nullptr;
return promoteOperandForOther;
}
@@ -1740,7 +1920,7 @@ TypePromotionHelper::promoteOperandForOther(Instruction *SExt,
TPT.moveBefore(SExtForOpnd, SExtOpnd);
TPT.setOperand(SExtOpnd, OpIdx, SExtForOpnd);
// If more sext are required, new instructions will have to be created.
- SExtForOpnd = NULL;
+ SExtForOpnd = nullptr;
}
if (SExtForOpnd == SExt) {
DEBUG(dbgs() << "Sign extension is useless now\n");
@@ -1815,6 +1995,7 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
return MatchAddr(AddrInst->getOperand(0), Depth);
return false;
case Instruction::BitCast:
+ case Instruction::AddrSpaceCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
@@ -2022,11 +2203,11 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
AddrMode.BaseOffs -= CI->getSExtValue();
} else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
// If this is a global variable, try to fold it into the addressing mode.
- if (AddrMode.BaseGV == 0) {
+ if (!AddrMode.BaseGV) {
AddrMode.BaseGV = GV;
if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
return true;
- AddrMode.BaseGV = 0;
+ AddrMode.BaseGV = nullptr;
}
} else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
ExtAddrMode BackupAddrMode = AddrMode;
@@ -2071,7 +2252,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
return true;
AddrMode.HasBaseReg = false;
- AddrMode.BaseReg = 0;
+ AddrMode.BaseReg = nullptr;
}
// If the base register is already taken, see if we can do [r+r].
@@ -2081,7 +2262,7 @@ bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
return true;
AddrMode.Scale = 0;
- AddrMode.ScaledReg = 0;
+ AddrMode.ScaledReg = nullptr;
}
// Couldn't match.
TPT.rollback(LastKnownGood);
@@ -2166,7 +2347,7 @@ static bool FindAllMemoryUses(Instruction *I,
bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
Value *KnownLive2) {
// If Val is either of the known-live values, we know it is live!
- if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+ if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
return true;
// All values other than instructions and arguments (e.g. constants) are live.
@@ -2225,13 +2406,13 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// If the BaseReg or ScaledReg was referenced by the previous addrmode, their
// lifetime wasn't extended by adding this instruction.
if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- BaseReg = 0;
+ BaseReg = nullptr;
if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
- ScaledReg = 0;
+ ScaledReg = nullptr;
// If folding this instruction (and it's subexprs) didn't extend any live
// ranges, we're ok with it.
- if (BaseReg == 0 && ScaledReg == 0)
+ if (!BaseReg && !ScaledReg)
return true;
// If all uses of this instruction are ultimately load/store/inlineasm's,
@@ -2320,7 +2501,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Use a worklist to iteratively look through PHI nodes, and ensure that
// the addressing mode obtained from the non-PHI roots of the graph
// are equivalent.
- Value *Consensus = 0;
+ Value *Consensus = nullptr;
unsigned NumUsesConsensus = 0;
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
@@ -2334,7 +2515,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// Break use-def graph loops.
if (!Visited.insert(V)) {
- Consensus = 0;
+ Consensus = nullptr;
break;
}
@@ -2380,7 +2561,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
continue;
}
- Consensus = 0;
+ Consensus = nullptr;
break;
}
@@ -2420,14 +2601,135 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *&SunkAddr = SunkAddrs[Addr];
if (SunkAddr) {
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
+ << *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
+ TM && TM->getSubtarget<TargetSubtargetInfo>().useAA())) {
+ // By default, we use the GEP-based method when AA is used later. This
+ // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
+ DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+ << *MemoryInst << "\n");
+ Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
+ Value *ResultPtr = nullptr, *ResultIndex = nullptr;
+
+ // First, find the pointer.
+ if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
+ ResultPtr = AddrMode.BaseReg;
+ AddrMode.BaseReg = nullptr;
+ }
+
+ if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
+ // We can't add more than one pointer together, nor can we scale a
+ // pointer (both of which seem meaningless).
+ if (ResultPtr || AddrMode.Scale != 1)
+ return false;
+
+ ResultPtr = AddrMode.ScaledReg;
+ AddrMode.Scale = 0;
+ }
+
+ if (AddrMode.BaseGV) {
+ if (ResultPtr)
+ return false;
+
+ ResultPtr = AddrMode.BaseGV;
+ }
+
+ // If the real base value actually came from an inttoptr, then the matcher
+ // will look through it and provide only the integer value. In that case,
+ // use it here.
+ if (!ResultPtr && AddrMode.BaseReg) {
+ ResultPtr =
+ Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
+ AddrMode.BaseReg = nullptr;
+ } else if (!ResultPtr && AddrMode.Scale == 1) {
+ ResultPtr =
+ Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
+ AddrMode.Scale = 0;
+ }
+
+ if (!ResultPtr &&
+ !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
+ SunkAddr = Constant::getNullValue(Addr->getType());
+ } else if (!ResultPtr) {
+ return false;
+ } else {
+ Type *I8PtrTy =
+ Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
+
+ // Start with the base register. Do this first so that subsequent address
+ // matching finds it last, which will prevent it from trying to match it
+ // as the scaled value in case it happens to be a mul. That would be
+ // problematic if we've sunk a different mul for the scale, because then
+ // we'd end up sinking both muls.
+ if (AddrMode.BaseReg) {
+ Value *V = AddrMode.BaseReg;
+ if (V->getType() != IntPtrTy)
+ V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
+
+ ResultIndex = V;
+ }
+
+ // Add the scale value.
+ if (AddrMode.Scale) {
+ Value *V = AddrMode.ScaledReg;
+ if (V->getType() == IntPtrTy) {
+ // done.
+ } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+ cast<IntegerType>(V->getType())->getBitWidth()) {
+ V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
+ } else {
+ // It is only safe to sign extend the BaseReg if we know that the math
+ // required to create it did not overflow before we extend it. Since
+ // the original IR value was tossed in favor of a constant back when
+ // the AddrMode was created we need to bail out gracefully if widths
+ // do not match instead of extending it.
+ Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
+ if (I && (ResultIndex != AddrMode.BaseReg))
+ I->eraseFromParent();
+ return false;
+ }
+
+ if (AddrMode.Scale != 1)
+ V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
+ "sunkaddr");
+ if (ResultIndex)
+ ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
+ else
+ ResultIndex = V;
+ }
+
+ // Add in the Base Offset if present.
+ if (AddrMode.BaseOffs) {
+ Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+ if (ResultIndex) {
+ // We need to add this separately from the scale above to help with
+ // SDAG consecutive load/store merging.
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ }
+
+ ResultIndex = V;
+ }
+
+ if (!ResultIndex) {
+ SunkAddr = ResultPtr;
+ } else {
+ if (ResultPtr->getType() != I8PtrTy)
+ ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ SunkAddr = Builder.CreateGEP(ResultPtr, ResultIndex, "sunkaddr");
+ }
+
+ if (SunkAddr->getType() != Addr->getType())
+ SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ }
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
- << *MemoryInst);
+ << *MemoryInst << "\n");
Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
- Value *Result = 0;
+ Value *Result = nullptr;
// Start with the base register. Do this first so that subsequent address
// matching finds it last, which will prevent it from trying to match it
@@ -2459,8 +2761,9 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// the original IR value was tossed in favor of a constant back when
// the AddrMode was created we need to bail out gracefully if widths
// do not match instead of extending it.
- if (Result != AddrMode.BaseReg)
- cast<Instruction>(Result)->eraseFromParent();
+ Instruction *I = dyn_cast_or_null<Instruction>(Result);
+ if (I && (Result != AddrMode.BaseReg))
+ I->eraseFromParent();
return false;
}
if (AddrMode.Scale != 1)
@@ -2490,7 +2793,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Result = V;
}
- if (Result == 0)
+ if (!Result)
SunkAddr = Constant::getNullValue(Addr->getType());
else
SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
@@ -2815,7 +3118,7 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
- if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
+ if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : nullptr,
TLInfo, DT)) {
P->replaceAllUsesWith(V);
P->eraseFromParent();
@@ -2870,6 +3173,17 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) {
return false;
}
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+
+ if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
+ BinOp->getOpcode() == Instruction::LShr)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (TLI && CI && TLI->hasExtractBitsInsn())
+ return OptimizeExtractBits(BinOp, CI, *TLI);
+
+ return false;
+ }
+
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
if (GEPI->hasAllZeroIndices()) {
/// The GEP operand must be a pointer, so must its result -> BitCast
@@ -2918,11 +3232,16 @@ bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool CodeGenPrepare::PlaceDbgValues(Function &F) {
bool MadeChange = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
- Instruction *PrevNonDbgInst = NULL;
+ Instruction *PrevNonDbgInst = nullptr;
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
Instruction *Insn = BI; ++BI;
DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
- if (!DVI) {
+ // Leave dbg.values that refer to an alloca alone. These
+ // instrinsics describe the address of a variable (= the alloca)
+ // being taken. They should not be moved next to the alloca
+ // (and to the beginning of the scope), but rather stay close to
+ // where said address is used.
+ if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
PrevNonDbgInst = Insn;
continue;
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 463eb86..822636f 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "post-RA-sched"
#include "CriticalAntiDepBreaker.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "post-RA-sched"
+
CriticalAntiDepBreaker::
CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
AntiDepBreaker(), MF(MFi),
@@ -33,7 +34,7 @@ CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
RegClassInfo(RCI),
- Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+ Classes(TRI->getNumRegs(), nullptr),
KillIndices(TRI->getNumRegs(), 0),
DefIndices(TRI->getNumRegs(), 0),
KeepRegs(TRI->getNumRegs(), false) {}
@@ -45,7 +46,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const unsigned BBSize = BB->size();
for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
// Clear out the register class data.
- Classes[i] = static_cast<const TargetRegisterClass *>(0);
+ Classes[i] = nullptr;
// Initialize the indices to indicate that no registers are live.
KillIndices[i] = ~0u;
@@ -75,7 +76,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// callee-saved register that is not saved in the prolog.
const MachineFrameInfo *MFI = MF.getFrameInfo();
BitVector Pristine = MFI->getPristineRegs(BB);
- for (const uint16_t *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
+ for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
if (!IsReturnBlock && !Pristine.test(*I)) continue;
for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
unsigned Reg = *AI;
@@ -124,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
/// critical path.
static const SDep *CriticalPathStep(const SUnit *SU) {
- const SDep *Next = 0;
+ const SDep *Next = nullptr;
unsigned NextDepth = 0;
// Find the predecessor edge with the greatest depth.
for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
@@ -171,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- const TargetRegisterClass *NewRC = 0;
+ const TargetRegisterClass *NewRC = nullptr;
if (i < MI->getDesc().getNumOperands())
NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
@@ -227,7 +228,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
DefIndices[i] = Count;
KillIndices[i] = ~0u;
KeepRegs.reset(i);
- Classes[i] = 0;
+ Classes[i] = nullptr;
RegRefs.erase(i);
}
@@ -244,7 +245,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
(DefIndices[Reg] == ~0u)) &&
"Kill and Def maps aren't consistent for Reg!");
KeepRegs.reset(Reg);
- Classes[Reg] = 0;
+ Classes[Reg] = nullptr;
RegRefs.erase(Reg);
// Repeat, for all subregs.
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
@@ -252,7 +253,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
KeepRegs.reset(SubregReg);
- Classes[SubregReg] = 0;
+ Classes[SubregReg] = nullptr;
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
@@ -267,7 +268,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
if (Reg == 0) continue;
if (!MO.isUse()) continue;
- const TargetRegisterClass *NewRC = 0;
+ const TargetRegisterClass *NewRC = nullptr;
if (i < MI->getDesc().getNumOperands())
NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF);
@@ -419,7 +420,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
// Find the node at the bottom of the critical path.
- const SUnit *Max = 0;
+ const SUnit *Max = nullptr;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
const SUnit *SU = &SUnits[i];
MISUnitMap[SU->getInstr()] = SU;
@@ -551,8 +552,8 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
CriticalPathMI = CriticalPathSU->getInstr();
} else {
// We've reached the end of the critical path.
- CriticalPathSU = 0;
- CriticalPathMI = 0;
+ CriticalPathSU = nullptr;
+ CriticalPathMI = nullptr;
}
}
@@ -589,8 +590,9 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Determine AntiDepReg's register class, if it is live and is
// consistently used within a single class.
- const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
- assert((AntiDepReg == 0 || RC != NULL) &&
+ const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg]
+ : nullptr;
+ assert((AntiDepReg == 0 || RC != nullptr) &&
"Register should be live if it's causing an anti-dependence!");
if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
AntiDepReg = 0;
@@ -638,7 +640,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
(DefIndices[NewReg] == ~0u)) &&
"Kill and Def maps aren't consistent for NewReg!");
- Classes[AntiDepReg] = 0;
+ Classes[AntiDepReg] = nullptr;
DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
KillIndices[AntiDepReg] = ~0u;
assert(((KillIndices[AntiDepReg] == ~0u) !=
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 5b40ae1..bc6e9dc 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -121,7 +121,7 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
- buildSchedGraph(0);
+ buildSchedGraph(nullptr);
}
// VLIWPacketizerList Ctor
@@ -129,7 +129,7 @@ VLIWPacketizerList::VLIWPacketizerList(
MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
bool IsPostRA) : TM(MF.getTarget()), MF(MF) {
TII = TM.getInstrInfo();
- ResourceTracker = TII->CreateTargetScheduleState(&TM, 0);
+ ResourceTracker = TII->CreateTargetScheduleState(&TM, nullptr);
VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, MDT, IsPostRA);
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index aa03e77..2b144d8 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "codegen-dce"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -23,6 +22,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "codegen-dce"
+
STATISTIC(NumDeletes, "Number of dead instructions deleted");
namespace {
@@ -59,7 +60,7 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
// Don't delete instructions with side effects.
bool SawStore = false;
- if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+ if (!MI->isSafeToMove(TII, nullptr, SawStore) && !MI->isPHI())
return false;
// Examine each operand.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index d543baf..a195586 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dwarfehprepare"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CallSite.h"
@@ -28,6 +27,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "dwarfehprepare"
+
STATISTIC(NumResumesLowered, "Number of resume calls lowered");
namespace {
@@ -43,7 +44,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
DwarfEHPrepare(const TargetMachine *TM)
- : FunctionPass(ID), TM(TM), RewindFunction(0) {
+ : FunctionPass(ID), TM(TM), RewindFunction(nullptr) {
initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
}
@@ -68,10 +69,10 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) {
/// instructions, including the 'resume' instruction.
Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) {
Value *V = RI->getOperand(0);
- Value *ExnObj = 0;
+ Value *ExnObj = nullptr;
InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V);
- LoadInst *SelLoad = 0;
- InsertValueInst *ExcIVI = 0;
+ LoadInst *SelLoad = nullptr;
+ InsertValueInst *ExcIVI = nullptr;
bool EraseIVIs = false;
if (SelIVI) {
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index f8887ef..c470632 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-ifcvt"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -40,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "early-ifcvt"
+
// Absolute maximum number of instructions allowed per speculated block.
// This bypasses all other heuristics, so it should be set fairly high.
static cl::opt<unsigned>
@@ -219,7 +220,7 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
- if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
+ if (!I->isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
DEBUG(dbgs() << "Can't speculate: " << *I);
return false;
}
@@ -338,7 +339,7 @@ bool SSAIfConv::findInsertionPoint() {
///
bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
Head = MBB;
- TBB = FBB = Tail = 0;
+ TBB = FBB = Tail = nullptr;
if (Head->succ_size() != 2)
return false;
@@ -463,7 +464,7 @@ void SSAIfConv::replacePHIInstrs() {
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
- PI.PHI = 0;
+ PI.PHI = nullptr;
}
}
@@ -564,7 +565,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
// We need a branch to Tail, let code placement work it out later.
DEBUG(dbgs() << "Converting to unconditional branch.\n");
SmallVector<MachineOperand, 0> EmptyCond;
- TII->InsertBranch(*Head, Tail, 0, EmptyCond, HeadDL);
+ TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
Head->addSuccessor(Tail);
}
DEBUG(dbgs() << *Head);
@@ -775,6 +776,12 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n"
<< "********** Function: " << MF.getName() << '\n');
+ // Only run if conversion if the target wants it.
+ if (!MF.getTarget()
+ .getSubtarget<TargetSubtargetInfo>()
+ .enableEarlyIfConversion())
+ return false;
+
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
SchedModel =
@@ -783,7 +790,7 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
- MinInstr = 0;
+ MinInstr = nullptr;
bool Changed = false;
IfConv.runOnMachineFunction(MF);
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 3bb0465..aea7c31 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -41,9 +41,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
EC.clear();
EC.grow(2 * MF->getNumBlockIDs());
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
- ++I) {
- const MachineBasicBlock &MBB = *I;
+ for (const auto &MBB : *MF) {
unsigned OutE = 2 * MBB.getNumber() + 1;
// Join the outgoing bundle with the ingoing bundles of all successors.
for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
@@ -69,29 +67,31 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
return false;
}
-/// view - Visualize the annotated bipartite CFG with Graphviz.
-void EdgeBundles::view() const {
- ViewGraph(*this, "EdgeBundles");
-}
-
/// Specialize WriteGraph, the standard implementation won't work.
-raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
- bool ShortNames,
- const Twine &Title) {
+namespace llvm {
+template<>
+raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const Twine &Title) {
const MachineFunction *MF = G.getMachineFunction();
O << "digraph {\n";
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
- I != E; ++I) {
- unsigned BB = I->getNumber();
+ for (const auto &MBB : *MF) {
+ unsigned BB = MBB.getNumber();
O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
<< '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
<< "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
- for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
- SE = I->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
<< "\" [ color=lightgray ]\n";
}
O << "}\n";
return O;
}
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp
index a08eb6b..cf55b68 100644
--- a/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/lib/CodeGen/ExecutionDepsFix.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "execution-fix"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -33,6 +32,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "execution-fix"
+
/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
/// of execution domains.
///
@@ -100,7 +101,7 @@ struct DomainValue {
// Clear this DomainValue and point to next which has all its data.
void clear() {
AvailableDomains = 0;
- Next = 0;
+ Next = nullptr;
Instrs.clear();
}
};
@@ -275,7 +276,7 @@ void ExeDepsFix::kill(int rx) {
return;
release(LiveRegs[rx].Value);
- LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Value = nullptr;
}
/// Force register rx into domain.
@@ -360,7 +361,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Default values are 'nothing happened a long time ago'.
for (unsigned rx = 0; rx != NumRegs; ++rx) {
- LiveRegs[rx].Value = 0;
+ LiveRegs[rx].Value = nullptr;
LiveRegs[rx].Def = -(1 << 20);
}
@@ -404,7 +405,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// We have a live DomainValue from more than one predecessor.
if (LiveRegs[rx].Value->isCollapsed()) {
- // We are already collapsed, but predecessor is not. Force him.
+ // We are already collapsed, but predecessor is not. Force it.
unsigned Domain = LiveRegs[rx].Value->getFirstDomain();
if (!pdv->isCollapsed() && pdv->hasDomain(Domain))
collapse(pdv, Domain);
@@ -440,7 +441,7 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
release(LiveRegs[i].Value);
delete[] LiveRegs;
}
- LiveRegs = 0;
+ LiveRegs = nullptr;
}
void ExeDepsFix::visitInstr(MachineInstr *MI) {
@@ -664,7 +665,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// doms are now sorted in order of appearance. Try to merge them all, giving
// priority to the latest ones.
- DomainValue *dv = 0;
+ DomainValue *dv = nullptr;
while (!Regs.empty()) {
if (!dv) {
dv = Regs.pop_back_val().Value;
@@ -714,7 +715,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
TII = MF->getTarget().getInstrInfo();
TRI = MF->getTarget().getRegisterInfo();
- LiveRegs = 0;
+ LiveRegs = nullptr;
assert(NumRegs == RC->getNumRegs() && "Bad regclass");
DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: "
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index fb2e446..90b62b5 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "expand-isel-pseudos"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -23,6 +22,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "expand-isel-pseudos"
+
namespace {
class ExpandISelPseudos : public MachineFunctionPass {
public:
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 1b0315a..8969bcc 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "postrapseudos"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -25,6 +24,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "postrapseudos"
+
namespace {
struct ExpandPostRA : public MachineFunctionPass {
private:
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 54b047b..c3e4f3e 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -61,10 +61,6 @@ GCModuleInfo::GCModuleInfo()
initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
}
-GCModuleInfo::~GCModuleInfo() {
- clear();
-}
-
GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
const std::string &Name) {
strategy_map_type::iterator NMI = StrategyMap.find(Name);
@@ -74,17 +70,17 @@ GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
for (GCRegistry::iterator I = GCRegistry::begin(),
E = GCRegistry::end(); I != E; ++I) {
if (Name == I->getName()) {
- GCStrategy *S = I->instantiate();
+ std::unique_ptr<GCStrategy> S = I->instantiate();
S->M = M;
S->Name = Name;
- StrategyMap.GetOrCreateValue(Name).setValue(S);
- StrategyList.push_back(S);
- return S;
+ StrategyMap.GetOrCreateValue(Name).setValue(S.get());
+ StrategyList.push_back(std::move(S));
+ return StrategyList.back().get();
}
}
dbgs() << "unsupported GC: " << Name << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
@@ -104,9 +100,6 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
void GCModuleInfo::clear() {
FInfoMap.clear();
StrategyMap.clear();
-
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
StrategyList.clear();
}
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index b31a0f2..1fdff6b 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -101,13 +101,6 @@ GCStrategy::GCStrategy() :
UsesMetadata(false)
{}
-GCStrategy::~GCStrategy() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
-
- Functions.clear();
-}
-
bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
bool GCStrategy::performCustomLowering(Function &F) {
@@ -118,14 +111,13 @@ bool GCStrategy::performCustomLowering(Function &F) {
bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) {
dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
- GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
- Functions.push_back(FI);
- return FI;
+ Functions.push_back(make_unique<GCFunctionInfo>(F, *this));
+ return Functions.back().get();
}
// -----------------------------------------------------------------------------
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1a18b1a..1502d5f 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ifcvt"
#include "llvm/CodeGen/Passes.h"
#include "BranchFolding.h"
#include "llvm/ADT/STLExtras.h"
@@ -37,6 +36,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ifcvt"
+
// Hidden options for help debugging.
static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
@@ -127,7 +128,8 @@ namespace {
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
HasFallThrough(false), IsUnpredicable(false),
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+ ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+ FalseBB(nullptr) {}
};
/// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -205,7 +207,7 @@ namespace {
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
- SmallSet<unsigned, 4> *LaterRedefs = 0);
+ SmallSet<unsigned, 4> *LaterRedefs = nullptr);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
bool IgnoreBr = false);
@@ -230,7 +232,7 @@ namespace {
// blockAlwaysFallThrough - Block ends without a terminator.
bool blockAlwaysFallThrough(BBInfo &BBI) const {
- return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+ return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
}
// IfcvtTokenCmp - Used to sort if-conversion candidates.
@@ -438,7 +440,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
if (SuccBB != TrueBB)
return SuccBB;
}
- return NULL;
+ return nullptr;
}
/// ReverseBranchCondition - Reverse the condition of the end of the block
@@ -460,7 +462,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
MachineFunction::iterator I = BB;
MachineFunction::iterator E = BB->getParent()->end();
if (++I == E)
- return NULL;
+ return nullptr;
return I;
}
@@ -551,7 +553,7 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
FT = getNextBlock(FalseBBI.BB);
if (TT != FT)
return false;
- if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+ if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
return false;
if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
return false;
@@ -641,11 +643,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool AlreadyPredicated = !BBI.Predicate.empty();
// First analyze the end of BB branches.
- BBI.TrueBB = BBI.FalseBB = NULL;
+ BBI.TrueBB = BBI.FalseBB = nullptr;
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
!TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
- BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+ BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
if (BBI.BrCond.size()) {
// No false branch. This BB must end with a conditional branch and a
@@ -954,13 +956,13 @@ static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
const TargetInstrInfo *TII) {
DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 0> NoCond;
- TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+ TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
}
/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
/// successors.
void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
- MachineBasicBlock *TBB = NULL, *FBB = NULL;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
@@ -1179,7 +1181,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
DontKill.clear();
- bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+ bool HasEarlyExit = CvtBBI->FalseBB != nullptr;
uint64_t CvtNext = 0, CvtFalse = 0, BBNext = 0, BBCvt = 0, SumWeight = 0;
uint32_t WeightScale = 0;
if (HasEarlyExit) {
@@ -1215,7 +1217,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
llvm_unreachable("Unable to reverse branch condition!");
- TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB);
// Update the edge weight for both CvtBBI->FalseBB and NextBBI.
// New_Weight(BBI.BB, NextBBI->BB) =
@@ -1453,8 +1455,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
PredicateBlock(*BBI2, DI2, *Cond2);
// Merge the true block into the entry of the diamond.
- MergeBlocks(BBI, *BBI1, TailBB == 0);
- MergeBlocks(BBI, *BBI2, TailBB == 0);
+ MergeBlocks(BBI, *BBI1, TailBB == nullptr);
+ MergeBlocks(BBI, *BBI2, TailBB == nullptr);
// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then
@@ -1503,7 +1505,7 @@ static bool MaySpeculate(const MachineInstr *MI,
SmallSet<unsigned, 4> &LaterRedefs,
const TargetInstrInfo *TII) {
bool SawStore = true;
- if (!MI->isSafeToMove(TII, 0, SawStore))
+ if (!MI->isSafeToMove(TII, nullptr, SawStore))
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -1527,7 +1529,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
SmallVectorImpl<MachineOperand> &Cond,
SmallSet<unsigned, 4> *LaterRedefs) {
bool AnyUnpred = false;
- bool MaySpec = LaterRedefs != 0;
+ bool MaySpec = LaterRedefs != nullptr;
for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
if (I->isDebugValue() || TII->isPredicated(I))
continue;
@@ -1545,7 +1547,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// If the predicated instruction now redefines a register as the result of
@@ -1590,7 +1592,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -1607,7 +1609,7 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
- MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
MachineBasicBlock *Succ = Succs[i];
@@ -1643,7 +1645,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
- MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
MachineBasicBlock *Succ = Succs[i];
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 0f7ba8e..f3c8d3d 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -39,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumSpilledRanges, "Number of spilled live ranges");
STATISTIC(NumSnippets, "Number of spilled snippets");
STATISTIC(NumSpills, "Number of spills inserted");
@@ -121,7 +122,7 @@ public:
SibValueInfo(unsigned Reg, VNInfo *VNI)
: AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false),
- SpillReg(Reg), SpillVNI(VNI), SpillMBB(0), DefMI(0) {}
+ SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {}
// Returns true when a def has been found.
bool hasDef() const { return DefByOrigPHI || DefMI; }
@@ -167,7 +168,7 @@ private:
bool isSibling(unsigned Reg);
MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*);
- void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = 0);
+ void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr);
void analyzeSiblingValues();
bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI);
@@ -179,7 +180,7 @@ private:
bool coalesceStackAccess(MachineInstr *MI, unsigned Reg);
bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >,
- MachineInstr *LoadMI = 0);
+ MachineInstr *LoadMI = nullptr);
void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI);
void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI);
@@ -236,7 +237,7 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI))
return false;
- MachineInstr *UseMI = 0;
+ MachineInstr *UseMI = nullptr;
// Check that all uses satisfy our criteria.
for (MachineRegisterInfo::reg_instr_nodbg_iterator
@@ -367,7 +368,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter,
do {
SVI = WorkList.pop_back_val();
TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps;
- VNI = 0;
+ VNI = nullptr;
SibValueInfo &SV = SVI->second;
if (!SV.SpillMBB)
@@ -659,7 +660,7 @@ void InlineSpiller::analyzeSiblingValues() {
VNInfo *VNI = *VI;
if (VNI->isUnused())
continue;
- MachineInstr *DefMI = 0;
+ MachineInstr *DefMI = nullptr;
if (!VNI->isPHIDef()) {
DefMI = LIS.getInstructionFromIndex(VNI->def);
assert(DefMI && "No defining instruction");
@@ -1359,7 +1360,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) {
// Share a stack slot among all descendants of Original.
Original = VRM.getOriginal(edit.getReg());
StackSlot = VRM.getStackSlot(Original);
- StackInt = 0;
+ StackInt = nullptr;
DEBUG(dbgs() << "Inline spilling "
<< MRI.getRegClass(edit.getReg())->getName()
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 61d065a..187e015 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "InterferenceCache.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/Support/ErrorHandling.h"
@@ -19,6 +18,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
// Static member used for null interference cursors.
InterferenceCache::BlockInterference InterferenceCache::Cursor::NoInterference;
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index d3482d0..91a1da9 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -77,7 +77,8 @@ class InterferenceCache {
/// Iterator pointing into the fixed RegUnit interference.
LiveInterval::iterator FixedI;
- RegUnitInfo(LiveIntervalUnion &LIU) : VirtTag(LIU.getTag()), Fixed(0) {
+ RegUnitInfo(LiveIntervalUnion &LIU)
+ : VirtTag(LIU.getTag()), Fixed(nullptr) {
VirtI.setMap(LIU.getMap());
}
};
@@ -93,7 +94,7 @@ class InterferenceCache {
void update(unsigned MBBNum);
public:
- Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0), LIS(0) {}
+ Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {}
void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) {
assert(!hasRefs() && "Cannot clear cache entry with references");
@@ -148,8 +149,9 @@ class InterferenceCache {
Entry *get(unsigned PhysReg);
public:
- InterferenceCache() : TRI(0), LIUArray(0), MF(0), PhysRegEntries(NULL),
- PhysRegEntriesCount(0), RoundRobin(0) {}
+ InterferenceCache()
+ : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr),
+ PhysRegEntriesCount(0), RoundRobin(0) {}
~InterferenceCache() {
free(PhysRegEntries);
@@ -172,7 +174,7 @@ public:
static BlockInterference NoInterference;
void setEntry(Entry *E) {
- Current = 0;
+ Current = nullptr;
// Update reference counts. Nothing happens when RefCount reaches 0, so
// we don't have to check for E == CacheEntry etc.
if (CacheEntry)
@@ -184,10 +186,10 @@ public:
public:
/// Cursor - Create a dangling cursor.
- Cursor() : CacheEntry(0), Current(0) {}
- ~Cursor() { setEntry(0); }
+ Cursor() : CacheEntry(nullptr), Current(nullptr) {}
+ ~Cursor() { setEntry(nullptr); }
- Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+ Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) {
setEntry(O.CacheEntry);
}
@@ -200,7 +202,7 @@ public:
void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
// Release reference before getting a new one. That guarantees we can
// actually have CacheEntries live cursors.
- setEntry(0);
+ setEntry(nullptr);
if (PhysReg)
setEntry(Cache.get(PhysReg));
}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 9977c6b..a8b8600 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -115,21 +115,21 @@ void IntrinsicLowering::AddPrototypes(Module &M) {
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), (Type *)0);
+ DL.getIntPtrType(Context), nullptr);
break;
case Intrinsic::memmove:
M.getOrInsertFunction("memmove",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context), (Type *)0);
+ DL.getIntPtrType(Context), nullptr);
break;
case Intrinsic::memset:
M.getOrInsertFunction("memset",
Type::getInt8PtrTy(Context),
Type::getInt8PtrTy(Context),
Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context), (Type *)0);
+ DL.getIntPtrType(Context), nullptr);
break;
case Intrinsic::sqrt:
EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 9c2718b..a5ac057 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -43,24 +43,6 @@ static cl::opt<cl::boolOrDefault>
EnableFastISelOption("fast-isel", cl::Hidden,
cl::desc("Enable the \"fast\" instruction selector"));
-static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
- cl::desc("Show encoding in .s output"));
-static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
- cl::desc("Show instruction structure in .s output"));
-
-static cl::opt<cl::boolOrDefault>
-AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
- cl::init(cl::BOU_UNSET));
-
-static bool getVerboseAsm() {
- switch (AsmVerbose) {
- case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
- case cl::BOU_TRUE: return true;
- case cl::BOU_FALSE: return false;
- }
- llvm_unreachable("Invalid verbose asm state");
-}
-
void LLVMTargetMachine::initAsmInfo() {
MCAsmInfo *TmpAsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(),
TargetTriple);
@@ -103,7 +85,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Add internal analysis passes from the target machine.
TM->addAnalysisPasses(PM);
- // Targets may override createPassConfig to provide a target-specific sublass.
+ // Targets may override createPassConfig to provide a target-specific
+ // subclass.
TargetPassConfig *PassConfig = TM->createPassConfig(PM);
PassConfig->setStartStopPasses(StartAfter, StopAfter);
@@ -138,7 +121,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM,
// Ask the target for an isel.
if (PassConfig->addInstSelector())
- return NULL;
+ return nullptr;
PassConfig->addMachinePasses();
@@ -169,7 +152,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
return false;
}
- if (hasMCSaveTempLabels())
+ if (Options.MCOptions.MCSaveTempLabels)
Context->setAllowTemporaryLabels(false);
const MCAsmInfo &MAI = *getMCAsmInfo();
@@ -185,19 +168,16 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
MII, MRI, STI);
// Create a code emitter if asked to show the encoding.
- MCCodeEmitter *MCE = 0;
- if (ShowMCEncoding)
+ MCCodeEmitter *MCE = nullptr;
+ if (Options.MCOptions.ShowMCEncoding)
MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
- MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
- getVerboseAsm(),
- hasMCUseCFI(),
- hasMCUseDwarfDirectory(),
- InstPrinter,
- MCE, MAB,
- ShowMCInst);
+ MCStreamer *S = getTarget().createAsmStreamer(
+ *Context, Out, Options.MCOptions.AsmVerbose,
+ Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB,
+ Options.MCOptions.ShowMCInst);
AsmStreamer.reset(S);
break;
}
@@ -208,12 +188,12 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
*Context);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
- if (MCE == 0 || MAB == 0)
+ if (!MCE || !MAB)
return true;
AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Context, *MAB, Out, MCE, STI, hasMCRelaxAll(),
- hasMCNoExecStack()));
+ getTargetTriple(), *Context, *MAB, Out, MCE, STI,
+ Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
break;
}
case CGFT_Null:
@@ -225,7 +205,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
- if (Printer == 0)
+ if (!Printer)
return true;
// If successful, createAsmPrinter took ownership of AsmStreamer.
@@ -246,7 +226,8 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
bool DisableVerify) {
// Add common CodeGen passes.
- MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, nullptr,
+ nullptr);
if (!Context)
return true;
@@ -265,11 +246,11 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
raw_ostream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- Ctx = addPassesToGenerateCode(this, PM, DisableVerify, 0, 0);
+ Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr);
if (!Ctx)
return true;
- if (hasMCSaveTempLabels())
+ if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
// Create the code emitter for the target if it exists. If not, .o file
@@ -280,17 +261,17 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
STI, *Ctx);
MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(),
TargetCPU);
- if (MCE == 0 || MAB == 0)
+ if (!MCE || !MAB)
return true;
std::unique_ptr<MCStreamer> AsmStreamer;
AsmStreamer.reset(getTarget().createMCObjectStreamer(
- getTargetTriple(), *Ctx, *MAB, Out, MCE, STI, hasMCRelaxAll(),
- hasMCNoExecStack()));
+ getTargetTriple(), *Ctx, *MAB, Out, MCE, STI,
+ Options.MCOptions.MCRelaxAll, Options.MCOptions.MCNoExecStack));
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
- if (Printer == 0)
+ if (!Printer)
return true;
// If successful, createAsmPrinter took ownership of AsmStreamer.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index e88d537..cdf505e 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -13,12 +13,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scheduler"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "scheduler"
+
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
// The isScheduleHigh flag allows nodes with wraparound dependencies that
// cannot easily be modeled as edges with latencies to be scheduled as
@@ -53,7 +54,7 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
- SUnit *OnlyAvailablePred = 0;
+ SUnit *OnlyAvailablePred = nullptr;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit &Pred = *I->getSUnit();
@@ -61,7 +62,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
- return 0;
+ return nullptr;
OnlyAvailablePred = &Pred;
}
}
@@ -105,7 +106,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
if (SU->isAvailable) return; // All preds scheduled.
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
- if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return;
// Okay, we found a single predecessor that is available, but not scheduled.
// Since it is available, it must be in the priority queue. First remove it.
@@ -117,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
}
SUnit *LatencyPriorityQueue::pop() {
- if (empty()) return NULL;
+ if (empty()) return nullptr;
std::vector<SUnit *>::iterator Best = Queue.begin();
for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index c22ab11..d12c234 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lexicalscopes"
#include "llvm/CodeGen/LexicalScopes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -25,15 +24,14 @@
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
-/// ~LexicalScopes - final cleanup after ourselves.
-LexicalScopes::~LexicalScopes() { reset(); }
+#define DEBUG_TYPE "lexicalscopes"
/// reset - Reset the instance so that it's prepared for another function.
void LexicalScopes::reset() {
- MF = NULL;
- CurrentFnLexicalScope = NULL;
- DeleteContainerSeconds(LexicalScopeMap);
- DeleteContainerSeconds(AbstractScopeMap);
+ MF = nullptr;
+ CurrentFnLexicalScope = nullptr;
+ LexicalScopeMap.clear();
+ AbstractScopeMap.clear();
InlinedLexicalScopeMap.clear();
AbstractScopesList.clear();
}
@@ -58,30 +56,26 @@ void LexicalScopes::extractLexicalScopes(
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
// Scan each instruction and create scopes. First build working set of scopes.
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
- ++I) {
- const MachineInstr *RangeBeginMI = NULL;
- const MachineInstr *PrevMI = NULL;
+ for (const auto &MBB : *MF) {
+ const MachineInstr *RangeBeginMI = nullptr;
+ const MachineInstr *PrevMI = nullptr;
DebugLoc PrevDL;
- for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
- II != IE; ++II) {
- const MachineInstr *MInsn = II;
-
+ for (const auto &MInsn : MBB) {
// Check if instruction has valid location information.
- const DebugLoc MIDL = MInsn->getDebugLoc();
+ const DebugLoc MIDL = MInsn.getDebugLoc();
if (MIDL.isUnknown()) {
- PrevMI = MInsn;
+ PrevMI = &MInsn;
continue;
}
// If scope has not changed then skip this instruction.
if (MIDL == PrevDL) {
- PrevMI = MInsn;
+ PrevMI = &MInsn;
continue;
}
// Ignore DBG_VALUE. It does not contribute to any instruction in output.
- if (MInsn->isDebugValue())
+ if (MInsn.isDebugValue())
continue;
if (RangeBeginMI) {
@@ -94,10 +88,10 @@ void LexicalScopes::extractLexicalScopes(
}
// This is a beginning of a new instruction range.
- RangeBeginMI = MInsn;
+ RangeBeginMI = &MInsn;
// Reset previous markers.
- PrevMI = MInsn;
+ PrevMI = &MInsn;
PrevDL = MIDL;
}
@@ -110,14 +104,22 @@ void LexicalScopes::extractLexicalScopes(
}
}
+LexicalScope *LexicalScopes::findInlinedScope(DebugLoc DL) {
+ MDNode *Scope = nullptr;
+ MDNode *IA = nullptr;
+ DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
+ auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
+ return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
+}
+
/// findLexicalScope - Find lexical scope, either regular or inlined, for the
/// given DebugLoc. Return NULL if not found.
LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
- MDNode *Scope = NULL;
- MDNode *IA = NULL;
+ MDNode *Scope = nullptr;
+ MDNode *IA = nullptr;
DL.getScopeAndInlinedAt(Scope, IA, MF->getFunction()->getContext());
if (!Scope)
- return NULL;
+ return nullptr;
// The scope that we were created with could have an extra file - which
// isn't what we care about in this case.
@@ -125,16 +127,18 @@ LexicalScope *LexicalScopes::findLexicalScope(DebugLoc DL) {
if (D.isLexicalBlockFile())
Scope = DILexicalBlockFile(Scope).getScope();
- if (IA)
- return InlinedLexicalScopeMap.lookup(DebugLoc::getFromDILocation(IA));
- return LexicalScopeMap.lookup(Scope);
+ if (IA) {
+ auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA));
+ return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr;
+ }
+ return findLexicalScope(Scope);
}
/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If
/// not available then create new lexical scope.
LexicalScope *LexicalScopes::getOrCreateLexicalScope(DebugLoc DL) {
- MDNode *Scope = NULL;
- MDNode *InlinedAt = NULL;
+ MDNode *Scope = nullptr;
+ MDNode *InlinedAt = nullptr;
DL.getScopeAndInlinedAt(Scope, InlinedAt, MF->getFunction()->getContext());
if (InlinedAt) {
@@ -155,35 +159,48 @@ LexicalScope *LexicalScopes::getOrCreateRegularScope(MDNode *Scope) {
D = DIDescriptor(Scope);
}
- LexicalScope *WScope = LexicalScopeMap.lookup(Scope);
- if (WScope)
- return WScope;
+ auto I = LexicalScopeMap.find(Scope);
+ if (I != LexicalScopeMap.end())
+ return &I->second;
- LexicalScope *Parent = NULL;
+ LexicalScope *Parent = nullptr;
if (D.isLexicalBlock())
Parent = getOrCreateLexicalScope(DebugLoc::getFromDILexicalBlock(Scope));
- WScope = new LexicalScope(Parent, DIDescriptor(Scope), NULL, false);
- LexicalScopeMap.insert(std::make_pair(Scope, WScope));
+ // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
+ // compatibility is no longer required.
+ I = LexicalScopeMap.emplace(std::piecewise_construct, std::make_tuple(Scope),
+ std::make_tuple(Parent, DIDescriptor(Scope),
+ nullptr, false)).first;
+
if (!Parent && DIDescriptor(Scope).isSubprogram() &&
DISubprogram(Scope).describes(MF->getFunction()))
- CurrentFnLexicalScope = WScope;
+ CurrentFnLexicalScope = &I->second;
- return WScope;
+ return &I->second;
}
/// getOrCreateInlinedScope - Find or create an inlined lexical scope.
-LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *Scope,
+LexicalScope *LexicalScopes::getOrCreateInlinedScope(MDNode *ScopeNode,
MDNode *InlinedAt) {
- LexicalScope *InlinedScope = LexicalScopeMap.lookup(InlinedAt);
- if (InlinedScope)
- return InlinedScope;
-
- DebugLoc InlinedLoc = DebugLoc::getFromDILocation(InlinedAt);
- InlinedScope = new LexicalScope(getOrCreateLexicalScope(InlinedLoc),
- DIDescriptor(Scope), InlinedAt, false);
- InlinedLexicalScopeMap[InlinedLoc] = InlinedScope;
- LexicalScopeMap[InlinedAt] = InlinedScope;
- return InlinedScope;
+ std::pair<const MDNode*, const MDNode*> P(ScopeNode, InlinedAt);
+ auto I = InlinedLexicalScopeMap.find(P);
+ if (I != InlinedLexicalScopeMap.end())
+ return &I->second;
+
+ LexicalScope *Parent;
+ DILexicalBlock Scope(ScopeNode);
+ if (Scope.isSubprogram())
+ Parent = getOrCreateLexicalScope(DebugLoc::getFromDILocation(InlinedAt));
+ else
+ Parent = getOrCreateInlinedScope(Scope.getContext(), InlinedAt);
+
+ // FIXME: Use forward_as_tuple instead of make_tuple, once MSVC2012
+ // compatibility is no longer required.
+ I = InlinedLexicalScopeMap.emplace(std::piecewise_construct,
+ std::make_tuple(P),
+ std::make_tuple(Parent, Scope, InlinedAt,
+ false)).first;
+ return &I->second;
}
/// getOrCreateAbstractScope - Find or create an abstract lexical scope.
@@ -193,21 +210,23 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) {
DIDescriptor Scope(N);
if (Scope.isLexicalBlockFile())
Scope = DILexicalBlockFile(Scope).getScope();
- LexicalScope *AScope = AbstractScopeMap.lookup(N);
- if (AScope)
- return AScope;
+ auto I = AbstractScopeMap.find(Scope);
+ if (I != AbstractScopeMap.end())
+ return &I->second;
- LexicalScope *Parent = NULL;
+ LexicalScope *Parent = nullptr;
if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(N);
+ DILexicalBlock DB(Scope);
DIDescriptor ParentDesc = DB.getContext();
Parent = getOrCreateAbstractScope(ParentDesc);
}
- AScope = new LexicalScope(Parent, DIDescriptor(N), NULL, true);
- AbstractScopeMap[N] = AScope;
- if (DIDescriptor(N).isSubprogram())
- AbstractScopesList.push_back(AScope);
- return AScope;
+ I = AbstractScopeMap.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Scope),
+ std::forward_as_tuple(Parent, Scope,
+ nullptr, true)).first;
+ if (Scope.isSubprogram())
+ AbstractScopesList.push_back(&I->second);
+ return &I->second;
}
/// constructScopeNest
@@ -244,7 +263,7 @@ void LexicalScopes::assignInstructionRanges(
SmallVectorImpl<InsnRange> &MIRanges,
DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
- LexicalScope *PrevLexicalScope = NULL;
+ LexicalScope *PrevLexicalScope = nullptr;
for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
RE = MIRanges.end();
RI != RE; ++RI) {
@@ -273,9 +292,8 @@ void LexicalScopes::getMachineBasicBlocks(
return;
if (Scope == CurrentFnLexicalScope) {
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
- ++I)
- MBBs.insert(I);
+ for (const auto &MBB : *MF)
+ MBBs.insert(&MBB);
return;
}
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index bef4156..388f58f 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "livedebug"
#include "LiveDebugVariables.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/Statistic.h"
@@ -41,8 +40,12 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <memory>
+
using namespace llvm;
+#define DEBUG_TYPE "livedebug"
+
static cl::opt<bool>
EnableLDV("live-debug-variables", cl::init(true),
cl::desc("Enable the live debug variables pass"), cl::Hidden);
@@ -64,7 +67,7 @@ void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) {
initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
}
@@ -139,7 +142,7 @@ public:
UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L,
LocMap::Allocator &alloc)
: variable(var), offset(o), IsIndirect(i), dl(L), leader(this),
- next(0), locInts(alloc)
+ next(nullptr), locInts(alloc)
{}
/// getLeader - Get the leader of this value's equivalence class.
@@ -154,8 +157,8 @@ public:
UserValue *getNext() const { return next; }
/// match - Does this UserValue match the parameters?
- bool match(const MDNode *Var, unsigned Offset) const {
- return Var == variable && Offset == offset;
+ bool match(const MDNode *Var, unsigned Offset, bool indirect) const {
+ return Var == variable && Offset == offset && indirect == IsIndirect;
}
/// merge - Merge equivalence classes.
@@ -292,7 +295,7 @@ class LDVImpl {
bool ModifiedMF;
/// userValues - All allocated UserValue instances.
- SmallVector<UserValue*, 8> userValues;
+ SmallVector<std::unique_ptr<UserValue>, 8> userValues;
/// Map virtual register to eq class leader.
typedef DenseMap<unsigned, UserValue*> VRMap;
@@ -332,7 +335,6 @@ public:
/// clear - Release all memory.
void clear() {
- DeleteContainerPointers(userValues);
userValues.clear();
virtRegToEqClass.clear();
userVarMap.clear();
@@ -425,12 +427,13 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
UserValue *UV = Leader->getLeader();
Leader = UV;
for (; UV; UV = UV->getNext())
- if (UV->match(Var, Offset))
+ if (UV->match(Var, Offset, IsIndirect))
return UV;
}
- UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator);
- userValues.push_back(UV);
+ userValues.push_back(
+ make_unique<UserValue>(Var, Offset, IsIndirect, DL, allocator));
+ UserValue *UV = userValues.back().get();
Leader = UserValue::merge(Leader, UV);
return UV;
}
@@ -444,7 +447,7 @@ void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
return UV->getLeader();
- return 0;
+ return nullptr;
}
bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
@@ -646,14 +649,14 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
const MachineOperand &Loc = locations[LocNo];
if (!Loc.isReg()) {
- extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS);
continue;
}
// Register locations are constrained to where the register value is live.
if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
- LiveInterval *LI = 0;
- const VNInfo *VNI = 0;
+ LiveInterval *LI = nullptr;
+ const VNInfo *VNI = nullptr;
if (LIS.hasInterval(Loc.getReg())) {
LI = &LIS.getInterval(Loc.getReg());
VNI = LI->getVNInfoAt(Idx);
@@ -670,7 +673,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
LiveRange *LR = &LIS.getRegUnit(Unit);
const VNInfo *VNI = LR->getVNInfoAt(Idx);
// Don't track copies from physregs, it is too expensive.
- extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS);
+ extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS);
}
// Finally, erase all the undefs.
@@ -733,7 +736,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
LiveIntervals& LIS) {
DEBUG({
dbgs() << "Splitting Loc" << OldLocNo << '\t';
- print(dbgs(), 0);
+ print(dbgs(), nullptr);
});
bool DidChange = false;
LocMap::iterator LocMapI;
@@ -823,7 +826,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs,
}
}
- DEBUG({dbgs() << "Split result: \t"; print(dbgs(), 0);});
+ DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);});
return DidChange;
}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 3a7ac11..ce8ce96 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -331,13 +331,13 @@ LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) {
/// the value. If there is no live range before Kill, return NULL.
VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
if (empty())
- return 0;
+ return nullptr;
iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot());
if (I == begin())
- return 0;
+ return nullptr;
--I;
if (I->end <= StartIdx)
- return 0;
+ return nullptr;
if (I->end < Kill)
extendSegmentEndTo(I, Kill);
return I->valno;
@@ -435,7 +435,7 @@ void LiveRange::join(LiveRange &Other,
OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
for (iterator I = std::next(OutIt), E = end(); I != E; ++I) {
VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]];
- assert(nextValNo != 0 && "Huh?");
+ assert(nextValNo && "Huh?");
// If this live range has the same value # as its immediate predecessor,
// and if they are neighbors, remove one Segment. This happens when we
@@ -638,7 +638,7 @@ void LiveRange::verify() const {
assert(I->start.isValid());
assert(I->end.isValid());
assert(I->start < I->end);
- assert(I->valno != 0);
+ assert(I->valno != nullptr);
assert(I->valno->id < valnos.size());
assert(I->valno == valnos[I->valno->id]);
if (std::next(I) != E) {
@@ -857,7 +857,7 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
EqClass.clear();
EqClass.grow(LI->getNumValNums());
- const VNInfo *used = 0, *unused = 0;
+ const VNInfo *used = nullptr, *unused = nullptr;
// Determine connections.
for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index fdc673f..3563f8e 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "LiveRangeCalc.h"
#include "llvm/ADT/DenseSet.h"
@@ -42,6 +41,8 @@
#include <limits>
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
char LiveIntervals::ID = 0;
char &llvm::LiveIntervalsID = LiveIntervals::ID;
INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
@@ -79,7 +80,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
}
LiveIntervals::LiveIntervals() : MachineFunctionPass(ID),
- DomTree(0), LRCalc(0) {
+ DomTree(nullptr), LRCalc(nullptr) {
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
}
@@ -572,9 +573,9 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
break;
}
if (CancelKill)
- MI->clearRegisterKills(Reg, NULL);
+ MI->clearRegisterKills(Reg, nullptr);
else
- MI->addRegisterKilled(Reg, NULL);
+ MI->addRegisterKilled(Reg, nullptr);
}
}
}
@@ -590,17 +591,17 @@ LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
SlotIndex Start = LI.beginIndex();
if (Start.isBlock())
- return NULL;
+ return nullptr;
SlotIndex Stop = LI.endIndex();
if (Stop.isBlock())
- return NULL;
+ return nullptr;
// getMBBFromIndex doesn't need to search the MBB table when both indexes
// belong to proper instructions.
MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start);
MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop);
- return MBB1 == MBB2 ? MBB1 : NULL;
+ return MBB1 == MBB2 ? MBB1 : nullptr;
}
bool
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index d5a81a3..d81221b 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/Support/Debug.h"
@@ -23,6 +22,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
// Merge a LiveInterval's segments. Guarantee no overlaps.
void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
@@ -138,7 +139,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
}
LiveInterval::iterator VirtRegEnd = VirtReg->end();
- LiveInterval *RecentReg = 0;
+ LiveInterval *RecentReg = nullptr;
while (LiveUnionI.valid()) {
assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg");
@@ -200,5 +201,5 @@ void LiveIntervalUnion::Array::clear() {
LIUs[i].~LiveIntervalUnion();
free(LIUs);
Size = 0;
- LIUs = 0;
+ LIUs = nullptr;
}
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index ecd75b4..a558e14 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "LiveRangeCalc.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
void LiveRangeCalc::reset(const MachineFunction *mf,
SlotIndexes *SI,
MachineDominatorTree *MDT,
@@ -121,7 +122,7 @@ void LiveRangeCalc::updateLiveIns() {
// The value is live-through, update LiveOut as well.
// Defer the Domtree lookup until it is needed.
assert(Seen.test(MBB->getNumber()));
- LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0);
+ LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)nullptr);
}
Updater.setDest(&I->LR);
Updater.add(Start, End, I->Value);
@@ -174,7 +175,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
// Remember if we have seen more than one value.
bool UniqueVNI = true;
- VNInfo *TheVNI = 0;
+ VNInfo *TheVNI = nullptr;
// Using Seen as a visited set, perform a BFS for all reaching defs.
for (unsigned i = 0; i != WorkList.size(); ++i) {
@@ -251,7 +252,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB,
End = Kill;
else
LiveOut[MF->getBlockNumbered(*I)] =
- LiveOutPair(TheVNI, (MachineDomTreeNode *)0);
+ LiveOutPair(TheVNI, nullptr);
Updater.add(Start, End, TheVNI);
}
return true;
@@ -345,7 +346,7 @@ void LiveRangeCalc::updateSSA() {
VNInfo *VNI = LR.getNextValue(Start, *Alloc);
I->Value = VNI;
// This block is done, we know the final value.
- I->DomNode = 0;
+ I->DomNode = nullptr;
// Add liveness since updateLiveIns now skips this node.
if (I->Kill.isValid())
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index a3a3fbb..67ab559 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -92,7 +92,7 @@ class LiveRangeCalc {
VNInfo *Value;
LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
- : LR(LR), DomNode(node), Kill(kill), Value(0) {}
+ : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {}
};
/// LiveIn - Work list of blocks where the live-in value has yet to be
@@ -125,7 +125,8 @@ class LiveRangeCalc {
void updateLiveIns();
public:
- LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {}
+ LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr),
+ DomTree(nullptr), Alloc(nullptr) {}
//===--------------------------------------------------------------------===//
// High-level interface.
@@ -203,7 +204,7 @@ public:
/// addLiveInBlock().
void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
Seen.set(MBB->getNumber());
- LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0);
+ LiveOut[MBB] = LiveOutPair(VNI, nullptr);
}
/// addLiveInBlock - Add a block with an unknown live-in value. This
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 891eaab..431241f 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -11,7 +11,6 @@
// is spilled or split.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE");
STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE");
STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE");
@@ -164,7 +165,7 @@ void LiveRangeEdit::eraseVirtReg(unsigned Reg) {
bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
SmallVectorImpl<MachineInstr*> &Dead) {
- MachineInstr *DefMI = 0, *UseMI = 0;
+ MachineInstr *DefMI = nullptr, *UseMI = nullptr;
// Check that there is a single def and a single use.
for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) {
@@ -197,7 +198,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
// We also need to make sure it is safe to move the load.
// Assume there are stores between DefMI and UseMI.
bool SawStore = true;
- if (!DefMI->isSafeToMove(&TII, 0, SawStore))
+ if (!DefMI->isSafeToMove(&TII, nullptr, SawStore))
return false;
DEBUG(dbgs() << "Try to fold single def: " << *DefMI
@@ -213,7 +214,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI);
UseMI->eraseFromParent();
- DefMI->addRegisterDead(LI->reg, 0);
+ DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
++NumDCEFoldedLoads;
return true;
@@ -236,7 +237,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Use the same criteria as DeadMachineInstructionElim.
bool SawStore = false;
- if (!MI->isSafeToMove(&TII, 0, SawStore)) {
+ if (!MI->isSafeToMove(&TII, nullptr, SawStore)) {
DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI);
return;
}
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index 7f797be..de2ce22 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "RegisterCoalescer.h"
#include "llvm/ADT/Statistic.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumAssigned , "Number of registers assigned");
STATISTIC(NumUnassigned , "Number of registers unassigned");
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index be11a8f..b3161a4 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "livestacks"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -24,6 +23,8 @@
#include <limits>
using namespace llvm;
+#define DEBUG_TYPE "livestacks"
+
char LiveStacks::ID = 0;
INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks",
"Live Stack Slot Analysis", false, false)
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index ed55d7a..758b216 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -61,7 +61,7 @@ LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
for (unsigned i = 0, e = Kills.size(); i != e; ++i)
if (Kills[i]->getParent() == MBB)
return Kills[i];
- return NULL;
+ return nullptr;
}
void LiveVariables::VarInfo::dump() const {
@@ -193,7 +193,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
SmallSet<unsigned,4> &PartDefRegs) {
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
- MachineInstr *LastDef = NULL;
+ MachineInstr *LastDef = nullptr;
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
MachineInstr *Def = PhysRegDef[SubReg];
@@ -208,7 +208,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
}
if (!LastDef)
- return 0;
+ return nullptr;
PartDefRegs.insert(LastDefReg);
for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
@@ -282,7 +282,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
- return 0;
+ return nullptr;
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
@@ -333,7 +333,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// AX<dead> = AL<imp-def>
// = AL<kill>
// AX =
- MachineInstr *LastPartDef = 0;
+ MachineInstr *LastPartDef = nullptr;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
@@ -436,7 +436,7 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
Super = *SR;
- HandlePhysRegKill(Super, 0);
+ HandlePhysRegKill(Super, nullptr);
}
}
@@ -492,7 +492,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
PhysRegDef[SubReg] = MI;
- PhysRegUse[SubReg] = NULL;
+ PhysRegUse[SubReg] = nullptr;
}
}
}
@@ -506,8 +506,8 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
PhysRegDef = new MachineInstr*[NumRegs];
PhysRegUse = new MachineInstr*[NumRegs];
PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
- std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
PHIJoins.clear();
// FIXME: LiveIntervals will be updated to remove its dependence on
@@ -536,7 +536,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
EE = MBB->livein_end(); II != EE; ++II) {
assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
"Cannot have a live-in virtual register!");
- HandlePhysRegDef(*II, 0, Defs);
+ HandlePhysRegDef(*II, nullptr, Defs);
}
// Loop over all of the instructions, processing them.
@@ -639,10 +639,10 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// available at the end of the basic block.
for (unsigned i = 0; i != NumRegs; ++i)
if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i))
- HandlePhysRegDef(i, 0, Defs);
+ HandlePhysRegDef(i, nullptr, Defs);
- std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
- std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ std::fill(PhysRegDef, PhysRegDef + NumRegs, nullptr);
+ std::fill(PhysRegUse, PhysRegUse + NumRegs, nullptr);
}
// Convert and transfer the dead / killed information we have gathered into
@@ -701,14 +701,15 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
/// which is used in a PHI node. We map that to the BB the vreg is coming from.
///
void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
- for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
- I != E; ++I)
- for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE && BBI->isPHI(); ++BBI)
- for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
- if (BBI->getOperand(i).readsReg())
- PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
- .push_back(BBI->getOperand(i).getReg());
+ for (const auto &MBB : Fn)
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
+ if (BBI.getOperand(i).readsReg())
+ PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()]
+ .push_back(BBI.getOperand(i).getReg());
+ }
}
bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 122d467..36885e8 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "localstackalloc"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -40,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "localstackalloc"
+
STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
STATISTIC(NumReplacements, "Number of frame indices references replaced");
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 888c20e..0ec5c33 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -35,9 +35,11 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "codegen"
+
MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
: BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
- AddressTaken(false), CachedMCSymbol(NULL) {
+ AddressTaken(false), CachedMCSymbol(nullptr) {
Insts.Parent = this;
}
@@ -98,7 +100,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
/// list, we update its parent pointer and add its operands from reg use/def
/// lists if appropriate.
void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
- assert(N->getParent() == 0 && "machine instruction already in a basic block");
+ assert(!N->getParent() && "machine instruction already in a basic block");
N->setParent(Parent);
// Add the instruction's register operands to their corresponding
@@ -113,13 +115,13 @@ void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
/// list, we update its parent pointer and remove its operands from reg use/def
/// lists if appropriate.
void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
- assert(N->getParent() != 0 && "machine instruction not in a basic block");
+ assert(N->getParent() && "machine instruction not in a basic block");
// Remove from the use/def lists.
if (MachineFunction *MF = N->getParent()->getParent())
N->RemoveRegOperandsFromUseLists(MF->getRegInfo());
- N->setParent(0);
+ N->setParent(nullptr);
LeakDetector::addGarbageObject(N);
}
@@ -229,11 +231,11 @@ MachineBasicBlock::getLastNonDebugInstr() const {
const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
// A block with a landing pad successor only has one other successor.
if (succ_size() > 2)
- return 0;
+ return nullptr;
for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
if ((*I)->isLandingPad())
return *I;
- return 0;
+ return nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -392,7 +394,7 @@ void MachineBasicBlock::updateTerminator() {
// A block with no successors has no concerns with fall-through edges.
if (this->succ_empty()) return;
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
DebugLoc dl; // FIXME: this is nowhere
bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
@@ -423,7 +425,7 @@ void MachineBasicBlock::updateTerminator() {
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
}
} else {
if (FBB) {
@@ -434,16 +436,16 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond))
return;
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, 0, Cond, dl);
+ TII->InsertBranch(*this, FBB, nullptr, Cond, dl);
} else if (isLayoutSuccessor(FBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
}
} else {
// Walk through the successors and find the successor which is not
// a landing pad and is not the conditional branch destination (in TBB)
// as the fallthrough successor.
- MachineBasicBlock *FallthroughBB = 0;
+ MachineBasicBlock *FallthroughBB = nullptr;
for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) {
if ((*SI)->isLandingPad() || *SI == TBB)
continue;
@@ -461,7 +463,7 @@ void MachineBasicBlock::updateTerminator() {
// Finally update the unconditional successor to be reached via a branch
// if it would not be reached by fallthrough.
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, 0, Cond, dl);
+ TII->InsertBranch(*this, TBB, nullptr, Cond, dl);
return;
}
@@ -470,11 +472,11 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FallthroughBB, 0, Cond, dl);
+ TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, dl);
} else if (!isLayoutSuccessor(FallthroughBB)) {
TII->RemoveBranch(*this);
TII->InsertBranch(*this, TBB, FallthroughBB, Cond, dl);
@@ -641,7 +643,7 @@ bool MachineBasicBlock::canFallThrough() {
return false;
// Analyze the branches, if any, at the end of the block.
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
@@ -654,7 +656,7 @@ bool MachineBasicBlock::canFallThrough() {
}
// If there is no branch, control always falls through.
- if (TBB == 0) return true;
+ if (!TBB) return true;
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
@@ -668,7 +670,7 @@ bool MachineBasicBlock::canFallThrough() {
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
- return FBB == 0;
+ return FBB == nullptr;
}
MachineBasicBlock *
@@ -676,7 +678,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
if (Succ->isLandingPad())
- return NULL;
+ return nullptr;
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
@@ -684,15 +686,15 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
// Performance might be harmed on HW that implements branching using exec mask
// where both sides of the branches are always executed.
if (MF->getTarget().requiresStructuredCFG())
- return NULL;
+ return nullptr;
// We may need to update this's terminator, but we can't do that if
// AnalyzeBranch fails. If this uses a jump table, we won't touch it.
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
- return NULL;
+ return nullptr;
// Avoid bugpoint weirdness: A block may end with a conditional branch but
// jumps to the same MBB is either case. We have duplicate CFG edges in that
@@ -701,7 +703,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
if (TBB && TBB == FBB) {
DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
<< getNumber() << '\n');
- return NULL;
+ return nullptr;
}
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
@@ -793,7 +795,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
NMBB->addSuccessor(Succ);
if (!NMBB->isLayoutSuccessor(Succ)) {
Cond.clear();
- MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, nullptr, Cond, dl);
if (Indexes) {
for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end();
@@ -1065,11 +1067,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineFunction::iterator FallThru =
std::next(MachineFunction::iterator(this));
- if (DestA == 0 && DestB == 0) {
+ if (!DestA && !DestB) {
// Block falls through to successor.
DestA = FallThru;
DestB = FallThru;
- } else if (DestA != 0 && DestB == 0) {
+ } else if (DestA && !DestB) {
if (isCond)
// Block ends in conditional jump that falls through to successor.
DestB = FallThru;
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 13203d5..9151d99 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,4 +1,4 @@
-//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====//
+//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,8 +12,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
-#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
@@ -22,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "block-freq"
+
#ifndef NDEBUG
enum GVDAGType {
GVDT_None,
@@ -112,6 +116,7 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo*> :
INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq",
"Machine Block Frequency Analysis", true, true)
@@ -127,16 +132,18 @@ MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {}
void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
+ AU.addRequired<MachineLoopInfo>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
MachineBranchProbabilityInfo &MBPI =
- getAnalysis<MachineBranchProbabilityInfo>();
+ getAnalysis<MachineBranchProbabilityInfo>();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
if (!MBFI)
MBFI.reset(new ImplType);
- MBFI->doFunction(&F, &MBPI);
+ MBFI->doFunction(&F, &MBPI, &MLI);
#ifndef NDEBUG
if (ViewMachineBlockFreqPropagationDAG != GVDT_None) {
view();
@@ -166,7 +173,7 @@ getBlockFreq(const MachineBasicBlock *MBB) const {
}
const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
- return MBFI ? MBFI->Fn : nullptr;
+ return MBFI ? MBFI->getFunction() : nullptr;
}
raw_ostream &
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 771e7ce..74af1e2 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -25,7 +25,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "block-placement2"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -46,6 +45,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "block-placement2"
+
STATISTIC(NumCondBranches, "Number of conditional branches");
STATISTIC(NumUncondBranches, "Number of uncondittional branches");
STATISTIC(CondBranchTakenFreq,
@@ -206,7 +207,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
void markChainSuccessors(BlockChain &Chain,
MachineBasicBlock *LoopHeaderBB,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
- const BlockFilterSet *BlockFilter = 0);
+ const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB,
BlockChain &Chain,
const BlockFilterSet *BlockFilter);
@@ -220,7 +221,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockFilterSet *BlockFilter);
void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
SmallVectorImpl<MachineBasicBlock *> &BlockWorkList,
- const BlockFilterSet *BlockFilter = 0);
+ const BlockFilterSet *BlockFilter = nullptr);
MachineBasicBlock *findBestLoopTop(MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(MachineFunction &F,
@@ -334,7 +335,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor(
const BlockFilterSet *BlockFilter) {
const BranchProbability HotProb(4, 5); // 80%
- MachineBasicBlock *BestSucc = 0;
+ MachineBasicBlock *BestSucc = nullptr;
// FIXME: Due to the performance of the probability and weight routines in
// the MBPI analysis, we manually compute probabilities using the edge
// weights. This is suboptimal as it means that the somewhat subtle
@@ -432,7 +433,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
}),
WorkList.end());
- MachineBasicBlock *BestBlock = 0;
+ MachineBasicBlock *BestBlock = nullptr;
BlockFrequency BestFreq;
for (SmallVectorImpl<MachineBasicBlock *>::iterator WBI = WorkList.begin(),
WBE = WorkList.end();
@@ -479,7 +480,7 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
return *BlockToChain[I]->begin();
}
}
- return 0;
+ return nullptr;
}
void MachineBlockPlacement::buildChain(
@@ -560,7 +561,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
<< getBlockName(L.getHeader()) << "\n");
BlockFrequency BestPredFreq;
- MachineBasicBlock *BestPred = 0;
+ MachineBasicBlock *BestPred = nullptr;
for (MachineBasicBlock::pred_iterator PI = L.getHeader()->pred_begin(),
PE = L.getHeader()->pred_end();
PI != PE; ++PI) {
@@ -616,11 +617,11 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// header and only rotate if safe.
BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return 0;
+ return nullptr;
BlockFrequency BestExitEdgeFreq;
unsigned BestExitLoopDepth = 0;
- MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
// If there are exits to outer loops, loop rotation can severely limit
// fallthrough opportunites unless it selects such an exit. Keep a set of
// blocks where rotating to exit with that block will reach an outer loop.
@@ -709,14 +710,14 @@ MachineBlockPlacement::findBestLoopExit(MachineFunction &F,
// Without a candidate exiting block or with only a single block in the
// loop, just use the loop header to layout the loop.
if (!ExitingBB || L.getNumBlocks() == 1)
- return 0;
+ return nullptr;
// Also, if we have exit blocks which lead to outer loops but didn't select
// one of them as the exiting block we are rotating toward, disable loop
// rotation altogether.
if (!BlocksExitingToOuterLoop.empty() &&
!BlocksExitingToOuterLoop.count(ExitingBB))
- return 0;
+ return nullptr;
DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n");
return ExitingBB;
@@ -795,7 +796,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F,
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
// branches by placing an exit edge at the bottom.
- MachineBasicBlock *ExitingBB = 0;
+ MachineBasicBlock *ExitingBB = nullptr;
if (LoopTop == L.getHeader())
ExitingBB = findBestLoopExit(F, L, LoopBlockSet);
@@ -883,7 +884,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// the exact fallthrough behavior for.
for (;;) {
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough())
break;
@@ -895,7 +896,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: "
<< getBlockName(BB) << " -> " << getBlockName(NextBB)
<< "\n");
- Chain->merge(NextBB, 0);
+ Chain->merge(NextBB, nullptr);
FI = NextFI;
BB = NextBB;
}
@@ -987,7 +988,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// than assert when the branch cannot be analyzed in order to remove this
// boiler plate.
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
// The "PrevBB" is not yet updated to reflect current code layout, so,
// o. it may fall-through to a block without explict "goto" instruction
@@ -1004,10 +1005,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
PrevBB->updateTerminator();
needUpdateBr = false;
Cond.clear();
- TBB = FBB = 0;
+ TBB = FBB = nullptr;
if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) {
// FIXME: This should never take place.
- TBB = FBB = 0;
+ TBB = FBB = nullptr;
}
}
@@ -1032,7 +1033,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) {
// Fixup the last block.
Cond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond))
F.back().updateTerminator();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 1d6879b..6fbc2be 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -88,7 +88,7 @@ MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src,
MachineBasicBlock *
MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
uint32_t MaxWeight = 0;
- MachineBasicBlock *MaxSucc = 0;
+ MachineBasicBlock *MaxSucc = nullptr;
for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I) {
uint32_t Weight = getEdgeWeight(MBB, I);
@@ -101,7 +101,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5))
return MaxSucc;
- return 0;
+ return nullptr;
}
BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 9c3bcc4..7da439c 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-cse"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "machine-cse"
+
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
STATISTIC(NumPhysCSEs,
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 7e1970c..3119a35 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "codegen-cp"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "codegen-cp"
+
STATISTIC(NumDeletes, "Number of dead copies deleted");
namespace {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 061efdb..eb3d71f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -38,6 +38,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "codegen"
+
//===----------------------------------------------------------------------===//
// MachineFunction implementation
//===----------------------------------------------------------------------===//
@@ -56,9 +58,9 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
if (TM.getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(TM);
else
- RegInfo = 0;
+ RegInfo = nullptr;
- MFInfo = 0;
+ MFInfo = nullptr;
FrameInfo =
new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack"));
@@ -77,7 +79,7 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
TM.getTargetLowering()->getPrefFunctionAlignment());
FunctionNumber = FunctionNum;
- JumpTableInfo = 0;
+ JumpTableInfo = nullptr;
}
MachineFunction::~MachineFunction() {
@@ -123,6 +125,11 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
return JumpTableInfo;
}
+/// Should we be emitting segmented stack stuff for the function
+bool MachineFunction::shouldSplitStack() {
+ return getFunction()->hasFnAttribute("split-stack");
+}
+
/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
/// recomputes them. This guarantees that the MBB numbers are sequential,
/// dense, and match the ordering of the blocks within the function. If a
@@ -131,7 +138,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) {
void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (empty()) { MBBNumbering.clear(); return; }
MachineFunction::iterator MBBI, E = end();
- if (MBB == 0)
+ if (MBB == nullptr)
MBBI = begin();
else
MBBI = MBB;
@@ -147,7 +154,7 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
if (MBBI->getNumber() != -1) {
assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
"MBB number mismatch!");
- MBBNumbering[MBBI->getNumber()] = 0;
+ MBBNumbering[MBBI->getNumber()] = nullptr;
}
// If BlockNo is already taken, set that block's number to -1.
@@ -231,11 +238,17 @@ MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
+ if (MMO->getValue())
+ return new (Allocator)
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), nullptr);
return new (Allocator)
- MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
MMO->getOffset()+Offset),
MMO->getFlags(), Size,
- MMO->getBaseAlignment(), 0);
+ MMO->getBaseAlignment(), nullptr);
}
MachineInstr::mmo_iterator
@@ -352,9 +365,9 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << '\n';
}
- for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+ for (const auto &BB : *this) {
OS << '\n';
- BB->print(OS, Indexes);
+ BB.print(OS, Indexes);
}
OS << "\n# End machine code for function " << getName() << ".\n\n";
@@ -564,7 +577,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
Align, getFrameLowering()->getStackAlignment());
Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
/*isSS*/ false,
- /*Alloca*/ 0));
+ /*Alloca*/ nullptr));
return -++NumFixedObjects;
}
@@ -583,7 +596,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
if (!isCalleeSavedInfoValid())
return BV;
- for (const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
BV.set(*CSR);
// The entry MBB always has all CSRs pristine.
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 35591e1..46cd60a 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm) :
- FunctionPass(ID), TM(tm), MF(0) {
+ FunctionPass(ID), TM(tm), MF(nullptr) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
@@ -53,5 +53,5 @@ bool MachineFunctionAnalysis::runOnFunction(Function &F) {
void MachineFunctionAnalysis::releaseMemory() {
delete MF;
- MF = 0;
+ MF = nullptr;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index d102794..5122165 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -128,7 +128,7 @@ void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
- MachineRegisterInfo *RegInfo = 0;
+ MachineRegisterInfo *RegInfo = nullptr;
if (MachineInstr *MI = getParent())
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent())
@@ -152,7 +152,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
IsEarlyClobber = false;
IsDebug = isDebug;
// Ensure isOnRegUseList() returns false.
- Contents.Reg.Prev = 0;
+ Contents.Reg.Prev = nullptr;
// Preserve the tie when the operand was already a register.
if (!WasReg)
TiedTo = 0;
@@ -265,7 +265,7 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
switch (getType()) {
case MachineOperand::MO_Register:
@@ -399,8 +399,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
/// getAddrSpace - Return the LLVM IR address space number that this pointer
/// points into.
unsigned MachinePointerInfo::getAddrSpace() const {
- if (V == 0) return 0;
- return cast<PointerType>(V->getType())->getAddressSpace();
+ if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0;
+ return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace();
}
/// getConstantPool - Return a MachinePointerInfo record that refers to the
@@ -434,7 +434,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
: PtrInfo(ptrinfo), Size(s),
Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
TBAAInfo(TBAAInfo), Ranges(Ranges) {
- assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
+ isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) &&
"invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
@@ -445,7 +446,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
ID.AddInteger(getOffset());
ID.AddInteger(Size);
- ID.AddPointer(getValue());
+ ID.AddPointer(getOpaqueValue());
ID.AddInteger(Flags);
}
@@ -486,10 +487,12 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
// Print the address information.
OS << "[";
- if (!MMO.getValue())
- OS << "<unknown>";
+ if (const Value *V = MMO.getValue())
+ V->printAsOperand(OS, /*PrintType=*/false);
+ else if (const PseudoSourceValue *PSV = MMO.getPseudoValue())
+ PSV->printCustom(OS);
else
- MMO.getValue()->printAsOperand(OS, /*PrintType=*/false);
+ OS << "<unknown>";
unsigned AS = MMO.getAddrSpace();
if (AS != 0)
@@ -545,9 +548,9 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
const DebugLoc dl, bool NoImp)
- : MCID(&tid), Parent(0), Operands(0), NumOperands(0),
+ : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0),
Flags(0), AsmPrinterFlags(0),
- NumMemRefs(0), MemRefs(0), debugLoc(dl) {
+ NumMemRefs(0), MemRefs(nullptr), debugLoc(dl) {
// Reserve space for the expected number of operands.
if (unsigned NumOps = MCID->getNumOperands() +
MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) {
@@ -562,7 +565,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0),
+ : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
Flags(0), AsmPrinterFlags(0),
NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs),
debugLoc(MI.getDebugLoc()) {
@@ -583,7 +586,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
MachineRegisterInfo *MachineInstr::getRegInfo() {
if (MachineBasicBlock *MBB = getParent())
return &MBB->getParent()->getRegInfo();
- return 0;
+ return nullptr;
}
/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
@@ -702,7 +705,7 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
// When adding a register operand, tell MRI about it.
if (NewMO->isReg()) {
// Ensure isOnRegUseList() returns false, regardless of Op's status.
- NewMO->Contents.Reg.Prev = 0;
+ NewMO->Contents.Reg.Prev = nullptr;
// Ignore existing ties. This is not a property that can be copied.
NewMO->TiedTo = 0;
// Add the new operand to MRI, but only for instructions in an MBB.
@@ -974,7 +977,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
return TII->getRegClass(getDesc(), OpIdx, TRI, MF);
if (!getOperand(OpIdx).isReg())
- return NULL;
+ return nullptr;
// For tied uses on inline asm, get the constraint from the def.
unsigned DefIdx;
@@ -984,7 +987,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
// Inline asm stores register class constraints in the flag word.
int FlagIdx = findInlineAsmFlagIdx(OpIdx);
if (FlagIdx < 0)
- return NULL;
+ return nullptr;
unsigned Flag = getOperand(FlagIdx).getImm();
unsigned RCID;
@@ -995,7 +998,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem)
return TRI->getPointerRegClass(MF);
- return NULL;
+ return nullptr;
}
const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
@@ -1366,11 +1369,13 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
if ((*I)->isStore()) return false;
if ((*I)->isInvariant()) return true;
+
+ // A load from a constant PseudoSourceValue is invariant.
+ if (const PseudoSourceValue *PSV = (*I)->getPseudoValue())
+ if (PSV->isConstant(MFI))
+ continue;
+
if (const Value *V = (*I)->getValue()) {
- // A load from a constant PseudoSourceValue is invariant.
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
- if (PSV->isConstant(MFI))
- continue;
// If we have an AliasAnalysis, ask it whether the memory is constant.
if (AA && AA->pointsToConstantMemory(
AliasAnalysis::Location(V, (*I)->getSize(),
@@ -1448,32 +1453,14 @@ void MachineInstr::dump() const {
static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
raw_ostream &CommentOS) {
const LLVMContext &Ctx = MF->getFunction()->getContext();
- if (!DL.isUnknown()) { // Print source line info.
- DIScope Scope(DL.getScope(Ctx));
- assert((!Scope || Scope.isScope()) &&
- "Scope of a DebugLoc should be null or a DIScope.");
- // Omit the directory, because it's likely to be long and uninteresting.
- if (Scope)
- CommentOS << Scope.getFilename();
- else
- CommentOS << "<unknown>";
- CommentOS << ':' << DL.getLine();
- if (DL.getCol() != 0)
- CommentOS << ':' << DL.getCol();
- DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
- if (!InlinedAtDL.isUnknown()) {
- CommentOS << " @[ ";
- printDebugLoc(InlinedAtDL, MF, CommentOS);
- CommentOS << " ]";
- }
- }
+ DL.print(Ctx, CommentOS);
}
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
bool SkipOpers) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
- const MachineFunction *MF = 0;
- const MachineRegisterInfo *MRI = 0;
+ const MachineFunction *MF = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
if (const MachineBasicBlock *MBB = getParent()) {
MF = MBB->getParent();
if (!TM && MF)
@@ -1679,7 +1666,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM,
OS << " line no:" << DV.getLineNumber();
if (MDNode *InlinedAt = DV.getInlinedAt()) {
DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(InlinedAt);
- if (!InlinedAtDL.isUnknown()) {
+ if (!InlinedAtDL.isUnknown() && MF) {
OS << " inlined @[ ";
printDebugLoc(InlinedAtDL, MF, OS);
OS << " ]";
@@ -1756,7 +1743,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
void MachineInstr::clearRegisterKills(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
if (!TargetRegisterInfo::isPhysicalRegister(Reg))
- RegInfo = 0;
+ RegInfo = nullptr;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
@@ -1889,7 +1876,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
void MachineInstr::emitError(StringRef Msg) const {
// Find the source location cookie.
unsigned LocCookie = 0;
- const MDNode *LocMD = 0;
+ const MDNode *LocMD = nullptr;
for (unsigned i = getNumOperands(); i != 0; --i) {
if (getOperand(i-1).isMetadata() &&
(LocMD = getOperand(i-1).getMetadata()) &&
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index d3a1ee7..68d2efd 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-licm"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
@@ -42,6 +41,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "machine-licm"
+
static cl::opt<bool>
AvoidSpeculation("avoid-speculation",
cl::desc("MachineLICM should avoid speculation"),
@@ -358,7 +359,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
while (!Worklist.empty()) {
CurLoop = Worklist.pop_back_val();
- CurPreheader = 0;
+ CurPreheader = nullptr;
ExitBlocks.clear();
// If this is done before regalloc, only visit outer-most preheader-sporting
@@ -390,10 +391,10 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end(); o != oe; ++o) {
- if (!(*o)->isStore() || !(*o)->getValue())
+ if (!(*o)->isStore() || !(*o)->getPseudoValue())
continue;
if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
if (Value->getFrameIndex() == FI)
return true;
}
@@ -700,7 +701,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
WorkList.push_back(HeaderN);
do {
MachineDomTreeNode *Node = WorkList.pop_back_val();
- assert(Node != 0 && "Null dominator tree node?");
+ assert(Node && "Null dominator tree node?");
MachineBasicBlock *BB = Node->getBlock();
// If the header of the loop containing this basic block is a landing pad,
@@ -804,7 +805,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
// defs as well. This happens whenever the preheader is created by splitting
// the critical edge from the loop predecessor to the loop header.
if (BB->pred_size() == 1) {
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
InitRegPressure(*BB->pred_begin());
@@ -882,10 +883,9 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
assert (MI.mayLoad() && "Expected MI that loads!");
for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
E = MI.memoperands_end(); I != E; ++I) {
- if (const Value *V = (*I)->getValue()) {
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
- if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
- return true;
+ if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
+ if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
+ return true;
}
}
return false;
@@ -1241,13 +1241,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
// Don't unfold simple loads.
if (MI->canFoldAsLoad())
- return 0;
+ return nullptr;
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
if (!MI->isInvariantLoad(AA))
- return 0;
+ return nullptr;
// Next determine the register class for a temporary register.
unsigned LoadRegIndex;
@@ -1256,9 +1256,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
/*UnfoldLoad=*/true,
/*UnfoldStore=*/false,
&LoadRegIndex);
- if (NewOpc == 0) return 0;
+ if (NewOpc == 0) return nullptr;
const MCInstrDesc &MID = TII->get(NewOpc);
- if (MID.getNumDefs() != 1) return 0;
+ if (MID.getNumDefs() != 1) return nullptr;
MachineFunction &MF = *MI->getParent()->getParent();
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
@@ -1284,7 +1284,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
NewMIs[0]->eraseFromParent();
NewMIs[1]->eraseFromParent();
- return 0;
+ return nullptr;
}
// Update register pressure for the unfolded instruction.
@@ -1316,10 +1316,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
}
- return 0;
+ return nullptr;
}
bool MachineLICM::EliminateCSE(MachineInstr *MI,
@@ -1390,7 +1390,7 @@ bool MachineLICM::MayCSE(MachineInstr *MI) {
if (CI == CSEMap.end() || MI->isImplicitDef())
return false;
- return LookForDuplicate(MI, CI->second) != 0;
+ return LookForDuplicate(MI, CI->second) != nullptr;
}
/// Hoist - When an instruction is found to use only loop invariant operands
@@ -1466,7 +1466,7 @@ MachineBasicBlock *MachineLICM::getCurPreheader() {
// If we've tried to get a preheader and failed, don't try again.
if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
- return 0;
+ return nullptr;
if (!CurPreheader) {
CurPreheader = CurLoop->getLoopPreheader();
@@ -1474,13 +1474,13 @@ MachineBasicBlock *MachineLICM::getCurPreheader() {
MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
if (!Pred) {
CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
- return 0;
+ return nullptr;
}
CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
if (!CurPreheader) {
CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
- return 0;
+ return nullptr;
}
}
}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 7181025..4976e35 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -36,8 +36,8 @@ namespace llvm {
class MMIAddrLabelMapCallbackPtr : CallbackVH {
MMIAddrLabelMap *Map;
public:
- MMIAddrLabelMapCallbackPtr() : Map(0) {}
- MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+ MMIAddrLabelMapCallbackPtr() : Map(nullptr) {}
+ MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {}
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
@@ -163,9 +163,9 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
AddrLabelSymbols.erase(BB);
assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
- BBCallbacks[Entry.Index] = 0; // Clear the callback.
+ BBCallbacks[Entry.Index] = nullptr; // Clear the callback.
- assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+ assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) &&
"Block/parent mismatch");
// Handle both the single and the multiple symbols cases.
@@ -213,7 +213,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
return;
}
- BBCallbacks[OldEntry.Index] = 0; // Update the callback.
+ BBCallbacks[OldEntry.Index] = nullptr; // Update the callback.
// Otherwise, we need to add the old symbol to the new block's set. If it is
// just a single entry, upgrade it to a symbol list.
@@ -253,12 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
const MCRegisterInfo &MRI,
const MCObjectFileInfo *MOFI)
- : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) {
+ : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) {
initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
MachineModuleInfo::MachineModuleInfo()
- : ImmutablePass(ID), Context(0, 0, 0) {
+ : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) {
llvm_unreachable("This MachineModuleInfo constructor should never be called, "
"MMI should always be explicitly constructed by "
"LLVMTargetMachine");
@@ -269,16 +269,16 @@ MachineModuleInfo::~MachineModuleInfo() {
bool MachineModuleInfo::doInitialization(Module &M) {
- ObjFileMMI = 0;
+ ObjFileMMI = nullptr;
CompactUnwindEncoding = 0;
CurCallSite = 0;
CallsEHReturn = 0;
CallsUnwindInit = 0;
DbgInfoAvailable = UsesVAFloatArgument = false;
// Always emit some info, by default "no personality" info.
- Personalities.push_back(NULL);
- AddrLabelSymbols = 0;
- TheModule = 0;
+ Personalities.push_back(nullptr);
+ AddrLabelSymbols = nullptr;
+ TheModule = nullptr;
return false;
}
@@ -288,12 +288,12 @@ bool MachineModuleInfo::doFinalization(Module &M) {
Personalities.clear();
delete AddrLabelSymbols;
- AddrLabelSymbols = 0;
+ AddrLabelSymbols = nullptr;
Context.reset();
delete ObjFileMMI;
- ObjFileMMI = 0;
+ ObjFileMMI = nullptr;
return false;
}
@@ -341,7 +341,7 @@ void MachineModuleInfo::AnalyzeModule(const Module &M) {
/// because the block may be accessed outside its containing function.
MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
// Lazily create AddrLabelSymbols.
- if (AddrLabelSymbols == 0)
+ if (!AddrLabelSymbols)
AddrLabelSymbols = new MMIAddrLabelMap(Context);
return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
}
@@ -352,7 +352,7 @@ MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
std::vector<MCSymbol*> MachineModuleInfo::
getAddrLabelSymbolToEmit(const BasicBlock *BB) {
// Lazily create AddrLabelSymbols.
- if (AddrLabelSymbols == 0)
+ if (!AddrLabelSymbols)
AddrLabelSymbols = new MMIAddrLabelMap(Context);
return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
}
@@ -366,7 +366,7 @@ void MachineModuleInfo::
takeDeletedSymbolsForFunction(const Function *F,
std::vector<MCSymbol*> &Result) {
// If no blocks have had their addresses taken, we're done.
- if (AddrLabelSymbols == 0) return;
+ if (!AddrLabelSymbols) return;
return AddrLabelSymbols->
takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
}
@@ -419,7 +419,7 @@ void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
// If this is the first personality we're adding go
// ahead and add it at the beginning.
- if (Personalities[0] == NULL)
+ if (!Personalities[0])
Personalities[0] = Personality;
else
Personalities.push_back(Personality);
@@ -462,7 +462,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
if (LandingPad.LandingPadLabel &&
!LandingPad.LandingPadLabel->isDefined() &&
(!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
- LandingPad.LandingPadLabel = 0;
+ LandingPad.LandingPadLabel = nullptr;
// Special case: we *should* emit LPs with null LP MBB. This indicates
// "nounwind" case.
@@ -550,13 +550,13 @@ try_next:;
const Function *MachineModuleInfo::getPersonality() const {
// FIXME: Until PR1414 will be fixed, we're using 1 personality function per
// function
- return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+ return !LandingPads.empty() ? LandingPads[0].Personality : nullptr;
}
/// getPersonalityIndex - Return unique index for current personality
/// function. NULL/first personality function should always get zero index.
unsigned MachineModuleInfo::getPersonalityIndex() const {
- const Function* Personality = NULL;
+ const Function* Personality = nullptr;
// Scan landing pads. If there is at least one non-NULL personality - use it.
for (unsigned i = 0, e = LandingPads.size(); i != e; ++i)
diff --git a/lib/CodeGen/MachinePassRegistry.cpp b/lib/CodeGen/MachinePassRegistry.cpp
index cb204fd..3ee3e40 100644
--- a/lib/CodeGen/MachinePassRegistry.cpp
+++ b/lib/CodeGen/MachinePassRegistry.cpp
@@ -20,7 +20,7 @@ void MachinePassRegistryListener::anchor() { }
/// setDefault - Set the default constructor by name.
void MachinePassRegistry::setDefault(StringRef Name) {
- MachinePassCtor Ctor = 0;
+ MachinePassCtor Ctor = nullptr;
for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) {
if (R->getName() == Name) {
Ctor = R->getCtor();
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index db3eec3..f560259 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
void MachineRegisterInfo::Delegate::anchor() {}
MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM)
- : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) {
+ : TM(TM), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits());
@@ -60,7 +60,7 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg,
if (!NewRC || NewRC == OldRC)
return NewRC;
if (NewRC->getNumRegs() < MinNumRegs)
- return 0;
+ return nullptr;
setRegClass(Reg, NewRC);
return NewRC;
}
@@ -182,7 +182,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
// Head is NULL for an empty list.
if (!Head) {
MO->Contents.Reg.Prev = MO;
- MO->Contents.Reg.Next = 0;
+ MO->Contents.Reg.Next = nullptr;
HeadRef = MO;
return;
}
@@ -203,7 +203,7 @@ void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) {
HeadRef = MO;
} else {
// Insert use at the end.
- MO->Contents.Reg.Next = 0;
+ MO->Contents.Reg.Next = nullptr;
Last->Contents.Reg.Next = MO;
}
}
@@ -227,8 +227,8 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) {
(Next ? Next : Head)->Contents.Reg.Prev = Prev;
- MO->Contents.Reg.Prev = 0;
- MO->Contents.Reg.Next = 0;
+ MO->Contents.Reg.Prev = nullptr;
+ MO->Contents.Reg.Next = nullptr;
}
/// Move NumOps operands from Src to Dst, updating use-def lists as needed.
@@ -303,17 +303,17 @@ MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
def_instr_iterator I = def_instr_begin(Reg);
assert((I.atEnd() || std::next(I) == def_instr_end()) &&
"getVRegDef assumes a single definition or no definition");
- return !I.atEnd() ? &*I : 0;
+ return !I.atEnd() ? &*I : nullptr;
}
/// getUniqueVRegDef - Return the unique machine instr that defines the
/// specified virtual register or null if none is found. If there are
/// multiple definitions or no definition, return null.
MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const {
- if (def_empty(Reg)) return 0;
+ if (def_empty(Reg)) return nullptr;
def_instr_iterator I = def_instr_begin(Reg);
if (std::next(I) != def_instr_end())
- return 0;
+ return nullptr;
return &*I;
}
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 77496ad..d9173a2 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -29,6 +29,8 @@
#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
using namespace llvm;
+#define DEBUG_TYPE "machine-ssaupdater"
+
typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
@@ -36,7 +38,7 @@ static AvailableValsTy &getAvailableVals(void *AV) {
MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
SmallVectorImpl<MachineInstr*> *NewPHI)
- : AV(0), InsertedPHIs(NewPHI) {
+ : AV(nullptr), InsertedPHIs(NewPHI) {
TII = MF.getTarget().getInstrInfo();
MRI = &MF.getRegInfo();
}
@@ -48,7 +50,7 @@ MachineSSAUpdater::~MachineSSAUpdater() {
/// Initialize - Reset this object to get ready for a new set of SSA
/// updates. ProtoValue is the value used to name PHI nodes.
void MachineSSAUpdater::Initialize(unsigned V) {
- if (AV == 0)
+ if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
@@ -313,7 +315,7 @@ public:
static MachineInstr *InstrIsPHI(MachineInstr *I) {
if (I && I->isPHI())
return I;
- return 0;
+ return nullptr;
}
/// ValueIsPHI - Check if the instruction that defines the specified register
@@ -328,7 +330,7 @@ public:
MachineInstr *PHI = ValueIsPHI(Val, Updater);
if (PHI && PHI->getNumOperands() <= 1)
return PHI;
- return 0;
+ return nullptr;
}
/// GetPHIValue - For the specified PHI instruction, return the register
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index d90cd23..23847d6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -12,8 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "misched"
-
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -35,6 +33,8 @@
using namespace llvm;
+#define DEBUG_TYPE "misched"
+
namespace llvm {
cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
@@ -85,7 +85,7 @@ void ScheduleDAGMutation::anchor() {}
//===----------------------------------------------------------------------===//
MachineSchedContext::MachineSchedContext():
- MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
+ MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) {
RegClassInfo = new RegisterClassInfo();
}
@@ -100,7 +100,7 @@ class MachineSchedulerBase : public MachineSchedContext,
public:
MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
- void print(raw_ostream &O, const Module* = 0) const override;
+ void print(raw_ostream &O, const Module* = nullptr) const override;
protected:
void scheduleRegions(ScheduleDAGInstrs &Scheduler);
@@ -192,7 +192,7 @@ MachinePassRegistry MachineSchedRegistry::Registry;
/// A dummy default scheduler factory indicates whether the scheduler
/// is overridden on the command line.
static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
- return 0;
+ return nullptr;
}
/// MachineSchedOpt allows command line selection of the scheduler.
@@ -487,9 +487,8 @@ void ReadyQueue::dump() {
// virtual registers.
// ===----------------------------------------------------------------------===/
+// Provide a vtable anchor.
ScheduleDAGMI::~ScheduleDAGMI() {
- DeleteContainerPointers(Mutations);
- delete SchedImpl;
}
bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
@@ -527,7 +526,7 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
--SuccSU->NumPredsLeft;
@@ -561,7 +560,7 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
dbgs() << "*** Scheduling failed! ***\n";
PredSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
--PredSU->NumSuccsLeft;
@@ -723,8 +722,8 @@ findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
/// Identify DAG roots and setup scheduler queues.
void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
ArrayRef<SUnit*> BotRoots) {
- NextClusterSucc = NULL;
- NextClusterPred = NULL;
+ NextClusterSucc = nullptr;
+ NextClusterPred = nullptr;
// Release all DAG roots for scheduling, not including EntrySU/ExitSU.
//
@@ -782,7 +781,7 @@ void ScheduleDAGMI::placeDebugValues() {
RegionEnd = DbgValue;
}
DbgValues.clear();
- FirstDbgValue = NULL;
+ FirstDbgValue = nullptr;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1549,7 +1548,7 @@ void SchedBoundary::reset() {
// invalid, placeholder HazardRecs.
if (HazardRec && HazardRec->isEnabled()) {
delete HazardRec;
- HazardRec = 0;
+ HazardRec = nullptr;
}
Available.clear();
Pending.clear();
@@ -1679,7 +1678,7 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
// Find the unscheduled node in ReadySUs with the highest latency.
unsigned SchedBoundary::
findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
- SUnit *LateSU = 0;
+ SUnit *LateSU = nullptr;
unsigned RemLatency = 0;
for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
I != E; ++I) {
@@ -2057,7 +2056,7 @@ SUnit *SchedBoundary::pickOnlyChoice() {
}
if (Available.size() == 1)
return *Available.begin();
- return NULL;
+ return nullptr;
}
#ifndef NDEBUG
@@ -2157,7 +2156,7 @@ public:
SchedResourceDelta ResDelta;
SchedCandidate(const CandPolicy &policy)
- : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
+ : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {}
bool isValid() const { return SU; }
@@ -2185,7 +2184,7 @@ protected:
SchedRemainder Rem;
protected:
GenericSchedulerBase(const MachineSchedContext *C):
- Context(C), SchedModel(0), TRI(0) {}
+ Context(C), SchedModel(nullptr), TRI(nullptr) {}
void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone,
SchedBoundary *OtherZone);
@@ -2444,7 +2443,7 @@ class GenericScheduler : public GenericSchedulerBase {
MachineSchedPolicy RegionPolicy;
public:
GenericScheduler(const MachineSchedContext *C):
- GenericSchedulerBase(C), DAG(0), Top(SchedBoundary::TopQID, "TopQ"),
+ GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
Bot(SchedBoundary::BotQID, "BotQ") {}
void initPolicy(MachineBasicBlock::iterator Begin,
@@ -2910,7 +2909,7 @@ SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() &&
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
- return NULL;
+ return nullptr;
}
SUnit *SU;
do {
@@ -3002,17 +3001,17 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create the standard converging machine scheduler. This will be used as the
/// default scheduler if the target does not set a default.
static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, new GenericScheduler(C));
+ ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
// data and pass it to later mutations. Have a single mutation that gathers
// the interesting nodes in one pass.
- DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
+ DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
if (EnableLoadCluster && DAG->TII->enableClusterLoads())
- DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
+ DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
if (EnableMacroFusion)
- DAG->addMutation(new MacroFusion(DAG->TII));
+ DAG->addMutation(make_unique<MacroFusion>(DAG->TII));
return DAG;
}
@@ -3164,7 +3163,7 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
- return NULL;
+ return nullptr;
}
SUnit *SU;
do {
@@ -3174,7 +3173,7 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
SchedCandidate TopCand(NoPolicy);
// Set the top-down policy based on the state of the current top zone and
// the instructions outside the zone, including the bottom zone.
- setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, NULL);
+ setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
pickNodeFromQueue(TopCand);
assert(TopCand.Reason != NoCand && "failed to find a candidate");
tracePick(TopCand, true);
@@ -3198,7 +3197,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, new PostGenericScheduler(C), /*IsPostRA=*/true);
+ return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
}
//===----------------------------------------------------------------------===//
@@ -3212,7 +3211,8 @@ struct ILPOrder {
const BitVector *ScheduledTrees;
bool MaximizeILP;
- ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
+ ILPOrder(bool MaxILP)
+ : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {}
/// \brief Apply a less-than relation on node priority.
///
@@ -3246,7 +3246,7 @@ class ILPScheduler : public MachineSchedStrategy {
std::vector<SUnit*> ReadyQ;
public:
- ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
+ ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {}
void initialize(ScheduleDAGMI *dag) override {
assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
@@ -3267,7 +3267,7 @@ public:
/// Callback to select the highest priority node from the ready Q.
SUnit *pickNode(bool &IsTopNode) override {
- if (ReadyQ.empty()) return NULL;
+ if (ReadyQ.empty()) return nullptr;
std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
SUnit *SU = ReadyQ.back();
ReadyQ.pop_back();
@@ -3302,10 +3302,10 @@ public:
} // namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, new ILPScheduler(true));
+ return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, new ILPScheduler(false));
+ return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false));
}
static MachineSchedRegistry ILPMaxRegistry(
"ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
@@ -3347,7 +3347,7 @@ public:
InstructionShuffler(bool alternate, bool topdown)
: IsAlternating(alternate), IsTopDown(topdown) {}
- virtual void initialize(ScheduleDAGMI*) {
+ void initialize(ScheduleDAGMI*) override {
TopQ.clear();
BottomQ.clear();
}
@@ -3355,11 +3355,11 @@ public:
/// Implement MachineSchedStrategy interface.
/// -----------------------------------------
- virtual SUnit *pickNode(bool &IsTopNode) {
+ SUnit *pickNode(bool &IsTopNode) override {
SUnit *SU;
if (IsTopDown) {
do {
- if (TopQ.empty()) return NULL;
+ if (TopQ.empty()) return nullptr;
SU = TopQ.top();
TopQ.pop();
} while (SU->isScheduled);
@@ -3367,7 +3367,7 @@ public:
}
else {
do {
- if (BottomQ.empty()) return NULL;
+ if (BottomQ.empty()) return nullptr;
SU = BottomQ.top();
BottomQ.pop();
} while (SU->isScheduled);
@@ -3378,12 +3378,12 @@ public:
return SU;
}
- virtual void schedNode(SUnit *SU, bool IsTopNode) {}
+ void schedNode(SUnit *SU, bool IsTopNode) override {}
- virtual void releaseTopNode(SUnit *SU) {
+ void releaseTopNode(SUnit *SU) override {
TopQ.push(SU);
}
- virtual void releaseBottomNode(SUnit *SU) {
+ void releaseBottomNode(SUnit *SU) override {
BottomQ.push(SU);
}
};
@@ -3394,7 +3394,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
bool TopDown = !ForceBottomUp;
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
- return new ScheduleDAGMILive(C, new InstructionShuffler(Alternate, TopDown));
+ return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown));
}
static MachineSchedRegistry ShufflerRegistry(
"shuffle", "Shuffle machine instructions alternating directions",
@@ -3450,7 +3450,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
raw_string_ostream SS(Str);
const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
- static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
SS << "SU:" << SU->NodeNum;
if (DFS)
SS << " I:" << DFS->getNumInstrs(SU);
@@ -3464,7 +3464,7 @@ struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
std::string Str("shape=Mrecord");
const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
- static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : 0;
+ static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
if (DFS) {
Str += ",style=filled,fillcolor=\"#";
Str += DOT::getColorString(DFS->getSubtreeID(N));
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index dbff1f6..f44e4d1 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-sink"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
@@ -32,6 +31,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "machine-sink"
+
static cl::opt<bool>
SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
@@ -332,16 +333,16 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
MachineBasicBlock *ToBB,
bool BreakPHIEdge) {
if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
- return 0;
+ return nullptr;
// Avoid breaking back edge. From == To means backedge for single BB loop.
if (!SplitEdges || FromBB == ToBB)
- return 0;
+ return nullptr;
// Check for backedges of more "complex" loops.
if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
LI->isLoopHeader(ToBB))
- return 0;
+ return nullptr;
// It's not always legal to break critical edges and sink the computation
// to the edge.
@@ -388,7 +389,7 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
if (*PI == FromBB)
continue;
if (!DT->dominates(ToBB, *PI))
- return 0;
+ return nullptr;
}
}
@@ -484,7 +485,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
- MachineBasicBlock *SuccToSinkTo = 0;
+ MachineBasicBlock *SuccToSinkTo = nullptr;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -498,10 +499,10 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
- return NULL;
+ return nullptr;
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
- return NULL;
+ return nullptr;
}
} else {
// Virtual register uses are always safe to sink.
@@ -509,7 +510,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
- return NULL;
+ return nullptr;
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
@@ -532,7 +533,7 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
bool LocalUse = false;
if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB,
BreakPHIEdge, LocalUse))
- return NULL;
+ return nullptr;
continue;
}
@@ -558,26 +559,26 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI,
}
if (LocalUse)
// Def is used locally, it's never safe to move this def.
- return NULL;
+ return nullptr;
}
// If we couldn't find a block to sink to, ignore this instruction.
- if (SuccToSinkTo == 0)
- return NULL;
- else if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
- return NULL;
+ if (!SuccToSinkTo)
+ return nullptr;
+ if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo))
+ return nullptr;
}
}
// It is not possible to sink an instruction into its own block. This can
// happen with loops.
if (MBB == SuccToSinkTo)
- return NULL;
+ return nullptr;
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
if (SuccToSinkTo && SuccToSinkTo->isLandingPad())
- return NULL;
+ return nullptr;
return SuccToSinkTo;
}
@@ -607,7 +608,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
MachineBasicBlock *SuccToSinkTo = FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge);
// If there are no outputs, it must have side-effects.
- if (SuccToSinkTo == 0)
+ if (!SuccToSinkTo)
return false;
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index d07178e..1bbf0ad 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "machine-trace-metrics"
#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SparseSet.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "machine-trace-metrics"
+
char MachineTraceMetrics::ID = 0;
char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
@@ -37,8 +38,9 @@ INITIALIZE_PASS_END(MachineTraceMetrics,
"machine-trace-metrics", "Machine Trace Metrics", false, true)
MachineTraceMetrics::MachineTraceMetrics()
- : MachineFunctionPass(ID), MF(0), TII(0), TRI(0), MRI(0), Loops(0) {
- std::fill(Ensembles, array_endof(Ensembles), (Ensemble*)0);
+ : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr),
+ MRI(nullptr), Loops(nullptr) {
+ std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr);
}
void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -64,11 +66,11 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) {
}
void MachineTraceMetrics::releaseMemory() {
- MF = 0;
+ MF = nullptr;
BlockInfo.clear();
for (unsigned i = 0; i != TS_NumStrategies; ++i) {
delete Ensembles[i];
- Ensembles[i] = 0;
+ Ensembles[i] = nullptr;
}
}
@@ -95,19 +97,17 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) {
unsigned PRKinds = SchedModel.getNumProcResourceKinds();
SmallVector<unsigned, 32> PRCycles(PRKinds);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *MI = I;
- if (MI->isTransient())
+ for (const auto &MI : *MBB) {
+ if (MI.isTransient())
continue;
++InstrCount;
- if (MI->isCall())
+ if (MI.isCall())
FBI->HasCalls = true;
// Count processor resources used.
if (!SchedModel.hasInstrSchedModel())
continue;
- const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI);
+ const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI);
if (!SC->isValid())
continue;
@@ -233,7 +233,7 @@ const MachineTraceMetrics::TraceBlockInfo*
MachineTraceMetrics::Ensemble::
getDepthResources(const MachineBasicBlock *MBB) const {
const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
- return TBI->hasValidDepth() ? TBI : 0;
+ return TBI->hasValidDepth() ? TBI : nullptr;
}
// Check if height resources for MBB are valid and return the TBI.
@@ -242,7 +242,7 @@ const MachineTraceMetrics::TraceBlockInfo*
MachineTraceMetrics::Ensemble::
getHeightResources(const MachineBasicBlock *MBB) const {
const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()];
- return TBI->hasValidHeight() ? TBI : 0;
+ return TBI->hasValidHeight() ? TBI : nullptr;
}
/// Get an array of processor resource depths for MBB. Indexed by processor
@@ -316,13 +316,13 @@ public:
const MachineBasicBlock*
MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
if (MBB->pred_empty())
- return 0;
+ return nullptr;
const MachineLoop *CurLoop = getLoopFor(MBB);
// Don't leave loops, and never follow back-edges.
if (CurLoop && MBB == CurLoop->getHeader())
- return 0;
+ return nullptr;
unsigned CurCount = MTM.getResources(MBB)->InstrCount;
- const MachineBasicBlock *Best = 0;
+ const MachineBasicBlock *Best = nullptr;
unsigned BestDepth = 0;
for (MachineBasicBlock::const_pred_iterator
I = MBB->pred_begin(), E = MBB->pred_end(); I != E; ++I) {
@@ -344,9 +344,9 @@ MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) {
const MachineBasicBlock*
MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
if (MBB->pred_empty())
- return 0;
+ return nullptr;
const MachineLoop *CurLoop = getLoopFor(MBB);
- const MachineBasicBlock *Best = 0;
+ const MachineBasicBlock *Best = nullptr;
unsigned BestHeight = 0;
for (MachineBasicBlock::const_succ_iterator
I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) {
@@ -568,9 +568,8 @@ MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) {
// invalidated, but their instructions will stay the same, so there is no
// need to erase the Cycle entries. They will be overwritten when we
// recompute.
- for (MachineBasicBlock::const_iterator I = BadMBB->begin(), E = BadMBB->end();
- I != E; ++I)
- Cycles.erase(I);
+ for (const auto &I : *BadMBB)
+ Cycles.erase(&I);
}
void MachineTraceMetrics::Ensemble::verify() const {
@@ -690,7 +689,7 @@ struct LiveRegUnit {
unsigned getSparseSetIndex() const { return RegUnit; }
- LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(0), Op(0) {}
+ LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {}
};
}
@@ -828,16 +827,13 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
if (TBI.HasValidInstrHeights)
TBI.CriticalPath = computeCrossBlockCriticalPath(TBI);
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- const MachineInstr *UseMI = I;
-
+ for (const auto &UseMI : *MBB) {
// Collect all data dependencies.
Deps.clear();
- if (UseMI->isPHI())
- getPHIDeps(UseMI, Deps, TBI.Pred, MTM.MRI);
- else if (getDataDeps(UseMI, Deps, MTM.MRI))
- updatePhysDepsDownwards(UseMI, Deps, RegUnits, MTM.TRI);
+ if (UseMI.isPHI())
+ getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI);
+ else if (getDataDeps(&UseMI, Deps, MTM.MRI))
+ updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI);
// Filter and process dependencies, computing the earliest issue cycle.
unsigned Cycle = 0;
@@ -853,20 +849,20 @@ computeInstrDepths(const MachineBasicBlock *MBB) {
// Add latency if DefMI is a real instruction. Transients get latency 0.
if (!Dep.DefMI->isTransient())
DepCycle += MTM.SchedModel
- .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp);
+ .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp);
Cycle = std::max(Cycle, DepCycle);
}
// Remember the instruction depth.
- InstrCycles &MICycles = Cycles[UseMI];
+ InstrCycles &MICycles = Cycles[&UseMI];
MICycles.Depth = Cycle;
if (!TBI.HasValidInstrHeights) {
- DEBUG(dbgs() << Cycle << '\t' << *UseMI);
+ DEBUG(dbgs() << Cycle << '\t' << UseMI);
continue;
}
// Update critical path length.
TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height);
- DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *UseMI);
+ DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI);
}
}
}
@@ -1055,16 +1051,16 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
Succ = Loop->getHeader();
if (Succ) {
- for (MachineBasicBlock::const_iterator I = Succ->begin(), E = Succ->end();
- I != E && I->isPHI(); ++I) {
- const MachineInstr *PHI = I;
+ for (const auto &PHI : *Succ) {
+ if (!PHI.isPHI())
+ break;
Deps.clear();
- getPHIDeps(PHI, Deps, MBB, MTM.MRI);
+ getPHIDeps(&PHI, Deps, MBB, MTM.MRI);
if (!Deps.empty()) {
// Loop header PHI heights are all 0.
- unsigned Height = TBI.Succ ? Cycles.lookup(PHI).Height : 0;
- DEBUG(dbgs() << "pred\t" << Height << '\t' << *PHI);
- if (pushDepHeight(Deps.front(), PHI, Height,
+ unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0;
+ DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI);
+ if (pushDepHeight(Deps.front(), &PHI, Height,
Heights, MTM.SchedModel, MTM.TII))
addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack);
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 1bd75f7..8515b0f 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -33,7 +33,6 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -42,6 +41,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -241,7 +241,7 @@ namespace {
static char ID; // Pass ID, replacement for typeid
const char *const Banner;
- MachineVerifierPass(const char *b = 0)
+ MachineVerifierPass(const char *b = nullptr)
: MachineFunctionPass(ID), Banner(b) {
initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
}
@@ -273,7 +273,7 @@ void MachineFunction::verify(Pass *p, const char *Banner) const {
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
- raw_ostream *OutFile = 0;
+ raw_ostream *OutFile = nullptr;
if (OutFileName) {
std::string ErrorInfo;
OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
@@ -296,10 +296,10 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
TRI = TM->getRegisterInfo();
MRI = &MF.getRegInfo();
- LiveVars = NULL;
- LiveInts = NULL;
- LiveStks = NULL;
- Indexes = NULL;
+ LiveVars = nullptr;
+ LiveInts = nullptr;
+ LiveStks = nullptr;
+ Indexes = nullptr;
if (PASS) {
LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
// We don't want to verify LiveVariables if LiveIntervals is available.
@@ -314,7 +314,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
MFI!=MFE; ++MFI) {
visitMachineBasicBlockBefore(MFI);
// Keep track of the current bundle header.
- const MachineInstr *CurBundle = 0;
+ const MachineInstr *CurBundle = nullptr;
// Do we expect the next instruction to be part of the same bundle?
bool InBundle = false;
@@ -469,18 +469,17 @@ void MachineVerifier::visitMachineFunctionBefore() {
// Build a set of the basic blocks in the function.
FunctionBlocks.clear();
- for (MachineFunction::const_iterator
- I = MF->begin(), E = MF->end(); I != E; ++I) {
- FunctionBlocks.insert(I);
- BBInfo &MInfo = MBBInfoMap[I];
-
- MInfo.Preds.insert(I->pred_begin(), I->pred_end());
- if (MInfo.Preds.size() != I->pred_size())
- report("MBB has duplicate entries in its predecessor list.", I);
-
- MInfo.Succs.insert(I->succ_begin(), I->succ_end());
- if (MInfo.Succs.size() != I->succ_size())
- report("MBB has duplicate entries in its successor list.", I);
+ for (const auto &MBB : *MF) {
+ FunctionBlocks.insert(&MBB);
+ BBInfo &MInfo = MBBInfoMap[&MBB];
+
+ MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end());
+ if (MInfo.Preds.size() != MBB.pred_size())
+ report("MBB has duplicate entries in its predecessor list.", &MBB);
+
+ MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end());
+ if (MInfo.Succs.size() != MBB.succ_size())
+ report("MBB has duplicate entries in its successor list.", &MBB);
}
// Check that the register use lists are sane.
@@ -501,7 +500,7 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
- FirstTerminator = 0;
+ FirstTerminator = nullptr;
if (MRI->isSSA()) {
// If this block has allocatable physical registers live-in, check that
@@ -553,7 +552,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB has more than one landing pad successor", MBB);
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
TBB, FBB, Cond)) {
@@ -578,8 +577,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && getBundleStart(&MBB->back())->isBarrier() &&
- !TII->isPredicated(getBundleStart(&MBB->back()))) {
+ if (!MBB->empty() && MBB->back().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
@@ -599,10 +598,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via unconditional branch but doesn't contain "
"any instructions!", MBB);
- } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via unconditional branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via unconditional branch but the branch isn't a "
"terminator instruction!", MBB);
}
@@ -630,10 +629,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/fall-through but doesn't "
"contain any instructions!", MBB);
- } else if (getBundleStart(&MBB->back())->isBarrier()) {
+ } else if (MBB->back().isBarrier()) {
report("MBB exits via conditional branch/fall-through but ends with a "
"barrier instruction!", MBB);
- } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/fall-through but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -658,10 +657,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
if (MBB->empty()) {
report("MBB exits via conditional branch/branch but doesn't "
"contain any instructions!", MBB);
- } else if (!getBundleStart(&MBB->back())->isBarrier()) {
+ } else if (!MBB->back().isBarrier()) {
report("MBB exits via conditional branch/branch but doesn't end with a "
"barrier instruction!", MBB);
- } else if (!getBundleStart(&MBB->back())->isTerminator()) {
+ } else if (!MBB->back().isTerminator()) {
report("MBB exits via conditional branch/branch but the branch "
"isn't a terminator instruction!", MBB);
}
@@ -1158,9 +1157,7 @@ void MachineVerifier::calcRegsPassed() {
// First push live-out regs to successors' vregsPassed. Remember the MBBs that
// have any vregsPassed.
SmallPtrSet<const MachineBasicBlock*, 8> todo;
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- const MachineBasicBlock &MBB(*MFI);
+ for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
if (!MInfo.reachable)
continue;
@@ -1195,9 +1192,7 @@ void MachineVerifier::calcRegsPassed() {
void MachineVerifier::calcRegsRequired() {
// First push live-in regs to predecessors' vregsRequired.
SmallPtrSet<const MachineBasicBlock*, 8> todo;
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- const MachineBasicBlock &MBB(*MFI);
+ for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
@@ -1228,27 +1223,28 @@ void MachineVerifier::calcRegsRequired() {
// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
SmallPtrSet<const MachineBasicBlock*, 8> seen;
- for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
+ for (const auto &BBI : *MBB) {
+ if (!BBI.isPHI())
+ break;
seen.clear();
- for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
- unsigned Reg = BBI->getOperand(i).getReg();
- const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI.getOperand(i).getReg();
+ const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB();
if (!Pre->isSuccessor(MBB))
continue;
seen.insert(Pre);
BBInfo &PrInfo = MBBInfoMap[Pre];
if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
report("PHI operand is not live-out from predecessor",
- &BBI->getOperand(i), i);
+ &BBI.getOperand(i), i);
}
// Did we see all predecessors?
for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
if (!seen.count(*PrI)) {
- report("Missing PHI operand", BBI);
+ report("Missing PHI operand", &BBI);
*OS << "BB#" << (*PrI)->getNumber()
<< " is a predecessor according to the CFG.\n";
}
@@ -1259,29 +1255,27 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
void MachineVerifier::visitMachineFunctionAfter() {
calcRegsPassed();
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- BBInfo &MInfo = MBBInfoMap[MFI];
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
// Skip unreachable MBBs.
if (!MInfo.reachable)
continue;
- checkPHIOps(MFI);
+ checkPHIOps(&MBB);
}
// Now check liveness info if available
calcRegsRequired();
// Check for killed virtual registers that should be live out.
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- BBInfo &MInfo = MBBInfoMap[MFI];
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
for (RegSet::iterator
I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E;
++I)
if (MInfo.regsKilled.count(*I)) {
- report("Virtual register killed in block, but needed live out.", MFI);
+ report("Virtual register killed in block, but needed live out.", &MBB);
*OS << "Virtual register " << PrintReg(*I)
<< " is used after the block.\n";
}
@@ -1307,20 +1301,19 @@ void MachineVerifier::verifyLiveVariables() {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
- for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
- MFI != MFE; ++MFI) {
- BBInfo &MInfo = MBBInfoMap[MFI];
+ for (const auto &MBB : *MF) {
+ BBInfo &MInfo = MBBInfoMap[&MBB];
// Our vregsRequired should be identical to LiveVariables' AliveBlocks
if (MInfo.vregsRequired.count(Reg)) {
- if (!VI.AliveBlocks.test(MFI->getNumber())) {
- report("LiveVariables: Block missing from AliveBlocks", MFI);
+ if (!VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block missing from AliveBlocks", &MBB);
*OS << "Virtual register " << PrintReg(Reg)
<< " must be live through the block.\n";
}
} else {
- if (VI.AliveBlocks.test(MFI->getNumber())) {
- report("LiveVariables: Block should not be in AliveBlocks", MFI);
+ if (VI.AliveBlocks.test(MBB.getNumber())) {
+ report("LiveVariables: Block should not be in AliveBlocks", &MBB);
*OS << "Virtual register " << PrintReg(Reg)
<< " is not needed live through the block.\n";
}
@@ -1675,32 +1668,31 @@ void MachineVerifier::verifyStackFrame() {
}
// Update stack state by checking contents of MBB.
- for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- if (I->getOpcode() == FrameSetupOpcode) {
+ for (const auto &I : *MBB) {
+ if (I.getOpcode() == FrameSetupOpcode) {
// The first operand of a FrameOpcode should be i32.
- int Size = I->getOperand(0).getImm();
+ int Size = I.getOperand(0).getImm();
assert(Size >= 0 &&
"Value should be non-negative in FrameSetup and FrameDestroy.\n");
if (BBState.ExitIsSetup)
- report("FrameSetup is after another FrameSetup", I);
+ report("FrameSetup is after another FrameSetup", &I);
BBState.ExitValue -= Size;
BBState.ExitIsSetup = true;
}
- if (I->getOpcode() == FrameDestroyOpcode) {
+ if (I.getOpcode() == FrameDestroyOpcode) {
// The first operand of a FrameOpcode should be i32.
- int Size = I->getOperand(0).getImm();
+ int Size = I.getOperand(0).getImm();
assert(Size >= 0 &&
"Value should be non-negative in FrameSetup and FrameDestroy.\n");
if (!BBState.ExitIsSetup)
- report("FrameDestroy is not after a FrameSetup", I);
+ report("FrameDestroy is not after a FrameSetup", &I);
int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue :
BBState.ExitValue;
if (BBState.ExitIsSetup && AbsSPAdj != Size) {
- report("FrameDestroy <n> is after FrameSetup <m>", I);
+ report("FrameDestroy <n> is after FrameSetup <m>", &I);
*OS << "FrameDestroy <" << Size << "> is after FrameSetup <"
<< AbsSPAdj << ">.\n";
}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 56cb673..95a2934 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "phi-opt"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -23,6 +22,8 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "phi-opt"
+
STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 0e9df58..c8d0819 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "phielim"
#include "llvm/CodeGen/Passes.h"
#include "PHIEliminationUtils.h"
#include "llvm/ADT/STLExtras.h"
@@ -35,6 +34,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "phielim"
+
static cl::opt<bool>
DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false),
cl::Hidden, cl::desc("Disable critical edge splitting "
@@ -377,7 +378,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
- MachineInstr *NewSrcInstr = 0;
+ MachineInstr *NewSrcInstr = nullptr;
if (!reusedIncoming && IncomingReg) {
if (SrcUndef) {
// The source register is undefined, so there is no need for a real
@@ -531,13 +532,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
/// used later to determine when the vreg is killed in the BB.
///
void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
- for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
- I != E; ++I)
- for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE && BBI->isPHI(); ++BBI)
- for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
- ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
- BBI->getOperand(i).getReg())];
+ for (const auto &MBB : MF)
+ for (const auto &BBI : MBB) {
+ if (!BBI.isPHI())
+ break;
+ for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2)
+ ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(),
+ BBI.getOperand(i).getReg())];
+ }
}
bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
@@ -546,7 +548,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
- const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : 0;
+ const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr;
bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader();
bool Changed = false;
@@ -563,7 +565,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
// out-of-line blocks into the loop which is very bad for code placement.
if (PreMBB == &MBB && !SplitAllCriticalEdges)
continue;
- const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0;
+ const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr;
if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges)
continue;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 080b20d..b3f7198 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -84,7 +84,7 @@ static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
- cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+ cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=nullptr));
static cl::opt<std::string>
PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
cl::desc("Print machine instrs"),
@@ -126,7 +126,7 @@ static IdentifyingPassPtr applyOverride(IdentifyingPassPtr TargetID,
case cl::BOU_TRUE:
if (TargetID.isValid())
return TargetID;
- if (StandardID == 0)
+ if (StandardID == nullptr)
report_fatal_error("Target cannot enable pass");
return StandardID;
case cl::BOU_FALSE:
@@ -232,8 +232,8 @@ TargetPassConfig::~TargetPassConfig() {
// Out of line constructor provides default values for pass options and
// registers all common codegen passes.
TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
- : ImmutablePass(ID), PM(&pm), StartAfter(0), StopAfter(0),
- Started(true), Stopped(false), TM(tm), Impl(0), Initialized(false),
+ : ImmutablePass(ID), PM(&pm), StartAfter(nullptr), StopAfter(nullptr),
+ Started(true), Stopped(false), TM(tm), Impl(nullptr), Initialized(false),
DisableVerify(false),
EnableTailMerge(true) {
@@ -274,7 +274,7 @@ TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) {
}
TargetPassConfig::TargetPassConfig()
- : ImmutablePass(ID), PM(0) {
+ : ImmutablePass(ID), PM(nullptr) {
llvm_unreachable("TargetPassConfig should not be constructed on-the-fly");
}
@@ -332,7 +332,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) {
IdentifyingPassPtr TargetID = getPassSubstitution(PassID);
IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID);
if (!FinalPtr.isValid())
- return 0;
+ return nullptr;
Pass *P;
if (FinalPtr.isInstance())
@@ -384,8 +384,10 @@ void TargetPassConfig::addIRPasses() {
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
- if (!DisableVerify)
+ if (!DisableVerify) {
addPass(createVerifierPass());
+ addPass(createDebugInfoVerifierPass());
+ }
// Run loop strength reduction before anything else.
if (getOptLevel() != CodeGenOpt::None && !DisableLSR) {
@@ -443,6 +445,12 @@ void TargetPassConfig::addCodeGenPrepare() {
void TargetPassConfig::addISelPrepare() {
addPreISel();
+ // Need to verify DebugInfo *before* creating the stack protector analysis.
+ // It's a function pass, and verifying between it and its users causes a
+ // crash.
+ if (!DisableVerify)
+ addPass(createDebugInfoVerifierPass());
+
addPass(createStackProtectorPass(TM));
if (PrintISelInput)
@@ -620,7 +628,7 @@ MachinePassRegistry RegisterRegAlloc::Registry;
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
-static FunctionPass *useDefaultRegisterAllocator() { return 0; }
+static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
static RegisterRegAlloc
defaultRegAlloc("default",
"pick register allocator based on -O option",
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index e18d9635..eeee93a 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -66,7 +66,6 @@
// C = copy A <-- same-bank copy
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "peephole-opt"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -81,6 +80,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "peephole-opt"
+
// Optimize Extensions
static cl::opt<bool>
Aggressive("aggressive-ext-opt", cl::Hidden,
@@ -183,7 +184,7 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
// SrcReg:SubIdx should be replaced.
bool UseSrcSubIdx = TM->getRegisterInfo()->
- getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != 0;
+ getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
// The source has other uses. See if we can replace the other uses with use of
// the result of the extension.
@@ -358,7 +359,7 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
unsigned SrcIdx, DefIdx;
if (SrcSubReg && DefSubReg)
return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
- SrcIdx, DefIdx) != NULL;
+ SrcIdx, DefIdx) != nullptr;
// At most one of the register is a sub register, make it Src to avoid
// duplicating the test.
if (!SrcSubReg) {
@@ -368,9 +369,9 @@ static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
// One of the register is a sub register, check if we can get a superclass.
if (SrcSubReg)
- return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL;
+ return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
// Plain copy.
- return TRI.getCommonSubClass(DefRC, SrcRC) != NULL;
+ return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
}
/// \brief Get the index of the definition and source for \p Copy
@@ -568,7 +569,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
- DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+ DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
bool Changed = false;
@@ -643,7 +644,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
// Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
// we need it for markUsesInDebugValueAsUndef().
unsigned FoldedReg = FoldAsLoadDefReg;
- MachineInstr *DefMI = 0;
+ MachineInstr *DefMI = nullptr;
MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
FoldAsLoadDefReg,
DefMI);
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index a13e51f..db3933e 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "post-RA-sched"
#include "llvm/CodeGen/Passes.h"
#include "AggressiveAntiDepBreaker.h"
#include "AntiDepBreaker.h"
@@ -47,6 +46,8 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "post-RA-sched"
+
STATISTIC(NumNoops, "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
@@ -205,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList(
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
- (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr));
}
SchedulePostRATDList::~SchedulePostRATDList() {
@@ -355,7 +356,7 @@ void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) {
// Reset the hazard recognizer and anti-dep breaker.
HazardRec->Reset();
- if (AntiDepBreak != NULL)
+ if (AntiDepBreak)
AntiDepBreak->StartBlock(BB);
}
@@ -365,7 +366,7 @@ void SchedulePostRATDList::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
- if (AntiDepBreak != NULL) {
+ if (AntiDepBreak) {
unsigned Broken =
AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd,
EndIndex, DbgValues);
@@ -397,14 +398,14 @@ void SchedulePostRATDList::schedule() {
/// instruction, which will not be scheduled.
///
void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
- if (AntiDepBreak != NULL)
+ if (AntiDepBreak)
AntiDepBreak->Observe(MI, Count, EndIndex);
}
/// FinishBlock - Clean up register live-range state.
///
void SchedulePostRATDList::finishBlock() {
- if (AntiDepBreak != NULL)
+ if (AntiDepBreak)
AntiDepBreak->FinishBlock();
// Call the superclass.
@@ -429,7 +430,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
--SuccSU->NumPredsLeft;
@@ -480,7 +481,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
void SchedulePostRATDList::emitNoop(unsigned CurCycle) {
DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
HazardRec->EmitNoop();
- Sequence.push_back(0); // NULL here means noop
+ Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
}
@@ -532,7 +533,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
- SUnit *FoundSUnit = 0, *NotPreferredSUnit = 0;
+ SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr;
bool HasNoopHazards = false;
while (!AvailableQueue.empty()) {
SUnit *CurSUnit = AvailableQueue.pop();
@@ -572,7 +573,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
AvailableQueue.push(NotPreferredSUnit);
}
- NotPreferredSUnit = 0;
+ NotPreferredSUnit = nullptr;
}
// Add the nodes that aren't ready back onto the available list.
@@ -662,5 +663,5 @@ void SchedulePostRATDList::EmitSchedule() {
BB->splice(++OrigPrivMI, BB, DbgValue);
}
DbgValues.clear();
- FirstDbgValue = NULL;
+ FirstDbgValue = nullptr;
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 360e8d7..3129927 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "processimplicitdefs"
-
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -21,6 +19,8 @@
using namespace llvm;
+#define DEBUG_TYPE "processimplicitdefs"
+
namespace {
/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
/// for each use. Add isUndef marker to implicit_def defs and their uses.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 136b1ed..c74a42f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -46,6 +45,8 @@
using namespace llvm;
+#define DEBUG_TYPE "pei"
+
char PEI::ID = 0;
char &llvm::PrologEpilogCodeInserterID = PEI::ID;
@@ -114,7 +115,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs");
- RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+ RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
@@ -243,14 +244,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) {
MachineFrameInfo *MFI = F.getFrameInfo();
// Get the callee saved register list...
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F);
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F);
// These are used to keep track the callee-save area. Initialize them.
MinCSFrameIndex = INT_MAX;
MaxCSFrameIndex = 0;
// Early exit for targets which have no callee saved registers.
- if (CSRegs == 0 || CSRegs[0] == 0)
+ if (!CSRegs || CSRegs[0] == 0)
return;
// In Naked functions we aren't going to save any registers.
@@ -680,7 +681,7 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
- if (Fn.getTarget().Options.EnableSegmentedStacks)
+ if (Fn.shouldSplitStack())
TFI.adjustForSegmentedStacks(Fn);
// Emit additional code that is required to explicitly handle the stack in
@@ -805,7 +806,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
// use that target machine register info object to eliminate
// it.
TRI.eliminateFrameIndex(MI, SPAdj, i,
- FrameIndexVirtualScavenging ? NULL : RS);
+ FrameIndexVirtualScavenging ? nullptr : RS);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -813,7 +814,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
DoIncr = false;
}
- MI = 0;
+ MI = nullptr;
break;
}
@@ -845,13 +846,14 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// We might end up here again with a NULL iterator if we scavenged a
// register for which we inserted spill code for definition by what was
// originally the first instruction in BB.
- if (I == MachineBasicBlock::iterator(NULL))
+ if (I == MachineBasicBlock::iterator(nullptr))
I = BB->begin();
MachineInstr *MI = I;
MachineBasicBlock::iterator J = std::next(I);
MachineBasicBlock::iterator P =
- I == BB->begin() ? MachineBasicBlock::iterator(NULL) : std::prev(I);
+ I == BB->begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
// RS should process this instruction before we might scavenge at this
// location. This is because we might be replacing a virtual register
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 8564911..12b2c90 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -58,13 +58,9 @@ static const char *const PSVNames[] = {
"ConstantPool"
};
-// FIXME: THIS IS A HACK!!!!
-// Eventually these should be uniqued on LLVMContext rather than in a managed
-// static. For now, we can safely use the global context for the time being to
-// squeak by.
-PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
- Value(Type::getInt8PtrTy(getGlobalContext()),
- Subclass) {}
+PseudoSourceValue::PseudoSourceValue(bool isFixed) : isFixed(isFixed) {}
+
+PseudoSourceValue::~PseudoSourceValue() {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
O << PSVNames[this - PSVGlobals->PSVs];
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 33584f8..894aee7 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "RegAllocBase.h"
#include "Spiller.h"
#include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumNewQueued , "Number of new live ranges queued");
// Temporary verification option until we can put verification inside
@@ -110,7 +111,7 @@ void RegAllocBase::allocatePhysRegs() {
if (AvailablePhysReg == ~0u) {
// selectOrSplit failed to find a register!
// Probably caused by an inline asm.
- MachineInstr *MI = 0;
+ MachineInstr *MI = nullptr;
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
I != E; ) {
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 68bd4b5..b333c36 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -65,7 +65,8 @@ protected:
LiveRegMatrix *Matrix;
RegisterClassInfo RegClassInfo;
- RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0), Matrix(0) {}
+ RegAllocBase()
+ : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {}
virtual ~RegAllocBase() {}
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index b8c04fc..b722098 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "LiveDebugVariables.h"
@@ -41,6 +40,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
createBasicRegisterAllocator);
@@ -93,7 +94,7 @@ public:
LiveInterval *dequeue() override {
if (Queue.empty())
- return 0;
+ return nullptr;
LiveInterval *LI = Queue.top();
Queue.pop();
return LI;
@@ -156,7 +157,7 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
}
void RABasic::releaseMemory() {
- SpillerInstance.reset(0);
+ SpillerInstance.reset(nullptr);
}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 8dc44f5..97b9f76 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
@@ -38,6 +37,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
STATISTIC(NumCopies, "Number of copies coalesced");
@@ -75,7 +76,7 @@ namespace {
bool Dirty; // Register needs spill.
explicit LiveReg(unsigned v)
- : LastUse(0), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false) {}
+ : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){}
unsigned getSparseSetIndex() const {
return TargetRegisterInfo::virtReg2Index(VirtReg);
@@ -319,7 +320,7 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// now.
LRIDbgValues.clear();
if (SpillKill)
- LR.LastUse = 0; // Don't kill register again
+ LR.LastUse = nullptr; // Don't kill register again
}
killVirtReg(LRI);
}
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 6a623b8..aa7c178 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/Passes.h"
#include "AllocationOrder.h"
#include "InterferenceCache.h"
@@ -37,7 +36,9 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/PassAnalysisSupport.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -47,6 +48,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumGlobalSplits, "Number of split global live ranges");
STATISTIC(NumLocalSplits, "Number of split local live ranges");
STATISTIC(NumEvicted, "Number of interferences evicted");
@@ -71,6 +74,11 @@ static cl::opt<unsigned> LastChanceRecoloringMaxInterference(
" interference at a time"),
cl::init(8));
+static cl::opt<bool>
+ExhaustiveSearch("exhaustive-register-search", cl::NotHidden,
+ cl::desc("Exhaustive Search for registers bypassing the depth "
+ "and interference cutoffs of last chance recoloring"));
+
// FIXME: Find a good default for this flag and remove the flag.
static cl::opt<unsigned>
CSRFirstTimeCost("regalloc-csr-first-time-cost",
@@ -147,6 +155,22 @@ class RAGreedy : public MachineFunctionPass,
RS_Done
};
+ // Enum CutOffStage to keep a track whether the register allocation failed
+ // because of the cutoffs encountered in last chance recoloring.
+ // Note: This is used as bitmask. New value should be next power of 2.
+ enum CutOffStage {
+ // No cutoffs encountered
+ CO_None = 0,
+
+ // lcr-max-depth cutoff encountered
+ CO_Depth = 1,
+
+ // lcr-max-interf cutoff encountered
+ CO_Interf = 2
+ };
+
+ uint8_t CutOffInfo;
+
#ifndef NDEBUG
static const char *const StageName[];
#endif
@@ -258,6 +282,9 @@ class RAGreedy : public MachineFunctionPass,
/// NoCand which indicates the stack interval.
SmallVector<unsigned, 32> BundleCand;
+ /// Callee-save register cost, calculated once per machine function.
+ BlockFrequency CSRCost;
+
public:
RAGreedy();
@@ -326,6 +353,7 @@ private:
unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order,
unsigned PhysReg, unsigned &CostPerUseLimit,
SmallVectorImpl<unsigned> &NewVRegs);
+ void initializeCSRCost();
unsigned tryBlockSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&,
@@ -447,7 +475,7 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
}
void RAGreedy::releaseMemory() {
- SpillerInstance.reset(0);
+ SpillerInstance.reset(nullptr);
ExtraRegInfo.clear();
GlobalCand.clear();
}
@@ -514,7 +542,7 @@ LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); }
LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
if (CurQueue.empty())
- return 0;
+ return nullptr;
LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second);
CurQueue.pop();
return LI;
@@ -1910,8 +1938,9 @@ RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg,
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >=
- LastChanceRecoloringMaxInterference) {
+ LastChanceRecoloringMaxInterference && !ExhaustiveSearch) {
DEBUG(dbgs() << "Early abort: too many interferences.\n");
+ CutOffInfo |= CO_Interf;
return false;
}
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
@@ -1982,8 +2011,9 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
// We may want to reconsider that if we end up with a too large search space
// for target with hundreds of registers.
// Indeed, in that case we may want to cut the search space earlier.
- if (Depth >= LastChanceRecoloringMaxDepth) {
+ if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) {
DEBUG(dbgs() << "Abort because max depth has been reached.\n");
+ CutOffInfo |= CO_Depth;
return ~0u;
}
@@ -2108,8 +2138,26 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs) {
+ CutOffInfo = CO_None;
+ LLVMContext &Ctx = MF->getFunction()->getContext();
SmallVirtRegSet FixedRegisters;
- return selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters);
+ if (Reg == ~0U && (CutOffInfo != CO_None)) {
+ uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf);
+ if (CutOffEncountered == CO_Depth)
+ Ctx.emitError("register allocation failed: maximum depth for recoloring "
+ "reached. Use -fexhaustive-register-search to skip "
+ "cutoffs");
+ else if (CutOffEncountered == CO_Interf)
+ Ctx.emitError("register allocation failed: maximum interference for "
+ "recoloring reached. Use -fexhaustive-register-search "
+ "to skip cutoffs");
+ else if (CutOffEncountered == (CO_Depth | CO_Interf))
+ Ctx.emitError("register allocation failed: maximum interference and "
+ "depth for recoloring reached. Use "
+ "-fexhaustive-register-search to skip cutoffs");
+ }
+ return Reg;
}
/// Using a CSR for the first time has a cost because it causes push|pop
@@ -2123,10 +2171,6 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
unsigned PhysReg,
unsigned &CostPerUseLimit,
SmallVectorImpl<unsigned> &NewVRegs) {
- // We use the larger one out of the command-line option and the value report
- // by TRI.
- BlockFrequency CSRCost(std::max((unsigned)CSRFirstTimeCost,
- TRI->getCSRFirstUseCost()));
if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) {
// We choose spill over using the CSR for the first time if the spill cost
// is lower than CSRCost.
@@ -2144,9 +2188,9 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
// the cost of splitting is lower than CSRCost.
SA->analyze(&VirtReg);
unsigned NumCands = 0;
- unsigned BestCand =
- calculateRegionSplitCost(VirtReg, Order, CSRCost, NumCands,
- true/*IgnoreCSR*/);
+ BlockFrequency BestCost = CSRCost; // Don't modify CSRCost.
+ unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost,
+ NumCands, true /*IgnoreCSR*/);
if (BestCand == NoCand)
// Use the CSR if we can't find a region split below CSRCost.
return PhysReg;
@@ -2158,6 +2202,31 @@ unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg,
return PhysReg;
}
+void RAGreedy::initializeCSRCost() {
+ // We use the larger one out of the command-line option and the value report
+ // by TRI.
+ CSRCost = BlockFrequency(
+ std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost()));
+ if (!CSRCost.getFrequency())
+ return;
+
+ // Raw cost is relative to Entry == 2^14; scale it appropriately.
+ uint64_t ActualEntry = MBFI->getEntryFreq();
+ if (!ActualEntry) {
+ CSRCost = 0;
+ return;
+ }
+ uint64_t FixedEntry = 1 << 14;
+ if (ActualEntry < FixedEntry)
+ CSRCost *= BranchProbability(ActualEntry, FixedEntry);
+ else if (ActualEntry <= UINT32_MAX)
+ // Invert the fraction and divide.
+ CSRCost /= BranchProbability(FixedEntry, ActualEntry);
+ else
+ // Can't use BranchProbability in general, since it takes 32-bit numbers.
+ CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry);
+}
+
unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
SmallVectorImpl<unsigned> &NewVRegs,
SmallVirtRegSet &FixedRegisters,
@@ -2175,8 +2244,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// When NewVRegs is not empty, we may have made decisions such as evicting
// a virtual register, go with the earlier decisions and use the physical
// register.
- if ((CSRFirstTimeCost || TRI->getCSRFirstUseCost()) &&
- CSRFirstUse && NewVRegs.empty()) {
+ if (CSRCost.getFrequency() && CSRFirstUse && NewVRegs.empty()) {
unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg,
CostPerUseLimit, NewVRegs);
if (CSRReg || !NewVRegs.empty())
@@ -2258,6 +2326,8 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
SpillPlacer = &getAnalysis<SpillPlacement>();
DebugVars = &getAnalysis<LiveDebugVariables>();
+ initializeCSRCost();
+
calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI);
DEBUG(LIS->dump());
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 96dbd9a..b8d2325 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -29,8 +29,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
-
#include "llvm/CodeGen/RegAllocPBQP.h"
#include "RegisterCoalescer.h"
#include "Spiller.h"
@@ -48,6 +46,7 @@
#include "llvm/CodeGen/VirtRegMap.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -59,6 +58,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
static RegisterRegAlloc
registerPBQPRepAlloc("pbqp", "PBQP register allocator",
createDefaultPBQPRegisterAllocator);
@@ -87,7 +88,7 @@ public:
static char ID;
/// Construct a PBQP register allocator.
- RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=0)
+ RegAllocPBQP(std::unique_ptr<PBQPBuilder> &b, char *cPassID=nullptr)
: MachineFunctionPass(ID), builder(b.release()), customPassID(cPassID) {
initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
@@ -215,7 +216,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis,
// Compute an initial allowed set for the current vreg.
typedef std::vector<unsigned> VRAllowed;
VRAllowed vrAllowed;
- ArrayRef<uint16_t> rawOrder = trc->getRawAllocationOrder(*mf);
+ ArrayRef<MCPhysReg> rawOrder = trc->getRawAllocationOrder(*mf);
for (unsigned i = 0; i != rawOrder.size(); ++i) {
unsigned preg = rawOrder[i];
if (mri->isReserved(preg))
@@ -320,17 +321,9 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
// Scan the machine function and add a coalescing cost whenever CoalescerPair
// gives the Ok.
- for (MachineFunction::const_iterator mbbItr = mf->begin(),
- mbbEnd = mf->end();
- mbbItr != mbbEnd; ++mbbItr) {
- const MachineBasicBlock *mbb = &*mbbItr;
-
- for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
- miEnd = mbb->end();
- miItr != miEnd; ++miItr) {
- const MachineInstr *mi = &*miItr;
-
- if (!cp.setRegisters(mi)) {
+ for (const auto &mbb : *mf) {
+ for (const auto &mi : mbb) {
+ if (!cp.setRegisters(&mi)) {
continue; // Not coalescable.
}
@@ -345,7 +338,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf,
// value plucked randomly out of the air.
PBQP::PBQPNum cBenefit =
- copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, mi);
+ copyFactor * LiveIntervals::getSpillWeight(false, true, mbfi, &mi);
if (cp.isPhys()) {
if (!mf->getRegInfo().isAllocatable(dst)) {
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index aa84446..8b5445c 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -25,12 +24,14 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
static cl::opt<unsigned>
StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"),
cl::desc("Limit all regclasses to N registers"));
-RegisterClassInfo::RegisterClassInfo() : Tag(0), MF(0), TRI(0), CalleeSaved(0)
-{}
+RegisterClassInfo::RegisterClassInfo()
+ : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {}
void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
bool Update = false;
@@ -151,7 +152,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
/// nonoverlapping reserved registers. However, computing the allocation order
/// for all register classes would be too expensive.
unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const {
- const TargetRegisterClass *RC = 0;
+ const TargetRegisterClass *RC = nullptr;
unsigned NumRCUnits = 0;
for (TargetRegisterInfo::regclass_iterator
RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 682c26c..5aaeb87 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "RegisterCoalescer.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -42,6 +41,8 @@
#include <cmath>
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(numJoins , "Number of interval joins performed");
STATISTIC(numCrossRCs , "Number of cross class joins performed");
STATISTIC(numCommutes , "Number of instruction commuting performed");
@@ -195,7 +196,7 @@ namespace {
bool runOnMachineFunction(MachineFunction&) override;
/// print - Implement the dump method.
- void print(raw_ostream &O, const Module* = 0) const override;
+ void print(raw_ostream &O, const Module* = nullptr) const override;
};
} /// end anonymous namespace
@@ -240,9 +241,8 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) {
if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
return false;
- for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end();
- MII != E; ++MII) {
- if (!MII->isCopyLike() && !MII->isUnconditionalBranch())
+ for (const auto &MI : *MBB) {
+ if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
return false;
}
return true;
@@ -251,7 +251,7 @@ static bool isSplitEdge(const MachineBasicBlock *MBB) {
bool CoalescerPair::setRegisters(const MachineInstr *MI) {
SrcReg = DstReg = 0;
SrcIdx = DstIdx = 0;
- NewRC = 0;
+ NewRC = nullptr;
Flipped = CrossClass = false;
unsigned Src, Dst, SrcSub, DstSub;
@@ -397,7 +397,8 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
void RegisterCoalescer::eliminateDeadDefs() {
SmallVector<unsigned, 8> NewRegs;
- LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
+ LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
+ nullptr, this).eliminateDeadDefs(DeadDefs);
}
// Callback from eliminateDeadDefs().
@@ -844,6 +845,27 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP,
true /*IsDef*/,
true /*IsImp*/,
false /*IsKill*/));
+ // Record small dead def live-ranges for all the subregisters
+ // of the destination register.
+ // Otherwise, variables that live through may miss some
+ // interferences, thus creating invalid allocation.
+ // E.g., i386 code:
+ // vreg1 = somedef ; vreg1 GR8
+ // vreg2 = remat ; vreg2 GR32
+ // CL = COPY vreg2.sub_8bit
+ // = somedef vreg1 ; vreg1 GR8
+ // =>
+ // vreg1 = somedef ; vreg1 GR8
+ // ECX<def, dead> = remat ; CL<imp-def>
+ // = somedef vreg1 ; vreg1 GR8
+ // vreg1 will see the inteferences with CL but not with CH since
+ // no live-ranges would have been created for ECX.
+ // Fix that!
+ SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
+ for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI);
+ Units.isValid(); ++Units)
+ if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
if (NewMI->getOperand(0).getSubReg())
@@ -902,7 +924,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
// No intervals are live-in to CopyMI - it is undef.
if (CP.isFlipped())
DstInt = SrcInt;
- SrcInt = 0;
+ SrcInt = nullptr;
VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
assert(DeadVNI && "No value defined in DstInt");
@@ -931,7 +953,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
+ LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator
@@ -1355,7 +1377,7 @@ class JoinVals {
bool PrunedComputed;
Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
- RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false),
+ RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
Pruned(false), PrunedComputed(false) {}
bool isAnalyzed() const { return WriteLanes != 0; }
@@ -1461,7 +1483,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
}
// Get the instruction defining this value, compute the lanes written.
- const MachineInstr *DefMI = 0;
+ const MachineInstr *DefMI = nullptr;
if (VNI->isPHIDef()) {
// Conservatively assume that all lanes in a PHI are valid.
V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
@@ -2085,14 +2107,14 @@ copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
// Skip instruction pointers that have already been erased, for example by
// dead code elimination.
if (ErasedInstrs.erase(CurrList[i])) {
- CurrList[i] = 0;
+ CurrList[i] = nullptr;
continue;
}
bool Again = false;
bool Success = joinCopy(CurrList[i], Again);
Progress |= Success;
if (Success || !Again)
- CurrList[i] = 0;
+ CurrList[i] = nullptr;
}
return Progress;
}
@@ -2132,7 +2154,7 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
CurrList(WorkList.begin() + PrevSize, WorkList.end());
if (copyCoalesceWorkList(CurrList))
WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
- (MachineInstr*)0), WorkList.end());
+ (MachineInstr*)nullptr), WorkList.end());
}
void RegisterCoalescer::coalesceLocals() {
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 47c3df1..e57ceab 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -61,14 +61,14 @@ namespace llvm {
public:
CoalescerPair(const TargetRegisterInfo &tri)
: TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0),
- Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+ Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
/// Create a CoalescerPair representing a virtreg-to-physreg copy.
/// No need to call setRegisters().
CoalescerPair(unsigned VirtReg, unsigned PhysReg,
const TargetRegisterInfo &tri)
: TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0),
- Partial(false), CrossClass(false), Flipped(false), NewRC(0) {}
+ Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {}
/// setRegisters - set registers to match the copy instruction MI. Return
/// false if MI is not a coalescable copy instruction.
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 97817da..b2909e0 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -154,8 +154,8 @@ const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const {
}
void RegPressureTracker::reset() {
- MBB = 0;
- LIS = 0;
+ MBB = nullptr;
+ LIS = nullptr;
CurrSetPressure.clear();
LiveThruPressure.clear();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index bfd26dc..72b6285 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reg-scavenging"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -29,6 +28,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "reg-scavenging"
+
/// setUsed - Set the register and its sub-registers as being used.
void RegScavenger::setUsed(unsigned Reg) {
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
@@ -47,7 +48,7 @@ void RegScavenger::initRegState() {
for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
IE = Scavenged.end(); I != IE; ++I) {
I->Reg = 0;
- I->Restore = NULL;
+ I->Restore = nullptr;
}
// All registers started out unused.
@@ -91,8 +92,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
// Create callee-saved registers bitvector.
CalleeSavedRegs.resize(NumPhysRegs);
- const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
- if (CSRegs != NULL)
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ if (CSRegs != nullptr)
for (unsigned i = 0; CSRegs[i]; ++i)
CalleeSavedRegs.set(CSRegs[i]);
}
@@ -162,7 +163,7 @@ void RegScavenger::unprocess() {
}
if (MBBI == MBB->begin()) {
- MBBI = MachineBasicBlock::iterator(NULL);
+ MBBI = MachineBasicBlock::iterator(nullptr);
Tracking = false;
} else
--MBBI;
@@ -187,7 +188,7 @@ void RegScavenger::forward() {
continue;
I->Reg = 0;
- I->Restore = NULL;
+ I->Restore = nullptr;
}
if (MI->isDebugValue())
@@ -223,7 +224,7 @@ void RegScavenger::forward() {
break;
}
if (!SubUsed) {
- MBB->getParent()->verify(NULL, "In Register Scavenger");
+ MBB->getParent()->verify(nullptr, "In Register Scavenger");
llvm_unreachable("Using an undefined register!");
}
(void)SubUsed;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index d08eb65..6a2a080 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -25,6 +24,8 @@
#include <climits>
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
#ifndef NDEBUG
static cl::opt<bool> StressSchedOpt(
"stress-sched", cl::Hidden, cl::init(false),
@@ -55,7 +56,7 @@ void ScheduleDAG::clearDAG() {
/// getInstrDesc helper to handle SDNodes.
const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
- if (!Node || !Node->isMachineOpcode()) return NULL;
+ if (!Node || !Node->isMachineOpcode()) return nullptr;
return &TII->get(Node->getMachineOpcode());
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index c8328ad..92a9a30 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "misched"
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -41,18 +40,14 @@
using namespace llvm;
+#define DEBUG_TYPE "misched"
+
static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Enable use of AA during MI GAD construction"));
-// FIXME: Enable the use of TBAA. There are two known issues preventing this:
-// 1. Stack coloring does not update TBAA when merging allocas
-// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
-// Because BasicAA does not handle inttoptr, we'll often miss basic type
-// punning idioms that we need to catch so we don't miscompile real-world
-// code.
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
- cl::init(false), cl::desc("Enable use of TBAA during MI GAD construction"));
+ cl::init(true), cl::desc("Enable use of TBAA during MI GAD construction"));
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
@@ -62,7 +57,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
LiveIntervals *lis)
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis),
IsPostRA(IsPostRAFlag), RemoveKillFlags(RemoveKillFlags),
- CanHandleTerminators(false), FirstDbgValue(0) {
+ CanHandleTerminators(false), FirstDbgValue(nullptr) {
assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals");
DbgValues.clear();
assert(!(IsPostRA && MRI.getNumVirtRegs()) &&
@@ -104,7 +99,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static void getUnderlyingObjects(const Value *V,
SmallVectorImpl<Value *> &Objects) {
- SmallPtrSet<const Value*, 16> Visited;
+ SmallPtrSet<const Value *, 16> Visited;
SmallVector<const Value *, 4> Working(1, V);
do {
V = Working.pop_back_val();
@@ -130,7 +125,8 @@ static void getUnderlyingObjects(const Value *V,
} while (!Working.empty());
}
-typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4>
+typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4>
UnderlyingObjectsVector;
/// getUnderlyingObjectsForInstr - If this machine instr has memory reference
@@ -140,25 +136,27 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
const MachineFrameInfo *MFI,
UnderlyingObjectsVector &Objects) {
if (!MI->hasOneMemOperand() ||
- !(*MI->memoperands_begin())->getValue() ||
+ (!(*MI->memoperands_begin())->getValue() &&
+ !(*MI->memoperands_begin())->getPseudoValue()) ||
(*MI->memoperands_begin())->isVolatile())
return;
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return;
-
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+ if (const PseudoSourceValue *PSV =
+ (*MI->memoperands_begin())->getPseudoValue()) {
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
if (!PSV->isAliased(MFI)) {
bool MayAlias = PSV->mayAlias(MFI);
- Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias));
+ Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
}
return;
}
+ const Value *V = (*MI->memoperands_begin())->getValue();
+ if (!V)
+ return;
+
SmallVector<Value *, 4> Objs;
getUnderlyingObjects(V, Objs);
@@ -166,8 +164,6 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
I != IE; ++I) {
V = *I;
- assert(!isa<PseudoSourceValue>(V) && "Underlying value is a stack slot!");
-
if (!isIdentifiedObject(V)) {
Objects.clear();
return;
@@ -183,7 +179,7 @@ void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) {
void ScheduleDAGInstrs::finishBlock() {
// Subclasses should no longer refer to the old block.
- BB = 0;
+ BB = nullptr;
}
/// Initialize the DAG and common scheduler state for the current scheduling
@@ -215,7 +211,7 @@ void ScheduleDAGInstrs::exitRegion() {
/// are too high to be hidden by the branch or when the liveout registers
/// used by instructions in the fallthrough block.
void ScheduleDAGInstrs::addSchedBarrierDeps() {
- MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0;
+ MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
ExitSU.setInstr(ExitMI);
bool AllDepKnown = ExitMI &&
(ExitMI->isCall() || ExitMI->isBarrier());
@@ -272,7 +268,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
// Adjust the dependence latency using operand def/use information,
// then allow the target to perform its own adjustments.
int UseOp = I->OpIdx;
- MachineInstr *RegUse = 0;
+ MachineInstr *RegUse = nullptr;
SDep Dep;
if (UseOp < 0)
Dep = SDep(SU, SDep::Artificial);
@@ -483,6 +479,15 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
if ((*MI->memoperands_begin())->isVolatile() ||
MI->hasUnmodeledSideEffects())
return true;
+
+ if ((*MI->memoperands_begin())->getPseudoValue()) {
+ // Similarly to getUnderlyingObjectForInstr:
+ // For now, ignore PseudoSourceValues which may alias LLVM IR values
+ // because the code that uses this function has no way to cope with
+ // such aliases.
+ return true;
+ }
+
const Value *V = (*MI->memoperands_begin())->getValue();
if (!V)
return true;
@@ -491,19 +496,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI,
getUnderlyingObjects(V, Objs);
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(),
IE = Objs.end(); I != IE; ++I) {
- V = *I;
-
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
- // Similarly to getUnderlyingObjectForInstr:
- // For now, ignore PseudoSourceValues which may alias LLVM IR values
- // because the code that uses this function has no way to cope with
- // such aliases.
- if (PSV->isAliased(MFI))
- return true;
- }
-
// Does this pointer refer to a distinct and identifiable object?
- if (!isIdentifiedObject(V))
+ if (!isIdentifiedObject(*I))
return true;
}
@@ -541,6 +535,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
MachineMemOperand *MMOa = *MIa->memoperands_begin();
MachineMemOperand *MMOb = *MIb->memoperands_begin();
+ if (!MMOa->getValue() || !MMOb->getValue())
+ return true;
+
// The following interface to AA is fashioned after DAGCombiner::isAlias
// and operates with MachineMemOperand offset with some important
// assumptions:
@@ -566,9 +563,9 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
AliasAnalysis::AliasResult AAResult = AA->alias(
AliasAnalysis::Location(MMOa->getValue(), Overlapa,
- UseTBAA ? MMOa->getTBAAInfo() : 0),
+ UseTBAA ? MMOa->getTBAAInfo() : nullptr),
AliasAnalysis::Location(MMOb->getValue(), Overlapb,
- UseTBAA ? MMOb->getTBAAInfo() : 0));
+ UseTBAA ? MMOb->getTBAAInfo() : nullptr));
return (AAResult != AliasAnalysis::NoAlias);
}
@@ -703,10 +700,14 @@ void ScheduleDAGInstrs::initSUnits() {
// Assign the Latency field of SU using target-provided information.
SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
- // If this SUnit uses an unbuffered resource, mark it as such.
- // These resources are used for in-order execution pipelines within an
- // out-of-order core and are identified by BufferSize=1. BufferSize=0 is
- // used for dispatch/issue groups and is not considered here.
+ // If this SUnit uses a reserved or unbuffered resource, mark it as such.
+ //
+ // Reserved resources block an instruction from issuing and stall the
+ // entire pipeline. These are identified by BufferSize=0.
+ //
+ // Unbuffered resources prevent execution of subsequent instructions that
+ // require the same resources. This is used for in-order execution pipelines
+ // within an out-of-order core. These are identified by BufferSize=1.
if (SchedModel.hasInstrSchedModel()) {
const MCSchedClassDesc *SC = getSchedClass(SU);
for (TargetSchedModel::ProcResIter
@@ -736,7 +737,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
: ST.useAA();
- AliasAnalysis *AAForDep = UseAA ? AA : 0;
+ AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
MISUnitMap.clear();
ScheduleDAG::clearDAG();
@@ -751,20 +752,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// to top.
// Remember where a generic side-effecting instruction is as we procede.
- SUnit *BarrierChain = 0, *AliasChain = 0;
+ SUnit *BarrierChain = nullptr, *AliasChain = nullptr;
// Memory references to specific known memory locations are tracked
// so that they can be given more precise dependencies. We track
// separately the known memory locations that may alias and those
// that are known not to alias
- MapVector<const Value *, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
- MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+ MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs;
+ MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
std::set<SUnit*> RejectMemNodes;
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
- FirstDbgValue = NULL;
+ FirstDbgValue = nullptr;
assert(Defs.empty() && Uses.empty() &&
"Only BuildGraph should update Defs/Uses");
@@ -781,13 +782,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addSchedBarrierDeps();
// Walk the list of instructions, from bottom moving up.
- MachineInstr *DbgMI = NULL;
+ MachineInstr *DbgMI = nullptr;
for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin;
MII != MIE; --MII) {
MachineInstr *MI = std::prev(MII);
if (MI && DbgMI) {
DbgValues.push_back(std::make_pair(DbgMI, MI));
- DbgMI = NULL;
+ DbgMI = nullptr;
}
if (MI->isDebugValue()) {
@@ -798,8 +799,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
- PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0;
- RPTracker->recede(/*LiveUses=*/0, PDiff);
+ PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
+ RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
assert(RPTracker->getPos() == std::prev(MII) &&
"RPTracker can't find MI");
}
@@ -854,13 +855,13 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (isGlobalMemoryObject(AA, MI)) {
// Be conservative with these and add dependencies on all memory
// references, even those that are known to not alias.
- for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
I->second[i]->addPred(SDep(SU, SDep::Barrier));
}
}
- for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i) {
SDep Dep(SU, SDep::Barrier);
@@ -894,12 +895,12 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes,
TrueMemOrderLatency);
- for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes);
}
- for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes,
@@ -922,7 +923,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
bool MayAlias = false;
for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end();
K != KE; ++K) {
- const Value *V = K->getPointer();
+ ValueType V = K->getPointer();
bool ThisMayAlias = K->getInt();
if (ThisMayAlias)
MayAlias = true;
@@ -930,9 +931,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// A store to a specific PseudoSourceValue. Add precise dependencies.
// Record the def in MemDefs, first adding a dep if there is
// an existing def.
- MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator I =
((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<const Value *, std::vector<SUnit *> >::iterator IE =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
@@ -955,9 +956,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
}
}
// Handle the uses in MemUses, if there are any.
- MapVector<const Value *, std::vector<SUnit *> >::iterator J =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator J =
((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
- MapVector<const Value *, std::vector<SUnit *> >::iterator JE =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator JE =
((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
@@ -986,11 +987,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// we have lost all RejectMemNodes below barrier.
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
-
- if (!ExitSU.isPred(SU))
- // Push store's up a bit to avoid them getting in between cmp
- // and branches.
- ExitSU.addPred(SDep(SU, SDep::Artificial));
} else if (MI->mayLoad()) {
bool MayAlias = true;
if (MI->isInvariantLoad(AA)) {
@@ -1002,7 +998,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
if (Objs.empty()) {
// A load with no underlying object. Depend on all
// potentially aliasing stores.
- for (MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ for (MapVector<ValueType, std::vector<SUnit *> >::iterator I =
AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
addChainDependency(AAForDep, MFI, SU, I->second[i],
@@ -1016,16 +1012,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
for (UnderlyingObjectsVector::iterator
J = Objs.begin(), JE = Objs.end(); J != JE; ++J) {
- const Value *V = J->getPointer();
+ ValueType V = J->getPointer();
bool ThisMayAlias = J->getInt();
if (ThisMayAlias)
MayAlias = true;
// A load from a specific PseudoSourceValue. Add precise dependencies.
- MapVector<const Value *, std::vector<SUnit *> >::iterator I =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator I =
((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
- MapVector<const Value *, std::vector<SUnit *> >::iterator IE =
+ MapVector<ValueType, std::vector<SUnit *> >::iterator IE =
((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
if (I != IE)
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
@@ -1429,7 +1425,7 @@ public:
const SDep *backtrack() {
DFSStack.pop_back();
- return DFSStack.empty() ? 0 : std::prev(DFSStack.back().second);
+ return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
}
const SUnit *getCurr() const { return DFSStack.back().first; }
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 2cd84d6..004c685 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/MC/MCInstrItineraries.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+
#ifndef NDEBUG
const char *ScoreboardHazardRecognizer::DebugType = "";
#endif
@@ -126,7 +127,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
// free FU's in the scoreboard at the appropriate future cycles.
const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
- if (MCID == NULL) {
+ if (!MCID) {
// Don't check hazards for non-machineinstr Nodes.
return NoHazard;
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cc0c5fa..2d2fd53 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dagcombine"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -40,6 +39,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "dagcombine"
+
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
@@ -56,14 +57,8 @@ namespace {
CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
cl::desc("Enable DAG combiner's use of IR alias analysis"));
-// FIXME: Enable the use of TBAA. There are two known issues preventing this:
-// 1. Stack coloring does not update TBAA when merging allocas
-// 2. CGP inserts ptrtoint/inttoptr pairs when sinking address computations.
-// Because BasicAA does not handle inttoptr, we'll often miss basic type
-// punning idioms that we need to catch so we don't miscompile real-world
-// code.
static cl::opt<bool>
- UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(false),
+ UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
cl::desc("Enable DAG combiner's use of TBAA"));
#ifndef NDEBUG
@@ -120,9 +115,8 @@ namespace {
/// now.
///
void AddUsersToWorkList(SDNode *N) {
- for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
- UI != UE; ++UI)
- AddToWorkList(*UI);
+ for (SDNode *Node : N->uses())
+ AddToWorkList(Node);
}
/// visit - call the node-specific routine that knows how to fold each
@@ -173,6 +167,7 @@ namespace {
bool CombineToPreIndexedLoadStore(SDNode *N);
bool CombineToPostIndexedLoadStore(SDNode *N);
+ SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
@@ -324,26 +319,7 @@ namespace {
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
- bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
- const Value *SrcValue1, int SrcValueOffset1,
- unsigned SrcValueAlign1,
- const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2, bool IsVolatile2,
- const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2,
- const MDNode *TBAAInfo2) const;
-
- /// isAlias - Return true if there is any possibility that the two addresses
- /// overlap.
- bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1);
-
- /// FindAliasInfo - Extracts the relevant alias information from the memory
- /// node. Returns true if the operand was a load.
- bool FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size, bool &IsVolatile,
- const Value *&SrcValue, int &SrcValueOffset,
- unsigned &SrcValueAlignment,
- const MDNode *&TBAAInfo) const;
+ bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
@@ -660,7 +636,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) {
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
if(BV && BV->isConstant())
return BV;
- return NULL;
+ return nullptr;
}
// \brief Returns the SDNode if it is a constant splat BuildVector or constant
@@ -669,8 +645,13 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
- if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N))
- return BV->getConstantSplatValue();
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ ConstantSDNode *CN = BV->getConstantSplatValue();
+
+ // BuildVectors can truncate their operands. Ignore that case here.
+ if (CN && CN->getValueType(0) == N.getValueType().getScalarType())
+ return CN;
+ }
return nullptr;
}
@@ -781,10 +762,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// If the operands of this node are only used by the node, they will now
// be dead. Make sure to visit them first to delete dead nodes early.
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
- if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
- AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
-
+ for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) {
+ SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode();
+ // For an operand generating multiple values, one of the values may
+ // become dead allowing further simplification (e.g. split index
+ // arithmetic from an indexed load).
+ if (Op->hasOneUse() || Op->getNumValues() > 1)
+ AddToWorkList(Op);
+ }
DAG.DeleteNode(TLO.Old.getNode());
}
}
@@ -876,7 +861,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
- if (NewOp.getNode() == 0)
+ if (!NewOp.getNode())
return SDValue();
AddToWorkList(NewOp.getNode());
@@ -891,7 +876,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
SDLoc dl(Op);
bool Replace = false;
SDValue NewOp = PromoteOperand(Op, PVT, Replace);
- if (NewOp.getNode() == 0)
+ if (!NewOp.getNode())
return SDValue();
AddToWorkList(NewOp.getNode());
@@ -926,7 +911,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
bool Replace0 = false;
SDValue N0 = Op.getOperand(0);
SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
- if (NN0.getNode() == 0)
+ if (!NN0.getNode())
return SDValue();
bool Replace1 = false;
@@ -936,7 +921,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
NN1 = NN0;
else {
NN1 = PromoteOperand(N1, PVT, Replace1);
- if (NN1.getNode() == 0)
+ if (!NN1.getNode())
return SDValue();
}
@@ -989,7 +974,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
else
N0 = PromoteOperand(N0, PVT, Replace);
- if (N0.getNode() == 0)
+ if (!N0.getNode())
return SDValue();
AddToWorkList(N0.getNode());
@@ -1134,7 +1119,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
SDValue RV = combine(N);
- if (RV.getNode() == 0)
+ if (!RV.getNode())
continue;
++NodesCombined;
@@ -1282,7 +1267,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
SDValue RV = visit(N);
// If nothing happened, try a target-specific DAG combine.
- if (RV.getNode() == 0) {
+ if (!RV.getNode()) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned NULL!");
@@ -1298,7 +1283,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
// If nothing happened still, try promoting the operation.
- if (RV.getNode() == 0) {
+ if (!RV.getNode()) {
switch (N->getOpcode()) {
default: break;
case ISD::ADD:
@@ -1328,8 +1313,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
// If N is a commutative binary node, try commuting it to enable more
// sdisel CSE.
- if (RV.getNode() == 0 &&
- SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+ if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1338,7 +1322,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
SDValue Ops[] = { N1, N0 };
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
- Ops, 2);
+ Ops);
if (CSENode)
return SDValue(CSENode, 0);
}
@@ -1428,8 +1412,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
Result = DAG.getEntryNode();
} else {
// New and improved token factor.
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N),
- MVT::Other, &Ops[0], Ops.size());
+ Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
}
// Don't add users to work list.
@@ -1528,7 +1511,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
N0.getOperand(1));
// reassociate add
SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1);
- if (RADD.getNode() != 0)
+ if (RADD.getNode())
return RADD;
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
@@ -1581,10 +1564,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+ DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+ DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1676,10 +1659,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- DAG.ComputeMaskedBits(N0, LHSZero, LHSOne);
+ DAG.computeKnownBits(N0, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N1, RHSZero, RHSOne);
+ DAG.computeKnownBits(N1, RHSZero, RHSOne);
// If all possibly-set bits on the LHS are clear on the RHS, return an OR.
// If all possibly-set bits on the RHS are clear on the LHS, return an OR.
@@ -1728,7 +1711,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+ ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
EVT VT = N0.getValueType();
@@ -1881,10 +1864,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
} else {
- N0IsConst = dyn_cast<ConstantSDNode>(N0) != 0;
+ N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr;
ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue()
: APInt();
- N1IsConst = dyn_cast<ConstantSDNode>(N1) != 0;
+ N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr;
ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue()
: APInt();
}
@@ -1942,7 +1925,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
// use.
{
- SDValue Sh(0,0), Y(0,0);
+ SDValue Sh(nullptr,0), Y(nullptr,0);
// Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
if (N0.getOpcode() == ISD::SHL &&
(isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
@@ -1975,7 +1958,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// reassociate mul
SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1);
- if (RMUL.getNode() != 0)
+ if (RMUL.getNode())
return RMUL;
return SDValue();
@@ -1984,8 +1967,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2011,10 +1994,10 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
N0, N1);
}
+
// fold (sdiv X, pow2) -> simple ops after legalize
- if (N1C && !N1C->isNullValue() &&
- (N1C->getAPIntValue().isPowerOf2() ||
- (-N1C->getAPIntValue()).isPowerOf2())) {
+ if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() ||
+ (-N1C->getAPIntValue()).isPowerOf2())) {
// If dividing by powers of two is cheap, then don't perform the following
// fold.
if (TLI.isPow2DivCheap())
@@ -2023,15 +2006,17 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
// Splat the sign bit into the register
- SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
- DAG.getConstant(VT.getSizeInBits()-1,
- getShiftAmountTy(N0.getValueType())));
+ SDValue SGN =
+ DAG.getNode(ISD::SRA, SDLoc(N), VT, N0,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ getShiftAmountTy(N0.getValueType())));
AddToWorkList(SGN.getNode());
// Add (N0 < 0) ? abs2 - 1 : 0;
- SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
- DAG.getConstant(VT.getSizeInBits() - lg2,
- getShiftAmountTy(SGN.getValueType())));
+ SDValue SRL =
+ DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN,
+ DAG.getConstant(VT.getScalarSizeInBits() - lg2,
+ getShiftAmountTy(SGN.getValueType())));
SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL);
AddToWorkList(SRL.getNode());
AddToWorkList(ADD.getNode()); // Divide by pow2
@@ -2044,13 +2029,12 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return SRA;
AddToWorkList(SRA.getNode());
- return DAG.getNode(ISD::SUB, SDLoc(N), VT,
- DAG.getConstant(0, VT), SRA);
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA);
}
// if integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence.
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ if (N1C && !TLI.isIntDivCheap()) {
SDValue Op = BuildSDIV(N);
if (Op.getNode()) return Op;
}
@@ -2068,8 +2052,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold vector ops
@@ -2102,7 +2086,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
}
// fold (udiv x, c) -> alternate
- if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+ if (N1C && !TLI.isIntDivCheap()) {
SDValue Op = BuildUDIV(N);
if (Op.getNode()) return Op;
}
@@ -2120,8 +2104,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (srem c1, c2) -> c1%c2
@@ -2162,8 +2146,8 @@ SDValue DAGCombiner::visitSREM(SDNode *N) {
SDValue DAGCombiner::visitUREM(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *N0C = isConstOrConstSplat(N0);
+ ConstantSDNode *N1C = isConstOrConstSplat(N1);
EVT VT = N->getValueType(0);
// fold (urem c1, c2) -> c1%c2
@@ -2298,7 +2282,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
(!LegalOperations ||
TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- N->op_begin(), N->getNumOperands());
+ ArrayRef<SDUse>(N->op_begin(), N->op_end()));
return CombineTo(N, Res, Res);
}
@@ -2308,7 +2292,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
(!LegalOperations ||
TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- N->op_begin(), N->getNumOperands());
+ ArrayRef<SDUse>(N->op_begin(), N->op_end()));
return CombineTo(N, Res, Res);
}
@@ -2319,7 +2303,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
// If the two computed results can be simplified separately, separate them.
if (LoExists) {
SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0),
- N->op_begin(), N->getNumOperands());
+ ArrayRef<SDUse>(N->op_begin(), N->op_end()));
AddToWorkList(Lo.getNode());
SDValue LoOpt = combine(Lo.getNode());
if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
@@ -2330,7 +2314,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
if (HiExists) {
SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1),
- N->op_begin(), N->getNumOperands());
+ ArrayRef<SDUse>(N->op_begin(), N->op_end()));
AddToWorkList(Hi.getNode());
SDValue HiOpt = combine(Hi.getNode());
if (HiOpt.getNode() && HiOpt != Hi &&
@@ -2532,7 +2516,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
"Inputs to shuffles are not the same type");
-
+
// Check that both shuffles use the same mask. The masks are known to be of
// the same length because the result vector type is the same.
// Check also that shuffles have only one use to avoid introducing extra
@@ -2632,7 +2616,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return DAG.getConstant(0, VT);
// reassociate and
SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1);
- if (RAND.getNode() != 0)
+ if (RAND.getNode())
return RAND;
// fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
@@ -3165,7 +3149,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
if (!TLI.isOperationLegal(ISD::BSWAP, VT))
return SDValue();
- SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+ SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr);
// Look for either
// (or (or (and), (and)), (or (and), (and)))
// (or (or (or (and), (and)), (and)), (and))
@@ -3270,11 +3254,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// two ways to fold this node into a shuffle.
SmallVector<int,4> Mask1;
SmallVector<int,4> Mask2;
-
+
for (unsigned i = 0; i != NumElts && CanFold; ++i) {
int M0 = SV0->getMaskElt(i);
int M1 = SV1->getMaskElt(i);
-
+
// Both shuffle indexes are undef. Propagate Undef.
if (M0 < 0 && M1 < 0) {
Mask1.push_back(M0);
@@ -3288,7 +3272,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
CanFold = false;
break;
}
-
+
Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
}
@@ -3329,15 +3313,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
// Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
SDValue BSwap = MatchBSwapHWord(N, N0, N1);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return BSwap;
BSwap = MatchBSwapHWordLow(N, N0, N1);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return BSwap;
// reassociate or
SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1);
- if (ROR.getNode() != 0)
+ if (ROR.getNode())
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) == 0.
@@ -3582,28 +3566,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
HasPos ? Pos : Neg).getNode();
}
- // fold (or (shl (*ext x), (*ext y)),
- // (srl (*ext x), (*ext (sub 32, y)))) ->
- // (*ext (rotl x, y)) or (*ext (rotr x, (sub 32, y)))
- //
- // fold (or (shl (*ext x), (*ext (sub 32, y))),
- // (srl (*ext x), (*ext y))) ->
- // (*ext (rotr x, y)) or (*ext (rotl x, (sub 32, y)))
- if (Shifted.getOpcode() == ISD::ZERO_EXTEND ||
- Shifted.getOpcode() == ISD::ANY_EXTEND) {
- SDValue InnerShifted = Shifted.getOperand(0);
- EVT InnerVT = InnerShifted.getValueType();
- bool HasPosInner = TLI.isOperationLegalOrCustom(PosOpcode, InnerVT);
- if (HasPosInner || TLI.isOperationLegalOrCustom(NegOpcode, InnerVT)) {
- if (matchRotateSub(InnerPos, InnerNeg, InnerVT.getSizeInBits())) {
- SDValue V = DAG.getNode(HasPosInner ? PosOpcode : NegOpcode, DL,
- InnerVT, InnerShifted, HasPosInner ? Pos : Neg);
- return DAG.getNode(Shifted.getOpcode(), DL, VT, V).getNode();
- }
- }
- }
-
- return 0;
+ return nullptr;
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
@@ -3612,29 +3575,29 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT)) return 0;
+ if (!TLI.isTypeLegal(VT)) return nullptr;
// The target must have at least one rotate flavor.
bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR) return 0;
+ if (!HasROTL && !HasROTR) return nullptr;
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
- return 0; // Not part of a rotate.
+ return nullptr; // Not part of a rotate.
SDValue RHSShift; // The shift.
SDValue RHSMask; // AND value if any.
if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
- return 0; // Not part of a rotate.
+ return nullptr; // Not part of a rotate.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return 0; // Not shifting the same value.
+ return nullptr; // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
- return 0; // Shifts must disagree.
+ return nullptr; // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
@@ -3656,7 +3619,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
if ((LShVal + RShVal) != OpSizeInBits)
- return 0;
+ return nullptr;
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -3683,7 +3646,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
- return 0;
+ return nullptr;
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
@@ -3710,7 +3673,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
if (TryR)
return TryR;
- return 0;
+ return nullptr;
}
SDValue DAGCombiner::visitXOR(SDNode *N) {
@@ -3752,7 +3715,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return N0;
// reassociate xor
SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1);
- if (RXOR.getNode() != 0)
+ if (RXOR.getNode())
return RXOR;
// fold !(x cc y) -> (x !cc y)
@@ -3909,6 +3872,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
return SDValue();
}
+ if (!TLI.isDesirableToCommuteWithShift(LHS))
+ return SDValue();
+
// Fold the constants, shifting the binop RHS by the shift amount.
SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
N->getValueType(0),
@@ -4382,7 +4348,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne);
+ DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
// zeros, thus the result of the srl will always be zero.
@@ -4745,7 +4711,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
// tryToFoldExtendOfConstant - Try to fold a sext/zext/aext
// dag node into a ConstantSDNode or a build_vector of constants.
// This function is called by the DAGCombiner when visiting sext/zext/aext
-// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
+// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
// Vector extends are not folded if operations are legal; this is to
// avoid introducing illegal build_vector dag nodes.
static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
@@ -4771,8 +4737,8 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
if (!(VT.isVector() &&
(!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
- return 0;
-
+ return nullptr;
+
// We can fold this node into a build_vector.
unsigned VTBits = SVT.getSizeInBits();
unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
@@ -4798,7 +4764,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], NumElts).getNode();
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
@@ -4882,8 +4848,7 @@ void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
}
Ops.push_back(SetCC->getOperand(2));
- CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
- &Ops[0], Ops.size()));
+ CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
}
}
@@ -4957,6 +4922,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -5009,7 +4975,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+ if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5108,13 +5074,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// isTruncateOf - If N is a truncate of some other value, return true, record
// the value being truncated in Op and which of Op's bits are zero in KnownZero.
// This function computes KnownZero to avoid a duplicated call to
-// ComputeMaskedBits in the caller.
+// computeKnownBits in the caller.
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
APInt &KnownZero) {
APInt KnownOne;
if (N->getOpcode() == ISD::TRUNCATE) {
Op = N->getOperand(0);
- DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
return true;
}
@@ -5135,7 +5101,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
else
return false;
- DAG.ComputeMaskedBits(Op, KnownZero, KnownOne);
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
return false;
@@ -5250,6 +5216,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -5282,7 +5249,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
- if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
@@ -5353,7 +5320,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
- &OneOps[0], OneOps.size()));
+ OneOps));
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
@@ -5370,8 +5337,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
cast<CondCodeSDNode>(N0.getOperand(2))->get());
return DAG.getNode(ISD::AND, SDLoc(N), VT,
DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT),
- DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT,
- &OneOps[0], OneOps.size()));
+ DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps));
}
// zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
@@ -5478,6 +5444,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// on vectors in one instruction. We only perform this transformation on
// scalars.
if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+ ISD::isUNINDEXEDLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
bool DoXform = true;
@@ -5507,20 +5474,26 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
!ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ ISD::LoadExtType ExtType = LN0->getExtensionType();
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N),
- VT, LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- CombineTo(N, ExtLoad);
- CombineTo(N0.getNode(),
- DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
- N0.getValueType(), ExtLoad),
- ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) {
+ SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ MemVT, LN0->getMemOperand());
+ CombineTo(N, ExtLoad);
+ CombineTo(N0.getNode(),
+ DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
+ N0.getValueType(), ExtLoad),
+ ExtLoad.getValue(1));
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
}
if (N0.getOpcode() == ISD::SETCC) {
- // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // For vectors:
+ // aext(setcc) -> vsetcc
+ // aext(setcc) -> truncate(vsetcc)
+ // aext(setcc) -> aext(vsetcc)
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
EVT N0VT = N0.getOperand(0).getValueType();
@@ -5535,19 +5508,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
- // truncate/sign extend
+ // truncate/any extend
else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
+ EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
+ return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
}
}
@@ -5571,7 +5539,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
default: break;
case ISD::Constant: {
const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV != 0 && "Const value should be ConstSDNode.");
+ assert(CV && "Const value should be ConstSDNode.");
const APInt &CVal = CV->getAPIntValue();
APInt NewVal = CVal & Mask;
if (NewVal != CVal)
@@ -5872,7 +5840,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
N0.getOperand(1), false);
- if (BSwap.getNode() != 0)
+ if (BSwap.getNode())
return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
BSwap, N1);
}
@@ -5897,7 +5865,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
Op.getValueType()));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Elts[0], NumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
}
return SDValue();
@@ -5998,8 +5966,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
Opnds.push_back(BuildVect.getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
- Opnds.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
}
@@ -6074,8 +6041,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
AddToWorkList(NV.getNode());
Opnds.push_back(NV);
}
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
- &Opnds[0], Opnds.size());
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
}
}
@@ -6313,8 +6279,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
// Otherwise, we're growing or shrinking the elements. To avoid having to
@@ -6370,8 +6335,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
}
EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
// Finally, this must be the case where we are shrinking elements: each input
@@ -6407,8 +6371,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT,
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
}
SDValue DAGCombiner::visitFADD(SDNode *N) {
@@ -7006,7 +6969,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
}
// fold (sint_to_fp (zext (setcc x, y, cc))) ->
@@ -7019,7 +6982,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT),
N0.getOperand(0).getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
}
}
@@ -7063,7 +7026,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
{ N0.getOperand(0), N0.getOperand(1),
DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT),
N0.getOperand(2) };
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5);
+ return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops);
}
}
@@ -7223,11 +7186,16 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// (fneg (fmul c, x)) -> (fmul -c, x)
if (N0.getOpcode() == ISD::FMUL) {
ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
- if (CFP1)
- return DAG.getNode(ISD::FMUL, SDLoc(N), VT,
- N0.getOperand(0),
- DAG.getNode(ISD::FNEG, SDLoc(N), VT,
- N0.getOperand(1)));
+ if (CFP1) {
+ APFloat CVal = CFP1->getValueAPF();
+ CVal.changeSign();
+ if (Level >= AfterLegalizeDAG &&
+ (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
+ TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
+ return DAG.getNode(
+ ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
+ }
}
return SDValue();
@@ -7335,7 +7303,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
(N1.getOperand(0).hasOneUse() &&
N1.getOperand(0).getOpcode() == ISD::SRL))) {
- SDNode *Trunc = 0;
+ SDNode *Trunc = nullptr;
if (N1.getOpcode() == ISD::TRUNCATE) {
// Look pass the truncate.
Trunc = N1.getNode();
@@ -7616,9 +7584,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// a copy of the original base pointer.
SmallVector<SDNode *, 16> OtherUses;
if (isa<ConstantSDNode>(Offset))
- for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
- E = BasePtr.getNode()->use_end(); I != E; ++I) {
- SDNode *Use = *I;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
if (Use == Ptr.getNode())
continue;
@@ -7660,9 +7626,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
- for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
- E = Ptr.getNode()->use_end(); I != E; ++I) {
- SDNode *Use = *I;
+ for (SDNode *Use : Ptr.getNode()->uses()) {
if (Use == N)
continue;
if (N->hasPredecessorHelper(Use, Visited, Worklist))
@@ -7798,9 +7762,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
if (Ptr.getNode()->hasOneUse())
return false;
- for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
- E = Ptr.getNode()->use_end(); I != E; ++I) {
- SDNode *Op = *I;
+ for (SDNode *Op : Ptr.getNode()->uses()) {
if (Op == N ||
(Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
continue;
@@ -7826,9 +7788,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// Check for #1.
bool TryNext = false;
- for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
- EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
- SDNode *Use = *II;
+ for (SDNode *Use : BasePtr.getNode()->uses()) {
if (Use == Ptr.getNode())
continue;
@@ -7836,9 +7796,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
// transformation.
if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
bool RealUse = false;
- for (SDNode::use_iterator III = Use->use_begin(),
- EEE = Use->use_end(); III != EEE; ++III) {
- SDNode *UseUse = *III;
+ for (SDNode *UseUse : Use->uses()) {
if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
RealUse = true;
}
@@ -7891,6 +7849,17 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
+/// \brief Return the base-pointer arithmetic from an indexed \p LD.
+SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ assert(AM != ISD::UNINDEXED);
+ SDValue BP = LD->getOperand(1);
+ SDValue Inc = LD->getOperand(2);
+ unsigned Opc =
+ (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
+ return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
+}
+
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -7927,8 +7896,16 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
} else {
// Indexed loads.
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
- if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) {
+ if (!N->hasAnyUseOfValue(0)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+ SDValue Index;
+ if (N->hasAnyUseOfValue(1)) {
+ Index = SplitIndexingFromLoad(LD);
+ // Try to fold the base pointer arithmetic into subsequent loads and
+ // stores.
+ AddUsersToWorkList(N);
+ } else
+ Index = DAG.getUNDEF(N->getValueType(1));
DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
@@ -7936,8 +7913,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
dbgs() << " and 2 other values\n");
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
- DAG.getUNDEF(N->getValueType(1)));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
removeFromWorkList(N);
DAG.DeleteNode(N);
@@ -8131,8 +8107,8 @@ struct LoadedSlice {
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
- LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL,
- unsigned Shift = 0, SelectionDAG *DAG = NULL)
+ LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
+ unsigned Shift = 0, SelectionDAG *DAG = nullptr)
: Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
LoadedSlice(const LoadedSlice &LS)
@@ -8228,7 +8204,7 @@ struct LoadedSlice {
/// \brief Get the offset in bytes of this slice in the original chunk of
/// bits.
- /// \pre DAG != NULL.
+ /// \pre DAG != nullptr.
uint64_t getOffsetFromBase() const {
assert(DAG && "Missing context.");
bool IsBigEndian =
@@ -8384,8 +8360,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
// First (resp. Second) is the first (resp. Second) potentially candidate
// to be placed in a paired load.
- const LoadedSlice *First = NULL;
- const LoadedSlice *Second = NULL;
+ const LoadedSlice *First = nullptr;
+ const LoadedSlice *Second = nullptr;
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
@@ -8407,7 +8383,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
unsigned RequiredAlignment = 0;
if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
// move to the next pair, this type is hopeless.
- Second = NULL;
+ Second = nullptr;
continue;
}
// Check if we meet the alignment requirement.
@@ -8421,7 +8397,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
--GlobalLSCost.Loads;
// Move to the next pair.
- Second = NULL;
+ Second = nullptr;
}
}
@@ -8565,7 +8541,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
}
SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
- &ArgChains[0], ArgChains.size());
+ ArgChains);
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
return true;
}
@@ -8660,14 +8636,14 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
- if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
- return 0;
+ return nullptr;
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
@@ -9081,7 +9057,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
break;
} else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
if (Ldn->isVolatile()) {
- Index = NULL;
+ Index = nullptr;
break;
}
@@ -9090,7 +9066,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
NextInChain = Ldn->getChain().getNode();
continue;
} else {
- Index = NULL;
+ Index = nullptr;
break;
}
}
@@ -9719,8 +9695,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
}
// Return the new vector
- return DAG.getNode(ISD::BUILD_VECTOR, dl,
- VT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
@@ -9826,8 +9801,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
NewLoad = true;
}
- LoadSDNode *LN0 = NULL;
- const ShuffleVectorSDNode *SVN = NULL;
+ LoadSDNode *LN0 = nullptr;
+ const ShuffleVectorSDNode *SVN = nullptr;
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
} else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
@@ -10052,7 +10027,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
if (!isTypeLegal(VecVT)) return SDValue();
// Make the new BUILD_VECTOR.
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size());
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
// The new BUILD_VECTOR node has the potential to be further optimized.
AddToWorkList(BV.getNode());
@@ -10120,8 +10095,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
else
Opnds.push_back(In.getOperand(0));
}
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
- &Opnds[0], Opnds.size());
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
AddToWorkList(BV.getNode());
return DAG.getNode(Opcode, dl, VT, BV);
@@ -10162,7 +10136,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// constant index, bail out.
if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
- VecIn1 = VecIn2 = SDValue(0, 0);
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
@@ -10171,18 +10145,18 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
continue;
- if (VecIn1.getNode() == 0) {
+ if (!VecIn1.getNode()) {
VecIn1 = ExtractedFromVec;
- } else if (VecIn2.getNode() == 0) {
+ } else if (!VecIn2.getNode()) {
VecIn2 = ExtractedFromVec;
} else {
// Too many inputs.
- VecIn1 = VecIn2 = SDValue(0, 0);
+ VecIn1 = VecIn2 = SDValue(nullptr, 0);
break;
}
}
- // If everything is good, we can make a shuffle operation.
+ // If everything is good, we can make a shuffle operation.
if (VecIn1.getNode()) {
SmallVector<int, 8> Mask;
for (unsigned i = 0; i != NumInScalars; ++i) {
@@ -10212,7 +10186,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
// Attempt to transform a single input vector to the correct type.
if ((VT != VecIn1.getValueType())) {
// We don't support shuffeling between TWO values of different types.
- if (VecIn2.getNode() != 0)
+ if (VecIn2.getNode())
return SDValue();
// We only support widening of vectors which are half the size of the
@@ -10311,8 +10285,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
for (unsigned i = 0; i != BuildVecNumElts; ++i)
Opnds.push_back(N1.getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0],
- Opnds.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
}
// Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
@@ -10469,8 +10442,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
}
}
- return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(),
- Ops.size());
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
@@ -10685,8 +10657,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT EltVT = RVT.getVectorElementType();
SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
DAG.getConstant(0, EltVT));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- RVT, &ZeroOps[0], ZeroOps.size());
+ SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps);
LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
@@ -10755,8 +10726,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
if (Ops.size() == LHS.getNumOperands())
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- LHS.getValueType(), &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
}
return SDValue();
@@ -10791,8 +10761,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) {
if (Ops.size() != N0.getNumOperands())
return SDValue();
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- N0.getValueType(), &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops);
}
SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
@@ -10994,7 +10963,9 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
if (TLI.isTypeLegal(N2.getValueType()) &&
(TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
- TargetLowering::Legal) &&
+ TargetLowering::Legal &&
+ !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
+ !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
// If both constants have multiple uses, then we won't need to do an
// extra load, they are likely around in registers for other users.
(TV->hasOneUse() || FV->hasOneUse())) {
@@ -11201,7 +11172,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
// select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
if (N1C) {
- ConstantSDNode *SubC = NULL;
+ ConstantSDNode *SubC = nullptr;
if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
(N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
@@ -11242,26 +11213,42 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (!C->getAPIntValue())
+ return SDValue();
+
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built);
+ SDValue S =
+ TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
- for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
- ii != ee; ++ii)
- AddToWorkList(*ii);
+ for (SDNode *N : Built)
+ AddToWorkList(N);
return S;
}
-/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+ ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ // Avoid division by zero.
+ if (!C->getAPIntValue())
+ return SDValue();
+
std::vector<SDNode*> Built;
- SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built);
+ SDValue S =
+ TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
- for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
- ii != ee; ++ii)
- AddToWorkList(*ii);
+ for (SDNode *N : Built)
+ AddToWorkList(N);
return S;
}
@@ -11271,7 +11258,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, const void *&CV) {
// Assume it is a primitive operation.
- Base = Ptr; Offset = 0; GV = 0; CV = 0;
+ Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
// If it's an adding a simple constant then integrate the offset.
if (Base.getOpcode() == ISD::ADD) {
@@ -11305,31 +11292,27 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
/// isAlias - Return true if there is any possibility that the two addresses
/// overlap.
-bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
- const Value *SrcValue1, int SrcValueOffset1,
- unsigned SrcValueAlign1,
- const MDNode *TBAAInfo1,
- SDValue Ptr2, int64_t Size2, bool IsVolatile2,
- const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2,
- const MDNode *TBAAInfo2) const {
+bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
// If they are the same then they must be aliases.
- if (Ptr1 == Ptr2) return true;
+ if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
// If they are both volatile then they cannot be reordered.
- if (IsVolatile1 && IsVolatile2) return true;
+ if (Op0->isVolatile() && Op1->isVolatile()) return true;
// Gather base node and offset information.
SDValue Base1, Base2;
int64_t Offset1, Offset2;
const GlobalValue *GV1, *GV2;
const void *CV1, *CV2;
- bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
- bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+ bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
+ Base1, Offset1, GV1, CV1);
+ bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
+ Base2, Offset2, GV2, CV2);
// If they have a same base address then check to see if they overlap.
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
- return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+ (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
// It is possible for different frame indices to alias each other, mostly
// when tail call optimization reuses return address slots for arguments.
@@ -11339,7 +11322,8 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
- return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
+ (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
}
// Otherwise, if we know what the bases are, and they aren't identical, then
@@ -11351,15 +11335,18 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
// compared to the size and offset of the access, we may be able to prove they
// do not alias. This check is conservative for now to catch cases created by
// splitting vector types.
- if ((SrcValueAlign1 == SrcValueAlign2) &&
- (SrcValueOffset1 != SrcValueOffset2) &&
- (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
- int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
- int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+ if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
+ (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
+ (Op0->getMemoryVT().getSizeInBits() >> 3 ==
+ Op1->getMemoryVT().getSizeInBits() >> 3) &&
+ (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
+ int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
+ int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
- if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+ if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
+ (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
return false;
}
@@ -11370,16 +11357,22 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
UseAA = false;
#endif
- if (UseAA && SrcValue1 && SrcValue2) {
+ if (UseAA &&
+ Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
// Use alias analysis information.
- int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
- int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
- int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+ int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
+ Op1->getSrcValueOffset());
+ int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
+ Op0->getSrcValueOffset() - MinOffset;
+ int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
+ Op1->getSrcValueOffset() - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1,
- UseTBAA ? TBAAInfo1 : 0),
- AliasAnalysis::Location(SrcValue2, Overlap2,
- UseTBAA ? TBAAInfo2 : 0));
+ AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(),
+ Overlap1,
+ UseTBAA ? Op0->getTBAAInfo() : nullptr),
+ AliasAnalysis::Location(Op1->getMemOperand()->getValue(),
+ Overlap2,
+ UseTBAA ? Op1->getTBAAInfo() : nullptr));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -11388,44 +11381,6 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1,
return true;
}
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) {
- SDValue Ptr0, Ptr1;
- int64_t Size0, Size1;
- bool IsVolatile0, IsVolatile1;
- const Value *SrcValue0, *SrcValue1;
- int SrcValueOffset0, SrcValueOffset1;
- unsigned SrcValueAlign0, SrcValueAlign1;
- const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1;
- FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
- SrcValueAlign0, SrcTBAAInfo0);
- FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
- SrcValueAlign1, SrcTBAAInfo1);
- return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0,
- SrcValueAlign0, SrcTBAAInfo0,
- Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1,
- SrcValueAlign1, SrcTBAAInfo1);
-}
-
-/// FindAliasInfo - Extracts the relevant alias information from the memory
-/// node. Returns true if the operand was a nonvolatile load.
-bool DAGCombiner::FindAliasInfo(SDNode *N,
- SDValue &Ptr, int64_t &Size, bool &IsVolatile,
- const Value *&SrcValue,
- int &SrcValueOffset,
- unsigned &SrcValueAlign,
- const MDNode *&TBAAInfo) const {
- LSBaseSDNode *LS = cast<LSBaseSDNode>(N);
-
- Ptr = LS->getBasePtr();
- Size = LS->getMemoryVT().getSizeInBits() >> 3;
- IsVolatile = LS->isVolatile();
- SrcValue = LS->getSrcValue();
- SrcValueOffset = LS->getSrcValueOffset();
- SrcValueAlign = LS->getOriginalAlignment();
- TBAAInfo = LS->getTBAAInfo();
- return isa<LoadSDNode>(LS) && !IsVolatile;
-}
-
/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
@@ -11434,15 +11389,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- SDValue Ptr;
- int64_t Size;
- bool IsVolatile;
- const Value *SrcValue;
- int SrcValueOffset;
- unsigned SrcValueAlign;
- const MDNode *SrcTBAAInfo;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue,
- SrcValueOffset, SrcValueAlign, SrcTBAAInfo);
+ bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
// Starting off.
Chains.push_back(OriginalChain);
@@ -11481,24 +11428,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for Chain.
- SDValue OpPtr;
- int64_t OpSize;
- bool OpIsVolatile;
- const Value *OpSrcValue;
- int OpSrcValueOffset;
- unsigned OpSrcValueAlign;
- const MDNode *OpSrcTBAAInfo;
- bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
- OpIsVolatile, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign,
- OpSrcTBAAInfo);
+ bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
+ !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
- isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset,
- SrcValueAlign, SrcTBAAInfo,
- OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign, OpSrcTBAAInfo)) {
+ isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
@@ -11604,8 +11539,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return Aliases[0];
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
- &Aliases[0], Aliases.size());
+ return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
}
// SelectionDAG::Combine - This is the entry point for the file.
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index baba51e..99931c1 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -39,7 +39,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "isel"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Statistic.h"
@@ -64,6 +63,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "isel"
+
STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by "
"target-independent selector");
STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by "
@@ -79,7 +80,7 @@ void FastISel::startNewBlock() {
// Instructions are appended to FuncInfo.MBB. If the basic block already
// contains labels or copies, use the last instruction as the last local
// value.
- EmitStartPt = 0;
+ EmitStartPt = nullptr;
if (!FuncInfo.MBB->empty())
EmitStartPt = &FuncInfo.MBB->back();
LastLocalValue = EmitStartPt;
@@ -826,15 +827,21 @@ FastISel::SelectInstruction(const Instruction *I) {
MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt;
- // As a special case, don't handle calls to builtin library functions that
- // may be translated directly to target instructions.
if (const CallInst *Call = dyn_cast<CallInst>(I)) {
const Function *F = Call->getCalledFunction();
LibFunc::Func Func;
+
+ // As a special case, don't handle calls to builtin library functions that
+ // may be translated directly to target instructions.
if (F && !F->hasLocalLinkage() && F->hasName() &&
LibInfo->getLibFunc(F->getName(), Func) &&
LibInfo->hasOptimizedCodeGen(Func))
return false;
+
+ // Don't handle Intrinsic::trap if a trap funciton is specified.
+ if (F && F->getIntrinsicID() == Intrinsic::trap &&
+ !TM.Options.getTrapFunctionName().empty())
+ return false;
}
// First, try doing target-independent selection.
@@ -880,7 +887,7 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) {
// fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
- TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
SmallVector<MachineOperand, 0>(), DbgLoc);
}
FuncInfo.MBB->addSuccessor(MSucc);
@@ -1035,8 +1042,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
case Instruction::Unreachable:
- // Nothing to emit.
- return true;
+ if (TM.Options.TrapUnreachable)
+ return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0;
+ else
+ return true;
case Instruction::Alloca:
// FunctionLowering has the static-sized case covered.
@@ -1204,6 +1213,23 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
return MRI.createVirtualRegister(RC);
}
+unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II,
+ unsigned Op, unsigned OpNum) {
+ if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ const TargetRegisterClass *RegClass =
+ TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
+ if (!MRI.constrainRegClass(Op, RegClass)) {
+ // If it's not legal to COPY between the register classes, something
+ // has gone very wrong before we got here.
+ unsigned NewOp = createResultReg(RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::COPY), NewOp).addReg(Op);
+ return NewOp;
+ }
+ }
+ return Op;
+}
+
unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
const TargetRegisterClass* RC) {
unsigned ResultReg = createResultReg(RC);
@@ -1216,9 +1242,11 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill);
@@ -1236,9 +1264,12 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1258,9 +1289,13 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
unsigned Op2, bool Op2IsKill) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+ Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1281,9 +1316,12 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF);
+ MRI.constrainRegClass(Op0, RC);
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1302,9 +1340,11 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
uint64_t Imm1, uint64_t Imm2) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1325,9 +1365,11 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill,
const ConstantFP *FPImm) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1347,9 +1389,12 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
@@ -1371,9 +1416,12 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode,
unsigned Op0, bool Op0IsKill,
unsigned Op1, bool Op1IsKill,
uint64_t Imm1, uint64_t Imm2) {
- unsigned ResultReg = createResultReg(RC);
const MCInstrDesc &II = TII.get(MachineInstOpcode);
+ unsigned ResultReg = createResultReg(RC);
+ Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+ Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5f0006e..ae124e8 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "function-lowering-info"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/CodeGen/Analysis.h"
@@ -40,6 +39,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "function-lowering-info"
+
/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
/// PHI nodes or outside of the basic block that defines it, or used by a
/// switch or atomic instruction, which may expand to multiple basic blocks.
@@ -283,11 +284,11 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
const FunctionLoweringInfo::LiveOutInfo *
FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (!LiveOutRegInfo.inBounds(Reg))
- return NULL;
+ return nullptr;
LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
if (!LOI->IsValid)
- return NULL;
+ return nullptr;
if (BitWidth > LOI->KnownZero.getBitWidth()) {
LOI->NumSignBits = 1;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 1c596b8..7c124b8 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instr-emitter"
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/Statistic.h"
@@ -31,6 +30,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "instr-emitter"
+
/// MinRCSize - Smallest register class we allow when constraining virtual
/// registers. If satisfying all register class constraints would require
/// using a smaller register class, emit a COPY to a new virtual register
@@ -99,7 +100,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
// the CopyToReg'd destination register instead of creating a new vreg.
bool MatchReg = true;
- const TargetRegisterClass *UseRC = NULL;
+ const TargetRegisterClass *UseRC = nullptr;
MVT VT = Node->getSimpleValueType(ResNo);
// Stick to the preferred register classes for legal types.
@@ -107,9 +108,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = TLI->getRegClassFor(VT);
if (!IsClone && !IsCloned)
- for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
- UI != E; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Node->uses()) {
bool Match = true;
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
@@ -131,7 +130,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
Match = false;
if (User->isMachineOpcode()) {
const MCInstrDesc &II = TII->get(User->getMachineOpcode());
- const TargetRegisterClass *RC = 0;
+ const TargetRegisterClass *RC = nullptr;
if (i+II.getNumDefs() < II.getNumOperands()) {
RC = TRI->getAllocatableClass(
TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF));
@@ -154,7 +153,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
break;
}
- const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+ const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr;
SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
// Figure out the register class to create for the destreg.
@@ -242,9 +241,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
}
if (!VRBase && !IsClone && !IsCloned)
- for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
- UI != E; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Node->uses()) {
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
@@ -329,7 +326,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
// shrink VReg's register class within reason. For example, if VReg == GR32
// and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
if (II) {
- const TargetRegisterClass *DstRC = 0;
+ const TargetRegisterClass *DstRC = nullptr;
if (IIOpNum < II->getNumOperands())
DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
@@ -470,9 +467,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// If the node is only used by a CopyToReg and the dest reg is a vreg, use
// the CopyToReg'd destination register instead of creating a new vreg.
- for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
- UI != E; ++UI) {
- SDNode *User = *UI;
+ for (SDNode *User : Node->uses()) {
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
@@ -561,10 +556,10 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
MIB.addImm(SD->getZExtValue());
} else
- AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
// Add the subregster being inserted
- AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
MIB.addImm(SubIdx);
MBB->insert(InsertPos, MIB);
@@ -693,10 +688,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addReg(0U);
}
- if (Offset != 0) // Indirect addressing.
+ // Indirect addressing is indicated by an Imm as the second parameter.
+ if (SD->isIndirect())
MIB.addImm(Offset);
- else
+ else {
+ assert(Offset == 0 && "direct value cannot have an offset");
MIB.addReg(0U, RegState::Debug);
+ }
MIB.addMetadata(MDPtr);
@@ -738,7 +736,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
const MCInstrDesc &II = TII->get(Opc);
unsigned NumResults = CountResults(Node);
unsigned NumDefs = II.getNumDefs();
- const uint16_t *ScratchRegs = NULL;
+ const MCPhysReg *ScratchRegs = nullptr;
// Handle STACKMAP and PATCHPOINT specially and then use the generic code.
if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
@@ -756,7 +754,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumImpUses = 0;
unsigned NodeOperands =
countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses);
- bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0;
+ bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr;
#ifndef NDEBUG
unsigned NumMIOperands = NodeOperands + NumResults;
if (II.isVariadic())
@@ -982,7 +980,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (unsigned j = 0; j != NumVals; ++j, ++i)
- AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap,
+ AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap,
/*IsDebug=*/false, IsClone, IsCloned);
// Manually set isTied bits.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 20afb3d..a59e895 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -387,9 +387,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
MinAlign(ST->getAlignment(), Offset),
ST->getTBAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue Result =
- DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
DAGLegalize->ReplaceNode(SDValue(ST, 0), Result);
return;
}
@@ -506,8 +504,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
false, false, 0));
// The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
@@ -705,7 +702,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
}
}
- return SDValue(0, 0);
+ return SDValue(nullptr, 0);
}
void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
@@ -1268,6 +1265,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action == TargetLowering::Legal)
Action = TargetLowering::Custom;
break;
+ case ISD::READ_REGISTER:
+ case ISD::WRITE_REGISTER:
+ // Named register is legal in the DAG, but blocked by register name
+ // selection if not implemented by target (to chose the correct register)
+ // They'll be converted to Copy(To/From)Reg.
+ Action = TargetLowering::Legal;
+ break;
case ISD::DEBUGTRAP:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
if (Action == TargetLowering::Expand) {
@@ -1528,8 +1532,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDValue StoreChain;
if (!Stores.empty()) // Not all undef elements?
- StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
+ StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
else
StoreChain = DAG.getEntryNode();
@@ -1649,8 +1652,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
/// If the SETCC has been legalized using the inverse condcode, then LHS and
/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert
/// will be set to true. The caller must invert the result of the SETCC with
-/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a
-/// true/false result.
+/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect
+/// of a true/false result.
///
/// \returns true if the SetCC has been legalized, false if it hasn't.
bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
@@ -2055,13 +2058,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
if (isTailCall)
InChain = TCChain;
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), isTailCall,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, SDLoc(Node));
- std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned);
+ std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode())
// It's a tailcall, return the chain (which is the DAG root).
@@ -2090,12 +2092,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo.first;
@@ -2124,11 +2126,12 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, SDLoc(Node));
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
@@ -2183,7 +2186,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
}
- return TLI.getLibcallName(LC) != 0;
+ return TLI.getLibcallName(LC) != nullptr;
}
/// useDivRem - Only issue divrem libcall if both quotient and remainder are
@@ -2261,11 +2264,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
TLI.getPointerTy());
SDLoc dl(Node);
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
// Remainder is loaded back from the stack frame.
@@ -2286,7 +2289,7 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) {
case MVT::f128: LC = RTLIB::SINCOS_F128; break;
case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break;
}
- return TLI.getLibcallName(LC) != 0;
+ return TLI.getLibcallName(LC) != nullptr;
}
/// canCombineSinCosLibcall - Return true if sincos libcall is available and
@@ -2375,12 +2378,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
TLI.getPointerTy());
SDLoc dl(Node);
- TargetLowering::
- CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false,
- 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC),
+ Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0);
+
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr,
@@ -2990,15 +2992,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If the target didn't lower this, lower it to '__sync_synchronize()' call
// FIXME: handle "fence singlethread" more efficiently.
TargetLowering::ArgListTy Args;
- TargetLowering::
- CallLoweringInfo CLI(Node->getOperand(0),
- Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__sync_synchronize",
- TLI.getPointerTy()),
- Args, DAG, dl);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()),
+ &Args, 0);
+
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3071,14 +3071,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::TRAP: {
// If this operation is not supported, lower it to 'abort()' call
TargetLowering::ArgListTy Args;
- TargetLowering::
- CallLoweringInfo CLI(Node->getOperand(0),
- Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("abort", TLI.getPointerTy()),
- Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Node->getOperand(0))
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0);
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
Results.push_back(CallResult.second);
@@ -3304,7 +3300,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
TLI.getVectorIdxTy())));
}
- Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+ Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
// We may have changed the BUILD_VECTOR type. Cast it back to the Node type.
Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1);
Results.push_back(Tmp1);
@@ -3625,6 +3621,23 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getOperand(1)));
break;
}
+
+ SDValue Lo, Hi;
+ EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
+ TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
+ Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+ Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
+ SDValue Shift = DAG.getConstant(HalfType.getSizeInBits(),
+ TLI.getShiftAmountTy(HalfType));
+ Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+ Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+ break;
+ }
+
Tmp1 = ExpandIntLibCall(Node, false,
RTLIB::MUL_I8,
RTLIB::MUL_I16, RTLIB::MUL_I32,
@@ -3698,8 +3711,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
- } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() * 2))) {
+ } else if (TLI.isTypeLegal(WideVT)) {
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
@@ -3857,7 +3869,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
if (NeedInvert)
- Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0));
+ Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0));
Results.push_back(Tmp1);
break;
@@ -3994,8 +4006,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
VT.getScalarType(), Ex, Sh));
}
SDValue Result =
- DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
- &Scalars[0], Scalars.size());
+ DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars);
ReplaceNode(SDValue(Node, 0), Result);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index ecf4c5d..6b8fec6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -24,6 +24,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "legalize-types"
+
/// GetFPLibCall - Return the right libcall for the given floating point type.
static RTLIB::Libcall GetFPLibCall(EVT VT,
RTLIB::Libcall Call_F32,
@@ -674,7 +676,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -720,7 +722,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -742,7 +744,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
// If softenSetCCOperands returned a scalar, use it.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
assert(NewLHS.getValueType() == N->getValueType(0) &&
"Unexpected setcc expansion!");
return NewLHS;
@@ -1340,7 +1342,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -1433,7 +1435,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -1450,7 +1452,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
// If ExpandSetCCOperands returned a scalar, use it.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
assert(NewLHS.getValueType() == N->getValueType(0) &&
"Unexpected setcc expansion!");
return NewLHS;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 18b2376..2483184 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -24,6 +24,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "legalize-types"
+
//===----------------------------------------------------------------------===//
// Integer Result Promotion
//===----------------------------------------------------------------------===//
@@ -266,9 +268,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
EVT NVT = Op.getValueType();
SDLoc dl(N);
- unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+ unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getConstant(DiffBits, TLI.getPointerTy()));
+ DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -432,7 +434,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
EVT ValueVTs[] = { N->getValueType(0), NVT };
SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
- DAG.getVTList(ValueVTs, 2), Ops, 2);
+ DAG.getVTList(ValueVTs), Ops);
// Modified the sum result - switch anything that used the old sum to use
// the new one.
@@ -931,7 +933,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
for (unsigned i = 0; i < NumElts; ++i)
NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
- return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
@@ -1270,6 +1272,7 @@ std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
+ assert(Amt && "Expected zero shifts to be already optimized away.");
SDLoc DL(N);
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
@@ -1296,9 +1299,9 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
// Emit this X << 1 as X+X.
SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps);
SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
Hi = DAG.getNode(ISD::OR, DL, NVT,
@@ -1372,7 +1375,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(N->getOperand(1), KnownZero, KnownOne);
+ DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne);
// If we don't know anything about the high bits, exit.
if (((KnownZero|KnownOne) & HighBitMask) == 0)
@@ -1547,20 +1550,20 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (hasCarry) {
SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
} else {
- Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
}
return;
}
if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
ISD::SETULT);
SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1,
@@ -1572,8 +1575,8 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
DAG.getConstant(1, NVT), Carry1);
Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
- Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2));
SDValue Cmp =
DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()),
LoOps[0], LoOps[1], ISD::SETULT);
@@ -1596,13 +1599,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
SDValue HiOps[3] = { LHSH, RHSH };
if (N->getOpcode() == ISD::ADDC) {
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps);
} else {
- Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps);
}
// Legalized the flag result - switch anything that used the old flag to
@@ -1621,9 +1624,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
- Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+ Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
- Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps);
// Legalized the flag result - switch anything that used the old flag to
// use the new one.
@@ -1712,9 +1715,13 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
- const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
- Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
- Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+ auto Constant = cast<ConstantSDNode>(N);
+ const APInt &Cst = Constant->getAPIntValue();
+ bool IsTarget = Constant->isTargetOpcode();
+ bool IsOpaque = Constant->isOpaque();
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT, IsTarget, IsOpaque);
+ Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT, IsTarget,
+ IsOpaque);
}
void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
@@ -1923,73 +1930,12 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDLoc dl(N);
- bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
- bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
- bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
- bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
- if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
- SDValue LL, LH, RL, RH;
- GetExpandedInteger(N->getOperand(0), LL, LH);
- GetExpandedInteger(N->getOperand(1), RL, RH);
- unsigned OuterBitSize = VT.getSizeInBits();
- unsigned InnerBitSize = NVT.getSizeInBits();
- unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
- unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
-
- APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
- if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
- DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
- // The inputs are both zero-extended.
- if (HasUMUL_LOHI) {
- // We can emit a umul_lohi.
- Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
- Hi = SDValue(Lo.getNode(), 1);
- return;
- }
- if (HasMULHU) {
- // We can emit a mulhu+mul.
- Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
- return;
- }
- }
- if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
- // The input values are both sign-extended.
- if (HasSMUL_LOHI) {
- // We can emit a smul_lohi.
- Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
- Hi = SDValue(Lo.getNode(), 1);
- return;
- }
- if (HasMULHS) {
- // We can emit a mulhs+mul.
- Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
- return;
- }
- }
- if (HasUMUL_LOHI) {
- // Lo,Hi = umul LHS, RHS.
- SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
- DAG.getVTList(NVT, NVT), LL, RL);
- Lo = UMulLOHI;
- Hi = UMulLOHI.getValue(1);
- RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
- LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
- return;
- }
- if (HasMULHU) {
- Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
- Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
- RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
- LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
- return;
- }
- }
+ SDValue LL, LH, RL, RH;
+ GetExpandedInteger(N->getOperand(0), LL, LH);
+ GetExpandedInteger(N->getOperand(1), RL, RH);
+
+ if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH))
+ return;
// If nothing else, we can make a libcall.
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
@@ -2120,7 +2066,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
SDValue Ops[] = { LHSL, LHSH, ShiftOp };
- Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+ Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops);
Hi = Lo.getValue(1);
return;
}
@@ -2352,12 +2298,12 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
Args.push_back(Entry);
SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
- TargetLowering::
- CallLoweringInfo CLI(Chain, RetTy, true, false, false, false,
- 0, TLI.getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Func, Args, DAG, dl);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0)
+ .setSExtResult();
+
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
SplitInteger(CallInfo.first, Lo, Hi);
@@ -2576,7 +2522,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
// NOTE: on targets without efficient SELECT of bools, we can always use
// this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
- TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL);
+ TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
+ nullptr);
SDValue Tmp1, Tmp2;
if (TLI.isTypeLegal(LHSLo.getValueType()) &&
TLI.isTypeLegal(RHSLo.getValueType()))
@@ -2629,7 +2576,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -2647,7 +2594,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
// If ExpandSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
NewRHS = DAG.getConstant(0, NewLHS.getValueType());
CCCode = ISD::SETNE;
}
@@ -2664,7 +2611,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N));
// If ExpandSetCCOperands returned a scalar, use it.
- if (NewRHS.getNode() == 0) {
+ if (!NewRHS.getNode()) {
assert(NewLHS.getValueType() == N->getValueType(0) &&
"Unexpected setcc expansion!");
return NewLHS;
@@ -2912,7 +2859,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
}
@@ -2959,7 +2906,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
Ops.push_back(Op);
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
@@ -3007,7 +2954,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -3063,6 +3010,5 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0),
- &NewOps[0], NewOps.size());
- }
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps);
+}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index e141883..3971fc3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -22,6 +22,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "legalize-types"
+
static cl::opt<bool>
EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
@@ -159,7 +161,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
if (Mapped & 128)
dbgs() << " WidenedVectors";
dbgs() << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -433,7 +435,7 @@ NodeDone:
if (Failed) {
I->dump(&DAG); dbgs() << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
#endif
@@ -488,7 +490,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
// Some operands changed - update the node.
if (!NewOps.empty()) {
- SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+ SDNode *M = DAG.UpdateNodeOperands(N, NewOps);
if (M != N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
@@ -736,7 +738,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
AnalyzeNewValue(Result);
SDValue &OpEntry = PromotedIntegers[Op];
- assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+ assert(!OpEntry.getNode() && "Node is already promoted!");
OpEntry = Result;
}
@@ -747,7 +749,7 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
AnalyzeNewValue(Result);
SDValue &OpEntry = SoftenedFloats[Op];
- assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+ assert(!OpEntry.getNode() && "Node is already converted to integer!");
OpEntry = Result;
}
@@ -761,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
AnalyzeNewValue(Result);
SDValue &OpEntry = ScalarizedVectors[Op];
- assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+ assert(!OpEntry.getNode() && "Node is already scalarized!");
OpEntry = Result;
}
@@ -787,7 +789,7 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
// Remember that this is the result of the node.
std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
- assert(Entry.first.getNode() == 0 && "Node already expanded");
+ assert(!Entry.first.getNode() && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
}
@@ -814,7 +816,7 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
// Remember that this is the result of the node.
std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
- assert(Entry.first.getNode() == 0 && "Node already expanded");
+ assert(!Entry.first.getNode() && "Node already expanded");
Entry.first = Lo;
Entry.second = Hi;
}
@@ -843,7 +845,7 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
// Remember that this is the result of the node.
std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
- assert(Entry.first.getNode() == 0 && "Node already split");
+ assert(!Entry.first.getNode() && "Node already split");
Entry.first = Lo;
Entry.second = Hi;
}
@@ -855,7 +857,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
AnalyzeNewValue(Result);
SDValue &OpEntry = WidenedVectors[Op];
- assert(OpEntry.getNode() == 0 && "Node already widened!");
+ assert(!OpEntry.getNode() && "Node already widened!");
OpEntry = Result;
}
@@ -1007,7 +1009,7 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), nullptr, 0, isSigned,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
@@ -1049,11 +1051,12 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
TLI.getPointerTy());
Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, SDLoc(Node));
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Node)).setChain(InChain)
+ .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
+
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
return CallInfo;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 947ea10..e4bbc78 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -16,7 +16,6 @@
#ifndef SELECTIONDAG_LEGALIZETYPES_H
#define SELECTIONDAG_LEGALIZETYPES_H
-#define DEBUG_TYPE "legalize-types"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -540,6 +539,7 @@ private:
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+ SDValue ScalarizeVecOp_VSELECT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index e9424f2..f40ed76 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -23,6 +23,8 @@
#include "llvm/IR/DataLayout.h"
using namespace llvm;
+#define DEBUG_TYPE "legalize-types"
+
//===----------------------------------------------------------------------===//
// Generic Result Expansion.
//===----------------------------------------------------------------------===//
@@ -352,7 +354,8 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
SmallVector<SDValue, 8> Ops;
IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType());
- SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts);
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT,
+ makeArrayRef(Ops.data(), NumElts));
return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
@@ -388,7 +391,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, NewElts.size()),
- &NewElts[0], NewElts.size());
+ NewElts);
// Convert the new vector to the old vector type.
return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
@@ -447,7 +450,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
for (unsigned i = 1; i < NumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 551d054..898cd29 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -63,6 +63,8 @@ class VectorLegalizer {
SDValue ExpandUINT_TO_FLOAT(SDValue Op);
// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
SDValue ExpandSEXTINREG(SDValue Op);
+ // Expand bswap of vectors into a shuffle if legal.
+ SDValue ExpandBSWAP(SDValue Op);
// Implement vselect in terms of XOR, AND, OR when blend is not supported
// by the target.
SDValue ExpandVSELECT(SDValue Op);
@@ -152,8 +154,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(LegalizeOp(Node->getOperand(i)));
- SDValue Result =
- SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+ SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
if (Op.getOpcode() == ISD::LOAD) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
@@ -298,6 +299,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case TargetLowering::Expand:
if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG)
Result = ExpandSEXTINREG(Op);
+ else if (Node->getOpcode() == ISD::BSWAP)
+ Result = ExpandBSWAP(Op);
else if (Node->getOpcode() == ISD::VSELECT)
Result = ExpandVSELECT(Op);
else if (Node->getOpcode() == ISD::SELECT)
@@ -343,7 +346,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
Operands[j] = Op.getOperand(j);
}
- Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+ Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands);
return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
@@ -377,8 +380,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) {
Operands[j] = Op.getOperand(j);
}
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), &Operands[0],
- Operands.size());
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
}
// For FP_TO_INT we promote the result type to a vector type with wider
@@ -546,10 +548,9 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
}
}
- SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
- Op.getNode()->getValueType(0), &Vals[0], Vals.size());
+ Op.getNode()->getValueType(0), Vals);
AddLegalizedOperand(Op.getValue(0), Value);
AddLegalizedOperand(Op.getValue(1), NewChain);
@@ -603,8 +604,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) {
Stores.push_back(Store);
}
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Stores[0], Stores.size());
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
AddLegalizedOperand(Op, TF);
return TF;
}
@@ -648,7 +648,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
// Broadcast the mask so that the entire vector is all-one or all zero.
SmallVector<SDValue, 8> Ops(NumElem, Mask);
- Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, &Ops[0], Ops.size());
+ Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
// Bitcast the operands to be the same type as the mask.
// This is needed when we select between FP types because
@@ -686,6 +686,29 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
}
+SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
+ EVT VT = Op.getValueType();
+
+ // Generate a byte wise shuffle mask for the BSWAP.
+ SmallVector<int, 16> ShuffleMask;
+ int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
+ for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
+ for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
+ ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
+
+ EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
+
+ // Only emit a shuffle if the mask is legal.
+ if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
+ return DAG.UnrollVectorOp(Op.getNode());
+
+ SDLoc DL(Op);
+ Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
+ Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
+ ShuffleMask.data());
+ return DAG.getNode(ISD::BITCAST, DL, VT, Op);
+}
+
SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
// Implement VSELECT in terms of XOR, AND, OR
// on platforms which do not support blend natively.
@@ -803,7 +826,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
(EltVT.getSizeInBits()), EltVT),
DAG.getConstant(0, EltVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 940a9c9..368eba3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "legalize-types"
+
//===----------------------------------------------------------------------===//
// Result Vector Scalarization: <1 x ty> -> ty.
//===----------------------------------------------------------------------===//
@@ -331,12 +333,24 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
-
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT OpVT = LHS.getValueType();
EVT NVT = N->getValueType(0).getVectorElementType();
SDLoc DL(N);
+ // The result needs scalarizing, but it's not a given that the source does.
+ if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
+ LHS = GetScalarizedVector(LHS);
+ RHS = GetScalarizedVector(RHS);
+ } else {
+ EVT VT = OpVT.getVectorElementType();
+ LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS,
+ DAG.getConstant(0, TLI.getVectorIdxTy()));
+ }
+
// Turn it into a scalar SETCC.
SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
N->getOperand(2));
@@ -358,7 +372,7 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n");
SDValue Res = SDValue();
- if (Res.getNode() == 0) {
+ if (!Res.getNode()) {
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -382,6 +396,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::EXTRACT_VECTOR_ELT:
Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
break;
+ case ISD::VSELECT:
+ Res = ScalarizeVecOp_VSELECT(N);
+ break;
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -420,13 +437,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
assert(N->getValueType(0).getVectorNumElements() == 1 &&
"Unexected vector type!");
SDValue Elt = GetScalarizedVector(N->getOperand(0));
- SmallVector<SDValue, 1> Ops(1);
- Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N),
- N->getValueType(0).getScalarType(), Elt);
+ SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
+ N->getValueType(0).getScalarType(), Elt);
// Revectorize the result so the types line up with what the uses of this
// expression expect.
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0),
- &Ops[0], 1);
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
@@ -435,8 +450,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
Ops[i] = GetScalarizedVector(N->getOperand(i));
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0),
- &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops);
}
/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
@@ -450,6 +464,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
return Res;
}
+
+/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be
+/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT
+/// (still with vector output type since that was acceptable if we got here).
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
+ SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
+ EVT VT = N->getValueType(0);
+
+ return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
+ N->getOperand(2));
+}
+
/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
/// scalarized, it must be <1 x ty>. Just store the element.
SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
@@ -696,10 +722,10 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
unsigned LoNumElts = LoVT.getVectorNumElements();
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
- Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+ Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
- Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+ Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
@@ -717,10 +743,10 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
- Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+ Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
- Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+ Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
}
void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
@@ -1064,7 +1090,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
// Construct the Lo/Hi output using a BUILD_VECTOR.
- Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+ Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps);
} else if (InputUsed[0] == -1U) {
// No input vectors were used! The result is undefined.
Output = DAG.getUNDEF(NewVT);
@@ -1100,7 +1126,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
return false;
- if (Res.getNode() == 0) {
+ if (!Res.getNode()) {
switch (N->getOpcode()) {
default:
#ifndef NDEBUG
@@ -1342,8 +1368,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
- &Elts[0], Elts.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts);
}
SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) {
@@ -1700,8 +1725,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
while (SubConcatEnd < OpsToConcat)
SubConcatOps[SubConcatEnd++] = undefVec;
ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NextVT, &SubConcatOps[0],
- OpsToConcat);
+ NextVT, SubConcatOps);
ConcatEnd = SubConcatIdx + 1;
}
}
@@ -1720,7 +1744,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+ makeArrayRef(ConcatOps.data(), NumOps));
}
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
@@ -1762,8 +1787,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
- &Ops[0], NumConcat);
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
@@ -1798,7 +1822,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1922,11 +1946,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue NewVec;
if (InVT.isVector())
- NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
- NewInVT, &Ops[0], NewNumElts);
+ NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
else
- NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
- NewInVT, &Ops[0], NewNumElts);
+ NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -1951,7 +1973,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
@@ -1974,7 +1996,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
Ops[i] = N->getOperand(i);
for (unsigned i = NumOperands; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
}
} else {
InputWidened = true;
@@ -2020,7 +2042,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
@@ -2065,7 +2087,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+ InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops);
return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
SatOp, CvtCode);
}
@@ -2098,7 +2120,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -2137,7 +2159,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -2165,8 +2187,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
if (LdChain.size() == 1)
NewChain = LdChain[0];
else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
- &LdChain[0], LdChain.size());
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
// Modified the chain - switch anything that used the old chain to use
// the new one.
@@ -2372,7 +2393,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
DAG.getConstant(i, TLI.getVectorIdxTy())));
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
@@ -2421,7 +2442,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
DAG.getConstant(j, TLI.getVectorIdxTy()));
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
@@ -2450,8 +2471,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (StChain.size() == 1)
return StChain[0];
else
- return DAG.getNode(ISD::TokenFactor, SDLoc(ST),
- MVT::Other,&StChain[0],StChain.size());
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -2626,8 +2646,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
ConcatOps[0] = LdOp;
for (unsigned i = 1; i != NumConcat; ++i)
ConcatOps[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
- NumConcat);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps);
}
// Load vector by using multiple loads from largest vector to scalar
@@ -2661,8 +2680,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
size += L->getValueSizeInBits(0);
}
- L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0),
- &Loads[0], Loads.size());
+ L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
}
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
@@ -2706,7 +2724,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (NewLdTy != LdTy) {
// Create a larger vector
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- &ConcatOps[Idx], End - Idx);
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
Idx = End - 1;
LdTy = NewLdTy;
}
@@ -2715,7 +2733,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
- &ConcatOps[Idx], End - Idx);
+ makeArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
@@ -2728,7 +2746,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
for (; i != NumOps; ++i)
WidenOps[i] = UndefVal;
}
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
}
SDValue
@@ -2779,7 +2797,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
for (; i != WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops);
}
@@ -2925,7 +2943,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
}
if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
@@ -2944,5 +2962,5 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
SDValue UndefVal = DAG.getUNDEF(EltVT);
for ( ; Idx < WidenNumElts; ++Idx)
Ops[Idx] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops);
}
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 3b3424d..f92230c 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scheduler"
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -31,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "scheduler"
+
static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden,
cl::ZeroOrMore, cl::init(false),
cl::desc("Disable use of DFA during scheduling"));
@@ -49,7 +50,7 @@ ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) :
TLI = IS->getTargetLowering();
const TargetMachine &tm = (*IS->MF).getTarget();
- ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL);
+ ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr);
// This hard requirement could be relaxed, but for now
// do not let it procede.
assert (ResourcesModel && "Unimplemented CreateTargetScheduleState.");
@@ -214,7 +215,7 @@ bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
- SUnit *OnlyAvailablePred = 0;
+ SUnit *OnlyAvailablePred = nullptr;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit &Pred = *I->getSUnit();
@@ -222,7 +223,7 @@ SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
- return 0;
+ return nullptr;
OnlyAvailablePred = &Pred;
}
}
@@ -581,7 +582,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
if (SU->isAvailable) return; // All preds scheduled.
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
- if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable)
+ if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable)
return;
// Okay, we found a single predecessor that is available, but not scheduled.
@@ -598,7 +599,7 @@ void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) {
/// to be placed in scheduling sequence.
SUnit *ResourcePriorityQueue::pop() {
if (empty())
- return 0;
+ return nullptr;
std::vector<SUnit *>::iterator Best = Queue.begin();
if (!DisableDFASched) {
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index b62bd62..ee54292 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -45,14 +45,17 @@ private:
unsigned FrameIx; // valid for stack objects
} u;
MDNode *mdPtr;
+ bool IsIndirect;
uint64_t Offset;
DebugLoc DL;
unsigned Order;
bool Invalid;
public:
// Constructor for non-constants.
- SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
- unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+ SDDbgValue(MDNode *mdP, SDNode *N, unsigned R,
+ bool indir, uint64_t off, DebugLoc dl,
+ unsigned O) : mdPtr(mdP), IsIndirect(indir),
+ Offset(off), DL(dl), Order(O),
Invalid(false) {
kind = SDNODE;
u.s.Node = N;
@@ -62,14 +65,16 @@ public:
// Constructor for constants.
SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
unsigned O) :
- mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = CONST;
u.Const = C;
}
// Constructor for frame indices.
SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
- mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+ mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
@@ -92,6 +97,9 @@ public:
// Returns the FrameIx for a stack object
unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+ // Returns whether this is an indirect value.
+ bool isIndirect() { return IsIndirect; }
+
// Returns the offset.
uint64_t getOffset() { return Offset; }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 0687392..4d8c2c7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "InstrEmitter.h"
#include "ScheduleDAGSDNodes.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
STATISTIC(NumPRCopies, "Number of physical copies");
@@ -54,7 +55,7 @@ namespace {
}
SUnit *pop() {
- if (empty()) return NULL;
+ if (empty()) return nullptr;
SUnit *V = Queue.back();
Queue.pop_back();
return V;
@@ -117,11 +118,11 @@ void ScheduleDAGFast::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegDefs.resize(TRI->getNumRegs(), nullptr);
LiveRegCycles.resize(TRI->getNumRegs(), 0);
// Build the scheduling graph.
- BuildSchedGraph(NULL);
+ BuildSchedGraph(nullptr);
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
@@ -144,7 +145,7 @@ void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
dbgs() << "*** Scheduling failed! ***\n";
PredSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
--PredSU->NumSuccsLeft;
@@ -198,7 +199,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
assert(LiveRegDefs[I->getReg()] == SU &&
"Physical register dependency violated?");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
+ LiveRegDefs[I->getReg()] = nullptr;
LiveRegCycles[I->getReg()] = 0;
}
}
@@ -211,18 +212,18 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
if (SU->getNode()->getGluedNode())
- return NULL;
+ return nullptr;
SDNode *N = SU->getNode();
if (!N)
- return NULL;
+ return nullptr;
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue)
- return NULL;
+ return nullptr;
else if (VT == MVT::Other)
TryUnfold = true;
}
@@ -230,13 +231,13 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
if (VT == MVT::Glue)
- return NULL;
+ return nullptr;
}
if (TryUnfold) {
SmallVector<SDNode*, 2> NewNodes;
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
- return NULL;
+ return nullptr;
DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
@@ -388,11 +389,11 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVectorImpl<SUnit*> &Copies) {
- SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr));
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(NULL));
+ SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr));
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -583,7 +584,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
// and it is expensive.
// If cross copy register class is null, then it's not possible to copy
// the value at all.
- SUnit *NewDef = 0;
+ SUnit *NewDef = nullptr;
if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
if (!DestRC && !NewDef)
@@ -661,7 +662,7 @@ private:
void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
if (N->getNodeId() != 0)
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
if (!N->isMachineOpcode() &&
(N->getOpcode() == ISD::EntryToken || isPassiveNode(N)))
@@ -674,7 +675,7 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) {
unsigned NumOps = N->getNumOperands();
if (unsigned NumLeft = NumOps) {
- SDNode *GluedOpN = 0;
+ SDNode *GluedOpN = nullptr;
do {
const SDValue &Op = N->getOperand(NumLeft-1);
SDNode *OpN = Op.getNode();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c283664..78ec4df 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/STLExtras.h"
@@ -36,6 +35,8 @@
#include <climits>
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
@@ -163,7 +164,7 @@ public:
CodeGenOpt::Level OptLevel)
: ScheduleDAGSDNodes(mf),
NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
- Topo(SUnits, NULL) {
+ Topo(SUnits, nullptr) {
const TargetMachine &tm = mf.getTarget();
if (DisableSchedCycles || !NeedLatency)
@@ -327,13 +328,13 @@ void ScheduleDAGRRList::Schedule() {
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
- LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL);
- LiveRegGens.resize(TRI->getNumRegs() + 1, NULL);
+ LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
+ LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
CallSeqEndForStart.clear();
assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
// Build the scheduling graph.
- BuildSchedGraph(NULL);
+ BuildSchedGraph(nullptr);
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
SUnits[su].dumpAll(this));
@@ -369,7 +370,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
dbgs() << "*** Scheduling failed! ***\n";
PredSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
--PredSU->NumSuccsLeft;
@@ -461,7 +462,7 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
// to get to the CALLSEQ_BEGIN, but we need to find the path with the
// most nesting in order to ensure that we find the corresponding match.
if (N->getOpcode() == ISD::TokenFactor) {
- SDNode *Best = 0;
+ SDNode *Best = nullptr;
unsigned BestMaxNest = MaxNest;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
unsigned MyNestLevel = NestLevel;
@@ -497,10 +498,10 @@ FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
N = N->getOperand(i).getNode();
goto found_chain_operand;
}
- return 0;
+ return nullptr;
found_chain_operand:;
if (N->getOpcode() == ISD::EntryToken)
- return 0;
+ return nullptr;
}
}
@@ -742,8 +743,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegGens[I->getReg()] = NULL;
+ LiveRegDefs[I->getReg()] = nullptr;
+ LiveRegGens[I->getReg()] = nullptr;
releaseInterferences(I->getReg());
}
}
@@ -757,8 +758,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
- LiveRegDefs[CallResource] = NULL;
- LiveRegGens[CallResource] = NULL;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
releaseInterferences(CallResource);
}
}
@@ -813,8 +814,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegGens[I->getReg()] = NULL;
+ LiveRegDefs[I->getReg()] = nullptr;
+ LiveRegGens[I->getReg()] = nullptr;
releaseInterferences(I->getReg());
}
}
@@ -841,8 +842,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
--NumLiveRegs;
- LiveRegDefs[CallResource] = NULL;
- LiveRegGens[CallResource] = NULL;
+ LiveRegDefs[CallResource] = nullptr;
+ LiveRegGens[CallResource] = nullptr;
releaseInterferences(CallResource);
}
}
@@ -855,7 +856,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
// This becomes the nearest def. Note that an earlier def may still be
// pending if this is a two-address node.
LiveRegDefs[I->getReg()] = SU;
- if (LiveRegGens[I->getReg()] == NULL ||
+ if (LiveRegGens[I->getReg()] == nullptr ||
I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
LiveRegGens[I->getReg()] = I->getSUnit();
}
@@ -936,17 +937,17 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SDNode *N = SU->getNode();
if (!N)
- return NULL;
+ return nullptr;
if (SU->getNode()->getGluedNode())
- return NULL;
+ return nullptr;
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Glue)
- return NULL;
+ return nullptr;
else if (VT == MVT::Other)
TryUnfold = true;
}
@@ -954,18 +955,18 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
if (VT == MVT::Glue)
- return NULL;
+ return nullptr;
}
if (TryUnfold) {
SmallVector<SDNode*, 2> NewNodes;
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
- return NULL;
+ return nullptr;
// unfolding an x86 DEC64m operation results in store, dec, load which
// can't be handled here so quit
if (NewNodes.size() == 3)
- return NULL;
+ return nullptr;
DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
@@ -1136,11 +1137,11 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC,
SmallVectorImpl<SUnit*> &Copies) {
- SUnit *CopyFromSU = CreateNewSUnit(NULL);
+ SUnit *CopyFromSU = CreateNewSUnit(nullptr);
CopyFromSU->CopySrcRC = SrcRC;
CopyFromSU->CopyDstRC = DestRC;
- SUnit *CopyToSU = CreateNewSUnit(NULL);
+ SUnit *CopyToSU = CreateNewSUnit(nullptr);
CopyToSU->CopySrcRC = DestRC;
CopyToSU->CopyDstRC = SrcRC;
@@ -1244,7 +1245,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) {
if (const RegisterMaskSDNode *Op =
dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
return Op->getRegMask();
- return NULL;
+ return nullptr;
}
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
@@ -1355,7 +1356,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
/// (2) No Hazards: resources are available
/// (3) No Interferences: may unschedule to break register interferences.
SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
- SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop();
+ SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
while (CurSU) {
SmallVector<unsigned, 4> LRegs;
if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
@@ -1389,7 +1390,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// Try unscheduling up to the point where it's safe to schedule
// this node.
- SUnit *BtSU = NULL;
+ SUnit *BtSU = nullptr;
unsigned LiveCycle = UINT_MAX;
for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
unsigned Reg = LRegs[j];
@@ -1449,7 +1450,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// expensive.
// If cross copy register class is null, then it's not possible to copy
// the value at all.
- SUnit *NewDef = 0;
+ SUnit *NewDef = nullptr;
if (DestRC != RC) {
NewDef = CopyAndMoveSuccessors(LRDef);
if (!DestRC && !NewDef)
@@ -1646,7 +1647,7 @@ public:
const TargetLowering *tli)
: SchedulingPriorityQueue(hasReadyFilter),
CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
RegLimit.resize(NumRC);
@@ -1674,7 +1675,7 @@ public:
void updateNode(const SUnit *SU) override;
void releaseState() override {
- SUnits = 0;
+ SUnits = nullptr;
SethiUllmanNumbers.clear();
std::fill(RegPressure.begin(), RegPressure.end(), 0);
}
@@ -1775,7 +1776,7 @@ public:
}
SUnit *pop() override {
- if (Queue.empty()) return NULL;
+ if (Queue.empty()) return nullptr;
SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
V->NodeQueueId = 0;
@@ -1783,7 +1784,7 @@ public:
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- void dump(ScheduleDAG *DAG) const {
+ void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
std::vector<SUnit*> DumpQueue = Queue;
SF DumpPicker = Picker;
@@ -2824,7 +2825,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
continue;
// Locate the single data predecessor.
- SUnit *PredSU = 0;
+ SUnit *PredSU = nullptr;
for (SUnit::const_pred_iterator II = SU->Preds.begin(),
EE = SU->Preds.end(); II != EE; ++II)
if (!II->isCtrl()) {
@@ -2980,7 +2981,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
BURegReductionPriorityQueue *PQ =
- new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0);
+ new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
@@ -2994,7 +2995,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
SrcRegReductionPriorityQueue *PQ =
- new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0);
+ new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 5639894..de910b7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "ScheduleDAGSDNodes.h"
#include "InstrEmitter.h"
#include "SDNodeDbgValue.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
STATISTIC(LoadsClustered, "Number of loads clustered together");
// This allows latency based scheduler to notice high latency instructions
@@ -46,7 +47,7 @@ static cl::opt<int> HighLatencyCycles(
"instructions take for targets with no itinerary"));
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf), BB(0), DAG(0),
+ : ScheduleDAG(mf), BB(nullptr), DAG(nullptr),
InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
@@ -67,12 +68,12 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) {
///
SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) {
#ifndef NDEBUG
- const SUnit *Addr = 0;
+ const SUnit *Addr = nullptr;
if (!SUnits.empty())
Addr = &SUnits[0];
#endif
SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
+ assert((Addr == nullptr || Addr == &SUnits[0]) &&
"SUnits std::vector reallocated on the fly!");
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
@@ -142,8 +143,8 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
if (ExtraOper.getNode())
Ops.push_back(ExtraOper);
- SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
- MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ SDVTList VTList = DAG->getVTList(VTs);
+ MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr;
MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
// Store memory references.
@@ -152,7 +153,7 @@ static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG,
End = MN->memoperands_end();
}
- DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+ DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops);
// Reset the memory references
if (MN)
@@ -205,7 +206,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) {
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
- SDNode *Chain = 0;
+ SDNode *Chain = nullptr;
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
Chain = Node->getOperand(NumOps-1).getNode();
@@ -219,8 +220,11 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
bool Cluster = false;
SDNode *Base = Node;
+ // This algorithm requires a reasonably low use count before finding a match
+ // to avoid uselessly blowing up compile time in large blocks.
+ unsigned UseCount = 0;
for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
- I != E; ++I) {
+ I != E && UseCount < 100; ++I, ++UseCount) {
SDNode *User = *I;
if (User == Node || !Visited.insert(User))
continue;
@@ -237,6 +241,8 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (Offset2 < Offset1)
Base = User;
Cluster = true;
+ // Reset UseCount to allow more matches.
+ UseCount = 0;
}
if (!Cluster)
@@ -266,7 +272,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
// Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- SDValue InGlue = SDValue(0, 0);
+ SDValue InGlue = SDValue(nullptr, 0);
if (AddGlue(Lead, InGlue, true, DAG))
InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
@@ -567,7 +573,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
return; // Found a normal regdef.
}
Node = Node->getGluedNode();
- if (Node == NULL) {
+ if (!Node) {
return; // No values left to visit.
}
InitNodeNumDefs();
@@ -740,7 +746,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// BB->back().isPHI() test will not fire when we want it to.
std::prev(Emitter.getInsertPos())->isPHI()) {
// Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
return;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5e11dbb..39ebadf 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -139,7 +139,7 @@ namespace llvm {
public:
RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
- bool IsValid() const { return Node != NULL; }
+ bool IsValid() const { return Node != nullptr; }
MVT GetValue() const {
assert(IsValid() && "bad iterator");
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index fb86103..51c51d6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "ScheduleDAGSDNodes.h"
#include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
#include <climits>
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
STATISTIC(NumNoops , "Number of noops inserted");
STATISTIC(NumStalls, "Number of pipeline stalls");
@@ -120,7 +121,7 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) {
dbgs() << "*** Scheduling failed! ***\n";
SuccSU->dump(this);
dbgs() << " has been released too many times!\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
#endif
assert(!D.isWeak() && "unexpected artificial DAG edge");
@@ -204,12 +205,12 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
// Reset DFA state.
- AvailableQueue->scheduledNode(0);
+ AvailableQueue->scheduledNode(nullptr);
++CurCycle;
continue;
}
- SUnit *FoundSUnit = 0;
+ SUnit *FoundSUnit = nullptr;
bool HasNoopHazards = false;
while (!AvailableQueue->empty()) {
@@ -256,7 +257,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() {
// processors without pipeline interlocks and other cases.
DEBUG(dbgs() << "*** Emitting noop\n");
HazardRec->EmitNoop();
- Sequence.push_back(0); // NULL here means noop
+ Sequence.push_back(nullptr); // NULL here means noop
++NumNoops;
++CurCycle;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index d11ce80..b1b8035 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -364,29 +364,28 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
- const SDValue *Ops, unsigned NumOps) {
- for (; NumOps; --NumOps, ++Ops) {
- ID.AddPointer(Ops->getNode());
- ID.AddInteger(Ops->getResNo());
+ ArrayRef<SDValue> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
}
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
- const SDUse *Ops, unsigned NumOps) {
- for (; NumOps; --NumOps, ++Ops) {
- ID.AddPointer(Ops->getNode());
- ID.AddInteger(Ops->getResNo());
+ ArrayRef<SDUse> Ops) {
+ for (auto& Op : Ops) {
+ ID.AddPointer(Op.getNode());
+ ID.AddInteger(Op.getResNo());
}
}
-static void AddNodeIDNode(FoldingSetNodeID &ID,
- unsigned short OpC, SDVTList VTList,
- const SDValue *OpList, unsigned N) {
+static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC,
+ SDVTList VTList, ArrayRef<SDValue> OpList) {
AddNodeIDOpcode(ID, OpC);
AddNodeIDValueTypes(ID, VTList);
- AddNodeIDOperands(ID, OpList, N);
+ AddNodeIDOperands(ID, OpList);
}
/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
@@ -528,7 +527,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
// Add the return value info.
AddNodeIDValueTypes(ID, N->getVTList());
// Add the operand info.
- AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+ AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end()));
// Handle SDNode leafs with special info.
AddNodeIDCustom(ID, N);
@@ -606,7 +605,7 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) {
SDNode *N = DeadNodes.pop_back_val();
for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
- DUL->NodeDeleted(N, 0);
+ DUL->NodeDeleted(N, nullptr);
// Take the node out of the appropriate CSE map.
RemoveNodeFromCSEMaps(N);
@@ -684,8 +683,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
"Cond code doesn't exist!");
- Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
- CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+ Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr;
+ CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr;
break;
case ISD::ExternalSymbol:
Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
@@ -702,8 +701,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
if (VT.isExtended()) {
Erased = ExtendedValueTypeNodes.erase(VT);
} else {
- Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
- ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+ Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr;
+ ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr;
}
break;
}
@@ -765,11 +764,11 @@ SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
void *&InsertPos) {
if (doNotCSE(N))
- return 0;
+ return nullptr;
SDValue Ops[] = { Op };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
return Node;
@@ -783,11 +782,11 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
SDValue Op1, SDValue Op2,
void *&InsertPos) {
if (doNotCSE(N))
- return 0;
+ return nullptr;
SDValue Ops[] = { Op1, Op2 };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
return Node;
@@ -798,14 +797,13 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
/// node already exists with these operands, the slot will be non-null.
-SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
- const SDValue *Ops,unsigned NumOps,
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
void *&InsertPos) {
if (doNotCSE(N))
- return 0;
+ return nullptr;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+ AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops);
AddNodeIDCustom(ID, N);
SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
return Node;
@@ -901,10 +899,10 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(0), OptLevel(OL),
+ : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(0) {
+ UpdateListeners(nullptr) {
AllNodes.push_back(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -937,11 +935,11 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
- static_cast<CondCodeSDNode*>(0));
+ static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
- static_cast<SDNode*>(0));
+ static_cast<SDNode*>(nullptr));
- EntryNode.UseList = 0;
+ EntryNode.UseList = nullptr;
AllNodes.push_back(&EntryNode);
Root = getEntryNode();
DbgInfo->clear();
@@ -965,6 +963,14 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) {
getNode(ISD::TRUNCATE, DL, VT, Op);
}
+SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) {
+ if (VT.bitsLE(Op.getValueType()))
+ return getNode(ISD::TRUNCATE, SL, VT, Op);
+
+ TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector());
+ return getNode(TLI->getExtendForContent(BType), SL, VT, Op);
+}
+
SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) {
assert(!VT.isVector() &&
"getZeroExtendInReg should use the vector element type instead of "
@@ -986,6 +992,22 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) {
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
+SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) {
+ EVT EltVT = VT.getScalarType();
+ SDValue TrueValue;
+ switch (TLI->getBooleanContents(VT.isVector())) {
+ case TargetLowering::ZeroOrOneBooleanContent:
+ case TargetLowering::UndefinedBooleanContent:
+ TrueValue = getConstant(1, VT);
+ break;
+ case TargetLowering::ZeroOrNegativeOneBooleanContent:
+ TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
+ VT);
+ break;
+ }
+ return getNode(ISD::XOR, DL, VT, Val, TrueValue);
+}
+
SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT, bool isO) {
EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
@@ -1063,7 +1085,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT,
getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT,
- &Ops[0], Ops.size()));
+ Ops));
return Result;
}
@@ -1071,11 +1093,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
"APInt size does not match type size!");
unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
ID.AddPointer(Elt);
ID.AddBoolean(isO);
- void *IP = 0;
- SDNode *N = NULL;
+ void *IP = nullptr;
+ SDNode *N = nullptr;
if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
if (!VT.isVector())
return SDValue(N, 0);
@@ -1090,7 +1112,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT,
if (VT.isVector()) {
SmallVector<SDValue, 8> Ops;
Ops.assign(VT.getVectorNumElements(), Result);
- Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size());
+ Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
}
return Result;
}
@@ -1114,10 +1136,10 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
// we don't have issues with SNANs.
unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(EltVT), None);
ID.AddPointer(&V);
- void *IP = 0;
- SDNode *N = NULL;
+ void *IP = nullptr;
+ SDNode *N = nullptr;
if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
if (!VT.isVector())
return SDValue(N, 0);
@@ -1133,7 +1155,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
SmallVector<SDValue, 8> Ops;
Ops.assign(VT.getVectorNumElements(), Result);
// FIXME SDLoc info might be appropriate here
- Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size());
+ Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops);
}
return Result;
}
@@ -1172,7 +1194,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
if (!GVar) {
// If GV is an alias then use the aliasee for determining thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal());
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
}
unsigned Opc;
@@ -1182,12 +1204,12 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddPointer(GV);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
ID.AddInteger(GV->getType()->getAddressSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1202,9 +1224,9 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL,
SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(FI);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1220,10 +1242,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1245,12 +1267,12 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
ID.AddPointer(C);
ID.AddInteger(TargetFlags);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1273,12 +1295,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddInteger(Alignment);
ID.AddInteger(Offset);
C->addSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1292,11 +1314,11 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
unsigned char TargetFlags) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
ID.AddInteger(Index);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1309,9 +1331,9 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+ AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None);
ID.AddPointer(MBB);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1358,7 +1380,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
if ((unsigned)Cond >= CondCodeNodes.size())
CondCodeNodes.resize(Cond+1);
- if (CondCodeNodes[Cond] == 0) {
+ if (!CondCodeNodes[Cond]) {
CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
CondCodeNodes[Cond] = N;
AllNodes.push_back(N);
@@ -1441,13 +1463,18 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1,
if (Identity && NElts)
return N1;
+ // Shuffling a constant splat doesn't change the result.
+ if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR)
+ if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue())
+ return N1;
+
FoldingSetNodeID ID;
SDValue Ops[2] = { N1, N2 };
- AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+ AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops);
for (unsigned i = 0; i != NElts; ++i)
ID.AddInteger(MaskVec[i]);
- void* IP = 0;
+ void* IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1478,14 +1505,14 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
FoldingSetNodeID ID;
SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
- AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
- void* IP = 0;
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops);
+ void* IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(),
dl.getDebugLoc(),
- Ops, 5, Code);
+ Ops, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -1493,9 +1520,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl,
SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, ISD::Register, getVTList(VT), None);
ID.AddInteger(RegNo);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1507,9 +1534,9 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), 0, 0);
+ AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None);
ID.AddPointer(RegMask);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1522,9 +1549,9 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
- AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+ AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
ID.AddPointer(Label);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1543,11 +1570,11 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+ AddNodeIDNode(ID, Opc, getVTList(VT), None);
ID.AddPointer(BA);
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1563,10 +1590,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
"SrcValue is not a pointer?");
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+ AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
ID.AddPointer(V);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1579,10 +1606,10 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+ AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None);
ID.AddPointer(MD);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1597,11 +1624,11 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr,
unsigned SrcAS, unsigned DestAS) {
SDValue Ops[] = {Ptr};
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1);
+ AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops);
ID.AddInteger(SrcAS);
ID.AddInteger(DestAS);
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -1780,17 +1807,14 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
unsigned Depth) const {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op, KnownZero, KnownOne, Depth);
return (KnownZero & Mask) == Mask;
}
-/// ComputeMaskedBits - Determine which of the bits specified in Mask are
-/// known to be either zero or one and return them in the KnownZero/KnownOne
-/// bitsets. This code only analyzes bits in Mask, in order to short-circuit
-/// processing.
-void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth) const {
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bitsets.
+void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth) const {
const TargetLowering *TLI = TM.getTargetLowering();
unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
@@ -1805,48 +1829,40 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
// We know all of the bits for a constant!
KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
KnownZero = ~KnownOne;
- return;
+ break;
case ISD::AND:
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// Output known-1 bits are only known if set in both the LHS & RHS.
KnownOne &= KnownOne2;
// Output known-0 are known to be clear if zero in either the LHS | RHS.
KnownZero |= KnownZero2;
- return;
+ break;
case ISD::OR:
- ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// Output known-0 bits are only known if clear in both the LHS & RHS.
KnownZero &= KnownZero2;
// Output known-1 are known to be set if set in either the LHS | RHS.
KnownOne |= KnownOne2;
- return;
+ break;
case ISD::XOR: {
- ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// Output known-0 bits are known if clear or set in both the LHS & RHS.
APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
KnownZero = KnownZeroOut;
- return;
+ break;
}
case ISD::MUL: {
- ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// If low bits are zero in either operand, output low known-0 bits.
// Also compute a conserative estimate for high known-0 bits.
@@ -1863,46 +1879,42 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
LeadZ = std::min(LeadZ, BitWidth);
KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
APInt::getHighBitsSet(BitWidth, LeadZ);
- return;
+ break;
}
case ISD::UDIV: {
// For the purposes of computing leading zeros we can conservatively
// treat a udiv as a logical right shift by the power of 2 known to
// be less than the denominator.
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
KnownOne2.clearAllBits();
KnownZero2.clearAllBits();
- ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
if (RHSUnknownLeadingOnes != BitWidth)
LeadZ = std::min(BitWidth,
LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
- return;
+ break;
}
case ISD::SELECT:
- ComputeMaskedBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
- return;
+ break;
case ISD::SELECT_CC:
- ComputeMaskedBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
- return;
+ break;
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -1910,14 +1922,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
- return;
+ break;
// The boolean result conforms to getBooleanContents. Fall through.
case ISD::SETCC:
// If we know the result of a setcc has the top bits zero, use this info.
if (TLI->getBooleanContents(Op.getValueType().isVector()) ==
TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1)
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
- return;
+ break;
case ISD::SHL:
// (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -1925,16 +1937,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
- return;
+ break;
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero <<= ShAmt;
KnownOne <<= ShAmt;
// low bits known zero.
KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
}
- return;
+ break;
case ISD::SRL:
// (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
@@ -1942,31 +1953,29 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
- return;
+ break;
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
KnownZero |= HighBits; // High bits known zero.
}
- return;
+ break;
case ISD::SRA:
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
unsigned ShAmt = SA->getZExtValue();
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
- return;
+ break;
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1980,7 +1989,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
KnownOne |= HighBits; // New bits are known one.
}
}
- return;
+ break;
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned EBits = EVT.getScalarType().getSizeInBits();
@@ -1998,10 +2007,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownOne &= InputDemandedBits;
KnownZero &= InputDemandedBits;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
@@ -2015,7 +2023,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
KnownZero &= ~NewBits;
KnownOne &= ~NewBits;
}
- return;
+ break;
}
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
@@ -2025,7 +2033,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
KnownOne.clearAllBits();
- return;
+ break;
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
@@ -2035,9 +2043,9 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
unsigned MemBits = VT.getScalarType().getSizeInBits();
KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
} else if (const MDNode *Ranges = LD->getRanges()) {
- computeMaskedBitsLoad(*Ranges, KnownZero);
+ computeKnownBitsLoad(*Ranges, KnownZero);
}
- return;
+ break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
@@ -2045,11 +2053,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
- return;
+ break;
}
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
@@ -2058,13 +2066,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
bool SignBitKnownZero = KnownZero.isNegative();
bool SignBitKnownOne = KnownOne.isNegative();
- assert(!(SignBitKnownZero && SignBitKnownOne) &&
- "Sign bit can't be known to be both zero and one!");
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
@@ -2074,25 +2080,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
KnownZero |= NewBits;
else if (SignBitKnownOne)
KnownOne |= NewBits;
- return;
+ break;
}
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
KnownZero = KnownZero.trunc(InBits);
KnownOne = KnownOne.trunc(InBits);
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.zext(BitWidth);
KnownOne = KnownOne.zext(BitWidth);
- return;
+ break;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
KnownZero = KnownZero.zext(InBits);
KnownOne = KnownOne.zext(InBits);
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero = KnownZero.trunc(BitWidth);
KnownOne = KnownOne.trunc(BitWidth);
break;
@@ -2100,15 +2105,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
case ISD::AssertZext: {
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
KnownZero |= (~InMask);
KnownOne &= (~KnownZero);
- return;
+ break;
}
case ISD::FGETSIGN:
// All bits are zero except the low bit.
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
- return;
+ break;
case ISD::SUB: {
if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
@@ -2119,7 +2124,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
// NLZ can't be BitWidth with no sign bit
APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
// If all of the MaskV bits are known to be zero, then we know the
// output top bits are zero, because we now know that the output is
@@ -2138,18 +2143,16 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
- ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
- ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
if (Op.getOpcode() == ISD::ADD) {
KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
- return;
+ break;
}
// With ADDE, a carry bit may be added in, so we can only use this
@@ -2158,14 +2161,14 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
// are known zero.
if (KnownZeroOut >= 2) // ADDE
KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
- return;
+ break;
}
case ISD::SREM:
if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue().abs();
if (RA.isPowerOf2()) {
APInt LowBits = RA - 1;
- ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
// The low bits of the first operand are unchanged by the srem.
KnownZero = KnownZero2 & LowBits;
@@ -2183,36 +2186,35 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
}
}
- return;
+ break;
case ISD::UREM: {
if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
const APInt &RA = Rem->getAPIntValue();
if (RA.isPowerOf2()) {
APInt LowBits = (RA - 1);
KnownZero |= ~LowBits;
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
- assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1);
break;
}
}
// Since the result is less than or equal to either operand, any leading
// zero bits in either operand must also exist in the result.
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
- ComputeMaskedBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders);
- return;
+ break;
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
if (unsigned Align = InferPtrAlignment(Op)) {
// The low bits are known zero if the pointer is aligned.
KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
- return;
+ break;
}
break;
@@ -2224,9 +2226,11 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero,
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_VOID:
// Allow the target to implement this method for its nodes.
- TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
- return;
+ TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth);
+ break;
}
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
/// ComputeNumSignBits - Return the number of times the sign bit of the
@@ -2300,7 +2304,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
- // ComputeMaskedBits, and pick whichever answer is better.
+ // computeKnownBits, and pick whichever answer is better.
}
break;
@@ -2350,7 +2354,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
if (CRHS->isAllOnesValue()) {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
@@ -2375,7 +2379,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
if (CLHS->isNullValue()) {
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
// If the input is known to be 0 or 1, the output is 0/-1, which is all
// sign bits set.
if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue())
@@ -2422,14 +2426,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_VOID) {
- unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth);
+ unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth);
if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
}
// Finally, if we can prove that the top bits of the result are 0's or 1's,
// use this information.
APInt KnownZero, KnownOne;
- ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ computeKnownBits(Op, KnownZero, KnownOne, Depth);
APInt Mask;
if (KnownZero.isNegative()) { // sign bit is 0
@@ -2517,8 +2521,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
///
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, getVTList(VT), None);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -2789,8 +2793,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
SDValue Ops[1] = { Operand };
- AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -2811,6 +2815,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
SDNode *Cst1, SDNode *Cst2) {
+ // If the opcode is a target-specific ISD node, there's nothing we can
+ // do here and the operand rules may not line up with the below, so
+ // bail early.
+ if (Opcode >= ISD::BUILTIN_OP_END)
+ return SDValue();
+
SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4> Inputs;
SmallVector<SDValue, 4> Outputs;
EVT SVT = VT.getScalarType();
@@ -2915,13 +2925,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT,
}
}
+ assert((Scalar1 && Scalar2) || (VT.getVectorNumElements() == Outputs.size() &&
+ "Expected a scalar or vector!"));
+
// Handle the scalar case first.
- if (Scalar1 && Scalar2)
+ if (!VT.isVector())
return Outputs.back();
- // Otherwise build a big vector out of the scalar elements we generated.
- return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(),
- Outputs.size());
+ // We may have a vector type but a scalar result. Create a splat.
+ Outputs.resize(VT.getVectorNumElements(), Outputs.back());
+
+ // Build a big vector out of the scalar elements we generated.
+ return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
@@ -2951,7 +2966,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
- return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
break;
case ISD::AND:
@@ -3370,8 +3385,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1,
if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2 };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -3420,7 +3435,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
N1.getNode()->op_end());
Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
- return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+ return getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
}
break;
case ISD::SETCC: {
@@ -3477,8 +3492,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -3501,14 +3516,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3,
SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
- return getNode(Opcode, DL, VT, Ops, 4);
+ return getNode(Opcode, DL, VT, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3,
SDValue N4, SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
- return getNode(Opcode, DL, VT, Ops, 5);
+ return getNode(Opcode, DL, VT, Ops);
}
/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
@@ -3530,8 +3545,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
ArgChains.push_back(SDValue(L, 1));
// Build a tokenfactor for all the chains.
- return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
- &ArgChains[0], ArgChains.size());
+ return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
/// getMemsetValue - Vectorized representation of the memset value
@@ -3600,7 +3614,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG,
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty))
return DAG.getConstant(Val, VT);
- return SDValue(0, 0);
+ return SDValue(nullptr, 0);
}
/// getMemBasePlusOffset - Returns base and offset node for the
@@ -3616,7 +3630,7 @@ static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl,
///
static bool isMemSrcFromString(SDValue Src, StringRef &Str) {
unsigned SrcDelta = 0;
- GlobalAddressSDNode *G = NULL;
+ GlobalAddressSDNode *G = nullptr;
if (Src.getOpcode() == ISD::GlobalAddress)
G = cast<GlobalAddressSDNode>(Src);
else if (Src.getOpcode() == ISD::ADD &&
@@ -3852,8 +3866,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
Size -= VTSize;
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
@@ -3918,8 +3931,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &LoadChains[0], LoadChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
OutChains.clear();
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
@@ -3933,8 +3945,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl,
DstOff += VTSize;
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
/// \brief Lower the call to 'memset' intrinsic function into a series of store
@@ -4035,8 +4046,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl,
Size -= VTSize;
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
@@ -4095,15 +4105,13 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
- TargetLowering::
- CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
- false, false, false, false, 0,
- TLI->getLibcallCallingConv(RTLIB::MEMCPY),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
- TLI->getPointerTy()),
- Args, *this, dl);
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY),
+ TLI->getPointerTy()), &Args, 0)
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -4153,15 +4161,13 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
- TargetLowering::
- CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
- false, false, false, false, 0,
- TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
- /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/false,
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
- TLI->getPointerTy()),
- Args, *this, dl);
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE),
+ TLI->getPointerTy()), &Args, 0)
+ .setDiscardResult();
std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
@@ -4217,32 +4223,31 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst,
Entry.Ty = IntPtrTy;
Entry.isSExt = false;
Args.push_back(Entry);
+
// FIXME: pass in SDLoc
- TargetLowering::
- CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()),
- false, false, false, false, 0,
- TLI->getLibcallCallingConv(RTLIB::MEMSET),
- /*isTailCall=*/false,
- /*doesNotReturn*/false, /*isReturnValueUsed=*/false,
- getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
- TLI->getPointerTy()),
- Args, *this, dl);
- std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET),
+ Type::getVoidTy(*getContext()),
+ getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET),
+ TLI->getPointerTy()), &Args, 0)
+ .setDiscardResult();
+ std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
return CallResult.second;
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDVTList VTList, SDValue *Ops, unsigned NumOps,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
AtomicOrdering SuccessOrdering,
AtomicOrdering FailureOrdering,
SynchronizationScope SynchScope) {
FoldingSetNodeID ID;
ID.AddInteger(MemVT.getRawBits());
- AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void* IP = 0;
+ void* IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
@@ -4253,11 +4258,13 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
// the node is deallocated, but recovered when the allocator is released.
// If the number of operands is less than 5 we use AtomicSDNode's internal
// storage.
- SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) : 0;
+ unsigned NumOps = Ops.size();
+ SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps)
+ : nullptr;
SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(),
dl.getDebugLoc(), VTList, MemVT,
- Ops, DynOps, NumOps, MMO,
+ Ops.data(), DynOps, NumOps, MMO,
SuccessOrdering, FailureOrdering,
SynchScope);
CSEMap.InsertNode(N, IP);
@@ -4266,11 +4273,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- SDVTList VTList, SDValue *Ops, unsigned NumOps,
+ SDVTList VTList, ArrayRef<SDValue> Ops,
MachineMemOperand *MMO,
AtomicOrdering Ordering,
SynchronizationScope SynchScope) {
- return getAtomic(Opcode, dl, MemVT, VTList, Ops, NumOps, MMO, Ordering,
+ return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
Ordering, SynchScope);
}
@@ -4317,7 +4324,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, SuccessOrdering,
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering,
FailureOrdering, SynchScope);
}
@@ -4377,38 +4384,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr, Val};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope);
-}
-
-SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
- EVT VT, SDValue Chain,
- SDValue Ptr,
- const Value* PtrVal,
- unsigned Alignment,
- AtomicOrdering Ordering,
- SynchronizationScope SynchScope) {
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
-
- MachineFunction &MF = getMachineFunction();
- // An atomic store does not load. An atomic load does not store.
- // (An atomicrmw obviously both loads and stores.)
- // For now, atomics are considered to be volatile always, and they are
- // chained as such.
- // FIXME: Volatile isn't really correct; we should keep track of atomic
- // orderings in the memoperand.
- unsigned Flags = MachineMemOperand::MOVolatile;
- if (Opcode != ISD::ATOMIC_STORE)
- Flags |= MachineMemOperand::MOLoad;
- if (Opcode != ISD::ATOMIC_LOAD)
- Flags |= MachineMemOperand::MOStore;
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
- MemVT.getStoreSize(), Alignment);
-
- return getAtomic(Opcode, dl, MemVT, VT, Chain, Ptr, MMO,
- Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
@@ -4421,38 +4397,24 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT,
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = {Chain, Ptr};
- return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope);
+ return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
- SDLoc dl) {
- if (NumOps == 1)
+SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) {
+ if (Ops.size() == 1)
return Ops[0];
SmallVector<EVT, 4> VTs;
- VTs.reserve(NumOps);
- for (unsigned i = 0; i < NumOps; ++i)
+ VTs.reserve(Ops.size());
+ for (unsigned i = 0; i < Ops.size(); ++i)
VTs.push_back(Ops[i].getValueType());
- return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
- Ops, NumOps);
-}
-
-SDValue
-SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl,
- const EVT *VTs, unsigned NumVTs,
- const SDValue *Ops, unsigned NumOps,
- EVT MemVT, MachinePointerInfo PtrInfo,
- unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem) {
- return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
- MemVT, PtrInfo, Align, Vol,
- ReadMem, WriteMem);
+ return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
- const SDValue *Ops, unsigned NumOps,
+ ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
@@ -4470,13 +4432,13 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
- return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+ return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
- const SDValue *Ops, unsigned NumOps,
- EVT MemVT, MachineMemOperand *MMO) {
+ ArrayRef<SDValue> Ops, EVT MemVT,
+ MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
@@ -4490,9 +4452,9 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
MemIntrinsicSDNode *N;
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
@@ -4500,12 +4462,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList,
N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
dl.getDebugLoc(), VTList, Ops,
- NumOps, MemVT, MMO);
+ MemVT, MMO);
CSEMap.InsertNode(N, IP);
} else {
N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(),
dl.getDebugLoc(), VTList, Ops,
- NumOps, MemVT, MMO);
+ MemVT, MMO);
}
AllNodes.push_back(N);
return SDValue(N, 0);
@@ -4568,7 +4530,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
- if (PtrInfo.V == 0)
+ if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(Ptr, Offset);
MachineFunction &MF = getMachineFunction();
@@ -4608,13 +4570,13 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Offset };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+ AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
MMO->isNonTemporal(),
MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
@@ -4695,7 +4657,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
- if (PtrInfo.V == 0)
+ if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(Ptr);
MachineFunction &MF = getMachineFunction();
@@ -4716,12 +4678,12 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Undef = getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
@@ -4750,7 +4712,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
- if (PtrInfo.V == 0)
+ if (PtrInfo.V.isNull())
PtrInfo = InferPointerInfo(Ptr);
MachineFunction &MF = getMachineFunction();
@@ -4785,12 +4747,12 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val,
SDValue Undef = getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(SVT.getRawBits());
ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant()));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
return SDValue(E, 0);
@@ -4812,11 +4774,11 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base,
SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+ AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
ID.AddInteger(ST->getPointerInfo().getAddrSpace());
- void *IP = 0;
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -4835,14 +4797,14 @@ SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl,
SDValue SV,
unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
- return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- const SDUse *Ops, unsigned NumOps) {
- switch (NumOps) {
+ ArrayRef<SDUse> Ops) {
+ switch (Ops.size()) {
case 0: return getNode(Opcode, DL, VT);
- case 1: return getNode(Opcode, DL, VT, Ops[0]);
+ case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0]));
case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
default: break;
@@ -4850,12 +4812,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
// Copy from an SDUse array into an SDValue array for use with
// the regular getNode logic.
- SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
- return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+ SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end());
+ return getNode(Opcode, DL, VT, NewOps);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
switch (NumOps) {
case 0: return getNode(Opcode, DL, VT);
case 1: return getNode(Opcode, DL, VT, Ops[0]);
@@ -4890,18 +4853,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
if (VT != MVT::Glue) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTs, Ops, NumOps);
+ VTs, Ops);
CSEMap.InsertNode(N, IP);
} else {
N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTs, Ops, NumOps);
+ VTs, Ops);
}
AllNodes.push_back(N);
@@ -4912,24 +4875,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
- ArrayRef<EVT> ResultTys,
- const SDValue *Ops, unsigned NumOps) {
- return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
- Ops, NumOps);
-}
-
-SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL,
- const EVT *VTs, unsigned NumVTs,
- const SDValue *Ops, unsigned NumOps) {
- if (NumVTs == 1)
- return getNode(Opcode, DL, VTs[0], Ops, NumOps);
- return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+ ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) {
+ return getNode(Opcode, DL, getVTList(ResultTys), Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
if (VTList.NumVTs == 1)
- return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+ return getNode(Opcode, DL, VTList.VTs[0], Ops);
#if 0
switch (Opcode) {
@@ -4956,10 +4909,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
// Memoize the node unless it returns a flag.
SDNode *N;
+ unsigned NumOps = Ops.size();
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
@@ -4976,7 +4930,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
Ops[1], Ops[2]);
} else {
N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTList, Ops, NumOps);
+ VTList, Ops);
}
CSEMap.InsertNode(N, IP);
} else {
@@ -4993,7 +4947,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
Ops[1], Ops[2]);
} else {
N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(),
- VTList, Ops, NumOps);
+ VTList, Ops);
}
}
AllNodes.push_back(N);
@@ -5004,39 +4958,39 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) {
- return getNode(Opcode, DL, VTList, 0, 0);
+ return getNode(Opcode, DL, VTList, ArrayRef<SDValue>());
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
SDValue N1) {
SDValue Ops[] = { N1 };
- return getNode(Opcode, DL, VTList, Ops, 1);
+ return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
SDValue N1, SDValue N2) {
SDValue Ops[] = { N1, N2 };
- return getNode(Opcode, DL, VTList, Ops, 2);
+ return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3) {
SDValue Ops[] = { N1, N2, N3 };
- return getNode(Opcode, DL, VTList, Ops, 3);
+ return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3,
SDValue N4) {
SDValue Ops[] = { N1, N2, N3, N4 };
- return getNode(Opcode, DL, VTList, Ops, 4);
+ return getNode(Opcode, DL, VTList, Ops);
}
SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList,
SDValue N1, SDValue N2, SDValue N3,
SDValue N4, SDValue N5) {
SDValue Ops[] = { N1, N2, N3, N4, N5 };
- return getNode(Opcode, DL, VTList, Ops, 5);
+ return getNode(Opcode, DL, VTList, Ops);
}
SDVTList SelectionDAG::getVTList(EVT VT) {
@@ -5049,9 +5003,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
ID.AddInteger(VT1.getRawBits());
ID.AddInteger(VT2.getRawBits());
- void *IP = 0;
+ void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
- if (Result == NULL) {
+ if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(2);
Array[0] = VT1;
Array[1] = VT2;
@@ -5068,9 +5022,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
ID.AddInteger(VT2.getRawBits());
ID.AddInteger(VT3.getRawBits());
- void *IP = 0;
+ void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
- if (Result == NULL) {
+ if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(3);
Array[0] = VT1;
Array[1] = VT2;
@@ -5089,9 +5043,9 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
ID.AddInteger(VT3.getRawBits());
ID.AddInteger(VT4.getRawBits());
- void *IP = 0;
+ void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
- if (Result == NULL) {
+ if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(4);
Array[0] = VT1;
Array[1] = VT2;
@@ -5103,18 +5057,19 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
return Result->getSDVTList();
}
-SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) {
+ unsigned NumVTs = VTs.size();
FoldingSetNodeID ID;
ID.AddInteger(NumVTs);
for (unsigned index = 0; index < NumVTs; index++) {
ID.AddInteger(VTs[index].getRawBits());
}
- void *IP = 0;
+ void *IP = nullptr;
SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP);
- if (Result == NULL) {
+ if (!Result) {
EVT *Array = Allocator.Allocate<EVT>(NumVTs);
- std::copy(VTs, VTs + NumVTs, Array);
+ std::copy(VTs.begin(), VTs.end(), Array);
Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs);
VTListMap.InsertNode(Result, IP);
}
@@ -5135,14 +5090,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
if (Op == N->getOperand(0)) return N;
// See if the modified node already exists.
- void *InsertPos = 0;
+ void *InsertPos = nullptr;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
- InsertPos = 0;
+ InsertPos = nullptr;
// Now we update the operands.
N->OperandList[0].set(Op);
@@ -5160,14 +5115,14 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
return N; // No operands changed, just return the input node.
// See if the modified node already exists.
- void *InsertPos = 0;
+ void *InsertPos = nullptr;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
- InsertPos = 0;
+ InsertPos = nullptr;
// Now we update the operands.
if (N->OperandList[0] != Op1)
@@ -5183,25 +5138,26 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
SDValue Ops[] = { Op1, Op2, Op3 };
- return UpdateNodeOperands(N, Ops, 3);
+ return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4) {
SDValue Ops[] = { Op1, Op2, Op3, Op4 };
- return UpdateNodeOperands(N, Ops, 4);
+ return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4, SDValue Op5) {
SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
- return UpdateNodeOperands(N, Ops, 5);
+ return UpdateNodeOperands(N, Ops);
}
SDNode *SelectionDAG::
-UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
@@ -5218,14 +5174,14 @@ UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
if (!AnyChange) return N;
// See if the modified node already exists.
- void *InsertPos = 0;
- if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+ void *InsertPos = nullptr;
+ if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos))
return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
if (!RemoveNodeFromCSEMaps(N))
- InsertPos = 0;
+ InsertPos = nullptr;
// Now we update the operands.
for (unsigned i = 0; i != NumOps; ++i)
@@ -5254,14 +5210,14 @@ void SDNode::DropOperands() {
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT) {
SDVTList VTs = getVTList(VT);
- return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT, SDValue Op1) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5269,7 +5225,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op2) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5277,41 +5233,39 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op2, SDValue Op3) {
SDVTList VTs = getVTList(VT);
SDValue Ops[] = { Op1, Op2, Op3 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT, const SDValue *Ops,
- unsigned NumOps) {
+ EVT VT, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT);
- return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- EVT VT1, EVT VT2, const SDValue *Ops,
- unsigned NumOps) {
+ EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2);
- return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2) {
SDVTList VTs = getVTList(VT1, VT2);
- return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+ return SelectNodeTo(N, MachineOpc, VTs, None);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2, EVT VT3,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
- return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
EVT VT1, EVT VT2, EVT VT3, EVT VT4,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
- return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5319,7 +5273,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op1) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5327,7 +5281,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op1, SDValue Op2) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5336,7 +5290,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2);
SDValue Ops[] = { Op1, Op2, Op3 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
@@ -5345,13 +5299,12 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDValue Op3) {
SDVTList VTs = getVTList(VT1, VT2, VT3);
SDValue Ops[] = { Op1, Op2, Op3 };
- return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+ return SelectNodeTo(N, MachineOpc, VTs, Ops);
}
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
- SDVTList VTs, const SDValue *Ops,
- unsigned NumOps) {
- N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ SDVTList VTs,ArrayRef<SDValue> Ops) {
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
// Reset the NodeID to -1.
N->setNodeId(-1);
return N;
@@ -5388,19 +5341,19 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) {
/// the node's users.
///
SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- SDVTList VTs, const SDValue *Ops,
- unsigned NumOps) {
+ SDVTList VTs, ArrayRef<SDValue> Ops) {
+ unsigned NumOps = Ops.size();
// If an identical node already exists, use it.
- void *IP = 0;
+ void *IP = nullptr;
if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+ AddNodeIDNode(ID, Opc, VTs, Ops);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
}
if (!RemoveNodeFromCSEMaps(N))
- IP = 0;
+ IP = nullptr;
// Start the morphing.
N->NodeType = Opc;
@@ -5420,7 +5373,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
// Initialize the memory references information.
- MN->setMemRefs(0, 0);
+ MN->setMemRefs(nullptr, nullptr);
// If NumOps is larger than the # of operands we can have in a
// MachineSDNode, reallocate the operand list.
if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
@@ -5431,22 +5384,22 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// remainder of the current SelectionDAG iteration, so we can allocate
// the operands directly out of a pool with no recycling metadata.
MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
- Ops, NumOps);
+ Ops.data(), NumOps);
else
- MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+ MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps);
MN->OperandsNeedDelete = false;
} else
- MN->InitOperands(MN->OperandList, Ops, NumOps);
+ MN->InitOperands(MN->OperandList, Ops.data(), NumOps);
} else {
// If NumOps is larger than the # of operands we currently have, reallocate
// the operand list.
if (NumOps > N->NumOperands) {
if (N->OperandsNeedDelete)
delete[] N->OperandList;
- N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+ N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps);
N->OperandsNeedDelete = true;
} else
- N->InitOperands(N->OperandList, Ops, NumOps);
+ N->InitOperands(N->OperandList, Ops.data(), NumOps);
}
// Delete any nodes that are still dead after adding the uses for the
@@ -5585,7 +5538,7 @@ MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl,
ArrayRef<EVT> ResultTys,
ArrayRef<SDValue> Ops) {
- SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+ SDVTList VTs = getVTList(ResultTys);
return getMachineNode(Opcode, dl, VTs, Ops);
}
@@ -5594,14 +5547,14 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs,
ArrayRef<SDValue> OpsArray) {
bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
- void *IP = 0;
+ void *IP = nullptr;
const SDValue *Ops = OpsArray.data();
unsigned NumOps = OpsArray.size();
if (DoCSE) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
- IP = 0;
+ AddNodeIDNode(ID, ~Opcode, VTs, OpsArray);
+ IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
}
@@ -5657,34 +5610,39 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT,
/// getNodeIfExists - Get the specified node if it's already available, or
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
- const SDValue *Ops, unsigned NumOps) {
+ ArrayRef<SDValue> Ops) {
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
- AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
- void *IP = 0;
+ AddNodeIDNode(ID, Opcode, VTList, Ops);
+ void *IP = nullptr;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return E;
}
- return NULL;
+ return nullptr;
}
/// getDbgValue - Creates a SDDbgValue node.
///
+/// SDNode
SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R,
+ bool IsIndirect, uint64_t Off,
DebugLoc DL, unsigned O) {
- return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+ return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O);
}
+/// Constant
SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
- DebugLoc DL, unsigned O) {
+SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C,
+ uint64_t Off,
+ DebugLoc DL, unsigned O) {
return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
}
+/// FrameIndex
SDDbgValue *
-SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
- DebugLoc DL, unsigned O) {
+SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
}
@@ -6049,7 +6007,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() {
dbgs() << "Overran sorted position:\n";
S->dumprFull();
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -6090,6 +6048,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
SDDbgValue *Dbg = *I;
if (Dbg->getKind() == SDDbgValue::SDNODE) {
SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->isIndirect(),
Dbg->getOffset(), Dbg->getDebugLoc(),
Dbg->getOrder());
ClonedDVs.push_back(Clone);
@@ -6133,9 +6092,8 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
}
MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps, EVT memvt,
- MachineMemOperand *mmo)
- : SDNode(Opc, Order, dl, VTs, Ops, NumOps),
+ ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo)
+ : SDNode(Opc, Order, dl, VTs, Ops),
MemoryVT(memvt), MMO(mmo) {
SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
MMO->isNonTemporal(), MMO->isInvariant());
@@ -6354,12 +6312,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
switch (N->getOpcode()) {
default:
- Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
- &Operands[0], Operands.size()));
+ Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands));
break;
case ISD::VSELECT:
- Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
- &Operands[0], Operands.size()));
+ Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands));
break;
case ISD::SHL:
case ISD::SRA:
@@ -6384,8 +6340,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
Scalars.push_back(getUNDEF(EltVT));
return getNode(ISD::BUILD_VECTOR, dl,
- EVT::getVectorVT(*getContext(), EltVT, ResNE),
- &Scalars[0], Scalars.size());
+ EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
}
@@ -6419,8 +6374,8 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
return true;
- const GlobalValue *GV1 = NULL;
- const GlobalValue *GV2 = NULL;
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
const TargetLowering *TLI = TM.getTargetLowering();
@@ -6442,8 +6397,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType());
APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0);
- llvm::ComputeMaskedBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
- TLI->getDataLayout());
+ llvm::computeKnownBits(const_cast<GlobalValue*>(GV), KnownZero, KnownOne,
+ TLI->getDataLayout());
unsigned AlignBits = KnownZero.countTrailingOnes();
unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0;
if (Align)
@@ -6505,6 +6460,22 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
return std::make_pair(Lo, Hi);
}
+void SelectionDAG::ExtractVectorElements(SDValue Op,
+ SmallVectorImpl<SDValue> &Args,
+ unsigned Start, unsigned Count) {
+ EVT VT = Op.getValueType();
+ if (Count == 0)
+ Count = VT.getVectorNumElements();
+
+ EVT EltVT = VT.getVectorElementType();
+ EVT IdxTy = TLI->getVectorIdxTy();
+ SDLoc SL(Op);
+ for (unsigned i = Start, e = Start + Count; i != e; ++i) {
+ Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Op, getConstant(i, IdxTy)));
+ }
+}
+
// getAddressSpace - Return the address space this GlobalAddress belongs to.
unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4a6e5cf..070e929 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "isel"
#include "SelectionDAGBuilder.h"
#include "SDNodeDbgValue.h"
#include "llvm/ADT/BitVector.h"
@@ -62,6 +61,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "isel"
+
/// LimitFloatPrecision - Generate low-precision inline sequences for
/// some float libcalls (6, 8 or 12 bits).
static unsigned LimitFloatPrecision;
@@ -276,9 +277,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL,
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
- Val = DAG.getNode(IntermediateVT.isVector() ?
- ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
- ValueVT, &Ops[0], NumIntermediates);
+ Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, ValueVT, Ops);
}
// There is now one part, held in Val. Correct it to match ValueVT.
@@ -495,7 +496,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL,
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
- Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+ Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops);
// FIXME: Use CONCAT for 2x -> 4x.
@@ -638,7 +639,7 @@ namespace {
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
SDLoc dl,
SDValue &Chain, SDValue *Flag,
- const Value *V = 0) const;
+ const Value *V = nullptr) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
/// specified value into the registers specified by this object. This uses
@@ -684,7 +685,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue P;
- if (Flag == 0) {
+ if (!Flag) {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
} else {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
@@ -752,9 +753,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
Parts.clear();
}
- return DAG.getNode(ISD::MERGE_VALUES, dl,
- DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
- &Values[0], ValueVTs.size());
+ return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
}
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -785,7 +784,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
SmallVector<SDValue, 8> Chains(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue Part;
- if (Flag == 0) {
+ if (!Flag) {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
} else {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
@@ -808,7 +807,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl,
// = op c3, ..., f2
Chain = Chains[NumRegs-1];
else
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
@@ -877,7 +876,7 @@ void SelectionDAGBuilder::clear() {
UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
- CurInst = NULL;
+ CurInst = nullptr;
HasTailCall = false;
SDNodeOrder = LowestSDNodeOrder;
}
@@ -910,7 +909,7 @@ SDValue SelectionDAGBuilder::getRoot() {
// Otherwise, we have to make a token factor node.
SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- &PendingLoads[0], PendingLoads.size());
+ PendingLoads);
PendingLoads.clear();
DAG.setRoot(Root);
return Root;
@@ -940,8 +939,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
}
Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- &PendingExports[0],
- PendingExports.size());
+ PendingExports);
PendingExports.clear();
DAG.setRoot(Root);
return Root;
@@ -961,7 +959,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
if (!isa<TerminatorInst>(&I) && !HasTailCall)
CopyToExportRegsIfNeeded(&I);
- CurInst = NULL;
+ CurInst = nullptr;
}
void SelectionDAGBuilder::visitPHI(const PHINode &) {
@@ -991,11 +989,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
MDNode *Variable = DI->getVariable();
uint64_t Offset = DI->getOffset();
+ // A dbg.value for an alloca is always indirect.
+ bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) {
SDV = DAG.getDbgValue(Variable, Val.getNode(),
- Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+ Val.getResNo(), IsIndirect,
+ Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1020,7 +1021,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(),
InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
resolveDanglingDebugInfo(V, N);
return N;
}
@@ -1091,8 +1092,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Constants.push_back(SDValue(Val, i));
}
- return DAG.getMergeValues(&Constants[0], Constants.size(),
- getCurSDLoc());
+ return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const ConstantDataSequential *CDS =
@@ -1107,9 +1107,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
if (isa<ArrayType>(CDS->getType()))
- return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc());
+ return DAG.getMergeValues(Ops, getCurSDLoc());
return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
- VT, &Ops[0], Ops.size());
+ VT, Ops);
}
if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
@@ -1132,8 +1132,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
Constants[i] = DAG.getConstant(0, EltVT);
}
- return DAG.getMergeValues(&Constants[0], NumElts,
- getCurSDLoc());
+ return DAG.getMergeValues(Constants, getCurSDLoc());
}
if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
@@ -1161,8 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
}
// Create a BUILD_VECTOR node.
- return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
- VT, &Ops[0], Ops.size());
+ return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops);
}
// If this is a static alloca, generate it as the frameindex instead of
@@ -1179,7 +1177,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V);
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
llvm_unreachable("Can't get register for value!");
@@ -1223,7 +1221,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs);
@@ -1406,8 +1404,8 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
llvm_unreachable("Unknown compare instruction");
}
- CaseBlock CB(Condition, BOp->getOperand(0),
- BOp->getOperand(1), NULL, TBB, FBB, CurBB, TWeight, FWeight);
+ CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
+ TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
return;
}
@@ -1415,7 +1413,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
- NULL, TBB, FBB, CurBB, TWeight, FWeight);
+ nullptr, TBB, FBB, CurBB, TWeight, FWeight);
SwitchCases.push_back(CB);
}
@@ -1562,7 +1560,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
// Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = BrMBB;
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
@@ -1639,7 +1637,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
- NULL, Succ0MBB, Succ1MBB, BrMBB);
+ nullptr, Succ0MBB, Succ1MBB, BrMBB);
// Use visitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
@@ -1655,7 +1653,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
SDLoc dl = getCurSDLoc();
// Build the setcc now.
- if (CB.CmpMHS == NULL) {
+ if (!CB.CmpMHS) {
// Fold "(X == true)" to X and "(X == false)" to !X to
// handle common cases produced by branch lowering.
if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
@@ -1696,7 +1694,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = SwitchBB;
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
@@ -1774,7 +1772,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = SwitchBB;
if (++BBI != FuncInfo.MF->end())
@@ -1857,8 +1855,8 @@ void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering *TLI = TM.getTargetLowering();
SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL,
- MVT::isVoid, 0, 0, false, getCurSDLoc(),
- false, false).second;
+ MVT::isVoid, nullptr, 0, false,
+ getCurSDLoc(), false, false).second;
DAG.setRoot(Chain);
}
@@ -1905,7 +1903,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = SwitchBB;
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
@@ -1979,7 +1977,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = SwitchBB;
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
@@ -2059,8 +2057,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
// Merge into one.
SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
- &Ops[0], 2);
+ DAG.getVTList(ValueVTs), Ops);
setValue(&LP, Res);
}
@@ -2081,7 +2078,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
MachineFunction *CurMF = FuncInfo.MF;
// Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineFunction::iterator BBI = CR.CaseBB;
if (++BBI != FuncInfo.MF->end())
@@ -2192,7 +2189,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
if (I->High == I->Low) {
// This is just small small case range :) containing exactly 1 case
CC = ISD::SETEQ;
- LHS = SV; RHS = I->High; MHS = NULL;
+ LHS = SV; RHS = I->High; MHS = nullptr;
} else {
CC = ISD::SETLE;
LHS = I->Low; MHS = SV; RHS = I->High;
@@ -2427,7 +2424,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
CaseRange LHSR(CR.Range.first, Pivot);
CaseRange RHSR(Pivot, CR.Range.second);
const Constant *C = Pivot->Low;
- MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+ MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr;
// We know that we branch to the LHS if the Value being switched on is
// less than the Pivot value, C. We use this to optimize our binary
@@ -2469,7 +2466,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
// Create a CaseBlock record representing a conditional branch to
// the LHS node if the value being switched on SV is less than C.
// Otherwise, branch to LHS.
- CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+ CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB);
if (CR.CaseBB == SwitchBB)
visitSwitchCase(CB, SwitchBB);
@@ -2682,7 +2679,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Figure out which block is immediately after the current one.
- MachineBasicBlock *NextBlock = 0;
+ MachineBasicBlock *NextBlock = nullptr;
MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
// If there is only the default destination, branch to it if it is not the
@@ -2716,7 +2713,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Push the initial CaseRec onto the worklist
CaseRecVector WorkList;
- WorkList.push_back(CaseRec(SwitchMBB,0,0,
+ WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr,
CaseRange(Cases.begin(),Cases.end())));
while (!WorkList.empty()) {
@@ -2765,6 +2762,11 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
getValue(I.getAddress())));
}
+void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
+ if (DAG.getTarget().Options.TrapUnreachable)
+ DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
+}
+
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
Type *Ty = I.getType();
@@ -2887,8 +2889,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
FalseVal.getResNo() + i));
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&ValueVTs[0], NumValues),
- &Values[0], NumValues));
+ DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitTrunc(const User &I) {
@@ -3097,11 +3098,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps2[0] = Src2;
Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT,
- &MOps1[0], NumConcat);
+ getCurSDLoc(), VT, MOps1);
Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
- getCurSDLoc(), VT,
- &MOps2[0], NumConcat);
+ getCurSDLoc(), VT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps;
@@ -3219,8 +3218,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
Ops.push_back(Res);
}
- setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(),
- VT, &Ops[0], Ops.size()));
+ setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops));
}
void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3262,8 +3260,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&AggValueVTs[0], NumAggValues),
- &Values[0], NumAggValues));
+ DAG.getVTList(AggValueVTs), Values));
}
void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
@@ -3297,8 +3294,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
SDValue(Agg.getNode(), Agg.getResNo() + i);
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&ValValueVTs[0], NumValValues),
- &Values[0], NumValValues));
+ DAG.getVTList(ValValueVTs), Values));
}
void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
@@ -3420,8 +3416,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
- SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(),
- VTs, Ops, 3);
+ SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops);
setValue(&I, DSA);
DAG.setRoot(DSA.getValue(1));
@@ -3438,8 +3433,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != 0;
- bool isInvariant = I.getMetadata("invariant.load") != 0;
+ bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
+ bool isInvariant = I.getMetadata("invariant.load") != nullptr;
unsigned Alignment = I.getAlignment();
const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
@@ -3484,8 +3479,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
// (MaxParallelChains should always remain as failsafe).
if (ChainI == MaxParallelChains) {
assert(PendingLoads.empty() && "PendingLoads must be serialized first");
- SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], ChainI);
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -3502,8 +3497,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
if (!ConstantMemory) {
- SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], ChainI);
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -3511,8 +3506,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&ValueVTs[0], NumValues),
- &Values[0], NumValues));
+ DAG.getVTList(ValueVTs), Values));
}
void SelectionDAGBuilder::visitStore(const StoreInst &I) {
@@ -3540,7 +3534,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+ bool isNonTemporal = I.getMetadata("nontemporal") != nullptr;
unsigned Alignment = I.getAlignment();
const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
@@ -3548,8 +3542,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
// See visitLoad comments.
if (ChainI == MaxParallelChains) {
- SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], ChainI);
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
Root = Chain;
ChainI = 0;
}
@@ -3562,8 +3556,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
Chains[ChainI] = St;
}
- SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], ChainI);
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
+ makeArrayRef(Chains.data(), ChainI));
DAG.setRoot(StoreNode);
}
@@ -3588,7 +3582,7 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
Ops[0] = Chain;
Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
- return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
+ return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops);
}
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
@@ -3680,7 +3674,7 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) {
Ops[0] = getRoot();
Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy());
Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy());
- DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
+ DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
}
void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
@@ -3696,13 +3690,21 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
if (I.getAlignment() < VT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ MachineMemOperand *MMO =
+ DAG.getMachineFunction().
+ getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ MachineMemOperand::MOVolatile |
+ MachineMemOperand::MOLoad,
+ VT.getStoreSize(),
+ I.getAlignment() ? I.getAlignment() :
+ DAG.getEVTAlignment(VT));
+
InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
- getValue(I.getPointerOperand()),
- I.getPointerOperand(), I.getAlignment(),
- TLI->getInsertFencesForAtomic() ? Monotonic : Order,
- Scope);
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ getValue(I.getPointerOperand()), MMO,
+ TLI->getInsertFencesForAtomic() ? Monotonic : Order,
+ Scope);
SDValue OutChain = L.getValue(1);
@@ -3788,27 +3790,23 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (HasChain)
ValueVTs.push_back(MVT::Other);
- SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+ SDVTList VTs = DAG.getVTList(ValueVTs);
// Create the node.
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
- VTs, &Ops[0], Ops.size(),
- Info.memVT,
+ VTs, Ops, Info.memVT,
MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
Info.readMem, Info.writeMem);
} else if (!HasChain) {
- Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(),
- VTs, &Ops[0], Ops.size());
+ Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
- Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(),
- VTs, &Ops[0], Ops.size());
+ Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
} else {
- Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(),
- VTs, &Ops[0], Ops.size());
+ Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
}
if (HasChain) {
@@ -4530,7 +4528,7 @@ static unsigned getTruncatedArgReg(const SDValue &N) {
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset,
+ int64_t Offset, bool IsIndirect,
const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
@@ -4582,8 +4580,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (!Op)
return false;
- // FIXME: This does not handle register-indirect values at offset 0.
- bool IsIndirect = Offset != 0;
if (Op->isReg())
FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(),
TII->get(TargetOpcode::DBG_VALUE),
@@ -4619,18 +4615,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
- return 0;
- case Intrinsic::vastart: visitVAStart(I); return 0;
- case Intrinsic::vaend: visitVAEnd(I); return 0;
- case Intrinsic::vacopy: visitVACopy(I); return 0;
+ return nullptr;
+ case Intrinsic::vastart: visitVAStart(I); return nullptr;
+ case Intrinsic::vaend: visitVAEnd(I); return nullptr;
+ case Intrinsic::vacopy: visitVACopy(I); return nullptr;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(),
getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(),
getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
+ case Intrinsic::read_register: {
+ Value *Reg = I.getArgOperand(0);
+ SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ EVT VT = TM.getTargetLowering()->getValueType(I.getType());
+ setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName));
+ return nullptr;
+ }
+ case Intrinsic::write_register: {
+ Value *Reg = I.getArgOperand(0);
+ Value *RegValue = I.getArgOperand(1);
+ SDValue Chain = getValue(RegValue).getOperand(0);
+ SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg));
+ DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
+ RegName, getValue(RegValue)));
+ return nullptr;
+ }
case Intrinsic::setjmp:
return &"_setjmp"[!TLI->usesUnderscoreSetJmp()];
case Intrinsic::longjmp:
@@ -4653,7 +4665,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1))));
- return 0;
+ return nullptr;
}
case Intrinsic::memset: {
// Assert for address < 256 since we support only user defined address
@@ -4670,7 +4682,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
MachinePointerInfo(I.getArgOperand(0))));
- return 0;
+ return nullptr;
}
case Intrinsic::memmove: {
// Assert for address < 256 since we support only user defined address
@@ -4690,7 +4702,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1))));
- return 0;
+ return nullptr;
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
@@ -4701,14 +4713,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
"Variable in DbgDeclareInst should be either null or a DIVariable.");
if (!Address || !DIVar) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return 0;
+ return nullptr;
}
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return 0;
+ return nullptr;
}
SDValue &N = NodeMap[Address];
@@ -4730,29 +4742,29 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (FINode)
// Byval parameter. We have a frame index at this point.
- SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
- 0, dl, SDNodeOrder);
+ SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(),
+ 0, dl, SDNodeOrder);
else {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, 0, N);
- return 0;
+ EmitFuncArgumentDbgValue(Address, Variable, 0, false, N);
+ return nullptr;
}
} else if (AI)
SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
- 0, dl, SDNodeOrder);
+ true, 0, dl, SDNodeOrder);
else {
// Can't do anything with other non-AI cases yet.
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
DEBUG(Address->dump());
- return 0;
+ return nullptr;
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+ if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) {
// If variable is pinned by a alloca in dominating bb then
// use StaticAllocaMap.
if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
@@ -4760,17 +4772,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- SDV = DAG.getDbgValue(Variable, SI->second,
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
- return 0;
+ SDV = DAG.getFrameIndexDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return nullptr;
}
}
}
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
- return 0;
+ return nullptr;
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
@@ -4778,18 +4790,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
assert((!DIVar || DIVar.isVariable()) &&
"Variable in DbgValueInst should be either null or a DIVariable.");
if (!DIVar)
- return 0;
+ return nullptr;
MDNode *Variable = DI.getVariable();
uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
- return 0;
+ return nullptr;
SDDbgValue *SDV;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
- SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
} else {
// Do not use getValue() in here; we don't want to generate code at
// this point if it hasn't been done yet.
@@ -4798,9 +4810,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+ // A dbg.value for an alloca is always indirect.
+ bool IsIndirect = isa<AllocaInst>(V) || Offset != 0;
+ if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) {
SDV = DAG.getDbgValue(Variable, N.getNode(),
- N.getResNo(), Offset, dl, SDNodeOrder);
+ N.getResNo(), IsIndirect,
+ Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
} else if (!V->use_empty() ) {
@@ -4823,18 +4838,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!AI) {
DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
- return 0;
+ return nullptr;
}
DenseMap<const AllocaInst*, int>::iterator SI =
FuncInfo.StaticAllocaMap.find(AI);
if (SI == FuncInfo.StaticAllocaMap.end())
- return 0; // VLAs.
- int FI = SI->second;
-
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
- MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
- return 0;
+ return nullptr; // VLAs.
+ return nullptr;
}
case Intrinsic::eh_typeid_for: {
@@ -4843,7 +4853,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, MVT::i32);
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::eh_return_i32:
@@ -4854,10 +4864,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getControlRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return 0;
+ return nullptr;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
- return 0;
+ return nullptr;
case Intrinsic::eh_dwarf_cfa: {
SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl,
TLI->getPointerTy());
@@ -4871,7 +4881,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getConstant(0, TLI->getPointerTy()));
setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(),
FA, Offset));
- return 0;
+ return nullptr;
}
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
@@ -4880,7 +4890,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
- return 0;
+ return nullptr;
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
@@ -4889,23 +4899,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
MFI->setFunctionContextIndex(FI);
- return 0;
+ return nullptr;
}
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl,
- DAG.getVTList(MVT::i32, MVT::Other),
- Ops, 2);
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
setValue(&I, Op.getValue(0));
DAG.setRoot(Op.getValue(1));
- return 0;
+ return nullptr;
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
}
case Intrinsic::x86_mmx_pslli_w:
@@ -4919,7 +4928,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShAmt = getValue(I.getArgOperand(1));
if (isa<ConstantSDNode>(ShAmt)) {
visitTargetIntrinsic(I, Intrinsic);
- return 0;
+ return nullptr;
}
unsigned NewIntrinsic = 0;
EVT ShAmtVT = MVT::v2i32;
@@ -4958,14 +4967,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShOps[2];
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps);
EVT DestVT = TLI->getValueType(I.getType());
ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
DAG.getConstant(NewIntrinsic, MVT::i32),
getValue(I.getArgOperand(0)), ShAmt);
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::x86_avx_vinsertf128_pd_256:
case Intrinsic::x86_avx_vinsertf128_ps_256:
@@ -4980,7 +4989,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(1)),
DAG.getConstant(Idx, TLI->getVectorIdxTy()));
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::x86_avx_vextractf128_pd_256:
case Intrinsic::x86_avx_vextractf128_ps_256:
@@ -4993,7 +5002,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
DAG.getConstant(Idx, TLI->getVectorIdxTy()));
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::convertff:
case Intrinsic::convertfsi:
@@ -5026,31 +5035,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2)),
Code);
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
- return 0;
+ return nullptr;
case Intrinsic::log:
setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::log2:
setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::log10:
setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::exp:
setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::exp2:
setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG, *TLI));
- return 0;
+ return nullptr;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::sin:
@@ -5079,21 +5088,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
}
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return 0;
+ return nullptr;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
- return 0;
+ return nullptr;
case Intrinsic::fmuladd: {
EVT VT = TLI->getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5114,42 +5123,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2)));
setValue(&I, Add);
}
- return 0;
+ return nullptr;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl,
MVT::i16, getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl,
MVT::f32, getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
- return 0;
+ return nullptr;
}
case Intrinsic::readcyclecounter: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl,
- DAG.getVTList(MVT::i64, MVT::Other),
- &Op, 1);
+ DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return 0;
+ return nullptr;
}
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return 0;
+ return nullptr;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
@@ -5157,26 +5165,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return 0;
+ return nullptr;
}
case Intrinsic::ctpop: {
SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
- return 0;
+ return nullptr;
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(ISD::STACKSAVE, sdl,
- DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1);
+ DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return 0;
+ return nullptr;
}
case Intrinsic::stackrestore: {
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
- return 0;
+ return nullptr;
}
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
@@ -5198,7 +5206,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
- return 0;
+ return nullptr;
}
case Intrinsic::objectsize: {
// If we don't know by now, we're never going to know.
@@ -5215,16 +5223,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getConstant(0, Ty);
setValue(&I, Res);
- return 0;
+ return nullptr;
}
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
- return 0;
+ return nullptr;
case Intrinsic::var_annotation:
// Discard annotate attributes
- return 0;
+ return nullptr;
case Intrinsic::init_trampoline: {
const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
@@ -5237,16 +5245,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
- Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6);
+ Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
- return 0;
+ return nullptr;
}
case Intrinsic::adjust_trampoline: {
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI->getPointerTy(),
getValue(I.getArgOperand(0))));
- return 0;
+ return nullptr;
}
case Intrinsic::gcroot:
if (GFI) {
@@ -5256,18 +5264,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
}
- return 0;
+ return nullptr;
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
- return 0;
+ return nullptr;
case Intrinsic::expect: {
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
- return 0;
+ return nullptr;
}
case Intrinsic::debugtrap:
@@ -5277,20 +5285,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
ISD::TRAP : ISD::DEBUGTRAP;
DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
- return 0;
+ return nullptr;
}
TargetLowering::ArgListTy Args;
- TargetLowering::
- CallLoweringInfo CLI(getRoot(), I.getType(),
- false, false, false, false, 0, CallingConv::C,
- /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol(TrapFuncName.data(),
- TLI->getPointerTy()),
- Args, DAG, sdl);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(sdl).setChain(getRoot())
+ .setCallee(CallingConv::C, I.getType(),
+ DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()),
+ &Args, 0);
+
std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI);
DAG.setRoot(Result.second);
- return 0;
+ return nullptr;
}
case Intrinsic::uadd_with_overflow:
@@ -5314,7 +5321,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
- return 0;
+ return nullptr;
}
case Intrinsic::prefetch: {
SDValue Ops[5];
@@ -5325,22 +5332,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops[3] = getValue(I.getArgOperand(2));
Ops[4] = getValue(I.getArgOperand(3));
DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl,
- DAG.getVTList(MVT::Other),
- &Ops[0], 5,
+ DAG.getVTList(MVT::Other), Ops,
EVT::getIntegerVT(*Context, 8),
MachinePointerInfo(I.getArgOperand(0)),
0, /* align */
false, /* volatile */
rw==0, /* read */
rw==1)); /* write */
- return 0;
+ return nullptr;
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
if (TM.getOptLevel() == CodeGenOpt::None)
- return 0;
+ return nullptr;
SmallVector<Value *, 4> Allocas;
GetUnderlyingObjects(I.getArgOperand(1), Allocas, DL);
@@ -5360,18 +5366,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true);
unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
- Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2);
+ Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
}
- return 0;
+ return nullptr;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI->getPointerTy()));
- return 0;
+ return nullptr;
case Intrinsic::invariant_end:
// Discard region information.
- return 0;
+ return nullptr;
case Intrinsic::stackprotectorcheck: {
// Do not actually emit anything for this basic block. Instead we initialize
// the stack protector descriptor and export the guard variable so we can
@@ -5382,21 +5388,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Flush our exports since we are going to process a terminator.
(void)getControlRoot();
- return 0;
+ return nullptr;
}
case Intrinsic::clear_cache:
return TLI->getClearCacheBuiltinName();
case Intrinsic::donothing:
// ignore
- return 0;
+ return nullptr;
case Intrinsic::experimental_stackmap: {
visitStackmap(I);
- return 0;
+ return nullptr;
}
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64: {
visitPatchpoint(I);
- return 0;
+ return nullptr;
}
}
}
@@ -5408,7 +5414,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
FunctionType *FTy = cast<FunctionType>(PT->getElementType());
Type *RetTy = FTy->getReturnType();
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- MCSymbol *BeginLabel = 0;
+ MCSymbol *BeginLabel = nullptr;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -5496,9 +5502,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (isTailCall && !isInTailCallPosition(CS, *TLI))
isTailCall = false;
- TargetLowering::
- CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
- getCurSDLoc(), CS);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
+ .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall);
+
std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI);
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
@@ -5537,13 +5544,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
}
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, Chains);
PendingLoads.push_back(Chain);
setValue(CS.getInstruction(),
DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(&RetTys[0], RetTys.size()),
- &Values[0], Values.size()));
+ DAG.getVTList(RetTys), Values));
}
if (!Result.second.getNode()) {
@@ -5683,7 +5689,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
switch (CSize->getZExtValue()) {
default:
LoadVT = MVT::Other;
- LoadTy = 0;
+ LoadTy = nullptr;
ActuallyDoIt = false;
break;
case 2:
@@ -5910,7 +5916,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ComputeUsesVAFloatArgument(I, &MMI);
- const char *RenameFn = 0;
+ const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
@@ -6085,7 +6091,7 @@ public:
RegsForValue AssignedRegs;
explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
- : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
}
/// getCallOperandValEVT - Return the EVT of the Value* that this operand
@@ -6094,7 +6100,7 @@ public:
EVT getCallOperandValEVT(LLVMContext &Context,
const TargetLowering &TLI,
const DataLayout *DL) const {
- if (CallOperandVal == 0) return MVT::Other;
+ if (!CallOperandVal) return MVT::Other;
if (isa<BasicBlock>(CallOperandVal))
return TLI.getPointerTy();
@@ -6415,7 +6421,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// There is no longer a Value* corresponding to this operand.
- OpInfo.CallOperandVal = 0;
+ OpInfo.CallOperandVal = nullptr;
// It is now an indirect operand.
OpInfo.isIndirect = true;
@@ -6704,8 +6710,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
- DAG.getVTList(MVT::Other, MVT::Glue),
- &AsmNodeOperands[0], AsmNodeOperands.size());
+ DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
Flag = Chain.getValue(1);
// If this asm returns a register value, copy the result from that register
@@ -6768,8 +6773,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
if (!OutChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
- &OutChains[0], OutChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
DAG.setRoot(Chain);
}
@@ -6839,10 +6843,10 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx,
}
Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType();
- TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false,
- /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs,
- CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false,
- /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc());
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot())
+ .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs)
+ .setDiscardResult(!CI.use_empty());
const TargetLowering *TLI = TM.getTargetLowering();
return TLI->LowerCallTo(CLI);
@@ -7056,7 +7060,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) {
// There is always a chain and a glue type at the end
ValueVTs.push_back(MVT::Other);
ValueVTs.push_back(MVT::Glue);
- NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+ NodeTys = DAG.getVTList(ValueVTs);
} else
NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -7120,19 +7124,23 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle all of the outgoing arguments.
CLI.Outs.clear();
CLI.OutVals.clear();
- ArgListTy &Args = CLI.Args;
+ ArgListTy &Args = CLI.getArgs();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
- for (unsigned Value = 0, NumValues = ValueVTs.size();
- Value != NumValues; ++Value) {
+ Type *FinalType = Args[i].Ty;
+ if (Args[i].isByVal)
+ FinalType = cast<PointerType>(Args[i].Ty)->getElementType();
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ FinalType, CLI.CallConv, CLI.IsVarArg);
+ for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment =
- getDataLayout()->getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy);
if (Args[i].isZExt)
Flags.setZExt();
@@ -7168,6 +7176,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
if (Args[i].isNest)
Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
@@ -7200,8 +7210,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setReturned();
}
- getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
- PartVT, CLI.CS ? CLI.CS->getInstruction() : 0, ExtendKind);
+ getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
+ CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -7213,6 +7223,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
+ // Only mark the end at the last register of the last value.
+ if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1)
+ MyFlags.Flags.setInConsecutiveRegsLast();
+
CLI.Outs.push_back(MyFlags);
CLI.OutVals.push_back(Parts[j]);
}
@@ -7261,7 +7275,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
- NumRegs, RegisterVT, VT, NULL,
+ NumRegs, RegisterVT, VT, nullptr,
AssertOp));
CurReg += NumRegs;
}
@@ -7273,8 +7287,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
return std::make_pair(SDValue(), CLI.Chain);
SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
- CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
- &ReturnValues[0], ReturnValues.size());
+ CLI.DAG.getVTList(RetTys), ReturnValues);
return std::make_pair(Res, CLI.Chain);
}
@@ -7301,7 +7314,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
const TargetLowering *TLI = TM.getTargetLowering();
RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V);
+ RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V);
PendingExports.push_back(Chain);
}
@@ -7354,13 +7367,17 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ComputeValueVTs(*TLI, I->getType(), ValueVTs);
bool isArgValueUsed = !I->use_empty();
unsigned PartBase = 0;
+ Type *FinalType = I->getType();
+ if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal))
+ FinalType = cast<PointerType>(FinalType)->getElementType();
+ bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
+ FinalType, F.getCallingConv(), F.isVarArg());
for (unsigned Value = 0, NumValues = ValueVTs.size();
Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment =
- DL->getABITypeAlignment(ArgTy);
+ unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy);
if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
Flags.setZExt();
@@ -7396,6 +7413,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
}
if (F.getAttributes().hasAttribute(Idx, Attribute::Nest))
Flags.setNest();
+ if (NeedsRegBlock)
+ Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -7408,6 +7427,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// if it isn't first piece, alignment must be 1
else if (i > 0)
MyFlags.Flags.setOrigAlign(1);
+
+ // Only mark the end at the last register of the last value.
+ if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1)
+ MyFlags.Flags.setInConsecutiveRegsLast();
+
Ins.push_back(MyFlags);
}
PartBase += VT.getStoreSize();
@@ -7449,7 +7473,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
- RegVT, VT, NULL, AssertOp);
+ RegVT, VT, nullptr, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -7496,7 +7520,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
- NULL, AssertOp));
+ nullptr, AssertOp));
}
i += NumParts;
@@ -7511,7 +7535,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
- SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues),
SDB->getCurSDLoc());
SDB->setValue(I, Res);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 66835bf..fb29691 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -96,7 +96,7 @@ class SelectionDAGBuilder {
DebugLoc dl;
unsigned SDNodeOrder;
public:
- DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
DI(di), dl(DL), SDNodeOrder(SDNO) { }
const DbgValueInst* getDI() { return DI; }
@@ -135,7 +135,7 @@ private:
MachineBasicBlock* BB;
uint32_t ExtraWeight;
- Case() : Low(0), High(0), BB(0), ExtraWeight(0) { }
+ Case() : Low(nullptr), High(nullptr), BB(nullptr), ExtraWeight(0) { }
Case(const Constant *low, const Constant *high, MachineBasicBlock *bb,
uint32_t extraweight) : Low(low), High(high), BB(bb),
ExtraWeight(extraweight) { }
@@ -396,8 +396,8 @@ private:
/// the same function, use the same failure basic block).
class StackProtectorDescriptor {
public:
- StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0),
- Guard(0) { }
+ StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr),
+ FailureMBB(nullptr), Guard(nullptr) { }
~StackProtectorDescriptor() { }
/// Returns true if all fields of the stack protector descriptor are
@@ -432,8 +432,8 @@ private:
/// parent mbb after we create the stack protector check (SuccessMBB). This
/// BB is visited only on stack protector check success.
void resetPerBBState() {
- ParentMBB = 0;
- SuccessMBB = 0;
+ ParentMBB = nullptr;
+ SuccessMBB = nullptr;
}
/// Reset state that only changes when we switch functions.
@@ -446,8 +446,8 @@ private:
/// 2.The guard variable since the guard variable we are checking against is
/// always the same.
void resetPerFunctionState() {
- FailureMBB = 0;
- Guard = 0;
+ FailureMBB = nullptr;
+ Guard = nullptr;
}
MachineBasicBlock *getParentMBB() { return ParentMBB; }
@@ -482,7 +482,7 @@ private:
/// block will be created.
MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB,
MachineBasicBlock *ParentMBB,
- MachineBasicBlock *SuccMBB = 0);
+ MachineBasicBlock *SuccMBB = nullptr);
};
private:
@@ -538,7 +538,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
- : CurInst(NULL), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
+ : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
HasTailCall(false) {
}
@@ -600,13 +600,13 @@ public:
void setValue(const Value *V, SDValue NewN) {
SDValue &N = NodeMap[V];
- assert(N.getNode() == 0 && "Already set a value for this node!");
+ assert(!N.getNode() && "Already set a value for this node!");
N = NewN;
}
void setUnusedArgValue(const Value *V, SDValue NewN) {
SDValue &N = UnusedArgNodeMap[V];
- assert(N.getNode() == 0 && "Already set a value for this node!");
+ assert(!N.getNode() && "Already set a value for this node!");
N = NewN;
}
@@ -624,7 +624,7 @@ public:
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
- MachineBasicBlock *LandingPad = NULL);
+ MachineBasicBlock *LandingPad = nullptr);
std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI,
unsigned ArgIdx,
@@ -642,7 +642,7 @@ private:
void visitBr(const BranchInst &I);
void visitSwitch(const SwitchInst &I);
void visitIndirectBr(const IndirectBrInst &I);
- void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+ void visitUnreachable(const UnreachableInst &I);
// Helpers for visitSwitch
bool handleSmallSwitchRange(CaseRec& CR,
@@ -785,7 +785,8 @@ private:
/// corresponding DBG_VALUE machine instruction for it now. At the end of
/// instruction selection, they will be inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset, const SDValue &N);
+ int64_t Offset, bool IsIndirect,
+ const SDValue &N);
};
} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 535feba..d6b5255 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -93,6 +93,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
case ISD::RETURNADDR: return "RETURNADDR";
case ISD::FRAMEADDR: return "FRAMEADDR";
+ case ISD::READ_REGISTER: return "READ_REGISTER";
+ case ISD::WRITE_REGISTER: return "WRITE_REGISTER";
case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
@@ -330,7 +332,7 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
}
}
-void SDNode::dump() const { dump(0); }
+void SDNode::dump() const { dump(nullptr); }
void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
@@ -427,7 +429,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -595,7 +597,7 @@ static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
void SDNode::dumpr() const {
VisitedSDNodeSet once;
- DumpNodesr(dbgs(), this, 0, 0, once);
+ DumpNodesr(dbgs(), this, 0, nullptr, once);
}
void SDNode::dumpr(const SelectionDAG *G) const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 5d0e2b9..472fc9c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "isel"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
@@ -58,6 +57,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "isel"
+
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected");
STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
@@ -299,7 +300,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
"'usesCustomInserter', it must implement "
"TargetLowering::EmitInstrWithCustomInserter!";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
@@ -356,7 +357,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
// Loop for blocks with phi nodes.
for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
PHINode *PN = dyn_cast<PHINode>(BB->begin());
- if (PN == 0) continue;
+ if (!PN) continue;
ReprocessBlock:
// For each block with a PHI node, check to see if any of the input values
@@ -366,7 +367,7 @@ static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
- if (CE == 0 || !CE->canTrap()) continue;
+ if (!CE || !CE->canTrap()) continue;
// The only case we have to worry about is when the edge is critical.
// Since this block has a PHI Node, we assume it has multiple input
@@ -399,7 +400,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
RegInfo = &MF->getRegInfo();
AA = &getAnalysis<AliasAnalysis>();
LibInfo = &getAnalysis<TargetLibraryInfo>();
- GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+ GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
TargetSubtargetInfo &ST =
const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>());
@@ -422,7 +423,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (UseMBPI && OptLevel != CodeGenOpt::None)
FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
else
- FuncInfo->BPI = 0;
+ FuncInfo->BPI = nullptr;
SDB->init(GFI, *AA, LibInfo);
@@ -482,7 +483,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// If this vreg is directly copied into an exported register then
// that COPY instructions also need DBG_VALUE, if it is the only
// user of LDI->second.
- MachineInstr *CopyUseMI = NULL;
+ MachineInstr *CopyUseMI = nullptr;
for (MachineRegisterInfo::use_instr_iterator
UI = RegInfo->use_instr_begin(LDI->second),
E = RegInfo->use_instr_end(); UI != E; ) {
@@ -492,7 +493,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CopyUseMI = UseMI; continue;
}
// Otherwise this is another use or second copy use.
- CopyUseMI = NULL; break;
+ CopyUseMI = nullptr; break;
}
if (CopyUseMI) {
MachineInstr *NewMI =
@@ -509,21 +510,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there are any calls in this machine function.
MachineFrameInfo *MFI = MF->getFrameInfo();
- for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
- ++I) {
-
+ for (const auto &MBB : *MF) {
if (MFI->hasCalls() && MF->hasInlineAsm())
break;
- const MachineBasicBlock *MBB = I;
- for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end();
- II != IE; ++II) {
- const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
+ for (const auto &MI : MBB) {
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode());
if ((MCID.isCall() && !MCID.isReturn()) ||
- II->isStackAligningInlineAsm()) {
+ MI.isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
}
- if (II->isInlineAsm()) {
+ if (MI.isInlineAsm()) {
MF->setHasInlineAsm(true);
}
}
@@ -624,7 +621,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
- CurDAG->ComputeMaskedBits(Src, KnownZero, KnownOne);
+ CurDAG->computeKnownBits(Src, KnownZero, KnownOne);
FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
} while (!Worklist.empty());
}
@@ -994,7 +991,7 @@ static void collectFailStats(const Instruction *I) {
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
- FastISel *FastIS = 0;
+ FastISel *FastIS = nullptr;
if (TM.Options.EnableFastISel)
FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo);
@@ -1069,7 +1066,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt));
else
- FastIS->setLastLocalValue(0);
+ FastIS->setLastLocalValue(nullptr);
}
unsigned NumFastIselRemaining = std::distance(Begin, End);
@@ -1607,7 +1604,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
APInt NeededMask = DesiredMask & ~ActualMask;
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(LHS, KnownZero, KnownOne);
+ CurDAG->computeKnownBits(LHS, KnownZero, KnownOne);
// If all the missing bits in the or are already known to be set, match!
if ((NeededMask & KnownOne) == NeededMask)
@@ -1676,7 +1673,7 @@ static SDNode *findGlueUse(SDNode *N) {
if (Use.getResNo() == FlagResNo)
return Use.getUser();
}
- return NULL;
+ return nullptr;
}
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
@@ -1783,7 +1780,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
EVT VT = Root->getValueType(Root->getNumValues()-1);
while (VT == MVT::Glue) {
SDNode *GU = findGlueUse(Root);
- if (GU == NULL)
+ if (!GU)
break;
Root = GU;
VT = Root->getValueType(Root->getNumValues()-1);
@@ -1805,12 +1802,39 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
SelectInlineAsmMemoryOperands(Ops);
EVT VTs[] = { MVT::Other, MVT::Glue };
- SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
- VTs, &Ops[0], Ops.size());
+ SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, Ops);
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+SDNode
+*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ unsigned Reg = getTargetLowering()->getRegisterByName(
+ RegStr->getString().data(), Op->getValueType(0));
+ SDValue New = CurDAG->getCopyFromReg(
+ CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
return New.getNode();
}
+SDNode
+*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
+ SDLoc dl(Op);
+ MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
+ const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
+ unsigned Reg = getTargetLowering()->getRegisterByName(
+ RegStr->getString().data(), Op->getOperand(2).getValueType());
+ SDValue New = CurDAG->getCopyToReg(
+ CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2));
+ New->setNodeId(-1);
+ return New.getNode();
+}
+
+
+
SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
}
@@ -1846,7 +1870,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
// Now that all the normal results are replaced, we replace the chain and
// glue results if present.
if (!ChainNodesMatched.empty()) {
- assert(InputChain.getNode() != 0 &&
+ assert(InputChain.getNode() &&
"Matched input chains but didn't produce a chain");
// Loop over all of the nodes we matched that produced a chain result.
// Replace all the chain results with the final chain we ended up with.
@@ -1877,7 +1901,7 @@ UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
// If the result produces glue, update any glue results in the matched
// pattern with the glue result.
- if (InputGlue.getNode() != 0) {
+ if (InputGlue.getNode()) {
// Handle any interior nodes explicitly marked.
for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
SDNode *FRN = GlueResultNodesMatched[i];
@@ -2080,13 +2104,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]),
- MVT::Other, &InputChains[0], InputChains.size());
+ MVT::Other, InputChains);
}
/// MorphNode - Handle morphing a node in place for the selector.
SDNode *SelectionDAGISel::
MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
- const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) {
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
// adding a chain) and the input could have glue and chains as well.
@@ -2106,7 +2130,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// Call the underlying SelectionDAG routine to do the transmogrification. Note
// that this deletes operands of the old node that become dead.
- SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops);
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
@@ -2230,7 +2254,7 @@ CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
Val = GetVBR(Val, MatcherTable, MatcherIndex);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
- return C != 0 && C->getSExtValue() == Val;
+ return C && C->getSExtValue() == Val;
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2251,7 +2275,7 @@ CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
if (N->getOpcode() != ISD::AND) return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
- return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+ return C && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
@@ -2264,7 +2288,7 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
if (N->getOpcode() != ISD::OR) return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
- return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+ return C && SDISel.CheckOrMask(N.getOperand(0), C, Val);
}
/// IsPredicateKnownToFail - If we know how and can do so without pushing a
@@ -2396,13 +2420,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
- return 0;
+ return nullptr;
case ISD::AssertSext:
case ISD::AssertZext:
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
NodeToMatch->getOperand(0));
- return 0;
+ return nullptr;
case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch);
+ case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch);
case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
}
@@ -2548,7 +2574,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
}
case OPC_RecordNode: {
// Remember this node, it may end up being an operand in the pattern.
- SDNode *Parent = 0;
+ SDNode *Parent = nullptr;
if (NodeStack.size() > 1)
Parent = NodeStack[NodeStack.size()-2].getNode();
RecordedNodes.push_back(std::make_pair(N, Parent));
@@ -2755,7 +2781,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+ CurDAG->getTargetConstant(Val, VT), nullptr));
continue;
}
case OPC_EmitRegister: {
@@ -2763,7 +2789,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
unsigned RegNo = MatcherTable[MatcherIndex++];
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ CurDAG->getRegister(RegNo, VT), nullptr));
continue;
}
case OPC_EmitRegister2: {
@@ -2775,7 +2801,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned RegNo = MatcherTable[MatcherIndex++];
RegNo |= MatcherTable[MatcherIndex++] << 8;
RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
- CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+ CurDAG->getRegister(RegNo, VT), nullptr));
continue;
}
@@ -2800,7 +2826,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
// These are space-optimized forms of OPC_EmitMergeInputChains.
- assert(InputChain.getNode() == 0 &&
+ assert(!InputChain.getNode() &&
"EmitMergeInputChains should be the first chain producing node");
assert(ChainNodesMatched.empty() &&
"Should only have one EmitMergeInputChains per match");
@@ -2821,13 +2847,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
- if (InputChain.getNode() == 0)
+ if (!InputChain.getNode())
break; // Failed to merge.
continue;
}
case OPC_EmitMergeInputChains: {
- assert(InputChain.getNode() == 0 &&
+ assert(!InputChain.getNode() &&
"EmitMergeInputChains should be the first chain producing node");
// This node gets a list of nodes we matched in the input that have
// chains. We want to token factor all of the input chains to these nodes
@@ -2863,7 +2889,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
- if (InputChain.getNode() == 0)
+ if (!InputChain.getNode())
break; // Failed to merge.
continue;
@@ -2874,7 +2900,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
- if (InputChain.getNode() == 0)
+ if (!InputChain.getNode())
InputChain = CurDAG->getEntryNode();
InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch),
@@ -2890,7 +2916,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm");
SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
- RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr));
continue;
}
@@ -2922,7 +2948,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
else if (VTs.size() == 2)
VTList = CurDAG->getVTList(VTs[0], VTs[1]);
else
- VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+ VTList = CurDAG->getVTList(VTs);
// Get the operand list.
unsigned NumOps = MatcherTable[MatcherIndex++];
@@ -2956,11 +2982,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
- if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr)
Ops.push_back(InputGlue);
// Create the node.
- SDNode *Res = 0;
+ SDNode *Res = nullptr;
if (Opcode != OPC_MorphNodeTo) {
// If this is a normal EmitNode command, just create the new node and
// add the results to the RecordedNodes list.
@@ -2971,17 +2997,16 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
- (SDNode*) 0));
+ nullptr));
}
} else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) {
- Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
- EmitNodeInfo);
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo);
} else {
// NodeToMatch was eliminated by CSE when the target changed the DAG.
// We will visit the equivalent node later.
DEBUG(dbgs() << "Node was eliminated by CSE\n");
- return 0;
+ return nullptr;
}
// If the node had chain/glue results, update our notion of the current
@@ -3111,7 +3136,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// FIXME: We just return here, which interacts correctly with SelectRoot
// above. We should fix this to not return an SDNode* anymore.
- return 0;
+ return nullptr;
}
}
@@ -3123,7 +3148,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
while (1) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
- return 0;
+ return nullptr;
}
// Restore the interpreter state back to the point where the scope was
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 1483fdd..4df5ede 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -27,6 +27,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "dag-printer"
+
namespace llvm {
template<>
struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
@@ -124,9 +126,9 @@ namespace llvm {
static void addCustomGraphFeatures(SelectionDAG *G,
GraphWriter<SelectionDAG*> &GW) {
- GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
if (G->getRoot().getNode())
- GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+ GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
"color=blue,style=dashed");
}
};
@@ -289,10 +291,10 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
if (DAG) {
// Draw a special "GraphRoot" node to indicate the root of the graph.
- GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+ GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot");
const SDNode *N = DAG->getRoot().getNode();
if (N && N->getNodeId() != -1)
- GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+ GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1,
"color=blue,style=dashed");
}
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 5de0b03..b75d805 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -40,7 +40,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
: TargetLoweringBase(tm, tlof) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
- return NULL;
+ return nullptr;
}
/// Check whether a given call node is in tail position within its function. If
@@ -103,12 +103,11 @@ TargetLowering::makeLibCall(SelectionDAG &DAG,
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy());
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
- false, 0, getLibcallCallingConv(LC),
- /*isTailCall=*/false,
- doesNotReturn, isReturnValueUsed, Callee, Args,
- DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed)
+ .setSExtResult(isSigned).setZExtResult(!isSigned);
return LowerCallTo(CLI);
}
@@ -226,7 +225,7 @@ unsigned TargetLowering::getJumpTableEncoding() const {
return MachineJumpTableInfo::EK_BlockAddress;
// In PIC mode, if the target supports a GPRel32 directive, use it.
- if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+ if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
// Otherwise, use a label difference.
@@ -386,7 +385,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Depth != 0) {
// If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
- TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
return false;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -416,7 +415,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
// Do not increment Depth here; that can cause an infinite loop.
- TLO.DAG.ComputeMaskedBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+ TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -848,6 +847,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
break;
}
+ case ISD::BUILD_PAIR: {
+ EVT HalfVT = Op.getOperand(0).getValueType();
+ unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
+
+ APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
+ APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
+
+ APInt KnownZeroLo, KnownOneLo;
+ APInt KnownZeroHi, KnownOneHi;
+
+ if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo,
+ KnownOneLo, TLO, Depth + 1))
+ return true;
+
+ if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi,
+ KnownOneHi, TLO, Depth + 1))
+ return true;
+
+ KnownZero = KnownZeroLo.zext(BitWidth) |
+ KnownZeroHi.zext(BitWidth).shl(HalfBitWidth);
+
+ KnownOne = KnownOneLo.zext(BitWidth) |
+ KnownOneHi.zext(BitWidth).shl(HalfBitWidth);
+ break;
+ }
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
@@ -1040,8 +1064,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
// FALL THROUGH
default:
- // Just use ComputeMaskedBits to compute output bits.
- TLO.DAG.ComputeMaskedBits(Op, KnownZero, KnownOne, Depth);
+ // Just use computeKnownBits to compute output bits.
+ TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
break;
}
@@ -1053,14 +1077,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return false;
}
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
@@ -1074,6 +1098,7 @@ void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
/// targets that want to expose additional information about sign bits to the
/// DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ const SelectionDAG &,
unsigned Depth) const {
assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
@@ -1085,7 +1110,7 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
}
/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
-/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// one bit set. This differs from computeKnownBits in that it doesn't need to
/// determine which bit is set.
///
static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
@@ -1108,11 +1133,11 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// More could be done here, though the above checks are enough
// to handle some common cases.
- // Fall back to ComputeMaskedBits to catch other known cases.
+ // Fall back to computeKnownBits to catch other known cases.
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Val, KnownZero, KnownOne);
+ DAG.computeKnownBits(Val, KnownZero, KnownOne);
return (KnownZero.countPopulation() == BitWidth - 1) &&
(KnownOne.countPopulation() == 1);
}
@@ -1381,10 +1406,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
EVT newVT = N0.getOperand(0).getValueType();
if (DCI.isBeforeLegalizeOps() ||
(isOperationLegal(ISD::SETCC, newVT) &&
- getCondCodeAction(Cond, newVT.getSimpleVT())==Legal))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(C1.trunc(InSize), newVT),
- Cond);
+ getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) {
+ EVT NewSetCCVT = getSetCCResultType(*DAG.getContext(), newVT);
+ SDValue NewConst = DAG.getConstant(C1.trunc(InSize), newVT);
+
+ SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
+ NewConst, Cond);
+ return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT);
+ }
break;
}
default:
@@ -2052,7 +2081,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
return "r";
if (ConstraintVT.isFloatingPoint())
return "f"; // works for many targets
- return 0;
+ return nullptr;
}
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -2086,12 +2115,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (Op.getOpcode() == ISD::ADD) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
- if (C == 0 || GA == 0) {
+ if (!C || !GA) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
}
- if (C == 0 || GA == 0)
- C = 0, GA = 0;
+ if (!C || !GA)
+ C = nullptr, GA = nullptr;
}
// If we find a valid operand, map to the TargetXXX version so that the
@@ -2126,14 +2155,14 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
- return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
StringRef RegName(Constraint.data()+1, Constraint.size()-2);
std::pair<unsigned, const TargetRegisterClass*> R =
- std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+ std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
// Figure out which register class contains this reg.
const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo();
@@ -2428,7 +2457,7 @@ TargetLowering::ConstraintWeight
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
// Look at the constraint type.
switch (*constraint) {
@@ -2601,9 +2630,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::
-BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode*> *Created) const {
+SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode *> *Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2612,8 +2641,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
if (!isTypeLegal(VT))
return SDValue();
- APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- APInt::ms magics = d.magic();
+ APInt::ms magics = Divisor.magic();
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
@@ -2630,13 +2658,13 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
else
return SDValue(); // No mulhs or equvialent
// If d > 0 and m < 0, add the numerator
- if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
}
// If d < 0 and m > 0, subtract the numerator.
- if (d.isNegative() && magics.m.isStrictlyPositive()) {
+ if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
@@ -2649,9 +2677,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
Created->push_back(Q.getNode());
}
// Extract the sign bit and add it to the quotient
- SDValue T =
- DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
- getShiftAmountTy(Q.getValueType())));
+ SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ getShiftAmountTy(Q.getValueType())));
if (Created)
Created->push_back(T.getNode());
return DAG.getNode(ISD::ADD, dl, VT, Q, T);
@@ -2661,9 +2689,9 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::
-BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
- std::vector<SDNode*> *Created) const {
+SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
+ SelectionDAG &DAG, bool IsAfterLegalization,
+ std::vector<SDNode *> *Created) const {
EVT VT = N->getValueType(0);
SDLoc dl(N);
@@ -2674,22 +2702,21 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
- const APInt &N1C = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- APInt::mu magics = N1C.magicu();
+ APInt::mu magics = Divisor.magicu();
SDValue Q = N->getOperand(0);
// If the divisor is even, we can avoid using the expensive fixup by shifting
// the divided value upfront.
- if (magics.a != 0 && !N1C[0]) {
- unsigned Shift = N1C.countTrailingZeros();
+ if (magics.a != 0 && !Divisor[0]) {
+ unsigned Shift = Divisor.countTrailingZeros();
Q = DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType())));
if (Created)
Created->push_back(Q.getNode());
// Get magic number for the shifted divisor.
- magics = N1C.lshr(Shift).magicu(Shift);
+ magics = Divisor.lshr(Shift).magicu(Shift);
assert(magics.a == 0 && "Should use cheap fixup now");
}
@@ -2708,7 +2735,7 @@ BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
Created->push_back(Q.getNode());
if (magics.a == 0) {
- assert(magics.s < N1C.getBitWidth() &&
+ assert(magics.s < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
return DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
@@ -2738,3 +2765,110 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+
+//===----------------------------------------------------------------------===//
+// Legalization Utilities
+//===----------------------------------------------------------------------===//
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+ SelectionDAG &DAG, SDValue LL, SDValue LH,
+ SDValue RL, SDValue RH) const {
+ EVT VT = N->getValueType(0);
+ SDLoc dl(N);
+
+ bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+ bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+ bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+ bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+ if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+ unsigned OuterBitSize = VT.getSizeInBits();
+ unsigned InnerBitSize = HiLoVT.getSizeInBits();
+ unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+ unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+ // LL, LH, RL, and RH must be either all NULL or all set to a value.
+ assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+ (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+ if (!LL.getNode() && !RL.getNode() &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
+ RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
+ }
+
+ if (!LL.getNode())
+ return false;
+
+ APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+ if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+ DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+ // The inputs are both zero-extended.
+ if (HasUMUL_LOHI) {
+ // We can emit a umul_lohi.
+ Lo = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if (HasMULHU) {
+ // We can emit a mulhu+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
+ return true;
+ }
+ }
+ if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+ // The input values are both sign-extended.
+ if (HasSMUL_LOHI) {
+ // We can emit a smul_lohi.
+ Lo = DAG.getNode(ISD::SMUL_LOHI, dl,
+ DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Hi = SDValue(Lo.getNode(), 1);
+ return true;
+ }
+ if (HasMULHS) {
+ // We can emit a mulhs+mul.
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
+ return true;
+ }
+ }
+
+ if (!LH.getNode() && !RH.getNode() &&
+ isOperationLegalOrCustom(ISD::SRL, VT) &&
+ isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+ unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
+ SDValue Shift = DAG.getConstant(ShiftAmt, getShiftAmountTy(VT));
+ LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
+ LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+ RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
+ RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+ }
+
+ if (!LH.getNode())
+ return false;
+
+ if (HasUMUL_LOHI) {
+ // Lo,Hi = umul LHS, RHS.
+ SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+ DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
+ Lo = UMulLOHI;
+ Hi = UMulLOHI.getValue(1);
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ return true;
+ }
+ if (HasMULHU) {
+ Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
+ Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
+ RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+ LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+ Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index adb3ef9..f7c64da 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -25,7 +25,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "shadowstackgc"
#include "llvm/CodeGen/GCs.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -36,6 +35,8 @@
using namespace llvm;
+#define DEBUG_TYPE "shadowstackgc"
+
namespace {
class ShadowStackGC : public GCStrategy {
@@ -101,7 +102,7 @@ namespace {
IRBuilder<> *Next() {
switch (State) {
default:
- return 0;
+ return nullptr;
case 0:
StateBB = F.begin();
@@ -137,7 +138,7 @@ namespace {
Calls.push_back(CI);
if (Calls.empty())
- return 0;
+ return nullptr;
// Create a cleanup block.
LLVMContext &C = F.getContext();
@@ -194,7 +195,7 @@ namespace {
void llvm::linkShadowStackGC() { }
-ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+ShadowStackGC::ShadowStackGC() : Head(nullptr), StackEntryTy(nullptr) {
InitRoots = true;
CustomRoots = true;
}
@@ -390,8 +391,8 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
BasicBlock::iterator IP = F.getEntryBlock().begin();
IRBuilder<> AtEntry(IP->getParent(), IP);
- Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
- "gc_frame");
+ Instruction *StackEntry = AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr,
+ "gc_frame");
while (isa<AllocaInst>(IP)) ++IP;
AtEntry.SetInsertPoint(IP->getParent(), IP);
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index dc7ca2b..d2f3955 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sjljehprepare"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SetVector.h"
@@ -38,6 +37,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "sjljehprepare"
+
STATISTIC(NumInvokes, "Number of invokes replaced");
STATISTIC(NumSpilled, "Number of registers live across unwind edges");
@@ -100,10 +101,10 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
NULL);
RegisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), (Type *)0);
+ PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(FunctionContextTy), (Type *)0);
+ PointerType::getUnqual(FunctionContextTy), (Type *)nullptr);
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
@@ -111,7 +112,7 @@ bool SjLjEHPrepare::doInitialization(Module &M) {
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext);
- PersonalityFn = 0;
+ PersonalityFn = nullptr;
return true;
}
@@ -192,7 +193,7 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
const TargetLowering *TLI = TM->getTargetLowering();
unsigned Align =
TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy);
- FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context",
+ FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context",
EntryBB->begin());
// Fill in the function context structure.
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index a6c6261..d46621d 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "slotindexes"
-
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -18,6 +16,8 @@
using namespace llvm;
+#define DEBUG_TYPE "slotindexes"
+
char SlotIndexes::ID = 0;
INITIALIZE_PASS(SlotIndexes, "slotindexes",
"Slot index numbering", false, false)
@@ -66,7 +66,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
MBBRanges.resize(mf->getNumBlockIDs());
idx2MBBMap.reserve(mf->size());
- indexList.push_back(createEntry(0, index));
+ indexList.push_back(createEntry(nullptr, index));
// Iterate over the function.
for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
@@ -91,7 +91,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// We insert one blank instructions between basic blocks.
- indexList.push_back(createEntry(0, index += SlotIndex::InstrDist));
+ indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist));
MBBRanges[mbb->getNumber()].first = blockStartIndex;
MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(),
@@ -182,7 +182,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
"Decremented past the beginning of region to repair.");
MachineInstr *SlotMI = ListI->getInstr();
- MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0;
+ MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr;
bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart);
if (SlotMI == MI && !MBBIAtBegin) {
@@ -219,7 +219,7 @@ void SlotIndexes::dump() const {
itr != indexList.end(); ++itr) {
dbgs() << itr->getIndex() << " ";
- if (itr->getInstr() != 0) {
+ if (itr->getInstr()) {
dbgs() << *itr->getInstr();
} else {
dbgs() << "\n";
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index 5f73469..24e94d1 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -27,7 +27,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "spillplacement"
#include "SpillPlacement.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/EdgeBundles.h"
@@ -41,6 +40,8 @@
using namespace llvm;
+#define DEBUG_TYPE "spillplacement"
+
char SpillPlacement::ID = 0;
INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
"Spill Code Placement Analysis", true, true)
@@ -59,9 +60,26 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+namespace {
+static BlockFrequency Threshold;
+}
+
/// Decision threshold. A node gets the output value 0 if the weighted sum of
/// its inputs falls in the open interval (-Threshold;Threshold).
-static const BlockFrequency Threshold = 2;
+static BlockFrequency getThreshold() { return Threshold; }
+
+/// \brief Set the threshold for a given entry frequency.
+///
+/// Set the threshold relative to \c Entry. Since the threshold is used as a
+/// bound on the open interval (-Threshold;Threshold), 1 is the minimum
+/// threshold.
+static void setThreshold(const BlockFrequency &Entry) {
+ // Apparently 2 is a good threshold when Entry==2^14, but we need to scale
+ // it. Divide by 2^13, rounding as appropriate.
+ uint64_t Freq = Entry.getFrequency();
+ uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12));
+ Threshold = std::max(UINT64_C(1), Scaled);
+}
/// Node - Each edge bundle corresponds to a Hopfield node.
///
@@ -110,7 +128,7 @@ struct SpillPlacement::Node {
// the CFG.
void clear() {
BiasN = BiasP = Value = 0;
- SumLinkWeights = Threshold;
+ SumLinkWeights = getThreshold();
Links.clear();
}
@@ -168,9 +186,9 @@ struct SpillPlacement::Node {
// 2. It helps tame rounding errors when the links nominally sum to 0.
//
bool Before = preferReg();
- if (SumN >= SumP + Threshold)
+ if (SumN >= SumP + getThreshold())
Value = -1;
- else if (SumP >= SumN + Threshold)
+ else if (SumP >= SumN + getThreshold())
Value = 1;
else
Value = 0;
@@ -189,6 +207,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
// Compute total ingoing and outgoing block frequencies for all bundles.
BlockFrequencies.resize(mf.getNumBlockIDs());
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+ setThreshold(MBFI->getEntryFreq());
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
unsigned Num = I->getNumber();
BlockFrequencies[Num] = MBFI->getBlockFreq(I);
@@ -200,7 +219,7 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
void SpillPlacement::releaseMemory() {
delete[] nodes;
- nodes = 0;
+ nodes = nullptr;
}
/// activate - mark node n as active if it wasn't already.
@@ -375,6 +394,6 @@ SpillPlacement::finish() {
ActiveNodes->reset(n);
Perfect = false;
}
- ActiveNodes = 0;
+ ActiveNodes = nullptr;
return Perfect;
}
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index a88d7ac..43fc7f5 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -65,7 +65,7 @@ class SpillPlacement : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid.
- SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {}
~SpillPlacement() { releaseMemory(); }
/// BorderConstraint - A basic block has separate constraints for entry and
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 094641c..0649448 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "spiller"
-
#include "Spiller.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
@@ -28,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "spiller"
+
namespace {
enum SpillerName { trivial, inline_ };
}
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 16fe979..7d4f568 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumFinished, "Number of splits finished");
STATISTIC(NumSimple, "Number of splits that were simple");
STATISTIC(NumCopies, "Number of copies inserted for splitting");
@@ -47,14 +48,14 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
LIS(lis),
Loops(mli),
TII(*MF.getTarget().getInstrInfo()),
- CurLI(0),
+ CurLI(nullptr),
LastSplitPoint(MF.getNumBlockIDs()) {}
void SplitAnalysis::clear() {
UseSlots.clear();
UseBlocks.clear();
ThroughBlocks.clear();
- CurLI = 0;
+ CurLI = nullptr;
DidRepairRange = false;
}
@@ -331,7 +332,7 @@ SplitEditor::SplitEditor(SplitAnalysis &sa,
TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
MBFI(mbfi),
- Edit(0),
+ Edit(nullptr),
OpenIdx(0),
SpillMode(SM_Partition),
RegAssign(Allocator)
@@ -353,7 +354,7 @@ void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) {
// We don't need an AliasAnalysis since we will only be performing
// cheap-as-a-copy remats anyway.
- Edit->anyRematerializable(0);
+ Edit->anyRematerializable(nullptr);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -423,7 +424,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
// Mark as complex mapped, forced.
- VFP = ValueForcePair(0, true);
+ VFP = ValueForcePair(nullptr, true);
}
VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
@@ -431,7 +432,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
SlotIndex UseIdx,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
- MachineInstr *CopyMI = 0;
+ MachineInstr *CopyMI = nullptr;
SlotIndex Def;
LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
@@ -922,7 +923,7 @@ bool SplitEditor::transferValues() {
else {
// Live-through, and we don't know the value.
LRC.addLiveInBlock(LR, MDT[MBB]);
- LRC.setLiveOutValue(MBB, 0);
+ LRC.setLiveOutValue(MBB, nullptr);
}
}
BlockStart = BlockEnd;
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index f029c73..7048ee3 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -377,7 +377,7 @@ public:
SlotIndex enterIntvAfter(SlotIndex Idx);
/// enterIntvAtEnd - Enter the open interval at the end of MBB.
- /// Use the open interval from he inserted copy to the MBB end.
+ /// Use the open interval from the inserted copy to the MBB end.
/// Return the beginning of the new live range.
SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
@@ -417,7 +417,7 @@ public:
/// @param LRMap When not null, this vector will map each live range in Edit
/// back to the indices returned by openIntv.
/// There may be extra indices created by dead code elimination.
- void finish(SmallVectorImpl<unsigned> *LRMap = 0);
+ void finish(SmallVectorImpl<unsigned> *LRMap = nullptr);
/// dump - print the current interval maping to dbgs().
void dump() const;
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 7b1de85..370430c 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stackcoloring"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -58,6 +57,8 @@
using namespace llvm;
+#define DEBUG_TYPE "stackcoloring"
+
static cl::opt<bool>
DisableColoring("no-stack-coloring",
cl::init(false), cl::Hidden,
@@ -193,12 +194,11 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const {
}
void StackColoring::dump() const {
- for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
- FI != FE; ++FI) {
- DEBUG(dbgs()<<"Inspecting block #"<<BasicBlocks.lookup(*FI)<<
- " ["<<FI->getName()<<"]\n");
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
+ DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " ["
+ << MBB->getName() << "]\n");
- LivenessMap::const_iterator BI = BlockLiveness.find(*FI);
+ LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
assert(BI != BlockLiveness.end() && "Block not found");
const BlockLifetimeInfo &BlockInfo = BI->second;
@@ -231,20 +231,19 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) {
// NOTE: We use the a reverse-post-order iteration to ensure that we obtain a
// deterministic numbering, and because we'll need a post-order iteration
// later for solving the liveness dataflow problem.
- for (df_iterator<MachineFunction*> FI = df_begin(MF), FE = df_end(MF);
- FI != FE; ++FI) {
+ for (MachineBasicBlock *MBB : depth_first(MF)) {
// Assign a serial number to this basic block.
- BasicBlocks[*FI] = BasicBlockNumbering.size();
- BasicBlockNumbering.push_back(*FI);
+ BasicBlocks[MBB] = BasicBlockNumbering.size();
+ BasicBlockNumbering.push_back(MBB);
// Keep a reference to avoid repeated lookups.
- BlockLifetimeInfo &BlockInfo = BlockLiveness[*FI];
+ BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
BlockInfo.Begin.resize(NumSlot);
BlockInfo.End.resize(NumSlot);
- for (MachineInstr &MI : **FI) {
+ for (MachineInstr &MI : *MBB) {
if (MI.getOpcode() != TargetOpcode::LIFETIME_START &&
MI.getOpcode() != TargetOpcode::LIFETIME_END)
continue;
@@ -511,11 +510,6 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Update the MachineMemOperand to use the new alloca.
for (MachineMemOperand *MMO : I.memoperands()) {
- const Value *V = MMO->getValue();
-
- if (!V)
- continue;
-
// FIXME: In order to enable the use of TBAA when using AA in CodeGen,
// we'll also need to update the TBAA nodes in MMOs with values
// derived from the merged allocas. When doing this, we'll need to use
@@ -525,10 +519,10 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// We've replaced IR-level uses of the remapped allocas, so we only
// need to replace direct uses here.
- if (!isa<AllocaInst>(V))
+ const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
+ if (!AI)
continue;
- const AllocaInst *AI= cast<AllocaInst>(V);
if (!Allocas.count(AI))
continue;
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index a374417..4dd87dd 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stackmaps"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "stackmaps"
+
namespace llvm {
cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness",
cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass"));
@@ -99,7 +100,7 @@ bool StackMapLiveness::calculateLiveness() {
HasStackMap = true;
++NumStackMaps;
}
- DEBUG(dbgs() << " " << *I << " " << LiveRegs);
+ DEBUG(dbgs() << " " << LiveRegs << " " << *I);
LiveRegs.stepBackward(*I);
}
++NumBBsVisited;
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index a6522dc..1473fc1 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stackmaps"
-
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -20,6 +18,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,6 +28,13 @@
using namespace llvm;
+#define DEBUG_TYPE "stackmaps"
+
+static cl::opt<int> StackMapVersion("stackmap-version", cl::init(1),
+ cl::desc("Specify the stackmap encoding version (default = 1)"));
+
+const char *StackMaps::WSMP = "Stack Maps: ";
+
PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
: MI(MI),
HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
@@ -64,6 +70,11 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
return ScratchIdx;
}
+StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
+ if (StackMapVersion != 1)
+ llvm_unreachable("Unsupported stackmap version!");
+}
+
MachineInstr::const_mop_iterator
StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
MachineInstr::const_mop_iterator MOE,
@@ -209,7 +220,8 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
if (I->LocType == Location::Constant &&
((I->Offset + (int64_t(1)<<31)) >> 32) != 0) {
I->LocType = Location::ConstantIndex;
- I->Offset = ConstPool.getConstantIndex(I->Offset);
+ auto Result = ConstPool.insert(std::make_pair(I->Offset, I->Offset));
+ I->Offset = Result.first - ConstPool.begin();
}
}
@@ -259,7 +271,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
#endif
}
-/// serializeToStackMapSection conceptually populates the following fields:
+/// Emit the stackmap header.
///
/// Header {
/// uint8 : Stack Map Version (currently 1)
@@ -269,11 +281,54 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
/// uint32 : NumFunctions
/// uint32 : NumConstants
/// uint32 : NumRecords
+void StackMaps::emitStackmapHeader(MCStreamer &OS) {
+ // Header.
+ OS.EmitIntValue(StackMapVersion, 1); // Version.
+ OS.EmitIntValue(0, 1); // Reserved.
+ OS.EmitIntValue(0, 2); // Reserved.
+
+ // Num functions.
+ DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
+ OS.EmitIntValue(FnStackSize.size(), 4);
+ // Num constants.
+ DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
+ OS.EmitIntValue(ConstPool.size(), 4);
+ // Num callsites.
+ DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
+ OS.EmitIntValue(CSInfos.size(), 4);
+}
+
+/// Emit the function frame record for each function.
+///
/// StkSizeRecord[NumFunctions] {
/// uint64 : Function Address
/// uint64 : Stack Size
/// }
+void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
+ // Function Frame records.
+ DEBUG(dbgs() << WSMP << "functions:\n");
+ for (auto const &FR : FnStackSize) {
+ DEBUG(dbgs() << WSMP << "function addr: " << FR.first
+ << " frame size: " << FR.second);
+ OS.EmitSymbolValue(FR.first, 8);
+ OS.EmitIntValue(FR.second, 8);
+ }
+}
+
+/// Emit the constant pool.
+///
/// int64 : Constants[NumConstants]
+void StackMaps::emitConstantPoolEntries(MCStreamer &OS) {
+ // Constant pool entries.
+ DEBUG(dbgs() << WSMP << "constants:\n");
+ for (auto ConstEntry : ConstPool) {
+ DEBUG(dbgs() << WSMP << ConstEntry.second << '\n');
+ OS.EmitIntValue(ConstEntry.second, 8);
+ }
+}
+
+/// Emit the callsite info for each callsite.
+///
/// StkMapRecord[NumRecords] {
/// uint64 : PatchPoint ID
/// uint32 : Instruction Offset
@@ -301,95 +356,43 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
/// 0x3, Indirect, [Reg + Offset] (spilled value)
/// 0x4, Constant, Offset (small constant)
/// 0x5, ConstIndex, Constants[Offset] (large constant)
-///
-void StackMaps::serializeToStackMapSection() {
- // Bail out if there's no stack map data.
- if (CSInfos.empty())
- return;
-
- MCContext &OutContext = AP.OutStreamer.getContext();
- const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
-
- // Create the section.
- const MCSection *StackMapSection =
- OutContext.getObjectFileInfo()->getStackMapSection();
- AP.OutStreamer.SwitchSection(StackMapSection);
-
- // Emit a dummy symbol to force section inclusion.
- AP.OutStreamer.EmitLabel(
- OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps")));
-
- // Serialize data.
- const char *WSMP = "Stack Maps: ";
- (void)WSMP;
-
- DEBUG(dbgs() << "********** Stack Map Output **********\n");
-
- // Header.
- AP.OutStreamer.EmitIntValue(1, 1); // Version.
- AP.OutStreamer.EmitIntValue(0, 1); // Reserved.
- AP.OutStreamer.EmitIntValue(0, 2); // Reserved.
-
- // Num functions.
- DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
- AP.OutStreamer.EmitIntValue(FnStackSize.size(), 4);
- // Num constants.
- DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.getNumConstants()
- << '\n');
- AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4);
- // Num callsites.
- DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n');
- AP.OutStreamer.EmitIntValue(CSInfos.size(), 4);
-
- // Function stack size entries.
- for (FnStackSizeMap::iterator I = FnStackSize.begin(), E = FnStackSize.end();
- I != E; ++I) {
- AP.OutStreamer.EmitSymbolValue(I->first, 8);
- AP.OutStreamer.EmitIntValue(I->second, 8);
- }
-
- // Constant pool entries.
- for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i)
- AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8);
-
+void StackMaps::emitCallsiteEntries(MCStreamer &OS,
+ const TargetRegisterInfo *TRI) {
// Callsite entries.
- for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(),
- CSIE = CSInfos.end(); CSII != CSIE; ++CSII) {
- uint64_t CallsiteID = CSII->ID;
- const LocationVec &CSLocs = CSII->Locations;
- const LiveOutVec &LiveOuts = CSII->LiveOuts;
+ DEBUG(dbgs() << WSMP << "callsites:\n");
+ for (const auto &CSI : CSInfos) {
+ const LocationVec &CSLocs = CSI.Locations;
+ const LiveOutVec &LiveOuts = CSI.LiveOuts;
- DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n");
+ DEBUG(dbgs() << WSMP << "callsite " << CSI.ID << "\n");
// Verify stack map entry. It's better to communicate a problem to the
// runtime than crash in case of in-process compilation. Currently, we do
// simple overflow checks, but we may eventually communicate other
// compilation errors this way.
if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) {
- AP.OutStreamer.EmitIntValue(UINT64_MAX, 8); // Invalid ID.
- AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
- AP.OutStreamer.EmitIntValue(0, 2); // Reserved.
- AP.OutStreamer.EmitIntValue(0, 2); // 0 locations.
- AP.OutStreamer.EmitIntValue(0, 2); // padding.
- AP.OutStreamer.EmitIntValue(0, 2); // 0 live-out registers.
- AP.OutStreamer.EmitIntValue(0, 4); // padding.
+ OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID.
+ OS.EmitValue(CSI.CSOffsetExpr, 4);
+ OS.EmitIntValue(0, 2); // Reserved.
+ OS.EmitIntValue(0, 2); // 0 locations.
+ OS.EmitIntValue(0, 2); // padding.
+ OS.EmitIntValue(0, 2); // 0 live-out registers.
+ OS.EmitIntValue(0, 4); // padding.
continue;
}
- AP.OutStreamer.EmitIntValue(CallsiteID, 8);
- AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4);
+ OS.EmitIntValue(CSI.ID, 8);
+ OS.EmitValue(CSI.CSOffsetExpr, 4);
// Reserved for flags.
- AP.OutStreamer.EmitIntValue(0, 2);
+ OS.EmitIntValue(0, 2);
DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n");
- AP.OutStreamer.EmitIntValue(CSLocs.size(), 2);
+ OS.EmitIntValue(CSLocs.size(), 2);
- unsigned operIdx = 0;
- for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end();
- LocI != LocE; ++LocI, ++operIdx) {
- const Location &Loc = *LocI;
+ unsigned OperIdx = 0;
+ for (const auto &Loc : CSLocs) {
unsigned RegNo = 0;
int Offset = Loc.Offset;
if(Loc.Reg) {
@@ -410,67 +413,97 @@ void StackMaps::serializeToStackMapSection() {
"Missing location register");
}
- DEBUG(
- dbgs() << WSMP << " Loc " << operIdx << ": ";
- switch (Loc.LocType) {
- case Location::Unprocessed:
- dbgs() << "<Unprocessed operand>";
- break;
- case Location::Register:
- dbgs() << "Register " << TRI->getName(Loc.Reg);
- break;
- case Location::Direct:
- dbgs() << "Direct " << TRI->getName(Loc.Reg);
- if (Loc.Offset)
- dbgs() << " + " << Loc.Offset;
- break;
- case Location::Indirect:
- dbgs() << "Indirect " << TRI->getName(Loc.Reg)
- << " + " << Loc.Offset;
- break;
- case Location::Constant:
- dbgs() << "Constant " << Loc.Offset;
- break;
- case Location::ConstantIndex:
- dbgs() << "Constant Index " << Loc.Offset;
- break;
- }
- dbgs() << " [encoding: .byte " << Loc.LocType
- << ", .byte " << Loc.Size
- << ", .short " << RegNo
- << ", .int " << Offset << "]\n";
- );
-
- AP.OutStreamer.EmitIntValue(Loc.LocType, 1);
- AP.OutStreamer.EmitIntValue(Loc.Size, 1);
- AP.OutStreamer.EmitIntValue(RegNo, 2);
- AP.OutStreamer.EmitIntValue(Offset, 4);
+ DEBUG(dbgs() << WSMP << " Loc " << OperIdx << ": ";
+ switch (Loc.LocType) {
+ case Location::Unprocessed:
+ dbgs() << "<Unprocessed operand>";
+ break;
+ case Location::Register:
+ dbgs() << "Register " << TRI->getName(Loc.Reg);
+ break;
+ case Location::Direct:
+ dbgs() << "Direct " << TRI->getName(Loc.Reg);
+ if (Loc.Offset)
+ dbgs() << " + " << Loc.Offset;
+ break;
+ case Location::Indirect:
+ dbgs() << "Indirect " << TRI->getName(Loc.Reg)
+ << " + " << Loc.Offset;
+ break;
+ case Location::Constant:
+ dbgs() << "Constant " << Loc.Offset;
+ break;
+ case Location::ConstantIndex:
+ dbgs() << "Constant Index " << Loc.Offset;
+ break;
+ }
+ dbgs() << " [encoding: .byte " << Loc.LocType
+ << ", .byte " << Loc.Size
+ << ", .short " << RegNo
+ << ", .int " << Offset << "]\n";
+ );
+
+ OS.EmitIntValue(Loc.LocType, 1);
+ OS.EmitIntValue(Loc.Size, 1);
+ OS.EmitIntValue(RegNo, 2);
+ OS.EmitIntValue(Offset, 4);
+ OperIdx++;
}
DEBUG(dbgs() << WSMP << " has " << LiveOuts.size()
- << " live-out registers\n");
+ << " live-out registers\n");
// Num live-out registers and padding to align to 4 byte.
- AP.OutStreamer.EmitIntValue(0, 2);
- AP.OutStreamer.EmitIntValue(LiveOuts.size(), 2);
-
- operIdx = 0;
- for (LiveOutVec::const_iterator LI = LiveOuts.begin(), LE = LiveOuts.end();
- LI != LE; ++LI, ++operIdx) {
- DEBUG(dbgs() << WSMP << " LO " << operIdx << ": "
- << TRI->getName(LI->Reg)
- << " [encoding: .short " << LI->RegNo
- << ", .byte 0, .byte " << LI->Size << "]\n");
-
- AP.OutStreamer.EmitIntValue(LI->RegNo, 2);
- AP.OutStreamer.EmitIntValue(0, 1);
- AP.OutStreamer.EmitIntValue(LI->Size, 1);
+ OS.EmitIntValue(0, 2);
+ OS.EmitIntValue(LiveOuts.size(), 2);
+
+ OperIdx = 0;
+ for (const auto &LO : LiveOuts) {
+ DEBUG(dbgs() << WSMP << " LO " << OperIdx << ": "
+ << TRI->getName(LO.Reg)
+ << " [encoding: .short " << LO.RegNo
+ << ", .byte 0, .byte " << LO.Size << "]\n");
+ OS.EmitIntValue(LO.RegNo, 2);
+ OS.EmitIntValue(0, 1);
+ OS.EmitIntValue(LO.Size, 1);
}
// Emit alignment to 8 byte.
- AP.OutStreamer.EmitValueToAlignment(8);
+ OS.EmitValueToAlignment(8);
}
+}
+
+/// Serialize the stackmap data.
+void StackMaps::serializeToStackMapSection() {
+ (void) WSMP;
+ // Bail out if there's no stack map data.
+ assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) &&
+ "Expected empty constant pool too!");
+ assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) &&
+ "Expected empty function record too!");
+ if (CSInfos.empty())
+ return;
- AP.OutStreamer.AddBlankLine();
+ MCContext &OutContext = AP.OutStreamer.getContext();
+ MCStreamer &OS = AP.OutStreamer;
+ const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo();
+
+ // Create the section.
+ const MCSection *StackMapSection =
+ OutContext.getObjectFileInfo()->getStackMapSection();
+ OS.SwitchSection(StackMapSection);
+
+ // Emit a dummy symbol to force section inclusion.
+ OS.EmitLabel(OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps")));
+
+ // Serialize data.
+ DEBUG(dbgs() << "********** Stack Map Output **********\n");
+ emitStackmapHeader(OS);
+ emitFunctionFrameRecords(OS);
+ emitConstantPoolEntries(OS);
+ emitCallsiteEntries(OS, TRI);
+ OS.AddBlankLine();
+ // Clean up.
CSInfos.clear();
+ ConstPool.clear();
}
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index f3749e5..accfe7b 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stack-protector"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -37,6 +36,8 @@
#include <cstdlib>
using namespace llvm;
+#define DEBUG_TYPE "stack-protector"
+
STATISTIC(NumFunProtected, "Number of functions protected");
STATISTIC(NumAddrTaken, "Number of local variables that have their address"
" taken.");
@@ -83,18 +84,18 @@ bool StackProtector::runOnFunction(Function &Fn) {
M = F->getParent();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
TLI = TM->getTargetLowering();
- if (!RequiresStackProtector())
- return false;
-
Attribute Attr = Fn.getAttributes().getAttribute(
AttributeSet::FunctionIndex, "stack-protector-buffer-size");
if (Attr.isStringAttribute() &&
Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
return false; // Invalid integer string
+ if (!RequiresStackProtector())
+ return false;
+
++NumFunProtected;
return InsertStackProtectors();
}
@@ -319,7 +320,7 @@ static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI,
SearchCounter++;
}
- return 0;
+ return nullptr;
}
/// Insert code into the entry block that stores the __stack_chk_guard
@@ -354,7 +355,7 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI,
}
IRBuilder<> B(&F->getEntryBlock().front());
- AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot");
+ AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard");
B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI,
AI);
@@ -372,8 +373,8 @@ bool StackProtector::InsertStackProtectors() {
bool HasPrologue = false;
bool SupportsSelectionDAGSP =
EnableSelectionDAGSP && !TM->Options.EnableFastISel;
- AllocaInst *AI = 0; // Place on stack that stores the stack guard.
- Value *StackGuardVar = 0; // The stack guard variable.
+ AllocaInst *AI = nullptr; // Place on stack that stores the stack guard.
+ Value *StackGuardVar = nullptr; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E;) {
BasicBlock *BB = I++;
@@ -390,14 +391,14 @@ bool StackProtector::InsertStackProtectors() {
if (SupportsSelectionDAGSP) {
// Since we have a potential tail call, insert the special stack check
// intrinsic.
- Instruction *InsertionPt = 0;
+ Instruction *InsertionPt = nullptr;
if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) {
InsertionPt = CI;
} else {
InsertionPt = RI;
// At this point we know that BB has a return statement so it *DOES*
// have a terminator.
- assert(InsertionPt != 0 && "BB must have a terminator instruction at "
+ assert(InsertionPt != nullptr && "BB must have a terminator instruction at "
"this point.");
}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 2717f4c..791168f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "stackslotcoloring"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,6 +32,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "stackslotcoloring"
+
static cl::opt<bool>
DisableSharing("no-stack-slot-sharing",
cl::init(false), cl::Hidden,
@@ -161,13 +162,12 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(),
EE = MI->memoperands_end(); MMOI != EE; ++MMOI) {
MachineMemOperand *MMO = *MMOI;
- if (const Value *V = MMO->getValue()) {
- if (const FixedStackPseudoSourceValue *FSV =
- dyn_cast<FixedStackPseudoSourceValue>(V)) {
- int FI = FSV->getFrameIndex();
- if (FI >= 0)
- SSRefs[FI].push_back(MMO);
- }
+ if (const FixedStackPseudoSourceValue *FSV =
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ MMO->getPseudoValue())) {
+ int FI = FSV->getFrameIndex();
+ if (FI >= 0)
+ SSRefs[FI].push_back(MMO);
}
}
}
@@ -310,7 +310,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
if (NewFI == -1 || (NewFI == (int)SS))
continue;
- const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+ const PseudoSourceValue *NewSV = PseudoSourceValue::getFixedStack(NewFI);
SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS];
for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i)
RefMMOs[i]->setValue(NewSV);
@@ -398,7 +398,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
++NumDead;
changed = true;
- if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+ if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) {
++NumDead;
toErase.push_back(I);
}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 3b7a04c..723a629 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tailduplication"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
@@ -34,6 +33,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "tailduplication"
+
STATISTIC(NumTails , "Number of tails duplicated");
STATISTIC(NumTailDups , "Number of tail duplicated blocks");
STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
@@ -181,7 +182,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
dbgs() << " missing input from predecessor BB#"
<< PredBB->getNumber() << '\n';
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -192,12 +193,12 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
<< ": " << *MI;
dbgs() << " extra input from predecessor BB#"
<< PHIBB->getNumber() << '\n';
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
if (PHIBB->getNumber() < 0) {
dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
++MI;
@@ -247,7 +248,7 @@ TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
// If the original definition is still around, add it as an available
// value.
MachineInstr *DefMI = MRI->getVRegDef(VReg);
- MachineBasicBlock *DefBB = 0;
+ MachineBasicBlock *DefBB = nullptr;
if (DefMI) {
DefBB = DefMI->getParent();
SSAUpdate.AddAvailableValue(DefBB, VReg);
@@ -363,9 +364,7 @@ static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
// block (which is why we need to copy the information).
static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
DenseSet<unsigned> *UsedByPhi) {
- for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end();
- I != E; ++I) {
- const MachineInstr &MI = *I;
+ for (const auto &MI : BB) {
if (!MI.isPHI())
break;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
@@ -656,7 +655,7 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
if (PredBB->succ_size() > 1)
return false;
- MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
return false;
@@ -687,7 +686,7 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
if (bothUsedInPHI(*PredBB, Succs))
continue;
- MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
SmallVector<MachineOperand, 4> PredCond;
if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
continue;
@@ -718,14 +717,14 @@ TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
// Make the branch unconditional if possible
if (PredTBB == PredFBB) {
PredCond.clear();
- PredFBB = NULL;
+ PredFBB = nullptr;
}
// Avoid adding fall through branches.
if (PredFBB == NextBB)
- PredFBB = NULL;
- if (PredTBB == NextBB && PredFBB == NULL)
- PredTBB = NULL;
+ PredFBB = nullptr;
+ if (PredTBB == NextBB && PredFBB == nullptr)
+ PredTBB = nullptr;
TII->RemoveBranch(*PredBB);
@@ -858,7 +857,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
MachineBasicBlock *PrevBB = std::prev(MachineFunction::iterator(TailBB));
- MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
SmallVector<MachineOperand, 4> PriorCond;
// This has to check PrevBB->succ_size() because EH edges are ignored by
// AnalyzeBranch.
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index cae3ccd..c3f84c6 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -43,7 +43,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const {
if (OpNum >= MCID.getNumOperands())
- return 0;
+ return nullptr;
short RegClass = MCID.OpInfo[OpNum].RegClass;
if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
@@ -51,7 +51,7 @@ TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
// Instructions like INSERT_SUBREG do not have fixed register classes.
if (RegClass < 0)
- return 0;
+ return nullptr;
// Otherwise just look it up normally.
return TRI->getRegClass(RegClass);
@@ -111,7 +111,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// If MBB isn't immediately before MBB, insert a branch to it.
if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
- InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(),
Tail->getDebugLoc());
MBB->addSuccessor(NewDest);
}
@@ -124,13 +124,11 @@ MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI,
bool HasDef = MCID.getNumDefs();
if (HasDef && !MI->getOperand(0).isReg())
// No idea how to commute this instruction. Target should implement its own.
- return 0;
+ return nullptr;
unsigned Idx1, Idx2;
if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Don't know how to commute: " << *MI;
- report_fatal_error(Msg.str());
+ assert(MI->isCommutable() && "Precondition violation: MI must be commutable.");
+ return nullptr;
}
assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
@@ -250,13 +248,15 @@ bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
oe = MI->memoperands_end();
o != oe;
++o) {
- if ((*o)->isLoad() && (*o)->getValue())
+ if ((*o)->isLoad()) {
if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ (*o)->getPseudoValue())) {
FrameIndex = Value->getFrameIndex();
MMO = *o;
return true;
}
+ }
}
return false;
}
@@ -268,13 +268,15 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
oe = MI->memoperands_end();
o != oe;
++o) {
- if ((*o)->isStore() && (*o)->getValue())
+ if ((*o)->isStore()) {
if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ dyn_cast_or_null<FixedStackPseudoSourceValue>(
+ (*o)->getPseudoValue())) {
FrameIndex = Value->getFrameIndex();
MMO = *o;
return true;
}
+ }
}
return false;
}
@@ -340,14 +342,14 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
unsigned FoldIdx) {
assert(MI->isCopy() && "MI must be a COPY instruction");
if (MI->getNumOperands() != 2)
- return 0;
+ return nullptr;
assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
if (FoldOp.getSubReg() || LiveOp.getSubReg())
- return 0;
+ return nullptr;
unsigned FoldReg = FoldOp.getReg();
unsigned LiveReg = LiveOp.getReg();
@@ -359,13 +361,13 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
- return RC->contains(LiveOp.getReg()) ? RC : 0;
+ return RC->contains(LiveOp.getReg()) ? RC : nullptr;
if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
return RC;
// FIXME: Allow folding when register classes are memory compatible.
- return 0;
+ return nullptr;
}
bool TargetInstrInfo::
@@ -399,7 +401,7 @@ static MachineInstr* foldPatchpoint(MachineFunction &MF,
for (SmallVectorImpl<unsigned>::const_iterator I = Ops.begin(), E = Ops.end();
I != E; ++I) {
if (*I < StartIdx)
- return 0;
+ return nullptr;
}
MachineInstr *NewMI =
@@ -454,7 +456,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
assert(MBB && "foldMemoryOperand needs an inserted instruction");
MachineFunction &MF = *MBB->getParent();
- MachineInstr *NewMI = 0;
+ MachineInstr *NewMI = nullptr;
if (MI->getOpcode() == TargetOpcode::STACKMAP ||
MI->getOpcode() == TargetOpcode::PATCHPOINT) {
@@ -488,11 +490,11 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
// Straight COPY may fold as load/store.
if (!MI->isCopy() || Ops.size() != 1)
- return 0;
+ return nullptr;
const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
if (!RC)
- return 0;
+ return nullptr;
const MachineOperand &MO = MI->getOperand(1-Ops[0]);
MachineBasicBlock::iterator Pos = MI;
@@ -521,7 +523,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
MachineFunction &MF = *MBB.getParent();
// Ask the target to do the actual folding.
- MachineInstr *NewMI = 0;
+ MachineInstr *NewMI = nullptr;
int FrameIndex = 0;
if ((MI->getOpcode() == TargetOpcode::STACKMAP ||
@@ -534,7 +536,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
}
- if (!NewMI) return 0;
+ if (!NewMI) return nullptr;
NewMI = MBB.insert(MI, NewMI);
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 870370b..2634d71 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -82,16 +82,16 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::UREM_I128] = "__umodti3";
// These are generally not available.
- Names[RTLIB::SDIVREM_I8] = 0;
- Names[RTLIB::SDIVREM_I16] = 0;
- Names[RTLIB::SDIVREM_I32] = 0;
- Names[RTLIB::SDIVREM_I64] = 0;
- Names[RTLIB::SDIVREM_I128] = 0;
- Names[RTLIB::UDIVREM_I8] = 0;
- Names[RTLIB::UDIVREM_I16] = 0;
- Names[RTLIB::UDIVREM_I32] = 0;
- Names[RTLIB::UDIVREM_I64] = 0;
- Names[RTLIB::UDIVREM_I128] = 0;
+ Names[RTLIB::SDIVREM_I8] = nullptr;
+ Names[RTLIB::SDIVREM_I16] = nullptr;
+ Names[RTLIB::SDIVREM_I32] = nullptr;
+ Names[RTLIB::SDIVREM_I64] = nullptr;
+ Names[RTLIB::SDIVREM_I128] = nullptr;
+ Names[RTLIB::UDIVREM_I8] = nullptr;
+ Names[RTLIB::UDIVREM_I16] = nullptr;
+ Names[RTLIB::UDIVREM_I32] = nullptr;
+ Names[RTLIB::UDIVREM_I64] = nullptr;
+ Names[RTLIB::UDIVREM_I128] = nullptr;
Names[RTLIB::NEG_I32] = "__negsi2";
Names[RTLIB::NEG_I64] = "__negdi2";
@@ -392,18 +392,18 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) {
Names[RTLIB::SINCOS_PPCF128] = "sincosl";
} else {
// These are generally not available.
- Names[RTLIB::SINCOS_F32] = 0;
- Names[RTLIB::SINCOS_F64] = 0;
- Names[RTLIB::SINCOS_F80] = 0;
- Names[RTLIB::SINCOS_F128] = 0;
- Names[RTLIB::SINCOS_PPCF128] = 0;
+ Names[RTLIB::SINCOS_F32] = nullptr;
+ Names[RTLIB::SINCOS_F64] = nullptr;
+ Names[RTLIB::SINCOS_F80] = nullptr;
+ Names[RTLIB::SINCOS_F128] = nullptr;
+ Names[RTLIB::SINCOS_PPCF128] = nullptr;
}
if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) {
Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail";
} else {
// These are generally not available.
- Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0;
+ Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr;
}
}
@@ -680,6 +680,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm,
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
HasMultipleConditionRegisters = false;
+ HasExtractBitsInsn = false;
IntDivIsCheap = false;
Pow2DivIsCheap = false;
JumpIsExpensive = false;
@@ -914,7 +915,6 @@ bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const {
MachineBasicBlock*
TargetLoweringBase::emitPatchPoint(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- const TargetMachine &TM = getTargetMachine();
MachineFunction &MF = *MI->getParent()->getParent();
// MI changes inside this loop as we grow operands.
@@ -1006,7 +1006,7 @@ void TargetLoweringBase::computeRegisterProperties() {
// Find the largest integer register class.
unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
- for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+ for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg)
assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
// Every integer value type larger than this largest register takes twice as
@@ -1326,7 +1326,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case Mul: return ISD::MUL;
case FMul: return ISD::FMUL;
case UDiv: return ISD::UDIV;
- case SDiv: return ISD::UDIV;
+ case SDiv: return ISD::SDIV;
case FDiv: return ISD::FDIV;
case URem: return ISD::UREM;
case SRem: return ISD::SREM;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e41fbfc..dda2259 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -100,7 +100,7 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
// Add information about the stub reference to ELFMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
MCSymbol *Sym = TM.getSymbol(GV, Mang);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -339,8 +339,8 @@ getSectionForConstant(SectionKind Kind) const {
return DataRelROSection;
}
-const MCSection *
-TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
+const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -359,8 +359,8 @@ TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const {
}
}
-const MCSection *
-TargetLoweringObjectFileELF::getStaticDtorSection(unsigned Priority) const {
+const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
// The default scheme is .ctor / .dtor, so we have to invert the priority
// numbering.
if (Priority == 65535)
@@ -418,7 +418,7 @@ emitModuleFlags(MCStreamer &Streamer,
Mangler &Mang, const TargetMachine &TM) const {
unsigned VersionVal = 0;
unsigned ImageInfoFlags = 0;
- MDNode *LinkerOptions = 0;
+ MDNode *LinkerOptions = nullptr;
StringRef SectionVal;
for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -659,7 +659,7 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
MachineModuleInfoImpl::StubValueTy &StubSym =
GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) :
MachOMMI.getGVStubEntry(SSym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
MCSymbol *Sym = TM.getSymbol(GV, Mang);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -685,7 +685,7 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
MCSymbol *Sym = TM.getSymbol(GV, Mang);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
@@ -755,7 +755,7 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
if (Kind.isText())
return ".text";
- if (Kind.isBSS ())
+ if (Kind.isBSS())
return ".bss";
if (Kind.isThreadLocal())
return ".tls$";
@@ -781,7 +781,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// Section names depend on the name of the symbol which is not feasible if the
// symbol has private linkage.
if ((GV->isWeakForLinker() || EmitUniquedSection) &&
- !GV->hasPrivateLinkage()) {
+ !GV->hasPrivateLinkage() && !Kind.isCommon()) {
const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
unsigned Characteristics = getCOFFSectionFlags(Kind);
@@ -802,7 +802,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isReadOnly())
return ReadOnlySection;
- if (Kind.isBSS())
+ // Note: we claim that common symbols are put in BSSSection, but they are
+ // really emitted with the magic .comm directive, which creates a symbol table
+ // entry but not a section.
+ if (Kind.isBSS() || Kind.isCommon())
return BSSSection;
return DataSection;
@@ -820,7 +823,7 @@ void TargetLoweringObjectFileCOFF::
emitModuleFlags(MCStreamer &Streamer,
ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
Mangler &Mang, const TargetMachine &TM) const {
- MDNode *LinkerOptions = 0;
+ MDNode *LinkerOptions = nullptr;
// Look for the "Linker Options" flag, since it's the only one we support.
for (ArrayRef<Module::ModuleFlagEntry>::iterator
@@ -862,3 +865,32 @@ emitModuleFlags(MCStreamer &Streamer,
}
}
}
+
+static const MCSection *getAssociativeCOFFSection(MCContext &Ctx,
+ const MCSection *Sec,
+ const MCSymbol *KeySym,
+ const MCSection *KeySec) {
+ // Return the normal section if we don't have to be associative.
+ if (!KeySym)
+ return Sec;
+
+ // Make an associative section with the same name and kind as the normal
+ // section.
+ const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec);
+ const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec);
+ unsigned Characteristics =
+ SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT;
+ return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics,
+ SecCOFF->getKind(), KeySym->getName(),
+ COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF);
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec);
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const {
+ return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec);
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 5a15243..a3a4fb3 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -101,7 +101,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
Idx += Offset + 1;
}
}
- return NULL;
+ return nullptr;
}
/// getMinimalPhysRegClass - Returns the Register Class of a physical
@@ -113,7 +113,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
// Pick the most sub register class of the right type that contains
// this physreg.
- const TargetRegisterClass* BestRC = 0;
+ const TargetRegisterClass* BestRC = nullptr;
for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
const TargetRegisterClass* RC = *I;
if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
@@ -130,7 +130,7 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
static void getAllocatableSetForRC(const MachineFunction &MF,
const TargetRegisterClass *RC, BitVector &R){
assert(RC->isAllocatable() && "invalid for nonallocatable sets");
- ArrayRef<uint16_t> Order = RC->getRawAllocationOrder(MF);
+ ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF);
for (unsigned i = 0; i != Order.size(); ++i)
R.set(Order[i]);
}
@@ -164,7 +164,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A,
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
if (unsigned Common = *A++ & *B++)
return TRI->getRegClass(I + countTrailingZeros(Common));
- return 0;
+ return nullptr;
}
const TargetRegisterClass *
@@ -174,7 +174,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
if (A == B)
return A;
if (!A || !B)
- return 0;
+ return nullptr;
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
@@ -194,7 +194,7 @@ TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
// The bit mask contains all register classes that are projected into B
// by Idx. Find a class that is also a sub-class of A.
return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this);
- return 0;
+ return nullptr;
}
const TargetRegisterClass *TargetRegisterInfo::
@@ -215,7 +215,7 @@ getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA,
// Arrange for RCA to be the larger register so the answer will be found in
// the first iteration. This makes the search linear for the most common
// case.
- const TargetRegisterClass *BestRC = 0;
+ const TargetRegisterClass *BestRC = nullptr;
unsigned *BestPreA = &PreA;
unsigned *BestPreB = &PreB;
if (RCA->getSize() < RCB->getSize()) {
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index d9e5aae..f42d47b 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -27,7 +27,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "twoaddrinstr"
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -51,6 +50,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "twoaddrinstr"
+
STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
STATISTIC(NumCommuted , "Number of instructions commuted to coalesce");
STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted");
@@ -211,7 +212,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
}
// Find the instruction that kills SavedReg.
- MachineInstr *KillMI = NULL;
+ MachineInstr *KillMI = nullptr;
if (LIS) {
LiveInterval &LI = LIS->getInterval(SavedReg);
assert(LI.end() != LI.begin() &&
@@ -250,7 +251,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
// FIXME: This can be sped up if there is an easy way to query whether an
// instruction is before or after another instruction. Then we can use
// MachineRegisterInfo def / use instead.
- MachineOperand *KillMO = NULL;
+ MachineOperand *KillMO = nullptr;
MachineBasicBlock::iterator KillPos = KillMI;
++KillPos;
@@ -454,10 +455,10 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
unsigned &DstReg, bool &IsDstPhys) {
if (!MRI->hasOneNonDBGUse(Reg))
// None or more than one use.
- return 0;
+ return nullptr;
MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg);
if (UseMI.getParent() != MBB)
- return 0;
+ return nullptr;
unsigned SrcReg;
bool IsSrcPhys;
if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
@@ -469,7 +470,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
return &UseMI;
}
- return 0;
+ return nullptr;
}
/// getMappedReg - Return the physical register the specified virtual register
@@ -576,7 +577,7 @@ commuteInstruction(MachineBasicBlock::iterator &mi,
DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
MachineInstr *NewMI = TII->commuteInstruction(MI);
- if (NewMI == 0) {
+ if (NewMI == nullptr) {
DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
return false;
}
@@ -755,7 +756,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = 0;
+ MachineInstr *KillMI = nullptr;
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
assert(LI.end() != LI.begin() &&
@@ -947,7 +948,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
// Must be created from unfolded load. Don't waste time trying this.
return false;
- MachineInstr *KillMI = 0;
+ MachineInstr *KillMI = nullptr;
if (LIS) {
LiveInterval &LI = LIS->getInterval(Reg);
assert(LI.end() != LI.begin() &&
@@ -1394,7 +1395,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
SubRegB) &&
"tied subregister must be a truncation");
// The superreg class will not be used to constrain the subreg class.
- RC = 0;
+ RC = nullptr;
}
else {
assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
@@ -1631,7 +1632,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
TargetRegisterInfo::isPhysicalRegister(DstReg) ||
!(MI->getNumOperands() & 1)) {
DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
SmallVector<unsigned, 4> OrigRegs;
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index f892e94..704736f 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "regalloc"
#include "llvm/CodeGen/VirtRegMap.h"
#include "LiveDebugVariables.h"
#include "llvm/ADT/STLExtras.h"
@@ -40,6 +39,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "regalloc"
+
STATISTIC(NumSpillSlots, "Number of spill slots allocated");
STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting");
diff --git a/lib/CodeGen/module.modulemap b/lib/CodeGen/module.modulemap
new file mode 100644
index 0000000..d4f68bc
--- /dev/null
+++ b/lib/CodeGen/module.modulemap
@@ -0,0 +1 @@
+module CodeGen { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h
index d1853d8..2ed188e 100644
--- a/lib/DebugInfo/DWARFCompileUnit.h
+++ b/lib/DebugInfo/DWARFCompileUnit.h
@@ -16,10 +16,10 @@ namespace llvm {
class DWARFCompileUnit : public DWARFUnit {
public:
- DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
- StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
+ StringRef SS, StringRef SOS, StringRef AOS,
const RelocAddrMap *M, bool LE)
- : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {}
+ : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {}
void dump(raw_ostream &OS);
// VTable anchor.
~DWARFCompileUnit() override;
diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp
index 60c5f6a..e52e8af 100644
--- a/lib/DebugInfo/DWARFContext.cpp
+++ b/lib/DebugInfo/DWARFContext.cpp
@@ -8,6 +8,8 @@
//===----------------------------------------------------------------------===//
#include "DWARFContext.h"
+#include "DWARFDebugArangeSet.h"
+
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compression.h"
@@ -20,7 +22,11 @@ using namespace llvm;
using namespace dwarf;
using namespace object;
+#define DEBUG_TYPE "dwarf"
+
typedef DWARFDebugLine::LineTable DWARFLineTable;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data,
bool LittleEndian, bool GnuStyle) {
@@ -126,8 +132,9 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
if (stmtOffset != -1U) {
DataExtractor lineData(getLineSection().Data, isLittleEndian(),
savedAddressByteSize);
- DWARFDebugLine::DumpingState state(OS);
- DWARFDebugLine::parseStatementTable(lineData, &getLineSection().Relocs, &stmtOffset, state);
+ DWARFDebugLine::LineTable LineTable;
+ LineTable.parse(lineData, &getLineSection().Relocs, &stmtOffset);
+ LineTable.dump(OS);
}
}
}
@@ -137,9 +144,11 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) {
unsigned stmtOffset = 0;
DataExtractor lineData(getLineDWOSection().Data, isLittleEndian(),
savedAddressByteSize);
- DWARFDebugLine::DumpingState state(OS);
- while (DWARFDebugLine::parsePrologue(lineData, &stmtOffset, &state.Prologue))
- state.finalize();
+ DWARFDebugLine::LineTable LineTable;
+ while (LineTable.Prologue.parse(lineData, &stmtOffset)) {
+ LineTable.dump(OS);
+ LineTable.clear();
+ }
}
if (DumpType == DIDT_All || DumpType == DIDT_Str) {
@@ -216,7 +225,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
DataExtractor abbrData(getAbbrevSection(), isLittleEndian(), 0);
Abbrev.reset(new DWARFDebugAbbrev());
- Abbrev->parse(abbrData);
+ Abbrev->extract(abbrData);
return Abbrev.get();
}
@@ -226,7 +235,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() {
DataExtractor abbrData(getAbbrevDWOSection(), isLittleEndian(), 0);
AbbrevDWO.reset(new DWARFDebugAbbrev());
- AbbrevDWO->parse(abbrData);
+ AbbrevDWO->extract(abbrData);
return AbbrevDWO.get();
}
@@ -290,7 +299,7 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) {
cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset(
cu, DW_AT_stmt_list, -1U);
if (stmtOffset == -1U)
- return 0; // No line table for this compile unit.
+ return nullptr; // No line table for this compile unit.
// See if the line table is cached.
if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
@@ -310,8 +319,8 @@ void DWARFContext::parseCompileUnits() {
isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
std::unique_ptr<DWARFCompileUnit> CU(new DWARFCompileUnit(
- getDebugAbbrev(), getInfoSection().Data, getAbbrevSection(),
- getRangeSection(), getStringSection(), StringRef(), getAddrSection(),
+ getDebugAbbrev(), getInfoSection().Data, getRangeSection(),
+ getStringSection(), StringRef(), getAddrSection(),
&getInfoSection().Relocs, isLittleEndian()));
if (!CU->extract(DIData, &offset)) {
break;
@@ -329,10 +338,10 @@ void DWARFContext::parseTypeUnits() {
const DataExtractor &DIData =
DataExtractor(I.second.Data, isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
- std::unique_ptr<DWARFTypeUnit> TU(new DWARFTypeUnit(
- getDebugAbbrev(), I.second.Data, getAbbrevSection(),
- getRangeSection(), getStringSection(), StringRef(), getAddrSection(),
- &I.second.Relocs, isLittleEndian()));
+ std::unique_ptr<DWARFTypeUnit> TU(
+ new DWARFTypeUnit(getDebugAbbrev(), I.second.Data, getRangeSection(),
+ getStringSection(), StringRef(), getAddrSection(),
+ &I.second.Relocs, isLittleEndian()));
if (!TU->extract(DIData, &offset))
break;
TUs.push_back(std::move(TU));
@@ -349,9 +358,8 @@ void DWARFContext::parseDWOCompileUnits() {
DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
std::unique_ptr<DWARFCompileUnit> DWOCU(new DWARFCompileUnit(
- getDebugAbbrevDWO(), getInfoDWOSection().Data, getAbbrevDWOSection(),
- getRangeDWOSection(), getStringDWOSection(),
- getStringOffsetDWOSection(), getAddrSection(),
+ getDebugAbbrevDWO(), getInfoDWOSection().Data, getRangeDWOSection(),
+ getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(),
&getInfoDWOSection().Relocs, isLittleEndian()));
if (!DWOCU->extract(DIData, &offset)) {
break;
@@ -370,10 +378,9 @@ void DWARFContext::parseDWOTypeUnits() {
DataExtractor(I.second.Data, isLittleEndian(), 0);
while (DIData.isValidOffset(offset)) {
std::unique_ptr<DWARFTypeUnit> TU(new DWARFTypeUnit(
- getDebugAbbrevDWO(), I.second.Data, getAbbrevDWOSection(),
- getRangeDWOSection(), getStringDWOSection(),
- getStringOffsetDWOSection(), getAddrSection(), &I.second.Relocs,
- isLittleEndian()));
+ getDebugAbbrevDWO(), I.second.Data, getRangeDWOSection(),
+ getStringDWOSection(), getStringOffsetDWOSection(), getAddrSection(),
+ &I.second.Relocs, isLittleEndian()));
if (!TU->extract(DIData, &offset))
break;
DWOTUs.push_back(std::move(TU));
@@ -408,7 +415,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) {
if (CU != CUs.end()) {
return CU->get();
}
- return 0;
+ return nullptr;
}
DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
@@ -420,15 +427,13 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
const DWARFLineTable *LineTable,
- uint64_t FileIndex,
- bool NeedsAbsoluteFilePath,
+ uint64_t FileIndex, FileLineInfoKind Kind,
std::string &FileName) {
- if (CU == 0 ||
- LineTable == 0 ||
- !LineTable->getFileNameByIndex(FileIndex, NeedsAbsoluteFilePath,
- FileName))
+ if (!CU || !LineTable || Kind == FileLineInfoKind::None ||
+ !LineTable->getFileNameByIndex(FileIndex, Kind, FileName))
return false;
- if (NeedsAbsoluteFilePath && sys::path::is_relative(FileName)) {
+ if (Kind == FileLineInfoKind::AbsoluteFilePath &&
+ sys::path::is_relative(FileName)) {
// We may still need to append compilation directory of compile unit.
SmallString<16> AbsolutePath;
if (const char *CompilationDir = CU->getCompilationDir()) {
@@ -443,10 +448,9 @@ static bool getFileNameForCompileUnit(DWARFCompileUnit *CU,
static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
const DWARFLineTable *LineTable,
uint64_t Address,
- bool NeedsAbsoluteFilePath,
- std::string &FileName,
- uint32_t &Line, uint32_t &Column) {
- if (CU == 0 || LineTable == 0)
+ FileLineInfoKind Kind,
+ DILineInfo &Result) {
+ if (!CU || !LineTable)
return false;
// Get the index of row we're looking for in the line table.
uint32_t RowIndex = LineTable->lookupAddress(Address);
@@ -454,80 +458,71 @@ static bool getFileLineInfoForCompileUnit(DWARFCompileUnit *CU,
return false;
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
- if (!getFileNameForCompileUnit(CU, LineTable, Row.File,
- NeedsAbsoluteFilePath, FileName))
+ if (!getFileNameForCompileUnit(CU, LineTable, Row.File, Kind,
+ Result.FileName))
return false;
- Line = Row.Line;
- Column = Row.Column;
+ Result.Line = Row.Line;
+ Result.Column = Row.Column;
return true;
}
+static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address,
+ FunctionNameKind Kind,
+ std::string &FunctionName) {
+ if (Kind == FunctionNameKind::None)
+ return false;
+ // The address may correspond to instruction in some inlined function,
+ // so we have to build the chain of inlined functions and take the
+ // name of the topmost function in it.
+ const DWARFDebugInfoEntryInlinedChain &InlinedChain =
+ CU->getInlinedChainForAddress(Address);
+ if (InlinedChain.DIEs.size() == 0)
+ return false;
+ const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
+ if (const char *Name =
+ TopFunctionDIE.getSubroutineName(InlinedChain.U, Kind)) {
+ FunctionName = Name;
+ return true;
+ }
+ return false;
+}
+
DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address,
- DILineInfoSpecifier Specifier) {
+ DILineInfoSpecifier Spec) {
+ DILineInfo Result;
+
DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
if (!CU)
- return DILineInfo();
- std::string FileName = "<invalid>";
- std::string FunctionName = "<invalid>";
- uint32_t Line = 0;
- uint32_t Column = 0;
- if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
- // The address may correspond to instruction in some inlined function,
- // so we have to build the chain of inlined functions and take the
- // name of the topmost function in it.
- const DWARFDebugInfoEntryInlinedChain &InlinedChain =
- CU->getInlinedChainForAddress(Address);
- if (InlinedChain.DIEs.size() > 0) {
- const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
- if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U))
- FunctionName = Name;
- }
- }
- if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
+ return Result;
+ getFunctionNameForAddress(CU, Address, Spec.FNKind, Result.FunctionName);
+ if (Spec.FLIKind != FileLineInfoKind::None) {
const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
- const bool NeedsAbsoluteFilePath =
- Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
- getFileLineInfoForCompileUnit(CU, LineTable, Address,
- NeedsAbsoluteFilePath,
- FileName, Line, Column);
+ getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind, Result);
}
- return DILineInfo(StringRef(FileName), StringRef(FunctionName),
- Line, Column);
+ return Result;
}
-DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
- uint64_t Size,
- DILineInfoSpecifier Specifier) {
+DILineInfoTable
+DWARFContext::getLineInfoForAddressRange(uint64_t Address, uint64_t Size,
+ DILineInfoSpecifier Spec) {
DILineInfoTable Lines;
DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
if (!CU)
return Lines;
std::string FunctionName = "<invalid>";
- if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
- // The address may correspond to instruction in some inlined function,
- // so we have to build the chain of inlined functions and take the
- // name of the topmost function in it.
- const DWARFDebugInfoEntryInlinedChain &InlinedChain =
- CU->getInlinedChainForAddress(Address);
- if (InlinedChain.DIEs.size() > 0) {
- const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
- if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U))
- FunctionName = Name;
- }
- }
+ getFunctionNameForAddress(CU, Address, Spec.FNKind, FunctionName);
// If the Specifier says we don't need FileLineInfo, just
// return the top-most function at the starting address.
- if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
- Lines.push_back(
- std::make_pair(Address, DILineInfo("<invalid>", FunctionName, 0, 0)));
+ if (Spec.FLIKind == FileLineInfoKind::None) {
+ DILineInfo Result;
+ Result.FunctionName = FunctionName;
+ Lines.push_back(std::make_pair(Address, Result));
return Lines;
}
const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU);
- const bool NeedsAbsoluteFilePath =
- Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
// Get the index of row we're looking for in the line table.
std::vector<uint32_t> RowVector;
@@ -537,59 +532,67 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address,
for (uint32_t RowIndex : RowVector) {
// Take file number and line/column from the row.
const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex];
- std::string FileName = "<invalid>";
- getFileNameForCompileUnit(CU, LineTable, Row.File,
- NeedsAbsoluteFilePath, FileName);
- Lines.push_back(std::make_pair(
- Row.Address, DILineInfo(FileName, FunctionName, Row.Line, Row.Column)));
+ DILineInfo Result;
+ getFileNameForCompileUnit(CU, LineTable, Row.File, Spec.FLIKind,
+ Result.FileName);
+ Result.FunctionName = FunctionName;
+ Result.Line = Row.Line;
+ Result.Column = Row.Column;
+ Lines.push_back(std::make_pair(Row.Address, Result));
}
return Lines;
}
-DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
- DILineInfoSpecifier Specifier) {
+DIInliningInfo
+DWARFContext::getInliningInfoForAddress(uint64_t Address,
+ DILineInfoSpecifier Spec) {
+ DIInliningInfo InliningInfo;
+
DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
if (!CU)
- return DIInliningInfo();
+ return InliningInfo;
+ const DWARFLineTable *LineTable = nullptr;
const DWARFDebugInfoEntryInlinedChain &InlinedChain =
CU->getInlinedChainForAddress(Address);
- if (InlinedChain.DIEs.size() == 0)
- return DIInliningInfo();
+ if (InlinedChain.DIEs.size() == 0) {
+ // If there is no DIE for address (e.g. it is in unavailable .dwo file),
+ // try to at least get file/line info from symbol table.
+ if (Spec.FLIKind != FileLineInfoKind::None) {
+ DILineInfo Frame;
+ LineTable = getLineTableForCompileUnit(CU);
+ if (getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind,
+ Frame)) {
+ InliningInfo.addFrame(Frame);
+ }
+ }
+ return InliningInfo;
+ }
- DIInliningInfo InliningInfo;
uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
- const DWARFLineTable *LineTable = 0;
for (uint32_t i = 0, n = InlinedChain.DIEs.size(); i != n; i++) {
const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain.DIEs[i];
- std::string FileName = "<invalid>";
- std::string FunctionName = "<invalid>";
- uint32_t Line = 0;
- uint32_t Column = 0;
+ DILineInfo Frame;
// Get function name if necessary.
- if (Specifier.needs(DILineInfoSpecifier::FunctionName)) {
- if (const char *Name = FunctionDIE.getSubroutineName(InlinedChain.U))
- FunctionName = Name;
- }
- if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) {
- const bool NeedsAbsoluteFilePath =
- Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath);
+ if (const char *Name =
+ FunctionDIE.getSubroutineName(InlinedChain.U, Spec.FNKind))
+ Frame.FunctionName = Name;
+ if (Spec.FLIKind != FileLineInfoKind::None) {
if (i == 0) {
// For the topmost frame, initialize the line table of this
// compile unit and fetch file/line info from it.
LineTable = getLineTableForCompileUnit(CU);
// For the topmost routine, get file/line info from line table.
- getFileLineInfoForCompileUnit(CU, LineTable, Address,
- NeedsAbsoluteFilePath,
- FileName, Line, Column);
+ getFileLineInfoForCompileUnit(CU, LineTable, Address, Spec.FLIKind,
+ Frame);
} else {
// Otherwise, use call file, call line and call column from
// previous DIE in inlined chain.
- getFileNameForCompileUnit(CU, LineTable, CallFile,
- NeedsAbsoluteFilePath, FileName);
- Line = CallLine;
- Column = CallColumn;
+ getFileNameForCompileUnit(CU, LineTable, CallFile, Spec.FLIKind,
+ Frame.FileName);
+ Frame.Line = CallLine;
+ Frame.Column = CallColumn;
}
// Get call file/line/column of a current DIE.
if (i + 1 < n) {
@@ -597,8 +600,6 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address,
CallColumn);
}
}
- DILineInfo Frame(StringRef(FileName), StringRef(FunctionName),
- Line, Column);
InliningInfo.addFrame(Frame);
}
return InliningInfo;
@@ -637,14 +638,15 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
if (!zlib::isAvailable() ||
!consumeCompressedDebugSectionHeader(data, OriginalSize))
continue;
- std::unique_ptr<MemoryBuffer> UncompressedSection;
- if (zlib::uncompress(data, UncompressedSection, OriginalSize) !=
- zlib::StatusOK)
+ UncompressedSections.resize(UncompressedSections.size() + 1);
+ if (zlib::uncompress(data, UncompressedSections.back(), OriginalSize) !=
+ zlib::StatusOK) {
+ UncompressedSections.pop_back();
continue;
+ }
// Make data point to uncompressed section contents and save its contents.
name = name.substr(1);
- data = UncompressedSection->getBuffer();
- UncompressedSections.push_back(std::move(UncompressedSection));
+ data = UncompressedSections.back();
}
StringRef *SectionData =
@@ -669,7 +671,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
.Case("debug_str_offsets.dwo", &StringOffsetDWOSection)
.Case("debug_addr", &AddrSection)
// Any more debug info sections go here.
- .Default(0);
+ .Default(nullptr);
if (SectionData) {
*SectionData = data;
if (name == "debug_ranges") {
@@ -700,7 +702,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj)
.Case("debug_loc", &LocSection.Relocs)
.Case("debug_info.dwo", &InfoDWOSection.Relocs)
.Case("debug_line", &LineSection.Relocs)
- .Default(0);
+ .Default(nullptr);
if (!Map) {
// Find debug_types relocs by section rather than name as there are
// multiple, comdat grouped, debug_types sections.
diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h
index ad6841a..6d1ae92 100644
--- a/lib/DebugInfo/DWARFContext.h
+++ b/lib/DebugInfo/DWARFContext.h
@@ -242,7 +242,7 @@ class DWARFContextInMemory : public DWARFContext {
StringRef RangeDWOSection;
StringRef AddrSection;
- SmallVector<std::unique_ptr<MemoryBuffer>, 4> UncompressedSections;
+ SmallVector<SmallString<32>, 4> UncompressedSections;
public:
DWARFContextInMemory(object::ObjectFile *);
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp
index fd5f5e9..8426bf9 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.cpp
+++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp
@@ -12,24 +12,36 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-bool DWARFAbbreviationDeclarationSet::extract(DataExtractor data,
- uint32_t* offset_ptr) {
- const uint32_t beginOffset = *offset_ptr;
- Offset = beginOffset;
+DWARFAbbreviationDeclarationSet::DWARFAbbreviationDeclarationSet() {
clear();
- DWARFAbbreviationDeclaration abbrevDeclaration;
- uint32_t prevAbbrAode = 0;
- while (abbrevDeclaration.extract(data, offset_ptr)) {
- Decls.push_back(abbrevDeclaration);
- if (IdxOffset == 0) {
- IdxOffset = abbrevDeclaration.getCode();
+}
+
+void DWARFAbbreviationDeclarationSet::clear() {
+ Offset = 0;
+ FirstAbbrCode = 0;
+ Decls.clear();
+}
+
+bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
+ uint32_t *OffsetPtr) {
+ clear();
+ const uint32_t BeginOffset = *OffsetPtr;
+ Offset = BeginOffset;
+ DWARFAbbreviationDeclaration AbbrDecl;
+ uint32_t PrevAbbrCode = 0;
+ while (AbbrDecl.extract(Data, OffsetPtr)) {
+ Decls.push_back(AbbrDecl);
+ if (FirstAbbrCode == 0) {
+ FirstAbbrCode = AbbrDecl.getCode();
} else {
- if (prevAbbrAode + 1 != abbrevDeclaration.getCode())
- IdxOffset = UINT32_MAX;// Out of order indexes, we can't do O(1) lookups
+ if (PrevAbbrCode + 1 != AbbrDecl.getCode()) {
+ // Codes are not consecutive, can't do O(1) lookups.
+ FirstAbbrCode = UINT32_MAX;
+ }
}
- prevAbbrAode = abbrevDeclaration.getCode();
+ PrevAbbrCode = AbbrDecl.getCode();
}
- return beginOffset != *offset_ptr;
+ return BeginOffset != *OffsetPtr;
}
void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const {
@@ -37,67 +49,67 @@ void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const {
Decl.dump(OS);
}
-const DWARFAbbreviationDeclaration*
-DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(uint32_t abbrCode)
- const {
- if (IdxOffset == UINT32_MAX) {
+const DWARFAbbreviationDeclaration *
+DWARFAbbreviationDeclarationSet::getAbbreviationDeclaration(
+ uint32_t AbbrCode) const {
+ if (FirstAbbrCode == UINT32_MAX) {
for (const auto &Decl : Decls) {
- if (Decl.getCode() == abbrCode)
+ if (Decl.getCode() == AbbrCode)
return &Decl;
}
- } else {
- uint32_t idx = abbrCode - IdxOffset;
- if (idx < Decls.size())
- return &Decls[idx];
+ return nullptr;
}
- return NULL;
+ if (AbbrCode < FirstAbbrCode || AbbrCode >= FirstAbbrCode + Decls.size())
+ return nullptr;
+ return &Decls[AbbrCode - FirstAbbrCode];
}
-DWARFDebugAbbrev::DWARFDebugAbbrev() :
- AbbrevCollMap(),
- PrevAbbrOffsetPos(AbbrevCollMap.end()) {}
-
+DWARFDebugAbbrev::DWARFDebugAbbrev() {
+ clear();
+}
-void DWARFDebugAbbrev::parse(DataExtractor data) {
- uint32_t offset = 0;
+void DWARFDebugAbbrev::clear() {
+ AbbrDeclSets.clear();
+ PrevAbbrOffsetPos = AbbrDeclSets.end();
+}
- while (data.isValidOffset(offset)) {
- uint32_t initial_cu_offset = offset;
- DWARFAbbreviationDeclarationSet abbrevDeclSet;
+void DWARFDebugAbbrev::extract(DataExtractor Data) {
+ clear();
- if (abbrevDeclSet.extract(data, &offset))
- AbbrevCollMap[initial_cu_offset] = abbrevDeclSet;
- else
+ uint32_t Offset = 0;
+ DWARFAbbreviationDeclarationSet AbbrDecls;
+ while (Data.isValidOffset(Offset)) {
+ uint32_t CUAbbrOffset = Offset;
+ if (!AbbrDecls.extract(Data, &Offset))
break;
+ AbbrDeclSets[CUAbbrOffset] = AbbrDecls;
}
- PrevAbbrOffsetPos = AbbrevCollMap.end();
}
void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
- if (AbbrevCollMap.empty()) {
+ if (AbbrDeclSets.empty()) {
OS << "< EMPTY >\n";
return;
}
- for (const auto &I : AbbrevCollMap) {
+ for (const auto &I : AbbrDeclSets) {
OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", I.first);
I.second.dump(OS);
}
}
const DWARFAbbreviationDeclarationSet*
-DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const {
- DWARFAbbreviationDeclarationCollMapConstIter end = AbbrevCollMap.end();
- DWARFAbbreviationDeclarationCollMapConstIter pos;
- if (PrevAbbrOffsetPos != end &&
- PrevAbbrOffsetPos->first == cu_abbr_offset) {
+DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
+ const auto End = AbbrDeclSets.end();
+ if (PrevAbbrOffsetPos != End && PrevAbbrOffsetPos->first == CUAbbrOffset) {
return &(PrevAbbrOffsetPos->second);
- } else {
- pos = AbbrevCollMap.find(cu_abbr_offset);
- PrevAbbrOffsetPos = pos;
}
- if (pos != AbbrevCollMap.end())
- return &(pos->second);
- return NULL;
+ const auto Pos = AbbrDeclSets.find(CUAbbrOffset);
+ if (Pos != End) {
+ PrevAbbrOffsetPos = Pos;
+ return &(Pos->second);
+ }
+
+ return nullptr;
}
diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h
index c7c0436..3a9adba 100644
--- a/lib/DebugInfo/DWARFDebugAbbrev.h
+++ b/lib/DebugInfo/DWARFDebugAbbrev.h
@@ -17,55 +17,45 @@
namespace llvm {
-typedef std::vector<DWARFAbbreviationDeclaration>
- DWARFAbbreviationDeclarationColl;
-typedef DWARFAbbreviationDeclarationColl::iterator
- DWARFAbbreviationDeclarationCollIter;
-typedef DWARFAbbreviationDeclarationColl::const_iterator
- DWARFAbbreviationDeclarationCollConstIter;
-
class DWARFAbbreviationDeclarationSet {
uint32_t Offset;
- uint32_t IdxOffset;
+ /// Code of the first abbreviation, if all abbreviations in the set have
+ /// consecutive codes. UINT32_MAX otherwise.
+ uint32_t FirstAbbrCode;
std::vector<DWARFAbbreviationDeclaration> Decls;
- public:
- DWARFAbbreviationDeclarationSet()
- : Offset(0), IdxOffset(0) {}
- DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset)
- : Offset(offset), IdxOffset(idxOffset) {}
+public:
+ DWARFAbbreviationDeclarationSet();
- void clear() {
- IdxOffset = 0;
- Decls.clear();
- }
uint32_t getOffset() const { return Offset; }
void dump(raw_ostream &OS) const;
- bool extract(DataExtractor data, uint32_t* offset_ptr);
+ bool extract(DataExtractor Data, uint32_t *OffsetPtr);
const DWARFAbbreviationDeclaration *
- getAbbreviationDeclaration(uint32_t abbrCode) const;
+ getAbbreviationDeclaration(uint32_t AbbrCode) const;
+
+private:
+ void clear();
};
class DWARFDebugAbbrev {
-public:
typedef std::map<uint64_t, DWARFAbbreviationDeclarationSet>
- DWARFAbbreviationDeclarationCollMap;
- typedef DWARFAbbreviationDeclarationCollMap::iterator
- DWARFAbbreviationDeclarationCollMapIter;
- typedef DWARFAbbreviationDeclarationCollMap::const_iterator
- DWARFAbbreviationDeclarationCollMapConstIter;
+ DWARFAbbreviationDeclarationSetMap;
-private:
- DWARFAbbreviationDeclarationCollMap AbbrevCollMap;
- mutable DWARFAbbreviationDeclarationCollMapConstIter PrevAbbrOffsetPos;
+ DWARFAbbreviationDeclarationSetMap AbbrDeclSets;
+ mutable DWARFAbbreviationDeclarationSetMap::const_iterator PrevAbbrOffsetPos;
public:
DWARFDebugAbbrev();
+
const DWARFAbbreviationDeclarationSet *
- getAbbreviationDeclarationSet(uint64_t cu_abbr_offset) const;
+ getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
+
void dump(raw_ostream &OS) const;
- void parse(DataExtractor data);
+ void extract(DataExtractor Data);
+
+private:
+ void clear();
};
}
diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h
index c18b3c5..d6c2d8b 100644
--- a/lib/DebugInfo/DWARFDebugArangeSet.h
+++ b/lib/DebugInfo/DWARFDebugArangeSet.h
@@ -63,7 +63,6 @@ public:
return desc_iterator_range(ArangeDescriptors.begin(),
ArangeDescriptors.end());
}
- uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); }
};
}
diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp
index dfab788..2524adc 100644
--- a/lib/DebugInfo/DWARFDebugAranges.cpp
+++ b/lib/DebugInfo/DWARFDebugAranges.cpp
@@ -10,6 +10,7 @@
#include "DWARFDebugAranges.h"
#include "DWARFCompileUnit.h"
#include "DWARFContext.h"
+#include "DWARFDebugArangeSet.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -20,23 +21,11 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) {
if (!DebugArangesData.isValidOffset(0))
return;
uint32_t Offset = 0;
- typedef std::vector<DWARFDebugArangeSet> RangeSetColl;
- RangeSetColl Sets;
DWARFDebugArangeSet Set;
- uint32_t TotalRanges = 0;
while (Set.extract(DebugArangesData, &Offset)) {
- Sets.push_back(Set);
- TotalRanges += Set.getNumDescriptors();
- }
- if (TotalRanges == 0)
- return;
-
- Aranges.reserve(TotalRanges);
- for (const auto &I : Sets) {
- uint32_t CUOffset = I.getCompileUnitDIEOffset();
-
- for (const auto &Desc : I.descriptors()) {
+ uint32_t CUOffset = Set.getCompileUnitDIEOffset();
+ for (const auto &Desc : Set.descriptors()) {
uint64_t LowPC = Desc.Address;
uint64_t HighPC = Desc.getEndAddress();
appendRange(CUOffset, LowPC, HighPC);
@@ -58,13 +47,23 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) {
// manually build aranges for the rest of them.
for (const auto &CU : CTX->compile_units()) {
uint32_t CUOffset = CU->getOffset();
- if (ParsedCUOffsets.insert(CUOffset).second)
- CU->buildAddressRangeTable(this, true, CUOffset);
+ if (ParsedCUOffsets.insert(CUOffset).second) {
+ DWARFAddressRangesVector CURanges;
+ CU->collectAddressRanges(CURanges);
+ for (const auto &R : CURanges) {
+ appendRange(CUOffset, R.first, R.second);
+ }
+ }
}
sortAndMinimize();
}
+void DWARFDebugAranges::clear() {
+ Aranges.clear();
+ ParsedCUOffsets.clear();
+}
+
void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC,
uint64_t HighPC) {
if (!Aranges.empty()) {
@@ -101,11 +100,6 @@ void DWARFDebugAranges::sortAndMinimize() {
++minimal_size;
}
- // If the sizes are the same, then no consecutive aranges can be
- // combined, we are done.
- if (minimal_size == orig_arange_size)
- return;
-
// Else, make a new RangeColl that _only_ contains what we need.
RangeColl minimal_aranges;
minimal_aranges.resize(minimal_size);
diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h
index 35ad8e5..de96d7f 100644
--- a/lib/DebugInfo/DWARFDebugAranges.h
+++ b/lib/DebugInfo/DWARFDebugAranges.h
@@ -10,9 +10,9 @@
#ifndef LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
#define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H
-#include "DWARFDebugArangeSet.h"
#include "llvm/ADT/DenseSet.h"
-#include <list>
+#include "llvm/Support/DataExtractor.h"
+#include <vector>
namespace llvm {
@@ -20,20 +20,15 @@ class DWARFContext;
class DWARFDebugAranges {
public:
- void clear() {
- Aranges.clear();
- ParsedCUOffsets.clear();
- }
-
void generate(DWARFContext *CTX);
-
- // Use appendRange multiple times and then call sortAndMinimize.
- void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
-
uint32_t findAddress(uint64_t Address) const;
private:
+ void clear();
void extract(DataExtractor DebugArangesData);
+
+ // Use appendRange multiple times and then call sortAndMinimize.
+ void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC);
void sortAndMinimize();
struct Range {
diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp
index 5bf7b07..a33548e 100644
--- a/lib/DebugInfo/DWARFDebugFrame.cpp
+++ b/lib/DebugInfo/DWARFDebugFrame.cpp
@@ -26,8 +26,8 @@ using namespace dwarf;
class llvm::FrameEntry {
public:
enum FrameKind {FK_CIE, FK_FDE};
- FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length)
- : Kind(K), Data(D), Offset(Offset), Length(Length) {}
+ FrameEntry(FrameKind K, uint64_t Offset, uint64_t Length)
+ : Kind(K), Offset(Offset), Length(Length) {}
virtual ~FrameEntry() {
}
@@ -35,11 +35,12 @@ public:
FrameKind getKind() const { return Kind; }
virtual uint64_t getOffset() const { return Offset; }
- /// \brief Parse and store a sequence of CFI instructions from our data
- /// stream, starting at *Offset and ending at EndOffset. If everything
+ /// \brief Parse and store a sequence of CFI instructions from Data,
+ /// starting at *Offset and ending at EndOffset. If everything
/// goes well, *Offset should be equal to EndOffset when this method
/// returns. Otherwise, an error occurred.
- virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset);
+ virtual void parseInstructions(DataExtractor Data, uint32_t *Offset,
+ uint32_t EndOffset);
/// \brief Dump the entry header to the given output stream.
virtual void dumpHeader(raw_ostream &OS) const = 0;
@@ -50,10 +51,6 @@ public:
protected:
const FrameKind Kind;
- /// \brief The data stream holding the section from which the entry was
- /// parsed.
- DataExtractor Data;
-
/// \brief Offset of this entry in the section.
uint64_t Offset;
@@ -97,8 +94,8 @@ protected:
const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
-
-void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) {
+void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset,
+ uint32_t EndOffset) {
while (*Offset < EndOffset) {
uint8_t Opcode = Data.getU8(Offset);
// Some instructions have a primary opcode encoded in the top bits.
@@ -201,13 +198,13 @@ class CIE : public FrameEntry {
public:
// CIEs (and FDEs) are simply container classes, so the only sensible way to
// create them is by providing the full parsed contents in the constructor.
- CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version,
+ CIE(uint64_t Offset, uint64_t Length, uint8_t Version,
SmallString<8> Augmentation, uint64_t CodeAlignmentFactor,
int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister)
- : FrameEntry(FK_CIE, D, Offset, Length), Version(Version),
- Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor),
- DataAlignmentFactor(DataAlignmentFactor),
- ReturnAddressRegister(ReturnAddressRegister) {}
+ : FrameEntry(FK_CIE, Offset, Length), Version(Version),
+ Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor),
+ DataAlignmentFactor(DataAlignmentFactor),
+ ReturnAddressRegister(ReturnAddressRegister) {}
~CIE() {
}
@@ -229,7 +226,7 @@ public:
static bool classof(const FrameEntry *FE) {
return FE->getKind() == FK_CIE;
- }
+ }
private:
/// The following fields are defined in section 6.4.1 of the DWARF standard v3
@@ -247,11 +244,11 @@ public:
// Each FDE has a CIE it's "linked to". Our FDE contains is constructed with
// an offset to the CIE (provided by parsing the FDE header). The CIE itself
// is obtained lazily once it's actually required.
- FDE(DataExtractor D, uint64_t Offset, uint64_t Length,
- int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange)
- : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset),
- InitialLocation(InitialLocation), AddressRange(AddressRange),
- LinkedCIE(NULL) {}
+ FDE(uint64_t Offset, uint64_t Length, int64_t LinkedCIEOffset,
+ uint64_t InitialLocation, uint64_t AddressRange)
+ : FrameEntry(FK_FDE, Offset, Length), LinkedCIEOffset(LinkedCIEOffset),
+ InitialLocation(InitialLocation), AddressRange(AddressRange),
+ LinkedCIE(nullptr) {}
~FDE() {
}
@@ -270,9 +267,9 @@ public:
static bool classof(const FrameEntry *FE) {
return FE->getKind() == FK_FDE;
- }
-private:
+ }
+private:
/// The following fields are defined in section 6.4.1 of the DWARF standard v3
uint64_t LinkedCIEOffset;
uint64_t InitialLocation;
@@ -285,14 +282,9 @@ private:
DWARFDebugFrame::DWARFDebugFrame() {
}
-
DWARFDebugFrame::~DWARFDebugFrame() {
- for (const auto &Entry : Entries) {
- delete Entry;
- }
}
-
static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
uint32_t Offset, int Length) {
errs() << "DUMP: ";
@@ -334,7 +326,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4);
bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID);
- FrameEntry *Entry = 0;
if (IsCIE) {
// Note: this is specifically DWARFv3 CIE header structure. It was
// changed in DWARFv4. We currently don't support reading DWARFv4
@@ -346,30 +337,25 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
int64_t DataAlignmentFactor = Data.getSLEB128(&Offset);
uint64_t ReturnAddressRegister = Data.getULEB128(&Offset);
- Entry = new CIE(Data, StartOffset, Length, Version,
- StringRef(Augmentation), CodeAlignmentFactor,
- DataAlignmentFactor, ReturnAddressRegister);
+ Entries.emplace_back(new CIE(StartOffset, Length, Version,
+ StringRef(Augmentation), CodeAlignmentFactor,
+ DataAlignmentFactor, ReturnAddressRegister));
} else {
// FDE
uint64_t CIEPointer = Id;
uint64_t InitialLocation = Data.getAddress(&Offset);
uint64_t AddressRange = Data.getAddress(&Offset);
- Entry = new FDE(Data, StartOffset, Length, CIEPointer,
- InitialLocation, AddressRange);
+ Entries.emplace_back(new FDE(StartOffset, Length, CIEPointer,
+ InitialLocation, AddressRange));
}
- assert(Entry && "Expected Entry to be populated with CIE or FDE");
- Entry->parseInstructions(&Offset, EndStructureOffset);
+ Entries.back()->parseInstructions(Data, &Offset, EndStructureOffset);
- if (Offset == EndStructureOffset) {
- // Entry instrucitons parsed successfully.
- Entries.push_back(Entry);
- } else {
+ if (Offset != EndStructureOffset) {
std::string Str;
raw_string_ostream OS(Str);
- OS << format("Parsing entry instructions at %lx failed",
- Entry->getOffset());
+ OS << format("Parsing entry instructions at %lx failed", StartOffset);
report_fatal_error(Str);
}
}
diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h
index 7683849..bd4ef45 100644
--- a/lib/DebugInfo/DWARFDebugFrame.h
+++ b/lib/DebugInfo/DWARFDebugFrame.h
@@ -12,14 +12,13 @@
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/raw_ostream.h"
+#include <memory>
#include <vector>
-
namespace llvm {
class FrameEntry;
-
/// \brief A parsed .debug_frame section
///
class DWARFDebugFrame {
@@ -35,8 +34,7 @@ public:
void parse(DataExtractor Data);
private:
- typedef std::vector<FrameEntry *> EntryVector;
- EntryVector Entries;
+ std::vector<std::unique_ptr<FrameEntry>> Entries;
};
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
index bde25ec..b811ed7 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp
@@ -18,6 +18,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
using namespace dwarf;
+typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u,
unsigned recurseDepth,
@@ -99,11 +100,11 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
if (0 == AbbrCode) {
// NULL debug tag entry.
- AbbrevDecl = NULL;
+ AbbrevDecl = nullptr;
return true;
}
AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
- if (0 == AbbrevDecl) {
+ if (nullptr == AbbrevDecl) {
// Restore the original offset.
*OffsetPtr = Offset;
return false;
@@ -226,54 +227,66 @@ bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U,
return (HighPC != -1ULL);
}
-void DWARFDebugInfoEntryMinimal::buildAddressRangeTable(
- const DWARFUnit *U, DWARFDebugAranges *DebugAranges,
- uint32_t UOffsetInAranges) const {
- if (AbbrevDecl) {
- if (isSubprogramDIE()) {
- uint64_t LowPC, HighPC;
- if (getLowAndHighPC(U, LowPC, HighPC))
- DebugAranges->appendRange(UOffsetInAranges, LowPC, HighPC);
- // FIXME: try to append ranges from .debug_ranges section.
- }
-
- const DWARFDebugInfoEntryMinimal *Child = getFirstChild();
- while (Child) {
- Child->buildAddressRangeTable(U, DebugAranges, UOffsetInAranges);
- Child = Child->getSibling();
- }
- }
-}
-
-bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
- const DWARFUnit *U, const uint64_t Address) const {
+DWARFAddressRangesVector
+DWARFDebugInfoEntryMinimal::getAddressRanges(const DWARFUnit *U) const {
if (isNULL())
- return false;
+ return DWARFAddressRangesVector();
+ // Single range specified by low/high PC.
uint64_t LowPC, HighPC;
- if (getLowAndHighPC(U, LowPC, HighPC))
- return (LowPC <= Address && Address <= HighPC);
- // Try to get address ranges from .debug_ranges section.
+ if (getLowAndHighPC(U, LowPC, HighPC)) {
+ return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC));
+ }
+ // Multiple ranges from .debug_ranges section.
uint32_t RangesOffset =
getAttributeValueAsSectionOffset(U, DW_AT_ranges, -1U);
if (RangesOffset != -1U) {
DWARFDebugRangeList RangeList;
if (U->extractRangeList(RangesOffset, RangeList))
- return RangeList.containsAddress(U->getBaseAddress(), Address);
+ return RangeList.getAbsoluteRanges(U->getBaseAddress());
+ }
+ return DWARFAddressRangesVector();
+}
+
+void DWARFDebugInfoEntryMinimal::collectChildrenAddressRanges(
+ const DWARFUnit *U, DWARFAddressRangesVector& Ranges) const {
+ if (isNULL())
+ return;
+ if (isSubprogramDIE()) {
+ const auto &DIERanges = getAddressRanges(U);
+ Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
+ }
+
+ const DWARFDebugInfoEntryMinimal *Child = getFirstChild();
+ while (Child) {
+ Child->collectChildrenAddressRanges(U, Ranges);
+ Child = Child->getSibling();
+ }
+}
+
+bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
+ const DWARFUnit *U, const uint64_t Address) const {
+ for (const auto& R : getAddressRanges(U)) {
+ if (R.first <= Address && Address < R.second)
+ return true;
}
return false;
}
const char *
-DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const {
- if (!isSubroutineDIE())
- return 0;
- // Try to get mangled name if possible.
- if (const char *name =
- getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, 0))
- return name;
- if (const char *name = getAttributeValueAsString(U, DW_AT_linkage_name, 0))
- return name;
- if (const char *name = getAttributeValueAsString(U, DW_AT_name, 0))
+DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U,
+ FunctionNameKind Kind) const {
+ if (!isSubroutineDIE() || Kind == FunctionNameKind::None)
+ return nullptr;
+ // Try to get mangled name only if it was asked for.
+ if (Kind == FunctionNameKind::LinkageName) {
+ if (const char *name =
+ getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, nullptr))
+ return name;
+ if (const char *name =
+ getAttributeValueAsString(U, DW_AT_linkage_name, nullptr))
+ return name;
+ }
+ if (const char *name = getAttributeValueAsString(U, DW_AT_name, nullptr))
return name;
// Try to get name from specification DIE.
uint32_t spec_ref =
@@ -281,7 +294,7 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const {
if (spec_ref != -1U) {
DWARFDebugInfoEntryMinimal spec_die;
if (spec_die.extractFast(U, &spec_ref)) {
- if (const char *name = spec_die.getSubroutineName(U))
+ if (const char *name = spec_die.getSubroutineName(U, Kind))
return name;
}
}
@@ -291,11 +304,11 @@ DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const {
if (abs_origin_ref != -1U) {
DWARFDebugInfoEntryMinimal abs_origin_die;
if (abs_origin_die.extractFast(U, &abs_origin_ref)) {
- if (const char *name = abs_origin_die.getSubroutineName(U))
+ if (const char *name = abs_origin_die.getSubroutineName(U, Kind))
return name;
}
}
- return 0;
+ return nullptr;
}
void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFUnit *U,
diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h
index f30e531..916e1ed 100644
--- a/lib/DebugInfo/DWARFDebugInfoEntry.h
+++ b/lib/DebugInfo/DWARFDebugInfoEntry.h
@@ -11,7 +11,9 @@
#define LLVM_DEBUGINFO_DWARFDEBUGINFOENTRY_H
#include "DWARFAbbreviationDeclaration.h"
+#include "DWARFDebugRangeList.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
@@ -28,17 +30,13 @@ class DWARFDebugInfoEntryMinimal {
/// Offset within the .debug_info of the start of this entry.
uint32_t Offset;
- /// How many to subtract from "this" to get the parent.
- /// If zero this die has no parent.
- uint32_t ParentIdx;
-
/// How many to add to "this" to get the sibling.
uint32_t SiblingIdx;
const DWARFAbbreviationDeclaration *AbbrevDecl;
public:
DWARFDebugInfoEntryMinimal()
- : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {}
+ : Offset(0), SiblingIdx(0), AbbrevDecl(nullptr) {}
void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth,
unsigned indent = 0) const;
@@ -51,7 +49,7 @@ public:
bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr);
uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
- bool isNULL() const { return AbbrevDecl == 0; }
+ bool isNULL() const { return AbbrevDecl == nullptr; }
/// Returns true if DIE represents a subprogram (not inlined).
bool isSubprogramDIE() const;
@@ -63,45 +61,23 @@ public:
bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); }
// We know we are kept in a vector of contiguous entries, so we know
- // our parent will be some index behind "this".
- DWARFDebugInfoEntryMinimal *getParent() {
- return ParentIdx > 0 ? this - ParentIdx : 0;
- }
- const DWARFDebugInfoEntryMinimal *getParent() const {
- return ParentIdx > 0 ? this - ParentIdx : 0;
- }
- // We know we are kept in a vector of contiguous entries, so we know
// our sibling will be some index after "this".
- DWARFDebugInfoEntryMinimal *getSibling() {
- return SiblingIdx > 0 ? this + SiblingIdx : 0;
- }
const DWARFDebugInfoEntryMinimal *getSibling() const {
- return SiblingIdx > 0 ? this + SiblingIdx : 0;
+ return SiblingIdx > 0 ? this + SiblingIdx : nullptr;
}
+
// We know we are kept in a vector of contiguous entries, so we know
// we don't need to store our child pointer, if we have a child it will
// be the next entry in the list...
- DWARFDebugInfoEntryMinimal *getFirstChild() {
- return hasChildren() ? this + 1 : 0;
- }
const DWARFDebugInfoEntryMinimal *getFirstChild() const {
- return hasChildren() ? this + 1 : 0;
+ return hasChildren() ? this + 1 : nullptr;
}
- void setParent(DWARFDebugInfoEntryMinimal *parent) {
- if (parent) {
- // We know we are kept in a vector of contiguous entries, so we know
- // our parent will be some index behind "this".
- ParentIdx = this - parent;
- } else
- ParentIdx = 0;
- }
- void setSibling(DWARFDebugInfoEntryMinimal *sibling) {
- if (sibling) {
+ void setSibling(const DWARFDebugInfoEntryMinimal *Sibling) {
+ if (Sibling) {
// We know we are kept in a vector of contiguous entries, so we know
// our sibling will be some index after "this".
- SiblingIdx = sibling - this;
- sibling->setParent(getParent());
+ SiblingIdx = Sibling - this;
} else
SiblingIdx = 0;
}
@@ -135,9 +111,10 @@ public:
bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC,
uint64_t &HighPC) const;
- void buildAddressRangeTable(const DWARFUnit *U,
- DWARFDebugAranges *DebugAranges,
- uint32_t CUOffsetInAranges) const;
+ DWARFAddressRangesVector getAddressRanges(const DWARFUnit *U) const;
+
+ void collectChildrenAddressRanges(const DWARFUnit *U,
+ DWARFAddressRangesVector &Ranges) const;
bool addressRangeContainsAddress(const DWARFUnit *U,
const uint64_t Address) const;
@@ -146,7 +123,9 @@ public:
/// returns its mangled name (or short name, if mangled is missing).
/// This name may be fetched from specification or abstract origin
/// for this subprogram. Returns null if no name is found.
- const char *getSubroutineName(const DWARFUnit *U) const;
+ const char *
+ getSubroutineName(const DWARFUnit *U,
+ DILineInfoSpecifier::FunctionNameKind Kind) const;
/// Retrieves values of DW_AT_call_file, DW_AT_call_line and
/// DW_AT_call_column from DIE (or zeroes if they are missing).
@@ -166,7 +145,7 @@ public:
/// (except the last DIE) in this chain is contained in address
/// range for next DIE in the chain.
struct DWARFDebugInfoEntryInlinedChain {
- DWARFDebugInfoEntryInlinedChain() : U(0) {}
+ DWARFDebugInfoEntryInlinedChain() : U(nullptr) {}
SmallVector<DWARFDebugInfoEntryMinimal, 4> DIEs;
const DWARFUnit *U;
};
diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp
index 43d9764..ce87635 100644
--- a/lib/DebugInfo/DWARFDebugLine.cpp
+++ b/lib/DebugInfo/DWARFDebugLine.cpp
@@ -15,6 +15,20 @@
#include <algorithm>
using namespace llvm;
using namespace dwarf;
+typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
+
+DWARFDebugLine::Prologue::Prologue() {
+ clear();
+}
+
+void DWARFDebugLine::Prologue::clear() {
+ TotalLength = Version = PrologueLength = 0;
+ MinInstLength = MaxOpsPerInst = DefaultIsStmt = LineBase = LineRange = 0;
+ OpcodeBase = 0;
+ StandardOpcodeLengths.clear();
+ IncludeDirectories.clear();
+ FileNames.clear();
+}
void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
OS << "Line table prologue:\n"
@@ -51,6 +65,67 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
}
}
+bool DWARFDebugLine::Prologue::parse(DataExtractor debug_line_data,
+ uint32_t *offset_ptr) {
+ const uint32_t prologue_offset = *offset_ptr;
+
+ clear();
+ TotalLength = debug_line_data.getU32(offset_ptr);
+ Version = debug_line_data.getU16(offset_ptr);
+ if (Version < 2)
+ return false;
+
+ PrologueLength = debug_line_data.getU32(offset_ptr);
+ const uint32_t end_prologue_offset = PrologueLength + *offset_ptr;
+ MinInstLength = debug_line_data.getU8(offset_ptr);
+ if (Version >= 4)
+ MaxOpsPerInst = debug_line_data.getU8(offset_ptr);
+ DefaultIsStmt = debug_line_data.getU8(offset_ptr);
+ LineBase = debug_line_data.getU8(offset_ptr);
+ LineRange = debug_line_data.getU8(offset_ptr);
+ OpcodeBase = debug_line_data.getU8(offset_ptr);
+
+ StandardOpcodeLengths.reserve(OpcodeBase - 1);
+ for (uint32_t i = 1; i < OpcodeBase; ++i) {
+ uint8_t op_len = debug_line_data.getU8(offset_ptr);
+ StandardOpcodeLengths.push_back(op_len);
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *s = debug_line_data.getCStr(offset_ptr);
+ if (s && s[0])
+ IncludeDirectories.push_back(s);
+ else
+ break;
+ }
+
+ while (*offset_ptr < end_prologue_offset) {
+ const char *name = debug_line_data.getCStr(offset_ptr);
+ if (name && name[0]) {
+ FileNameEntry fileEntry;
+ fileEntry.Name = name;
+ fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
+ fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
+ FileNames.push_back(fileEntry);
+ } else {
+ break;
+ }
+ }
+
+ if (*offset_ptr != end_prologue_offset) {
+ fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
+ " have ended at 0x%8.8x but it ended at 0x%8.8x\n",
+ prologue_offset, end_prologue_offset, *offset_ptr);
+ return false;
+ }
+ return true;
+}
+
+DWARFDebugLine::Row::Row(bool default_is_stmt) {
+ reset(default_is_stmt);
+}
+
void DWARFDebugLine::Row::postAppend() {
BasicBlock = false;
PrologueEnd = false;
@@ -82,6 +157,22 @@ void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
<< '\n';
}
+DWARFDebugLine::Sequence::Sequence() {
+ reset();
+}
+
+void DWARFDebugLine::Sequence::reset() {
+ LowPC = 0;
+ HighPC = 0;
+ FirstRowIndex = 0;
+ LastRowIndex = 0;
+ Empty = true;
+}
+
+DWARFDebugLine::LineTable::LineTable() {
+ clear();
+}
+
void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
Prologue.dump(OS);
OS << '\n';
@@ -96,50 +187,40 @@ void DWARFDebugLine::LineTable::dump(raw_ostream &OS) const {
}
}
-DWARFDebugLine::State::~State() {}
-
-void DWARFDebugLine::State::appendRowToMatrix(uint32_t offset) {
- if (Sequence::Empty) {
- // Record the beginning of instruction sequence.
- Sequence::Empty = false;
- Sequence::LowPC = Address;
- Sequence::FirstRowIndex = row;
- }
- ++row; // Increase the row number.
- LineTable::appendRow(*this);
- if (EndSequence) {
- // Record the end of instruction sequence.
- Sequence::HighPC = Address;
- Sequence::LastRowIndex = row;
- if (Sequence::isValid())
- LineTable::appendSequence(*this);
- Sequence::reset();
- }
- Row::postAppend();
+void DWARFDebugLine::LineTable::clear() {
+ Prologue.clear();
+ Rows.clear();
+ Sequences.clear();
}
-void DWARFDebugLine::State::finalize() {
- row = DoneParsingLineTable;
- if (!Sequence::Empty) {
- fprintf(stderr, "warning: last sequence in debug line table is not"
- "terminated!\n");
- }
- // Sort all sequences so that address lookup will work faster.
- if (!Sequences.empty()) {
- std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
- // Note: actually, instruction address ranges of sequences should not
- // overlap (in shared objects and executables). If they do, the address
- // lookup would still work, though, but result would be ambiguous.
- // We don't report warning in this case. For example,
- // sometimes .so compiled from multiple object files contains a few
- // rudimentary sequences for address ranges [0x0, 0xsomething).
- }
+DWARFDebugLine::ParsingState::ParsingState(struct LineTable *LT)
+ : LineTable(LT), RowNumber(0) {
+ resetRowAndSequence();
}
-DWARFDebugLine::DumpingState::~DumpingState() {}
+void DWARFDebugLine::ParsingState::resetRowAndSequence() {
+ Row.reset(LineTable->Prologue.DefaultIsStmt);
+ Sequence.reset();
+}
-void DWARFDebugLine::DumpingState::finalize() {
- LineTable::dump(OS);
+void DWARFDebugLine::ParsingState::appendRowToMatrix(uint32_t offset) {
+ if (Sequence.Empty) {
+ // Record the beginning of instruction sequence.
+ Sequence.Empty = false;
+ Sequence.LowPC = Row.Address;
+ Sequence.FirstRowIndex = RowNumber;
+ }
+ ++RowNumber;
+ LineTable->appendRow(Row);
+ if (Row.EndSequence) {
+ // Record the end of instruction sequence.
+ Sequence.HighPC = Row.Address;
+ Sequence.LastRowIndex = RowNumber;
+ if (Sequence.isValid())
+ LineTable->appendSequence(Sequence);
+ Sequence.reset();
+ }
+ Row.postAppend();
}
const DWARFDebugLine::LineTable *
@@ -147,7 +228,7 @@ DWARFDebugLine::getLineTable(uint32_t offset) const {
LineTableConstIter pos = LineTableMap.find(offset);
if (pos != LineTableMap.end())
return &pos->second;
- return 0;
+ return nullptr;
}
const DWARFDebugLine::LineTable *
@@ -155,91 +236,31 @@ DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data,
uint32_t offset) {
std::pair<LineTableIter, bool> pos =
LineTableMap.insert(LineTableMapTy::value_type(offset, LineTable()));
+ LineTable *LT = &pos.first->second;
if (pos.second) {
- // Parse and cache the line table for at this offset.
- State state;
- if (!parseStatementTable(debug_line_data, RelocMap, &offset, state))
- return 0;
- pos.first->second = state;
+ if (!LT->parse(debug_line_data, RelocMap, &offset))
+ return nullptr;
}
- return &pos.first->second;
+ return LT;
}
-bool
-DWARFDebugLine::parsePrologue(DataExtractor debug_line_data,
- uint32_t *offset_ptr, Prologue *prologue) {
- const uint32_t prologue_offset = *offset_ptr;
-
- prologue->clear();
- prologue->TotalLength = debug_line_data.getU32(offset_ptr);
- prologue->Version = debug_line_data.getU16(offset_ptr);
- if (prologue->Version < 2)
- return false;
-
- prologue->PrologueLength = debug_line_data.getU32(offset_ptr);
- const uint32_t end_prologue_offset = prologue->PrologueLength + *offset_ptr;
- prologue->MinInstLength = debug_line_data.getU8(offset_ptr);
- if (prologue->Version >= 4)
- prologue->MaxOpsPerInst = debug_line_data.getU8(offset_ptr);
- prologue->DefaultIsStmt = debug_line_data.getU8(offset_ptr);
- prologue->LineBase = debug_line_data.getU8(offset_ptr);
- prologue->LineRange = debug_line_data.getU8(offset_ptr);
- prologue->OpcodeBase = debug_line_data.getU8(offset_ptr);
-
- prologue->StandardOpcodeLengths.reserve(prologue->OpcodeBase-1);
- for (uint32_t i = 1; i < prologue->OpcodeBase; ++i) {
- uint8_t op_len = debug_line_data.getU8(offset_ptr);
- prologue->StandardOpcodeLengths.push_back(op_len);
- }
-
- while (*offset_ptr < end_prologue_offset) {
- const char *s = debug_line_data.getCStr(offset_ptr);
- if (s && s[0])
- prologue->IncludeDirectories.push_back(s);
- else
- break;
- }
-
- while (*offset_ptr < end_prologue_offset) {
- const char *name = debug_line_data.getCStr(offset_ptr);
- if (name && name[0]) {
- FileNameEntry fileEntry;
- fileEntry.Name = name;
- fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
- fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
- fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
- prologue->FileNames.push_back(fileEntry);
- } else {
- break;
- }
- }
-
- if (*offset_ptr != end_prologue_offset) {
- fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should"
- " have ended at 0x%8.8x but it ended at 0x%8.8x\n",
- prologue_offset, end_prologue_offset, *offset_ptr);
- return false;
- }
- return true;
-}
-
-bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
- const RelocAddrMap *RMap,
- uint32_t *offset_ptr, State &state) {
+bool DWARFDebugLine::LineTable::parse(DataExtractor debug_line_data,
+ const RelocAddrMap *RMap,
+ uint32_t *offset_ptr) {
const uint32_t debug_line_offset = *offset_ptr;
- Prologue *prologue = &state.Prologue;
+ clear();
- if (!parsePrologue(debug_line_data, offset_ptr, prologue)) {
+ if (!Prologue.parse(debug_line_data, offset_ptr)) {
// Restore our offset and return false to indicate failure!
*offset_ptr = debug_line_offset;
return false;
}
- const uint32_t end_offset = debug_line_offset + prologue->TotalLength +
- sizeof(prologue->TotalLength);
+ const uint32_t end_offset = debug_line_offset + Prologue.TotalLength +
+ sizeof(Prologue.TotalLength);
- state.reset();
+ ParsingState State(this);
while (*offset_ptr < end_offset) {
uint8_t opcode = debug_line_data.getU8(offset_ptr);
@@ -261,9 +282,9 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
// with a DW_LNE_end_sequence instruction which creates a row whose
// address is that of the byte after the last target machine instruction
// of the sequence.
- state.EndSequence = true;
- state.appendRowToMatrix(*offset_ptr);
- state.reset();
+ State.Row.EndSequence = true;
+ State.appendRowToMatrix(*offset_ptr);
+ State.resetRowAndSequence();
break;
case DW_LNE_set_address:
@@ -278,9 +299,10 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr);
if (AI != RMap->end()) {
const std::pair<uint8_t, int64_t> &R = AI->second;
- state.Address = debug_line_data.getAddress(offset_ptr) + R.second;
+ State.Row.Address =
+ debug_line_data.getAddress(offset_ptr) + R.second;
} else
- state.Address = debug_line_data.getAddress(offset_ptr);
+ State.Row.Address = debug_line_data.getAddress(offset_ptr);
}
break;
@@ -311,12 +333,12 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
fileEntry.DirIdx = debug_line_data.getULEB128(offset_ptr);
fileEntry.ModTime = debug_line_data.getULEB128(offset_ptr);
fileEntry.Length = debug_line_data.getULEB128(offset_ptr);
- prologue->FileNames.push_back(fileEntry);
+ Prologue.FileNames.push_back(fileEntry);
}
break;
case DW_LNE_set_discriminator:
- state.Discriminator = debug_line_data.getULEB128(offset_ptr);
+ State.Row.Discriminator = debug_line_data.getULEB128(offset_ptr);
break;
default:
@@ -325,52 +347,52 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
(*offset_ptr) += arg_size;
break;
}
- } else if (opcode < prologue->OpcodeBase) {
+ } else if (opcode < Prologue.OpcodeBase) {
switch (opcode) {
// Standard Opcodes
case DW_LNS_copy:
// Takes no arguments. Append a row to the matrix using the
// current values of the state-machine registers. Then set
// the basic_block register to false.
- state.appendRowToMatrix(*offset_ptr);
+ State.appendRowToMatrix(*offset_ptr);
break;
case DW_LNS_advance_pc:
// Takes a single unsigned LEB128 operand, multiplies it by the
// min_inst_length field of the prologue, and adds the
// result to the address register of the state machine.
- state.Address += debug_line_data.getULEB128(offset_ptr) *
- prologue->MinInstLength;
+ State.Row.Address +=
+ debug_line_data.getULEB128(offset_ptr) * Prologue.MinInstLength;
break;
case DW_LNS_advance_line:
// Takes a single signed LEB128 operand and adds that value to
// the line register of the state machine.
- state.Line += debug_line_data.getSLEB128(offset_ptr);
+ State.Row.Line += debug_line_data.getSLEB128(offset_ptr);
break;
case DW_LNS_set_file:
// Takes a single unsigned LEB128 operand and stores it in the file
// register of the state machine.
- state.File = debug_line_data.getULEB128(offset_ptr);
+ State.Row.File = debug_line_data.getULEB128(offset_ptr);
break;
case DW_LNS_set_column:
// Takes a single unsigned LEB128 operand and stores it in the
// column register of the state machine.
- state.Column = debug_line_data.getULEB128(offset_ptr);
+ State.Row.Column = debug_line_data.getULEB128(offset_ptr);
break;
case DW_LNS_negate_stmt:
// Takes no arguments. Set the is_stmt register of the state
// machine to the logical negation of its current value.
- state.IsStmt = !state.IsStmt;
+ State.Row.IsStmt = !State.Row.IsStmt;
break;
case DW_LNS_set_basic_block:
// Takes no arguments. Set the basic_block register of the
// state machine to true
- state.BasicBlock = true;
+ State.Row.BasicBlock = true;
break;
case DW_LNS_const_add_pc:
@@ -386,10 +408,10 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
// than twice that range will it need to use both DW_LNS_advance_pc
// and a special opcode, requiring three or more bytes.
{
- uint8_t adjust_opcode = 255 - prologue->OpcodeBase;
- uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
- prologue->MinInstLength;
- state.Address += addr_offset;
+ uint8_t adjust_opcode = 255 - Prologue.OpcodeBase;
+ uint64_t addr_offset =
+ (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
+ State.Row.Address += addr_offset;
}
break;
@@ -403,25 +425,25 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
// judge when the computation of a special opcode overflows and
// requires the use of DW_LNS_advance_pc. Such assemblers, however,
// can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
- state.Address += debug_line_data.getU16(offset_ptr);
+ State.Row.Address += debug_line_data.getU16(offset_ptr);
break;
case DW_LNS_set_prologue_end:
// Takes no arguments. Set the prologue_end register of the
// state machine to true
- state.PrologueEnd = true;
+ State.Row.PrologueEnd = true;
break;
case DW_LNS_set_epilogue_begin:
// Takes no arguments. Set the basic_block register of the
// state machine to true
- state.EpilogueBegin = true;
+ State.Row.EpilogueBegin = true;
break;
case DW_LNS_set_isa:
// Takes a single unsigned LEB128 operand and stores it in the
// column register of the state machine.
- state.Isa = debug_line_data.getULEB128(offset_ptr);
+ State.Row.Isa = debug_line_data.getULEB128(offset_ptr);
break;
default:
@@ -429,9 +451,9 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
// of such opcodes because they are specified in the prologue
// as a multiple of LEB128 operands for each opcode.
{
- assert(opcode - 1U < prologue->StandardOpcodeLengths.size());
- uint8_t opcode_length = prologue->StandardOpcodeLengths[opcode - 1];
- for (uint8_t i=0; i<opcode_length; ++i)
+ assert(opcode - 1U < Prologue.StandardOpcodeLengths.size());
+ uint8_t opcode_length = Prologue.StandardOpcodeLengths[opcode - 1];
+ for (uint8_t i = 0; i < opcode_length; ++i)
debug_line_data.getULEB128(offset_ptr);
}
break;
@@ -470,24 +492,37 @@ bool DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data,
//
// line increment = line_base + (adjusted opcode % line_range)
- uint8_t adjust_opcode = opcode - prologue->OpcodeBase;
- uint64_t addr_offset = (adjust_opcode / prologue->LineRange) *
- prologue->MinInstLength;
- int32_t line_offset = prologue->LineBase +
- (adjust_opcode % prologue->LineRange);
- state.Line += line_offset;
- state.Address += addr_offset;
- state.appendRowToMatrix(*offset_ptr);
+ uint8_t adjust_opcode = opcode - Prologue.OpcodeBase;
+ uint64_t addr_offset =
+ (adjust_opcode / Prologue.LineRange) * Prologue.MinInstLength;
+ int32_t line_offset =
+ Prologue.LineBase + (adjust_opcode % Prologue.LineRange);
+ State.Row.Line += line_offset;
+ State.Row.Address += addr_offset;
+ State.appendRowToMatrix(*offset_ptr);
}
}
- state.finalize();
+ if (!State.Sequence.Empty) {
+ fprintf(stderr, "warning: last sequence in debug line table is not"
+ "terminated!\n");
+ }
+
+ // Sort all sequences so that address lookup will work faster.
+ if (!Sequences.empty()) {
+ std::sort(Sequences.begin(), Sequences.end(), Sequence::orderByLowPC);
+ // Note: actually, instruction address ranges of sequences should not
+ // overlap (in shared objects and executables). If they do, the address
+ // lookup would still work, though, but result would be ambiguous.
+ // We don't report warning in this case. For example,
+ // sometimes .so compiled from multiple object files contains a few
+ // rudimentary sequences for address ranges [0x0, 0xsomething).
+ }
return end_offset;
}
-uint32_t
-DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
+uint32_t DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
uint32_t unknown_index = UINT32_MAX;
if (Sequences.empty())
return unknown_index;
@@ -532,10 +567,8 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const {
return index;
}
-bool
-DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address,
- uint64_t size,
- std::vector<uint32_t>& result) const {
+bool DWARFDebugLine::LineTable::lookupAddressRange(
+ uint64_t address, uint64_t size, std::vector<uint32_t> &result) const {
if (Sequences.empty())
return false;
uint64_t end_addr = address + size;
@@ -611,13 +644,14 @@ DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address,
bool
DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
- bool NeedsAbsoluteFilePath,
+ FileLineInfoKind Kind,
std::string &Result) const {
- if (FileIndex == 0 || FileIndex > Prologue.FileNames.size())
+ if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() ||
+ Kind == FileLineInfoKind::None)
return false;
const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
const char *FileName = Entry.Name;
- if (!NeedsAbsoluteFilePath ||
+ if (Kind != FileLineInfoKind::AbsoluteFilePath ||
sys::path::is_absolute(FileName)) {
Result = FileName;
return true;
diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h
index a336f49..c7b7ec2 100644
--- a/lib/DebugInfo/DWARFDebugLine.h
+++ b/lib/DebugInfo/DWARFDebugLine.h
@@ -11,6 +11,7 @@
#define LLVM_DEBUGINFO_DWARFDEBUGLINE_H
#include "DWARFRelocMap.h"
+#include "llvm/DebugInfo/DIContext.h"
#include "llvm/Support/DataExtractor.h"
#include <map>
#include <string>
@@ -24,7 +25,7 @@ class DWARFDebugLine {
public:
DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {}
struct FileNameEntry {
- FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {}
+ FileNameEntry() : Name(nullptr), DirIdx(0), ModTime(0), Length(0) {}
const char *Name;
uint64_t DirIdx;
@@ -33,10 +34,7 @@ public:
};
struct Prologue {
- Prologue()
- : TotalLength(0), Version(0), PrologueLength(0), MinInstLength(0),
- MaxOpsPerInst(0), DefaultIsStmt(0), LineBase(0), LineRange(0),
- OpcodeBase(0) {}
+ Prologue();
// The size in bytes of the statement information for this compilation unit
// (not including the total_length field itself).
@@ -77,19 +75,16 @@ public:
int32_t getMaxLineIncrementForSpecialOpcode() const {
return LineBase + (int8_t)LineRange - 1;
}
+
+ void clear();
void dump(raw_ostream &OS) const;
- void clear() {
- TotalLength = Version = PrologueLength = 0;
- MinInstLength = LineBase = LineRange = OpcodeBase = 0;
- StandardOpcodeLengths.clear();
- IncludeDirectories.clear();
- FileNames.clear();
- }
+ bool parse(DataExtractor debug_line_data, uint32_t *offset_ptr);
};
// Standard .debug_line state machine structure.
struct Row {
- Row(bool default_is_stmt = false) { reset(default_is_stmt); }
+ explicit Row(bool default_is_stmt = false);
+
/// Called after a row is appended to the matrix.
void postAppend();
void reset(bool default_is_stmt);
@@ -151,14 +146,9 @@ public:
unsigned LastRowIndex;
bool Empty;
- Sequence() { reset(); }
- void reset() {
- LowPC = 0;
- HighPC = 0;
- FirstRowIndex = 0;
- LastRowIndex = 0;
- Empty = true;
- }
+ Sequence();
+ void reset();
+
static bool orderByLowPC(const Sequence& LHS, const Sequence& RHS) {
return LHS.LowPC < RHS.LowPC;
}
@@ -171,31 +161,34 @@ public:
};
struct LineTable {
- void appendRow(const DWARFDebugLine::Row &state) { Rows.push_back(state); }
- void appendSequence(const DWARFDebugLine::Sequence &sequence) {
- Sequences.push_back(sequence);
+ LineTable();
+
+ void appendRow(const DWARFDebugLine::Row &R) {
+ Rows.push_back(R);
}
- void clear() {
- Prologue.clear();
- Rows.clear();
- Sequences.clear();
+ void appendSequence(const DWARFDebugLine::Sequence &S) {
+ Sequences.push_back(S);
}
// Returns the index of the row with file/line info for a given address,
// or -1 if there is no such row.
uint32_t lookupAddress(uint64_t address) const;
- bool lookupAddressRange(uint64_t address,
- uint64_t size,
- std::vector<uint32_t>& result) const;
+ bool lookupAddressRange(uint64_t address, uint64_t size,
+ std::vector<uint32_t> &result) const;
// Extracts filename by its index in filename table in prologue.
// Returns true on success.
bool getFileNameByIndex(uint64_t FileIndex,
- bool NeedsAbsoluteFilePath,
+ DILineInfoSpecifier::FileLineInfoKind Kind,
std::string &Result) const;
void dump(raw_ostream &OS) const;
+ void clear();
+
+ /// Parse prologue and all rows.
+ bool parse(DataExtractor debug_line_data, const RelocAddrMap *RMap,
+ uint32_t *offset_ptr);
struct Prologue Prologue;
typedef std::vector<Row> RowVector;
@@ -206,48 +199,26 @@ public:
SequenceVector Sequences;
};
- struct State : public Row, public Sequence, public LineTable {
- // Special row codes.
- enum {
- StartParsingLineTable = 0,
- DoneParsingLineTable = -1
- };
-
- State() : row(StartParsingLineTable) {}
- virtual ~State();
-
- virtual void appendRowToMatrix(uint32_t offset);
- virtual void finalize();
- virtual void reset() {
- Row::reset(Prologue.DefaultIsStmt);
- Sequence::reset();
- }
-
- // The row number that starts at zero for the prologue, and increases for
- // each row added to the matrix.
- unsigned row;
- };
-
- struct DumpingState : public State {
- DumpingState(raw_ostream &OS) : OS(OS) {}
- virtual ~DumpingState();
- void finalize() override;
- private:
- raw_ostream &OS;
- };
-
- static bool parsePrologue(DataExtractor debug_line_data, uint32_t *offset_ptr,
- Prologue *prologue);
- /// Parse a single line table (prologue and all rows).
- static bool parseStatementTable(DataExtractor debug_line_data,
- const RelocAddrMap *RMap,
- uint32_t *offset_ptr, State &state);
-
const LineTable *getLineTable(uint32_t offset) const;
const LineTable *getOrParseLineTable(DataExtractor debug_line_data,
uint32_t offset);
private:
+ struct ParsingState {
+ ParsingState(struct LineTable *LT);
+
+ void resetRowAndSequence();
+ void appendRowToMatrix(uint32_t offset);
+
+ // Line table we're currently parsing.
+ struct LineTable *LineTable;
+ // The row number that starts at zero for the prologue, and increases for
+ // each row added to the matrix.
+ unsigned RowNumber;
+ struct Row Row;
+ struct Sequence Sequence;
+ };
+
typedef std::map<uint32_t, LineTable> LineTableMapTy;
typedef LineTableMapTy::iterator LineTableIter;
typedef LineTableMapTy::const_iterator LineTableConstIter;
diff --git a/lib/DebugInfo/DWARFDebugRangeList.cpp b/lib/DebugInfo/DWARFDebugRangeList.cpp
index aa2a2be..07b23b3 100644
--- a/lib/DebugInfo/DWARFDebugRangeList.cpp
+++ b/lib/DebugInfo/DWARFDebugRangeList.cpp
@@ -54,13 +54,16 @@ void DWARFDebugRangeList::dump(raw_ostream &OS) const {
OS << format("%08x <End of list>\n", Offset);
}
-bool DWARFDebugRangeList::containsAddress(uint64_t BaseAddress,
- uint64_t Address) const {
+DWARFAddressRangesVector
+DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const {
+ DWARFAddressRangesVector Res;
for (const RangeListEntry &RLE : Entries) {
- if (RLE.isBaseAddressSelectionEntry(AddressSize))
+ if (RLE.isBaseAddressSelectionEntry(AddressSize)) {
BaseAddress = RLE.EndAddress;
- else if (RLE.containsAddress(BaseAddress, Address))
- return true;
+ } else {
+ Res.push_back(std::make_pair(BaseAddress + RLE.StartAddress,
+ BaseAddress + RLE.EndAddress));
+ }
}
- return false;
+ return Res;
}
diff --git a/lib/DebugInfo/DWARFDebugRangeList.h b/lib/DebugInfo/DWARFDebugRangeList.h
index 4e34a91..587b550 100644
--- a/lib/DebugInfo/DWARFDebugRangeList.h
+++ b/lib/DebugInfo/DWARFDebugRangeList.h
@@ -17,6 +17,9 @@ namespace llvm {
class raw_ostream;
+/// DWARFAddressRangesVector - represents a set of absolute address ranges.
+typedef std::vector<std::pair<uint64_t, uint64_t>> DWARFAddressRangesVector;
+
class DWARFDebugRangeList {
public:
struct RangeListEntry {
@@ -50,10 +53,6 @@ public:
else
return StartAddress == -1ULL;
}
- bool containsAddress(uint64_t BaseAddress, uint64_t Address) const {
- return (BaseAddress + StartAddress <= Address) &&
- (Address < BaseAddress + EndAddress);
- }
};
private:
@@ -67,10 +66,10 @@ public:
void clear();
void dump(raw_ostream &OS) const;
bool extract(DataExtractor data, uint32_t *offset_ptr);
- /// containsAddress - Returns true if range list contains the given
- /// address. Has to be passed base address of the compile unit that
- /// references this range list.
- bool containsAddress(uint64_t BaseAddress, uint64_t Address) const;
+ /// getAbsoluteRanges - Returns absolute address ranges defined by this range
+ /// list. Has to be passed base address of the compile unit referencing this
+ /// range list.
+ DWARFAddressRangesVector getAbsoluteRanges(uint64_t BaseAddress) const;
};
} // namespace llvm
diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp
index da71fb3..8d0f966 100644
--- a/lib/DebugInfo/DWARFFormValue.cpp
+++ b/lib/DebugInfo/DWARFFormValue.cpp
@@ -131,7 +131,7 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
const DWARFUnit *cu) {
bool indirect = false;
bool is_block = false;
- Value.data = NULL;
+ Value.data = nullptr;
// Read the value for the form into value and follow and DW_FORM_indirect
// instances we run into
do {
@@ -241,7 +241,7 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
if (is_block) {
StringRef str = data.getData().substr(*offset_ptr, Value.uval);
- Value.data = NULL;
+ Value.data = nullptr;
if (!str.empty()) {
Value.data = reinterpret_cast<const uint8_t *>(str.data());
*offset_ptr += Value.uval;
@@ -488,7 +488,7 @@ Optional<const char *> DWARFFormValue::getAsCString(const DWARFUnit *U) const {
return None;
if (Form == DW_FORM_string)
return Value.cstr;
- if (U == 0)
+ if (!U)
return None;
uint32_t Offset = Value.uval;
if (Form == DW_FORM_GNU_str_index) {
@@ -509,7 +509,7 @@ Optional<uint64_t> DWARFFormValue::getAsAddress(const DWARFUnit *U) const {
if (Form == DW_FORM_GNU_addr_index) {
uint32_t Index = Value.uval;
uint64_t Result;
- if (U == 0 || !U->getAddrOffsetSectionItem(Index, Result))
+ if (!U || !U->getAddrOffsetSectionItem(Index, Result))
return None;
return Result;
}
@@ -525,7 +525,7 @@ Optional<uint64_t> DWARFFormValue::getAsReference(const DWARFUnit *U) const {
case DW_FORM_ref4:
case DW_FORM_ref8:
case DW_FORM_ref_udata:
- if (U == 0)
+ if (!U)
return None;
return Value.uval + U->getOffset();
case DW_FORM_ref_addr:
diff --git a/lib/DebugInfo/DWARFTypeUnit.h b/lib/DebugInfo/DWARFTypeUnit.h
index 05e13ff..cf773b8 100644
--- a/lib/DebugInfo/DWARFTypeUnit.h
+++ b/lib/DebugInfo/DWARFTypeUnit.h
@@ -19,11 +19,13 @@ private:
uint64_t TypeHash;
uint32_t TypeOffset;
public:
- DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
- StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+ DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
+ StringRef SS, StringRef SOS, StringRef AOS,
const RelocAddrMap *M, bool LE)
- : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {}
- uint32_t getSize() const override { return DWARFUnit::getSize() + 12; }
+ : DWARFUnit(DA, IS, RS, SS, SOS, AOS, M, LE) {}
+ uint32_t getHeaderSize() const override {
+ return DWARFUnit::getHeaderSize() + 12;
+ }
void dump(raw_ostream &OS);
protected:
bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) override;
diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp
index 316c208..f5f5072 100644
--- a/lib/DebugInfo/DWARFUnit.cpp
+++ b/lib/DebugInfo/DWARFUnit.cpp
@@ -17,12 +17,12 @@
using namespace llvm;
using namespace dwarf;
-DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
- StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
+DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
+ StringRef SS, StringRef SOS, StringRef AOS,
const RelocAddrMap *M, bool LE)
- : Abbrev(DA), InfoSection(IS), AbbrevSection(AS), RangeSection(RS),
- StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS),
- RelocMap(M), isLittleEndian(LE) {
+ : Abbrev(DA), InfoSection(IS), RangeSection(RS), StringSection(SS),
+ StringOffsetSection(SOS), AddrOffsetSection(AOS), RelocMap(M),
+ isLittleEndian(LE) {
clear();
}
@@ -54,18 +54,20 @@ bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index,
bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) {
Length = debug_info.getU32(offset_ptr);
Version = debug_info.getU16(offset_ptr);
- uint64_t abbrOffset = debug_info.getU32(offset_ptr);
+ uint64_t AbbrOffset = debug_info.getU32(offset_ptr);
AddrSize = debug_info.getU8(offset_ptr);
- bool lengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
- bool versionOK = DWARFContext::isSupportedVersion(Version);
- bool abbrOffsetOK = AbbrevSection.size() > abbrOffset;
- bool addrSizeOK = AddrSize == 4 || AddrSize == 8;
+ bool LengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1);
+ bool VersionOK = DWARFContext::isSupportedVersion(Version);
+ bool AddrSizeOK = AddrSize == 4 || AddrSize == 8;
- if (!lengthOK || !versionOK || !addrSizeOK || !abbrOffsetOK)
+ if (!LengthOK || !VersionOK || !AddrSizeOK)
+ return false;
+
+ Abbrevs = Abbrev->getAbbreviationDeclarationSet(AbbrOffset);
+ if (Abbrevs == nullptr)
return false;
- Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset);
return true;
}
@@ -98,7 +100,7 @@ void DWARFUnit::clear() {
Offset = 0;
Length = 0;
Version = 0;
- Abbrevs = 0;
+ Abbrevs = nullptr;
AddrSize = 0;
BaseAddr = 0;
RangeSectionBase = 0;
@@ -110,8 +112,8 @@ void DWARFUnit::clear() {
const char *DWARFUnit::getCompilationDir() {
extractDIEsIfNeeded(true);
if (DieArray.empty())
- return 0;
- return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
+ return nullptr;
+ return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr);
}
uint64_t DWARFUnit::getDWOId() {
@@ -124,38 +126,32 @@ uint64_t DWARFUnit::getDWOId() {
}
void DWARFUnit::setDIERelations() {
- if (DieArray.empty())
+ if (DieArray.size() <= 1)
return;
- DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front();
- DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back();
- DWARFDebugInfoEntryMinimal *curr_die;
- // We purposely are skipping the last element in the array in the loop below
- // so that we can always have a valid next item
- for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) {
- // Since our loop doesn't include the last element, we can always
- // safely access the next die in the array.
- DWARFDebugInfoEntryMinimal *next_die = curr_die + 1;
-
- const DWARFAbbreviationDeclaration *curr_die_abbrev =
- curr_die->getAbbreviationDeclarationPtr();
-
- if (curr_die_abbrev) {
- // Normal DIE
- if (curr_die_abbrev->hasChildren())
- next_die->setParent(curr_die);
- else
- curr_die->setSibling(next_die);
+
+ std::vector<DWARFDebugInfoEntryMinimal *> ParentChain;
+ DWARFDebugInfoEntryMinimal *SiblingChain = nullptr;
+ for (auto &DIE : DieArray) {
+ if (SiblingChain) {
+ SiblingChain->setSibling(&DIE);
+ }
+ if (const DWARFAbbreviationDeclaration *AbbrDecl =
+ DIE.getAbbreviationDeclarationPtr()) {
+ // Normal DIE.
+ if (AbbrDecl->hasChildren()) {
+ ParentChain.push_back(&DIE);
+ SiblingChain = nullptr;
+ } else {
+ SiblingChain = &DIE;
+ }
} else {
- // NULL DIE that terminates a sibling chain
- DWARFDebugInfoEntryMinimal *parent = curr_die->getParent();
- if (parent)
- parent->setSibling(next_die);
+ // NULL entry terminates the sibling chain.
+ SiblingChain = ParentChain.back();
+ ParentChain.pop_back();
}
}
-
- // Since we skipped the last element, we need to fix it up!
- if (die_array_begin < die_array_end)
- curr_die->setParent(die_array_begin);
+ assert(SiblingChain == nullptr || SiblingChain == &DieArray[0]);
+ assert(ParentChain.empty());
}
void DWARFUnit::extractDIEsToVector(
@@ -166,13 +162,13 @@ void DWARFUnit::extractDIEsToVector(
// Set the offset to that of the first DIE and calculate the start of the
// next compilation unit header.
- uint32_t Offset = getFirstDIEOffset();
+ uint32_t DIEOffset = Offset + getHeaderSize();
uint32_t NextCUOffset = getNextUnitOffset();
DWARFDebugInfoEntryMinimal DIE;
uint32_t Depth = 0;
bool IsCUDie = true;
- while (Offset < NextCUOffset && DIE.extractFast(this, &Offset)) {
+ while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) {
if (IsCUDie) {
if (AppendCUDie)
Dies.push_back(DIE);
@@ -187,9 +183,8 @@ void DWARFUnit::extractDIEsToVector(
Dies.push_back(DIE);
}
- const DWARFAbbreviationDeclaration *AbbrDecl =
- DIE.getAbbreviationDeclarationPtr();
- if (AbbrDecl) {
+ if (const DWARFAbbreviationDeclaration *AbbrDecl =
+ DIE.getAbbreviationDeclarationPtr()) {
// Normal DIE
if (AbbrDecl->hasChildren())
++Depth;
@@ -205,9 +200,9 @@ void DWARFUnit::extractDIEsToVector(
// Give a little bit of info if we encounter corrupt DWARF (our offset
// should always terminate at or before the start of the next compilation
// unit header).
- if (Offset > NextCUOffset)
+ if (DIEOffset > NextCUOffset)
fprintf(stderr, "warning: DWARF compile unit extends beyond its "
- "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), Offset);
+ "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), DIEOffset);
}
size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
@@ -241,25 +236,25 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile)
: DWOFile(DWOFile),
DWOContext(cast<DWARFContext>(DIContext::getDWARFContext(DWOFile))),
- DWOU(0) {
+ DWOU(nullptr) {
if (DWOContext->getNumDWOCompileUnits() > 0)
DWOU = DWOContext->getDWOCompileUnitAtIndex(0);
}
bool DWARFUnit::parseDWO() {
- if (DWO.get() != 0)
+ if (DWO.get())
return false;
extractDIEsIfNeeded(true);
if (DieArray.empty())
return false;
const char *DWOFileName =
- DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, 0);
- if (DWOFileName == 0)
+ DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, nullptr);
+ if (!DWOFileName)
return false;
const char *CompilationDir =
- DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0);
+ DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr);
SmallString<16> AbsolutePath;
- if (sys::path::is_relative(DWOFileName) && CompilationDir != 0) {
+ if (sys::path::is_relative(DWOFileName) && CompilationDir != nullptr) {
sys::path::append(AbsolutePath, CompilationDir);
}
sys::path::append(AbsolutePath, DWOFileName);
@@ -271,7 +266,7 @@ bool DWARFUnit::parseDWO() {
DWO.reset(new DWOHolder(DWOFile.get()));
DWARFUnit *DWOCU = DWO->getUnit();
// Verify that compile unit in .dwo file is valid.
- if (DWOCU == 0 || DWOCU->getDWOId() != getDWOId()) {
+ if (!DWOCU || DWOCU->getDWOId() != getDWOId()) {
DWO.reset();
return false;
}
@@ -298,33 +293,33 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
}
}
-void
-DWARFUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
- bool clear_dies_if_already_not_parsed,
- uint32_t CUOffsetInAranges) {
+void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
+ // First, check if CU DIE describes address ranges for the unit.
+ const auto &CUDIERanges = getCompileUnitDIE()->getAddressRanges(this);
+ if (!CUDIERanges.empty()) {
+ CURanges.insert(CURanges.end(), CUDIERanges.begin(), CUDIERanges.end());
+ return;
+ }
+
// This function is usually called if there in no .debug_aranges section
// in order to produce a compile unit level set of address ranges that
// is accurate. If the DIEs weren't parsed, then we don't want all dies for
// all compile units to stay loaded when they weren't needed. So we can end
// up parsing the DWARF and then throwing them all away to keep memory usage
// down.
- const bool clear_dies = extractDIEsIfNeeded(false) > 1 &&
- clear_dies_if_already_not_parsed;
- DieArray[0].buildAddressRangeTable(this, debug_aranges, CUOffsetInAranges);
+ const bool ClearDIEs = extractDIEsIfNeeded(false) > 1;
+ DieArray[0].collectChildrenAddressRanges(this, CURanges);
+
+ // Collect address ranges from DIEs in .dwo if necessary.
bool DWOCreated = parseDWO();
- if (DWO.get()) {
- // If there is a .dwo file for this compile unit, then skeleton CU DIE
- // doesn't have children, and we should instead build address range table
- // from DIEs in the .debug_info.dwo section of .dwo file.
- DWO->getUnit()->buildAddressRangeTable(
- debug_aranges, clear_dies_if_already_not_parsed, CUOffsetInAranges);
- }
- if (DWOCreated && clear_dies_if_already_not_parsed)
+ if (DWO.get())
+ DWO->getUnit()->collectAddressRanges(CURanges);
+ if (DWOCreated)
DWO.reset();
// Keep memory down by clearing DIEs if this generate function
// caused them to be parsed.
- if (clear_dies)
+ if (ClearDIEs)
clearDIEs(true);
}
@@ -337,14 +332,14 @@ DWARFUnit::getSubprogramForAddress(uint64_t Address) {
return &DIE;
}
}
- return 0;
+ return nullptr;
}
DWARFDebugInfoEntryInlinedChain
DWARFUnit::getInlinedChainForAddress(uint64_t Address) {
// First, find a subprogram that contains the given address (the root
// of inlined chain).
- const DWARFUnit *ChainCU = 0;
+ const DWARFUnit *ChainCU = nullptr;
const DWARFDebugInfoEntryMinimal *SubprogramDIE =
getSubprogramForAddress(Address);
if (SubprogramDIE) {
diff --git a/lib/DebugInfo/DWARFUnit.h b/lib/DebugInfo/DWARFUnit.h
index 5b4cf09..471da36 100644
--- a/lib/DebugInfo/DWARFUnit.h
+++ b/lib/DebugInfo/DWARFUnit.h
@@ -29,7 +29,6 @@ class raw_ostream;
class DWARFUnit {
const DWARFDebugAbbrev *Abbrev;
StringRef InfoSection;
- StringRef AbbrevSection;
StringRef RangeSection;
uint32_t RangeSectionBase;
StringRef StringSection;
@@ -60,12 +59,13 @@ class DWARFUnit {
protected:
virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr);
+ /// Size in bytes of the unit header.
+ virtual uint32_t getHeaderSize() const { return 11; }
public:
-
- DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS,
- StringRef RS, StringRef SS, StringRef SOS, StringRef AOS,
- const RelocAddrMap *M, bool LE);
+ DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef RS,
+ StringRef SS, StringRef SOS, StringRef AOS, const RelocAddrMap *M,
+ bool LE);
virtual ~DWARFUnit();
@@ -102,12 +102,7 @@ public:
DWARFDebugRangeList &RangeList) const;
void clear();
uint32_t getOffset() const { return Offset; }
- /// Size in bytes of the compile unit header.
- virtual uint32_t getSize() const { return 11; }
- uint32_t getFirstDIEOffset() const { return Offset + getSize(); }
uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
- /// Size in bytes of the .debug_info data associated with this compile unit.
- size_t getDebugInfoSize() const { return Length + 4 - getSize(); }
uint32_t getLength() const { return Length; }
uint16_t getVersion() const { return Version; }
const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
@@ -123,15 +118,13 @@ public:
const DWARFDebugInfoEntryMinimal *
getCompileUnitDIE(bool extract_cu_die_only = true) {
extractDIEsIfNeeded(extract_cu_die_only);
- return DieArray.empty() ? NULL : &DieArray[0];
+ return DieArray.empty() ? nullptr : &DieArray[0];
}
const char *getCompilationDir();
uint64_t getDWOId();
- void buildAddressRangeTable(DWARFDebugAranges *debug_aranges,
- bool clear_dies_if_already_not_parsed,
- uint32_t CUOffsetInAranges);
+ void collectAddressRanges(DWARFAddressRangesVector &CURanges);
/// getInlinedChainForAddress - fetches inlined chain for a given address.
/// Returns empty chain if there is no subprogram containing address. The
@@ -139,6 +132,9 @@ public:
DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address);
private:
+ /// Size in bytes of the .debug_info data associated with this compile unit.
+ size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); }
+
/// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it
/// hasn't already been done. Returns the number of DIEs parsed at this call.
size_t extractDIEsIfNeeded(bool CUDieOnly);
diff --git a/lib/DebugInfo/module.modulemap b/lib/DebugInfo/module.modulemap
new file mode 100644
index 0000000..1fe5ab1
--- /dev/null
+++ b/lib/DebugInfo/module.modulemap
@@ -0,0 +1 @@
+module DebugInfo { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 4768e67..6766ef1 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
@@ -25,6 +24,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/ValueHandle.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
@@ -37,6 +37,8 @@
#include <cstring>
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
STATISTIC(NumGlobals , "Number of global vars initialized");
@@ -50,22 +52,31 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
std::string *ErrorStr,
JITMemoryManager *JMM,
bool GVsWithCode,
- TargetMachine *TM) = 0;
+ TargetMachine *TM) = nullptr;
ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
Module *M,
std::string *ErrorStr,
RTDyldMemoryManager *MCJMM,
bool GVsWithCode,
- TargetMachine *TM) = 0;
+ TargetMachine *TM) = nullptr;
ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
- std::string *ErrorStr) = 0;
+ std::string *ErrorStr) =nullptr;
ExecutionEngine::ExecutionEngine(Module *M)
: EEState(*this),
- LazyFunctionCreator(0) {
+ LazyFunctionCreator(nullptr) {
CompilingLazily = false;
GVCompilationDisabled = false;
SymbolSearchingDisabled = false;
+
+ // IR module verification is enabled by default in debug builds, and disabled
+ // by default in release builds.
+#ifndef NDEBUG
+ VerifyModules = true;
+#else
+ VerifyModules = false;
+#endif
+
Modules.push_back(M);
assert(M && "Module is null?");
}
@@ -111,6 +122,10 @@ char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
return GVMemoryBlock::Create(GV, *getDataLayout());
}
+void ExecutionEngine::addObjectFile(std::unique_ptr<object::ObjectFile> O) {
+ llvm_unreachable("ExecutionEngine subclass doesn't implement addObjectFile.");
+}
+
bool ExecutionEngine::removeModule(Module *M) {
for(SmallVectorImpl<Module *>::iterator I = Modules.begin(),
E = Modules.end(); I != E; ++I) {
@@ -129,7 +144,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
if (Function *F = Modules[i]->getFunction(FnName))
return F;
}
- return 0;
+ return nullptr;
}
@@ -141,7 +156,7 @@ void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
// FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
// GlobalAddressMap.
if (I == GlobalAddressMap.end())
- OldVal = 0;
+ OldVal = nullptr;
else {
OldVal = I->second;
GlobalAddressMap.erase(I);
@@ -157,14 +172,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
<< "\' to [" << Addr << "]\n";);
void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
- assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+ assert((!CurVal || !Addr) && "GlobalMapping already established!");
CurVal = Addr;
// If we are using the reverse mapping, add it too.
if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
AssertingVH<const GlobalValue> &V =
EEState.getGlobalAddressReverseMap(locked)[Addr];
- assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ assert((!V || !GV) && "GlobalMapping already established!");
V = GV;
}
}
@@ -193,7 +208,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
EEState.getGlobalAddressMap(locked);
// Deleting from the mapping?
- if (Addr == 0)
+ if (!Addr)
return EEState.RemoveMapping(locked, GV);
void *&CurVal = Map[GV];
@@ -207,7 +222,7 @@ void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
AssertingVH<const GlobalValue> &V =
EEState.getGlobalAddressReverseMap(locked)[Addr];
- assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+ assert((!V || !GV) && "GlobalMapping already established!");
V = GV;
}
return OldVal;
@@ -218,7 +233,7 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
ExecutionEngineState::GlobalAddressMapTy::iterator I =
EEState.getGlobalAddressMap(locked).find(GV);
- return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
+ return I != EEState.getGlobalAddressMap(locked).end() ? I->second : nullptr;
}
const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
@@ -235,7 +250,7 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
EEState.getGlobalAddressReverseMap(locked).find(Addr);
- return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+ return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : nullptr;
}
namespace {
@@ -243,11 +258,11 @@ class ArgvArray {
char *Array;
std::vector<char*> Values;
public:
- ArgvArray() : Array(NULL) {}
+ ArgvArray() : Array(nullptr) {}
~ArgvArray() { clear(); }
void clear() {
delete[] Array;
- Array = NULL;
+ Array = nullptr;
for (size_t I = 0, E = Values.size(); I != E; ++I) {
delete[] Values[I];
}
@@ -283,7 +298,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
}
// Null terminate it
- EE->StoreValueToMemory(PTOGV(0),
+ EE->StoreValueToMemory(PTOGV(nullptr),
(GenericValue*)(Array+InputArgv.size()*PtrSize),
SBytePtr);
return Array;
@@ -303,11 +318,11 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
// Should be an array of '{ i32, void ()* }' structs. The first value is
// the init priority, which we ignore.
ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
- if (InitList == 0)
+ if (!InitList)
return;
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i));
- if (CS == 0) continue;
+ if (!CS) continue;
Constant *FP = CS->getOperand(1);
if (FP->isNullValue())
@@ -418,10 +433,10 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
bool GVsWithCode,
Reloc::Model RM,
CodeModel::Model CMM) {
- if (ExecutionEngine::JITCtor == 0) {
+ if (!ExecutionEngine::JITCtor) {
if (ErrorStr)
*ErrorStr = "JIT has not been linked in.";
- return 0;
+ return nullptr;
}
// Use the defaults for extra parameters. Users can use EngineBuilder to
@@ -437,7 +452,7 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M,
// TODO: permit custom TargetOptions here
TargetMachine *TM = EB.selectTarget();
- if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+ if (!TM || (ErrorStr && ErrorStr->length() > 0)) return nullptr;
return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM);
}
@@ -447,8 +462,8 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
// Make sure we can resolve symbols in the program as well. The zero arg
// to the function tells DynamicLibrary to load the program, not a library.
- if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
- return 0;
+ if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, ErrorStr))
+ return nullptr;
assert(!(JMM && MCJMM));
@@ -461,7 +476,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
else {
if (ErrorStr)
*ErrorStr = "Cannot create an interpreter with a memory manager.";
- return 0;
+ return nullptr;
}
}
@@ -470,7 +485,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
*ErrorStr =
"Cannot create a legacy JIT with a runtime dyld memory "
"manager.";
- return 0;
+ return nullptr;
}
// Unless the interpreter was explicitly selected or the JIT is not linked,
@@ -483,16 +498,17 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
<< " a different -march switch.\n";
}
- if (UseMCJIT && ExecutionEngine::MCJITCtor) {
- ExecutionEngine *EE =
- ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM,
- AllocateGVsWithCode, TheTM.release());
- if (EE) return EE;
- } else if (ExecutionEngine::JITCtor) {
- ExecutionEngine *EE =
- ExecutionEngine::JITCtor(M, ErrorStr, JMM,
- AllocateGVsWithCode, TheTM.release());
- if (EE) return EE;
+ ExecutionEngine *EE = nullptr;
+ if (UseMCJIT && ExecutionEngine::MCJITCtor)
+ EE = ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM,
+ AllocateGVsWithCode, TheTM.release());
+ else if (ExecutionEngine::JITCtor)
+ EE = ExecutionEngine::JITCtor(M, ErrorStr, JMM,
+ AllocateGVsWithCode, TheTM.release());
+
+ if (EE) {
+ EE->setVerifyModules(VerifyModules);
+ return EE;
}
}
@@ -503,16 +519,16 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
return ExecutionEngine::InterpCtor(M, ErrorStr);
if (ErrorStr)
*ErrorStr = "Interpreter has not been linked in.";
- return 0;
+ return nullptr;
}
- if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0 &&
- ExecutionEngine::MCJITCtor == 0) {
+ if ((WhichEngine & EngineKind::JIT) && !ExecutionEngine::JITCtor &&
+ !ExecutionEngine::MCJITCtor) {
if (ErrorStr)
*ErrorStr = "JIT has not been linked in.";
}
- return 0;
+ return nullptr;
}
void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
@@ -848,7 +864,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
break;
case Type::PointerTyID:
if (isa<ConstantPointerNull>(C))
- Result.PointerVal = 0;
+ Result.PointerVal = nullptr;
else if (const Function *F = dyn_cast<Function>(C))
Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
@@ -1193,20 +1209,18 @@ void ExecutionEngine::emitGlobals() {
if (Modules.size() != 1) {
for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
Module &M = *Modules[m];
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I) {
- const GlobalValue *GV = I;
- if (GV->hasLocalLinkage() || GV->isDeclaration() ||
- GV->hasAppendingLinkage() || !GV->hasName())
+ for (const auto &GV : M.globals()) {
+ if (GV.hasLocalLinkage() || GV.isDeclaration() ||
+ GV.hasAppendingLinkage() || !GV.hasName())
continue;// Ignore external globals and globals with internal linkage.
const GlobalValue *&GVEntry =
- LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+ LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())];
// If this is the first time we've seen this global, it is the canonical
// version.
if (!GVEntry) {
- GVEntry = GV;
+ GVEntry = &GV;
continue;
}
@@ -1216,8 +1230,8 @@ void ExecutionEngine::emitGlobals() {
// Otherwise, we know it's linkonce/weak, replace it if this is a strong
// symbol. FIXME is this right for common?
- if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
- GVEntry = GV;
+ if (GV.hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+ GVEntry = &GV;
}
}
}
@@ -1225,31 +1239,30 @@ void ExecutionEngine::emitGlobals() {
std::vector<const GlobalValue*> NonCanonicalGlobals;
for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
Module &M = *Modules[m];
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
+ for (const auto &GV : M.globals()) {
// In the multi-module case, see what this global maps to.
if (!LinkedGlobalsMap.empty()) {
if (const GlobalValue *GVEntry =
- LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+ LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())]) {
// If something else is the canonical global, ignore this one.
- if (GVEntry != &*I) {
- NonCanonicalGlobals.push_back(I);
+ if (GVEntry != &GV) {
+ NonCanonicalGlobals.push_back(&GV);
continue;
}
}
}
- if (!I->isDeclaration()) {
- addGlobalMapping(I, getMemoryForGV(I));
+ if (!GV.isDeclaration()) {
+ addGlobalMapping(&GV, getMemoryForGV(&GV));
} else {
// External variable reference. Try to use the dynamic loader to
// get a pointer to it.
if (void *SymAddr =
- sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
- addGlobalMapping(I, SymAddr);
+ sys::DynamicLibrary::SearchForAddressOfSymbol(GV.getName()))
+ addGlobalMapping(&GV, SymAddr);
else {
report_fatal_error("Could not resolve external global address: "
- +I->getName());
+ +GV.getName());
}
}
}
@@ -1269,16 +1282,15 @@ void ExecutionEngine::emitGlobals() {
// Now that all of the globals are set up in memory, loop through them all
// and initialize their contents.
- for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- if (!I->isDeclaration()) {
+ for (const auto &GV : M.globals()) {
+ if (!GV.isDeclaration()) {
if (!LinkedGlobalsMap.empty()) {
if (const GlobalValue *GVEntry =
- LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
- if (GVEntry != &*I) // Not the canonical variable.
+ LinkedGlobalsMap[std::make_pair(GV.getName(), GV.getType())])
+ if (GVEntry != &GV) // Not the canonical variable.
continue;
}
- EmitGlobalVariable(I);
+ EmitGlobalVariable(&GV);
}
}
}
@@ -1290,12 +1302,12 @@ void ExecutionEngine::emitGlobals() {
void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
void *GA = getPointerToGlobalIfAvailable(GV);
- if (GA == 0) {
+ if (!GA) {
// If it's not already specified, allocate memory for the global.
GA = getMemoryForGV(GV);
// If we failed to allocate memory for this global, return.
- if (GA == 0) return;
+ if (!GA) return;
addGlobalMapping(GV, GA);
}
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index db3dead..6ff1e7a 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "llvm-c/ExecutionEngine.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/GenericValue.h"
@@ -23,17 +22,11 @@
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
// Wrapping the C bindings types.
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
-inline DataLayout *unwrap(LLVMTargetDataRef P) {
- return reinterpret_cast<DataLayout*>(P);
-}
-
-inline LLVMTargetDataRef wrap(const DataLayout *P) {
- return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
-}
-
inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
return reinterpret_cast<TargetLibraryInfo*>(P);
}
@@ -410,7 +403,7 @@ uint8_t *SimpleBindingMemoryManager::allocateDataSection(
}
bool SimpleBindingMemoryManager::finalizeMemory(std::string *ErrMsg) {
- char *errMsgCString = 0;
+ char *errMsgCString = nullptr;
bool result = Functions.FinalizeMemory(Opaque, &errMsgCString);
assert((result || !errMsgCString) &&
"Did not expect an error message if FinalizeMemory succeeded");
@@ -433,7 +426,7 @@ LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager(
if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory ||
!Destroy)
- return NULL;
+ return nullptr;
SimpleBindingMMFunctions functions;
functions.AllocateCodeSection = AllocateCodeSection;
diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
index 2ca4e3e..9a65fa0 100644
--- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
+++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp
@@ -15,7 +15,6 @@
#include "llvm/Config/config.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
-#define DEBUG_TYPE "amplifier-jit-event-listener"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
@@ -34,6 +33,8 @@
using namespace llvm;
using namespace llvm::jitprofiling;
+#define DEBUG_TYPE "amplifier-jit-event-listener"
+
namespace {
class IntelJITEventListener : public JITEventListener {
@@ -193,11 +194,10 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
MethodAddressVector Functions;
// Use symbol info to iterate functions in the object.
- error_code ec;
for (object::symbol_iterator I = Obj.begin_symbols(),
E = Obj.end_symbols();
- I != E && !ec;
- I.increment(ec)) {
+ I != E;
+ ++I) {
std::vector<LineNumberInfo> LineInfo;
std::string SourceFileName;
@@ -234,7 +234,7 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
FunctionMessage.line_number_table = 0;
} else {
SourceFileName = Lines.front().second.getFileName();
- FunctionMessage.source_file_name = (char *)SourceFileName.c_str();
+ FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str());
FunctionMessage.line_number_size = LineInfo.size();
FunctionMessage.line_number_table = &*LineInfo.begin();
}
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 8a80285..93bb2d1 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "interpreter"
#include "Interpreter.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
@@ -28,6 +27,8 @@
#include <cmath>
using namespace llvm;
+#define DEBUG_TYPE "interpreter"
+
STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
@@ -57,7 +58,7 @@ static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
IMPLEMENT_BINARY_OPERATOR(+, Double);
default:
dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -68,7 +69,7 @@ static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
IMPLEMENT_BINARY_OPERATOR(-, Double);
default:
dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -79,7 +80,7 @@ static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
IMPLEMENT_BINARY_OPERATOR(*, Double);
default:
dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -90,7 +91,7 @@ static void executeFDivInst(GenericValue &Dest, GenericValue Src1,
IMPLEMENT_BINARY_OPERATOR(/, Double);
default:
dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -105,7 +106,7 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
break;
default:
dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -142,7 +143,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(==);
default:
dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -156,7 +157,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(!=);
default:
dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -170,7 +171,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -184,7 +185,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(<);
default:
dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -198,7 +199,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -212,7 +213,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(>);
default:
dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -226,7 +227,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -240,7 +241,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(<=);
default:
dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -254,7 +255,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -268,7 +269,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_POINTER_ICMP(>=);
default:
dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -293,7 +294,7 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
default:
dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
SetValue(&I, R, SF);
@@ -329,7 +330,7 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(==);
default:
dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -385,7 +386,7 @@ static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(!=);
default:
dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// in vector case mask out NaN elements
if (Ty->isVectorTy())
@@ -405,7 +406,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(<=);
default:
dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -419,7 +420,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(>=);
default:
dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -433,7 +434,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(<);
default:
dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -447,7 +448,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
IMPLEMENT_VECTOR_FCMP(>);
default:
dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
return Dest;
}
@@ -615,7 +616,7 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
switch (I.getPredicate()) {
default:
dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
break;
case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false);
break;
@@ -672,7 +673,7 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
case FCmpInst::FCMP_TRUE: return executeFCMP_BOOL(Src1, Src2, Ty, true);
default:
dbgs() << "Unhandled Cmp predicate\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -726,7 +727,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
switch(I.getOpcode()){
default:
dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
break;
case Instruction::Add: INTEGER_VECTOR_OPERATION(+) break;
case Instruction::Sub: INTEGER_VECTOR_OPERATION(-) break;
@@ -754,7 +755,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
else {
dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
break;
@@ -763,7 +764,7 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
switch (I.getOpcode()) {
default:
dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
break;
case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break;
case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break;
@@ -896,7 +897,7 @@ void Interpreter::visitSwitchInst(SwitchInst &I) {
GenericValue CondVal = getOperandValue(Cond, SF);
// Check to see if any of the cases match...
- BasicBlock *Dest = 0;
+ BasicBlock *Dest = nullptr;
for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) {
GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF);
if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) {
@@ -979,7 +980,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
<< uintptr_t(Memory) << '\n');
GenericValue Result = PTOGV(Memory);
- assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+ assert(Result.PointerVal && "Null pointer returned by malloc!");
SetValue(&I, Result, SF);
if (I.getOpcode() == Instruction::Alloca)
@@ -1732,7 +1733,7 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
IMPLEMENT_VAARG(Double);
default:
dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// Set the Value of this Instruction.
@@ -1756,7 +1757,7 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
default:
dbgs() << "Unhandled destination type for extractelement instruction: "
<< *Ty << "\n";
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
break;
case Type::IntegerTyID:
Dest.IntVal = Src1.AggregateVal[indx].IntVal;
@@ -2073,7 +2074,7 @@ GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
//
void Interpreter::callFunction(Function *F,
const std::vector<GenericValue> &ArgVals) {
- assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+ assert((ECStack.empty() || !ECStack.back().Caller.getInstruction() ||
ECStack.back().Caller.arg_size() == ArgVals.size()) &&
"Incorrect number of arguments passed into function call!");
// Make a new stack frame... and fill it in.
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index a03c7f5..671bbee 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -98,13 +98,13 @@ static ExFunc lookupFunction(const Function *F) {
sys::ScopedLock Writer(*FunctionsLock);
ExFunc FnPtr = FuncNames[ExtName];
- if (FnPtr == 0)
+ if (!FnPtr)
FnPtr = FuncNames["lle_X_" + F->getName().str()];
- if (FnPtr == 0) // Try calling a generic function... if it exists...
+ if (!FnPtr) // Try calling a generic function... if it exists...
FnPtr = (ExFunc)(intptr_t)
sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_" +
F->getName().str());
- if (FnPtr != 0)
+ if (FnPtr)
ExportedFunctions->insert(std::make_pair(F, FnPtr)); // Cache for later
return FnPtr;
}
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index 6d4f6f7..c589457 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -38,7 +38,7 @@ ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
if (ErrStr)
*ErrStr = EC.message();
// We got an error, just return 0
- return 0;
+ return nullptr;
}
return new Interpreter(M);
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2e93cae..2145cde 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -108,7 +108,7 @@ public:
/// create - Create an interpreter ExecutionEngine. This can never fail.
///
- static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
+ static ExecutionEngine *create(Module *M, std::string *ErrorStr = nullptr);
/// run - Start execution with the specified function and arguments.
///
@@ -118,7 +118,7 @@ public:
void *getPointerToNamedFunction(const std::string &Name,
bool AbortOnFailure = true) override {
// FIXME: not implemented.
- return 0;
+ return nullptr;
}
/// recompileAndRelinkFunction - For the interpreter, functions are always
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index d3ad77b..f8b2827 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -79,7 +79,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
// Try to register the program as a source of symbols to resolve against.
//
// FIXME: Don't do this here.
- sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+ sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
// If the target supports JIT code generation, create the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo()) {
@@ -87,7 +87,7 @@ ExecutionEngine *JIT::createJIT(Module *M,
} else {
if (ErrorStr)
*ErrorStr = "target does not support JIT code generation";
- return 0;
+ return nullptr;
}
}
@@ -157,7 +157,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
// Turn the machine code intermediate representation into bytes in memory that
// may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -190,7 +190,7 @@ void JIT::addModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -210,7 +210,7 @@ bool JIT::removeModule(Module *M) {
if (jitstate && jitstate->getModule() == M) {
delete jitstate;
- jitstate = 0;
+ jitstate = nullptr;
}
if (!jitstate && !Modules.empty()) {
@@ -222,7 +222,7 @@ bool JIT::removeModule(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM.addPassesToEmitMachineCode(PM, *JCE)) {
+ if (TM.addPassesToEmitMachineCode(PM, *JCE, !getVerifyModules())) {
report_fatal_error("Target does not support machine code emission!");
}
@@ -353,7 +353,7 @@ GenericValue JIT::runFunction(Function *F,
// currently don't support varargs.
SmallVector<Value*, 8> Args;
for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
- Constant *C = 0;
+ Constant *C = nullptr;
Type *ArgTy = FTy->getParamType(i);
const GenericValue &AV = ArgValues[i];
switch (ArgTy->getTypeID()) {
@@ -406,13 +406,13 @@ GenericValue JIT::runFunction(Function *F,
}
void JIT::RegisterJITEventListener(JITEventListener *L) {
- if (L == NULL)
+ if (!L)
return;
MutexGuard locked(lock);
EventListeners.push_back(L);
}
void JIT::UnregisterJITEventListener(JITEventListener *L) {
- if (L == NULL)
+ if (!L)
return;
MutexGuard locked(lock);
std::vector<JITEventListener*>::reverse_iterator I=
@@ -584,7 +584,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name,
report_fatal_error("Program used external function '"+Name+
"' which could not be resolved!");
}
- return 0;
+ return nullptr;
}
@@ -604,7 +604,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
return (void*)&__dso_handle;
#endif
Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
- if (Ptr == 0) {
+ if (!Ptr) {
report_fatal_error("Could not resolve external global address: "
+GV->getName());
}
@@ -629,10 +629,10 @@ void *JIT::recompileAndRelinkFunction(Function *F) {
void *OldAddr = getPointerToGlobalIfAvailable(F);
// If it's not already compiled there is no reason to patch it up.
- if (OldAddr == 0) { return getPointerToFunction(F); }
+ if (!OldAddr) return getPointerToFunction(F);
// Delete the old function mapping.
- addGlobalMapping(F, 0);
+ addGlobalMapping(F, nullptr);
// Recodegen the function
runJITOnFunction(F);
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index b1b0768..d2bd508 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -189,7 +189,7 @@ public:
TargetMachine *TM);
// Run the JIT on F and return information about the generated code
- void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0) override;
+ void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override;
void RegisterJITEventListener(JITEventListener *L) override;
void UnregisterJITEventListener(JITEventListener *L) override;
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 9d215ec..cd7a500 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "JIT.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -52,6 +51,8 @@
#endif
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumBytes, "Number of bytes of machine code compiled");
STATISTIC(NumRelos, "Number of relocations applied");
STATISTIC(NumRetries, "Number of retries with more memory");
@@ -343,7 +344,8 @@ namespace {
void *FunctionBody; // Beginning of the function's allocation.
void *Code; // The address the function's code actually starts at.
void *ExceptionTable;
- EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+ EmittedCode() : FunctionBody(nullptr), Code(nullptr),
+ ExceptionTable(nullptr) {}
};
struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
typedef JITEmitter *ExtraData;
@@ -360,7 +362,7 @@ namespace {
public:
JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
- : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+ : SizeEstimate(0), Resolver(jit, *this), MMI(nullptr), CurFn(nullptr),
EmittedFunctions(this), TheJIT(&jit) {
MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
if (jit.getJITInfo().needsGOT()) {
@@ -516,7 +518,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
// Call the lazy resolver function if we are JIT'ing lazily. Otherwise we
// must resolve the symbol now.
void *Actual = TheJIT->isCompilingLazily()
- ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+ ? (void *)(intptr_t)LazyResolverFn : (void *)nullptr;
// If this is an external declaration, attempt to resolve the address now
// to place in the stub.
@@ -525,7 +527,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) {
// If we resolved the symbol to a null address (eg. a weak external)
// don't emit a stub. Return a null pointer to the application.
- if (!Actual) return 0;
+ if (!Actual) return nullptr;
}
TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
@@ -592,8 +594,8 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
if (Stub) return Stub;
TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
- JE.startGVStub(0, SL.Size, SL.Alignment);
- Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+ JE.startGVStub(nullptr, SL.Size, SL.Alignment);
+ Stub = TheJIT->getJITInfo().emitFunctionStub(nullptr, FnAddr, JE);
JE.finishGVStub();
DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
@@ -619,8 +621,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
assert(JR && "Unable to find the corresponding JITResolver to the call site");
- Function* F = 0;
- void* ActualPtr = 0;
+ Function* F = nullptr;
+ void* ActualPtr = nullptr;
{
// Only lock for getting the Function. The call getPointerToFunction made
@@ -688,7 +690,7 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
return TheJIT->getOrEmitGlobalVariable(GV);
if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
- return TheJIT->getPointerToGlobal(GA->getAliasedGlobal());
+ return TheJIT->getPointerToGlobal(GA->getAliasee());
// If we have already compiled the function, return a pointer to its body.
Function *F = cast<Function>(V);
@@ -735,7 +737,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
- if (DL.getScope(Context) != 0 && PrevDL != DL) {
+ if (DL.getScope(Context) != nullptr && PrevDL != DL) {
JITEvent_EmittedFunctionDetails::LineStart NextLine;
NextLine.Address = getCurrentPCValue();
NextLine.Loc = DL;
@@ -824,7 +826,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
// Resolve the relocations to concrete pointers.
for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
MachineRelocation &MR = Relocations[i];
- void *ResultPtr = 0;
+ void *ResultPtr = nullptr;
if (!MR.letTargetResolve()) {
if (MR.isExternalSymbol()) {
ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
@@ -870,7 +872,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
}
}
- CurFn = 0;
+ CurFn = nullptr;
TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
Relocations.size(), MemMgr->getGOTBase());
}
@@ -899,7 +901,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
SizeEstimate = 0;
}
- BufferBegin = CurBufferPtr = 0;
+ BufferBegin = CurBufferPtr = nullptr;
NumBytes += FnEnd-FnStart;
// Invalidate the icache if necessary.
@@ -1017,7 +1019,7 @@ void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
ConstantPoolBase = allocateSpace(Size, Align);
ConstantPool = MCP;
- if (ConstantPoolBase == 0) return; // Buffer overflow.
+ if (!ConstantPoolBase) return; // Buffer overflow.
DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
<< "] (size: " << Size << ", alignment: " << Align << ")\n");
@@ -1073,7 +1075,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
return;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
- if (JT.empty() || JumpTableBase == 0) return;
+ if (JT.empty() || !JumpTableBase) return;
switch (MJTI->getEntryKind()) {
@@ -1243,7 +1245,7 @@ void JIT::updateFunctionStub(Function *F) {
void JIT::freeMachineCodeForFunction(Function *F) {
// Delete translation for this from the ExecutionEngine, so it will get
// retranslated next time it is used.
- updateGlobalMapping(F, 0);
+ updateGlobalMapping(F, nullptr);
// Free the actual memory for the function body and related stuff.
static_cast<JITEmitter*>(JCE)->deallocateMemForFunction(F);
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 0d1ea02..584b93f 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "llvm/ExecutionEngine/JITMemoryManager.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -40,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
JITMemoryManager::~JITMemoryManager() {}
@@ -80,7 +81,7 @@ namespace {
/// getFreeBlockBefore - If the block before this one is free, return it,
/// otherwise return null.
FreeRangeHeader *getFreeBlockBefore() const {
- if (PrevAllocated) return 0;
+ if (PrevAllocated) return nullptr;
intptr_t PrevSize = reinterpret_cast<intptr_t *>(
const_cast<MemoryRangeHeader *>(this))[-1];
return reinterpret_cast<FreeRangeHeader *>(
@@ -174,7 +175,7 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
// coalesce with it, update our notion of what the free list is.
if (&FollowingFreeBlock == FreeList) {
FreeList = FollowingFreeBlock.Next;
- FreeListToReturn = 0;
+ FreeListToReturn = nullptr;
assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
}
FollowingFreeBlock.RemoveFromFreeList();
@@ -269,13 +270,12 @@ namespace {
class DefaultJITMemoryManager;
- class JITSlabAllocator : public SlabAllocator {
+ class JITAllocator {
DefaultJITMemoryManager &JMM;
public:
- JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
- virtual ~JITSlabAllocator() { }
- MemSlab *Allocate(size_t Size) override;
- void Deallocate(MemSlab *Slab) override;
+ JITAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+ void *Allocate(size_t Size, size_t /*Alignment*/);
+ void Deallocate(void *Slab, size_t Size);
};
/// DefaultJITMemoryManager - Manage memory for the JIT code generation.
@@ -313,9 +313,10 @@ namespace {
// Memory slabs allocated by the JIT. We refer to them as slabs so we don't
// confuse them with the blocks of memory described above.
std::vector<sys::MemoryBlock> CodeSlabs;
- JITSlabAllocator BumpSlabAllocator;
- BumpPtrAllocatorImpl<DefaultSlabSize, DefaultSizeThreshold> StubAllocator;
- BumpPtrAllocatorImpl<DefaultSlabSize, DefaultSizeThreshold> DataAllocator;
+ BumpPtrAllocatorImpl<JITAllocator, DefaultSlabSize,
+ DefaultSizeThreshold> StubAllocator;
+ BumpPtrAllocatorImpl<JITAllocator, DefaultSlabSize,
+ DefaultSizeThreshold> DataAllocator;
// Circular list of free blocks.
FreeRangeHeader *FreeMemoryList;
@@ -568,30 +569,24 @@ namespace {
};
}
-MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+void *JITAllocator::Allocate(size_t Size, size_t /*Alignment*/) {
sys::MemoryBlock B = JMM.allocateNewSlab(Size);
- MemSlab *Slab = (MemSlab*)B.base();
- Slab->Size = B.size();
- Slab->NextPtr = 0;
- return Slab;
+ return B.base();
}
-void JITSlabAllocator::Deallocate(MemSlab *Slab) {
- sys::MemoryBlock B(Slab, Slab->Size);
+void JITAllocator::Deallocate(void *Slab, size_t Size) {
+ sys::MemoryBlock B(Slab, Size);
sys::Memory::ReleaseRWX(B);
}
DefaultJITMemoryManager::DefaultJITMemoryManager()
- :
+ :
#ifdef NDEBUG
- PoisonMemory(false),
+ PoisonMemory(false),
#else
- PoisonMemory(true),
+ PoisonMemory(true),
#endif
- LastSlab(0, 0),
- BumpSlabAllocator(*this),
- StubAllocator(BumpSlabAllocator),
- DataAllocator(BumpSlabAllocator) {
+ LastSlab(nullptr, 0), StubAllocator(*this), DataAllocator(*this) {
// Allocate space for code.
sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
@@ -644,11 +639,11 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
// Start out with the freelist pointing to Mem0.
FreeMemoryList = Mem0;
- GOTBase = NULL;
+ GOTBase = nullptr;
}
void DefaultJITMemoryManager::AllocateGOT() {
- assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+ assert(!GOTBase && "Cannot allocate the got multiple times");
GOTBase = new uint8_t[sizeof(void*) * 8192];
HasGOT = true;
}
@@ -663,9 +658,9 @@ DefaultJITMemoryManager::~DefaultJITMemoryManager() {
sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
// Allocate a new block close to the last one.
std::string ErrMsg;
- sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+ sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : nullptr;
sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
- if (B.base() == 0) {
+ if (!B.base()) {
report_fatal_error("Allocation failed when allocating new memory in the"
" JIT\n" + Twine(ErrMsg));
}
@@ -726,7 +721,7 @@ bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
char *End = Start + I->size();
// Check each memory range.
- for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+ for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = nullptr;
Start <= (char*)Hdr && (char*)Hdr < End;
Hdr = &Hdr->getBlockAfter()) {
if (Hdr->ThisAllocated == 0) {
@@ -895,7 +890,7 @@ void *DefaultJITMemoryManager::getPointerToNamedFunction(const std::string &Name
report_fatal_error("Program used external function '"+Name+
"' which could not be resolved!");
}
- return 0;
+ return nullptr;
}
diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
index 90f4d2f..922cd0d 100644
--- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
+++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = MCJIT
parent = ExecutionEngine
-required_libraries = Core ExecutionEngine RuntimeDyld Support Target
+required_libraries = Core ExecutionEngine Object RuntimeDyld Support Target
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 49b6727..42cb4ea 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -50,7 +50,7 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
// Try to register the program as a source of symbols to resolve against.
//
// FIXME: Don't do this here.
- sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+ sys::DynamicLibrary::LoadLibraryPermanently(nullptr, nullptr);
return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(),
GVsWithCode);
@@ -58,8 +58,8 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
bool AllocateGVsWithCode)
- : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(this, MM), Dyld(&MemMgr),
- ObjCache(0) {
+ : ExecutionEngine(m), TM(tm), Ctx(nullptr), MemMgr(this, MM), Dyld(&MemMgr),
+ ObjCache(nullptr) {
OwnedModules.addModule(m);
setDataLayout(TM->getDataLayout());
@@ -113,8 +113,8 @@ bool MCJIT::removeModule(Module *M) {
-void MCJIT::addObjectFile(object::ObjectFile *Obj) {
- ObjectImage *LoadedObject = Dyld.loadObject(Obj);
+void MCJIT::addObjectFile(std::unique_ptr<object::ObjectFile> Obj) {
+ ObjectImage *LoadedObject = Dyld.loadObject(std::move(Obj));
if (!LoadedObject || Dyld.hasError())
report_fatal_error(Dyld.getErrorString());
@@ -150,7 +150,8 @@ ObjectBufferStream* MCJIT::emitObject(Module *M) {
// Turn the machine code intermediate representation into bytes in memory
// that may be executed.
- if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) {
+ if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(),
+ !getVerifyModules())) {
report_fatal_error("Target does not support MC emission!");
}
@@ -185,9 +186,9 @@ void MCJIT::generateCodeForModule(Module *M) {
std::unique_ptr<ObjectBuffer> ObjectToLoad;
// Try to load the pre-compiled object from cache if possible
- if (0 != ObjCache) {
+ if (ObjCache) {
std::unique_ptr<MemoryBuffer> PreCompiledObject(ObjCache->getObject(M));
- if (0 != PreCompiledObject.get())
+ if (PreCompiledObject.get())
ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.release()));
}
@@ -285,7 +286,7 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
}
}
// We didn't find the symbol in any of our modules.
- return NULL;
+ return nullptr;
}
uint64_t MCJIT::getSymbolAddress(const std::string &Name,
@@ -307,10 +308,10 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name,
std::unique_ptr<object::Binary> ChildBin;
// FIXME: Support nested archives?
if (!ChildIt->getAsBinary(ChildBin) && ChildBin->isObject()) {
- object::ObjectFile *OF = reinterpret_cast<object::ObjectFile *>(
- ChildBin.release());
+ std::unique_ptr<object::ObjectFile> OF(
+ static_cast<object::ObjectFile *>(ChildBin.release()));
// This causes the object file to be loaded.
- addObjectFile(OF);
+ addObjectFile(std::move(OF));
// The address should be here now.
Addr = getExistingSymbolAddress(Name);
if (Addr)
@@ -365,7 +366,7 @@ void *MCJIT::getPointerToFunction(Function *F) {
generateCodeForModule(M);
else if (!OwnedModules.hasModuleBeenLoaded(M))
// If this function doesn't belong to one of our modules, we're done.
- return NULL;
+ return nullptr;
// FIXME: Should the Dyld be retaining module information? Probably not.
//
@@ -409,7 +410,7 @@ Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
if (Function *F = (*I)->getFunction(FnName))
return F;
}
- return 0;
+ return nullptr;
}
Function *MCJIT::FindFunctionNamed(const char *FnName) {
@@ -541,17 +542,17 @@ void *MCJIT::getPointerToNamedFunction(const std::string &Name,
report_fatal_error("Program used external function '"+Name+
"' which could not be resolved!");
}
- return 0;
+ return nullptr;
}
void MCJIT::RegisterJITEventListener(JITEventListener *L) {
- if (L == NULL)
+ if (!L)
return;
MutexGuard locked(lock);
EventListeners.push_back(L);
}
void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
- if (L == NULL)
+ if (!L)
return;
MutexGuard locked(lock);
SmallVector<JITEventListener*, 2>::reverse_iterator I=
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 066eceb..100e9a2 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -71,7 +71,7 @@ public:
ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
}
- bool finalizeMemory(std::string *ErrMsg = 0) override {
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override {
return ClientMM->finalizeMemory(ErrMsg);
}
@@ -239,7 +239,7 @@ public:
/// @name ExecutionEngine interface implementation
/// @{
void addModule(Module *M) override;
- void addObjectFile(object::ObjectFile *O) override;
+ void addObjectFile(std::unique_ptr<object::ObjectFile> O) override;
void addArchive(object::Archive *O) override;
bool removeModule(Module *M) override;
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index f1dd5a6..9ceaa90 100644
--- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -79,7 +79,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
ec);
if (ec) {
// FIXME: Add error propagation to the interface.
- return NULL;
+ return nullptr;
}
// Save this address as the basis for our next request
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
index 87cef2e..fd37a13 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp
@@ -15,7 +15,6 @@
#include "llvm/Config/config.h"
#include "llvm/ExecutionEngine/JITEventListener.h"
-#define DEBUG_TYPE "oprofile-jit-event-listener"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +32,8 @@
using namespace llvm;
using namespace llvm::jitprofiling;
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+
namespace {
class OProfileJITEventListener : public JITEventListener {
@@ -170,11 +171,8 @@ void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) {
}
// Use symbol info to iterate functions in the object.
- error_code ec;
- for (object::symbol_iterator I = Obj.begin_symbols(),
- E = Obj.end_symbols();
- I != E && !ec;
- I.increment(ec)) {
+ for (object::symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols();
+ I != E; ++I) {
object::SymbolRef::Type SymType;
if (I->getType(SymType)) continue;
if (SymType == object::SymbolRef::ST_Function) {
@@ -203,11 +201,8 @@ void OProfileJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) {
}
// Use symbol info to iterate functions in the object.
- error_code ec;
- for (object::symbol_iterator I = Obj.begin_symbols(),
- E = Obj.end_symbols();
- I != E && !ec;
- I.increment(ec)) {
+ for (object::symbol_iterator I = Obj.begin_symbols(), E = Obj.end_symbols();
+ I != E; ++I) {
object::SymbolRef::Type SymType;
if (I->getType(SymType)) continue;
if (SymType == object::SymbolRef::ST_Function) {
diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
index 6702e20..04edbd2 100644
--- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
+++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "oprofile-wrapper"
#include "llvm/ExecutionEngine/OProfileWrapper.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Debug.h"
@@ -29,6 +28,8 @@
#include <sys/stat.h>
#include <unistd.h>
+#define DEBUG_TYPE "oprofile-wrapper"
+
namespace {
// Global mutex to ensure a single thread initializes oprofile agent.
diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
index 1d0e9b3..8546571 100644
--- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp
@@ -45,7 +45,7 @@ extern "C" {
// We put information about the JITed function in this global, which the
// debugger reads. Make sure to specify the version statically, because the
// debugger checks the version before we can set it during runtime.
- struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+ struct jit_descriptor __jit_debug_descriptor = { 1, 0, nullptr, nullptr };
// Debuggers puts a breakpoint in this function.
LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() {
@@ -108,10 +108,10 @@ void NotifyDebugger(jit_code_entry* JITCodeEntry) {
__jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
// Insert this entry at the head of the list.
- JITCodeEntry->prev_entry = NULL;
+ JITCodeEntry->prev_entry = nullptr;
jit_code_entry* NextEntry = __jit_debug_descriptor.first_entry;
JITCodeEntry->next_entry = NextEntry;
- if (NextEntry != NULL) {
+ if (NextEntry) {
NextEntry->prev_entry = JITCodeEntry;
}
__jit_debug_descriptor.first_entry = JITCodeEntry;
@@ -142,11 +142,10 @@ void GDBJITRegistrar::registerObject(const ObjectBuffer &Object) {
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
- if (JITCodeEntry == 0) {
+ if (!JITCodeEntry) {
llvm::report_fatal_error(
"Allocation failed when registering a JIT entry!\n");
- }
- else {
+ } else {
JITCodeEntry->symfile_addr = Buffer;
JITCodeEntry->symfile_size = Size;
@@ -198,7 +197,7 @@ void GDBJITRegistrar::deregisterObjectInternal(
}
delete JITCodeEntry;
- JITCodeEntry = NULL;
+ JITCodeEntry = nullptr;
}
llvm::ManagedStatic<GDBJITRegistrar> TheRegistrar;
diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
index 3693c69..4917b93 100644
--- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
+++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h
@@ -18,6 +18,8 @@
#include "llvm/ExecutionEngine/ObjectImage.h"
#include "llvm/Object/ObjectFile.h"
+#include <memory>
+
namespace llvm {
namespace object {
@@ -30,13 +32,13 @@ class ObjectImageCommon : public ObjectImage {
void anchor() override;
protected:
- object::ObjectFile *ObjFile;
+ std::unique_ptr<object::ObjectFile> ObjFile;
// This form of the constructor allows subclasses to use
// format-specific subclasses of ObjectFile directly
- ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj)
+ ObjectImageCommon(ObjectBuffer *Input, std::unique_ptr<object::ObjectFile> Obj)
: ObjectImage(Input), // saves Input as Buffer and takes ownership
- ObjFile(Obj)
+ ObjFile(std::move(Obj))
{
}
@@ -44,12 +46,13 @@ public:
ObjectImageCommon(ObjectBuffer* Input)
: ObjectImage(Input) // saves Input as Buffer and takes ownership
{
- ObjFile =
- object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get();
+ // FIXME: error checking? createObjectFile returns an ErrorOr<ObjectFile*>
+ // and should probably be checked for failure.
+ ObjFile.reset(object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get());
}
- ObjectImageCommon(object::ObjectFile* Input)
- : ObjectImage(NULL), ObjFile(Input) {}
- virtual ~ObjectImageCommon() { delete ObjFile; }
+ ObjectImageCommon(std::unique_ptr<object::ObjectFile> Input)
+ : ObjectImage(nullptr), ObjFile(std::move(Input)) {}
+ virtual ~ObjectImageCommon() { }
object::symbol_iterator begin_symbols() const override
{ return ObjFile->symbol_begin(); }
@@ -66,7 +69,7 @@ public:
StringRef getData() const override { return ObjFile->getData(); }
- object::ObjectFile* getObjectFile() const override { return ObjFile; }
+ object::ObjectFile* getObjectFile() const override { return ObjFile.get(); }
// Subclasses can override these methods to update the image with loaded
// addresses for sections and common symbols
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 986d3a0..c1eb0fd 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dyld"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "JITRegistrar.h"
#include "ObjectImageCommon.h"
@@ -25,6 +24,8 @@
using namespace llvm;
using namespace llvm::object;
+#define DEBUG_TYPE "dyld"
+
// Empty out-of-line virtual destructor as the key function.
RuntimeDyldImpl::~RuntimeDyldImpl() {}
@@ -72,12 +73,40 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress,
llvm_unreachable("Attempting to remap address of unknown section!");
}
+static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) {
+ uint64_t Address;
+ if (error_code EC = Sym.getAddress(Address))
+ return EC;
+
+ if (Address == UnknownAddressOrSize) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+
+ const ObjectFile *Obj = Sym.getObject();
+ section_iterator SecI(Obj->section_begin());
+ if (error_code EC = Sym.getSection(SecI))
+ return EC;
+
+ if (SecI == Obj->section_end()) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
+
+ uint64_t SectionAddress;
+ if (error_code EC = SecI->getAddress(SectionAddress))
+ return EC;
+
+ Result = Address - SectionAddress;
+ return object_error::success;
+}
+
ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
MutexGuard locked(lock);
std::unique_ptr<ObjectImage> Obj(InputObject);
if (!Obj)
- return NULL;
+ return nullptr;
// Save information about our target
Arch = (Triple::ArchType)Obj->getArch();
@@ -115,36 +144,33 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
bool IsCommon = Flags & SymbolRef::SF_Common;
if (IsCommon) {
// Add the common symbols to a list. We'll allocate them all below.
- uint32_t Align;
- Check(I->getAlignment(Align));
- uint64_t Size = 0;
- Check(I->getSize(Size));
- CommonSize += Size + Align;
- CommonSymbols[*I] = CommonSymbolInfo(Size, Align);
+ if (!GlobalSymbolTable.count(Name)) {
+ uint32_t Align;
+ Check(I->getAlignment(Align));
+ uint64_t Size = 0;
+ Check(I->getSize(Size));
+ CommonSize += Size + Align;
+ CommonSymbols[*I] = CommonSymbolInfo(Size, Align);
+ }
} else {
if (SymType == object::SymbolRef::ST_Function ||
SymType == object::SymbolRef::ST_Data ||
SymType == object::SymbolRef::ST_Unknown) {
- uint64_t FileOffset;
+ uint64_t SectOffset;
StringRef SectionData;
bool IsCode;
section_iterator SI = Obj->end_sections();
- Check(I->getFileOffset(FileOffset));
+ Check(getOffset(*I, SectOffset));
Check(I->getSection(SI));
if (SI == Obj->end_sections())
continue;
Check(SI->getContents(SectionData));
Check(SI->isText(IsCode));
- const uint8_t *SymPtr =
- (const uint8_t *)Obj->getData().data() + (uintptr_t)FileOffset;
- uintptr_t SectOffset =
- (uintptr_t)(SymPtr - (const uint8_t *)SectionData.begin());
unsigned SectionID =
findOrEmitSection(*Obj, *SI, IsCode, LocalSections);
LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset);
- DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset)
- << " flags: " << Flags << " SID: " << SectionID
- << " Offset: " << format("%p", SectOffset));
+ DEBUG(dbgs() << "\tOffset: " << format("%p", (uintptr_t)SectOffset)
+ << " flags: " << Flags << " SID: " << SectionID);
GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset);
}
}
@@ -153,7 +179,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
// Allocate common symbols
if (CommonSize != 0)
- emitCommonSymbols(*Obj, CommonSymbols, CommonSize, LocalSymbols);
+ emitCommonSymbols(*Obj, CommonSymbols, CommonSize, GlobalSymbolTable);
// Parse and process relocations
DEBUG(dbgs() << "Parse relocations:\n");
@@ -163,7 +189,10 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
StubMap Stubs;
section_iterator RelocatedSection = SI->getRelocatedSection();
- if (SI->relocation_empty() && !ProcessAllSections)
+ relocation_iterator I = SI->relocation_begin();
+ relocation_iterator E = SI->relocation_end();
+
+ if (I == E && !ProcessAllSections)
continue;
bool IsCode = false;
@@ -172,14 +201,13 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectImage *InputObject) {
findOrEmitSection(*Obj, *RelocatedSection, IsCode, LocalSections);
DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
- for (relocation_iterator I = SI->relocation_begin(),
- E = SI->relocation_end(); I != E;)
+ for (; I != E;)
I = processRelocationRef(SectionID, I, *Obj, LocalSections, LocalSymbols,
Stubs);
}
// Give the subclasses a chance to tie-up any loose ends.
- finalizeLoad(LocalSections);
+ finalizeLoad(*Obj, LocalSections);
return Obj.release();
}
@@ -400,7 +428,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
uintptr_t Allocate;
unsigned SectionID = Sections.size();
uint8_t *Addr;
- const char *pData = 0;
+ const char *pData = nullptr;
// Some sections, such as debug info, don't need to be loaded for execution.
// Leave those where they are.
@@ -441,7 +469,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
// to handle later processing (and by 'handle' I mean don't do anything
// with these sections).
Allocate = 0;
- Addr = 0;
+ Addr = nullptr;
DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: " << Name
<< " obj addr: " << format("%p", data.data()) << " new addr: 0"
<< " DataSize: " << DataSize << " StubBufSize: " << StubBufSize
@@ -490,7 +518,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
}
uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
- if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) {
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
+ Arch == Triple::arm64 || Arch == Triple::arm64_be) {
// This stub has to be able to access the full address space,
// since symbol lookup won't necessarily find a handy, in-range,
// PLT stub for functions which could be anywhere.
@@ -560,6 +589,8 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
*Addr = 0xFF; // jmp
*(Addr+1) = 0x25; // rip
// 32-bit PC-relative address of the GOT entry will be stored at Addr+2
+ } else if (Arch == Triple::x86) {
+ *Addr = 0xE9; // 32-bit pc-relative jump.
}
return Addr;
}
@@ -586,7 +617,7 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
const RelocationEntry &RE = Relocs[i];
// Ignore relocations for sections that were not loaded
- if (Sections[RE.SectionID].Address == 0)
+ if (Sections[RE.SectionID].Address == nullptr)
continue;
resolveRelocation(RE, Value);
}
@@ -651,7 +682,7 @@ RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
// though the public class spawns a new 'impl' instance for each load,
// they share a single memory manager. This can become a problem when page
// permissions are applied.
- Dyld = 0;
+ Dyld = nullptr;
MM = mm;
ProcessAllSections = false;
}
@@ -672,21 +703,23 @@ createRuntimeDyldMachO(RTDyldMemoryManager *MM, bool ProcessAllSections) {
return Dyld;
}
-ObjectImage *RuntimeDyld::loadObject(ObjectFile *InputObject) {
+ObjectImage *RuntimeDyld::loadObject(std::unique_ptr<ObjectFile> InputObject) {
std::unique_ptr<ObjectImage> InputImage;
+ ObjectFile &Obj = *InputObject;
+
if (InputObject->isELF()) {
- InputImage.reset(RuntimeDyldELF::createObjectImageFromFile(InputObject));
+ InputImage.reset(RuntimeDyldELF::createObjectImageFromFile(std::move(InputObject)));
if (!Dyld)
Dyld = createRuntimeDyldELF(MM, ProcessAllSections).release();
} else if (InputObject->isMachO()) {
- InputImage.reset(RuntimeDyldMachO::createObjectImageFromFile(InputObject));
+ InputImage.reset(RuntimeDyldMachO::createObjectImageFromFile(std::move(InputObject)));
if (!Dyld)
Dyld = createRuntimeDyldMachO(MM, ProcessAllSections).release();
} else
report_fatal_error("Incompatible object format!");
- if (!Dyld->isCompatibleFile(InputObject))
+ if (!Dyld->isCompatibleFile(&Obj))
report_fatal_error("Incompatible object format!");
Dyld->loadObject(InputImage.get());
@@ -740,7 +773,7 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
void *RuntimeDyld::getSymbolAddress(StringRef Name) {
if (!Dyld)
- return NULL;
+ return nullptr;
return Dyld->getSymbolAddress(Name);
}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 3204b81..6ba24b9 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dyld"
#include "RuntimeDyldELF.h"
#include "JITRegistrar.h"
#include "ObjectImageCommon.h"
@@ -29,6 +28,8 @@
using namespace llvm;
using namespace llvm::object;
+#define DEBUG_TYPE "dyld"
+
namespace {
static inline error_code check(error_code Err) {
@@ -50,7 +51,12 @@ template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> {
typedef typename ELFDataTypeTypedefHelper<ELFT>::value_type addr_type;
+ std::unique_ptr<ObjectFile> UnderlyingFile;
+
public:
+ DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
+ MemoryBuffer *Wrapper, error_code &ec);
+
DyldELFObject(MemoryBuffer *Wrapper, error_code &ec);
void updateSectionAddress(const SectionRef &Sec, uint64_t Addr);
@@ -67,13 +73,11 @@ public:
};
template <class ELFT> class ELFObjectImage : public ObjectImageCommon {
-protected:
- DyldELFObject<ELFT> *DyldObj;
bool Registered;
public:
- ELFObjectImage(ObjectBuffer *Input, DyldELFObject<ELFT> *Obj)
- : ObjectImageCommon(Input, Obj), DyldObj(Obj), Registered(false) {}
+ ELFObjectImage(ObjectBuffer *Input, std::unique_ptr<DyldELFObject<ELFT>> Obj)
+ : ObjectImageCommon(Input, std::move(Obj)), Registered(false) {}
virtual ~ELFObjectImage() {
if (Registered)
@@ -83,11 +87,13 @@ public:
// Subclasses can override these methods to update the image with loaded
// addresses for sections and common symbols
void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) override {
- DyldObj->updateSectionAddress(Sec, Addr);
+ static_cast<DyldELFObject<ELFT>*>(getObjectFile())
+ ->updateSectionAddress(Sec, Addr);
}
void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) override {
- DyldObj->updateSymbolAddress(Sym, Addr);
+ static_cast<DyldELFObject<ELFT>*>(getObjectFile())
+ ->updateSymbolAddress(Sym, Addr);
}
void registerWithDebugger() override {
@@ -109,6 +115,14 @@ DyldELFObject<ELFT>::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec)
}
template <class ELFT>
+DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile,
+ MemoryBuffer *Wrapper, error_code &ec)
+ : ELFObjectFile<ELFT>(Wrapper, ec),
+ UnderlyingFile(std::move(UnderlyingFile)) {
+ this->isDyldELFObject = true;
+}
+
+template <class ELFT>
void DyldELFObject<ELFT>::updateSectionAddress(const SectionRef &Sec,
uint64_t Addr) {
DataRefImpl ShdrRef = Sec.getRawDataRefImpl();
@@ -164,30 +178,36 @@ void RuntimeDyldELF::deregisterEHFrames() {
}
ObjectImage *
-RuntimeDyldELF::createObjectImageFromFile(object::ObjectFile *ObjFile) {
+RuntimeDyldELF::createObjectImageFromFile(std::unique_ptr<object::ObjectFile> ObjFile) {
if (!ObjFile)
- return NULL;
+ return nullptr;
error_code ec;
MemoryBuffer *Buffer =
MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false);
if (ObjFile->getBytesInAddress() == 4 && ObjFile->isLittleEndian()) {
- DyldELFObject<ELFType<support::little, 2, false>> *Obj =
- new DyldELFObject<ELFType<support::little, 2, false>>(Buffer, ec);
- return new ELFObjectImage<ELFType<support::little, 2, false>>(NULL, Obj);
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::little, 2, false>>>(
+ std::move(ObjFile), Buffer, ec);
+ return new ELFObjectImage<ELFType<support::little, 2, false>>(
+ nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 4 && !ObjFile->isLittleEndian()) {
- DyldELFObject<ELFType<support::big, 2, false>> *Obj =
- new DyldELFObject<ELFType<support::big, 2, false>>(Buffer, ec);
- return new ELFObjectImage<ELFType<support::big, 2, false>>(NULL, Obj);
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::big, 2, false>>>(
+ std::move(ObjFile), Buffer, ec);
+ return new ELFObjectImage<ELFType<support::big, 2, false>>(nullptr, std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && !ObjFile->isLittleEndian()) {
- DyldELFObject<ELFType<support::big, 2, true>> *Obj =
- new DyldELFObject<ELFType<support::big, 2, true>>(Buffer, ec);
- return new ELFObjectImage<ELFType<support::big, 2, true>>(NULL, Obj);
+ auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 2, true>>>(
+ std::move(ObjFile), Buffer, ec);
+ return new ELFObjectImage<ELFType<support::big, 2, true>>(nullptr,
+ std::move(Obj));
} else if (ObjFile->getBytesInAddress() == 8 && ObjFile->isLittleEndian()) {
- DyldELFObject<ELFType<support::little, 2, true>> *Obj =
- new DyldELFObject<ELFType<support::little, 2, true>>(Buffer, ec);
- return new ELFObjectImage<ELFType<support::little, 2, true>>(NULL, Obj);
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::little, 2, true>>>(
+ std::move(ObjFile), Buffer, ec);
+ return new ELFObjectImage<ELFType<support::little, 2, true>>(
+ nullptr, std::move(Obj));
} else
llvm_unreachable("Unexpected ELF format");
}
@@ -201,28 +221,29 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
error_code ec;
if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) {
- DyldELFObject<ELFType<support::little, 4, false>> *Obj =
- new DyldELFObject<ELFType<support::little, 4, false>>(
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::little, 4, false>>>(
Buffer->getMemBuffer(), ec);
- return new ELFObjectImage<ELFType<support::little, 4, false>>(Buffer, Obj);
+ return new ELFObjectImage<ELFType<support::little, 4, false>>(
+ Buffer, std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS32 &&
Ident.second == ELF::ELFDATA2MSB) {
- DyldELFObject<ELFType<support::big, 4, false>> *Obj =
- new DyldELFObject<ELFType<support::big, 4, false>>(
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::big, 4, false>>>(
Buffer->getMemBuffer(), ec);
- return new ELFObjectImage<ELFType<support::big, 4, false>>(Buffer, Obj);
+ return new ELFObjectImage<ELFType<support::big, 4, false>>(Buffer,
+ std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS64 &&
Ident.second == ELF::ELFDATA2MSB) {
- DyldELFObject<ELFType<support::big, 8, true>> *Obj =
- new DyldELFObject<ELFType<support::big, 8, true>>(
- Buffer->getMemBuffer(), ec);
- return new ELFObjectImage<ELFType<support::big, 8, true>>(Buffer, Obj);
+ auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 8, true>>>(
+ Buffer->getMemBuffer(), ec);
+ return new ELFObjectImage<ELFType<support::big, 8, true>>(Buffer, std::move(Obj));
} else if (Ident.first == ELF::ELFCLASS64 &&
Ident.second == ELF::ELFDATA2LSB) {
- DyldELFObject<ELFType<support::little, 8, true>> *Obj =
- new DyldELFObject<ELFType<support::little, 8, true>>(
+ auto Obj =
+ llvm::make_unique<DyldELFObject<ELFType<support::little, 8, true>>>(
Buffer->getMemBuffer(), ec);
- return new ELFObjectImage<ELFType<support::little, 8, true>>(Buffer, Obj);
+ return new ELFObjectImage<ELFType<support::little, 8, true>>(Buffer, std::move(Obj));
} else
llvm_unreachable("Unexpected ELF format");
}
@@ -845,6 +866,8 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
break;
case Triple::aarch64:
case Triple::aarch64_be:
+ case Triple::arm64:
+ case Triple::arm64_be:
resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
break;
case Triple::arm: // Fall through.
@@ -950,7 +973,8 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
DEBUG(dbgs() << "\t\tSectionID: " << SectionID << " Offset: " << Offset
<< "\n");
- if (Arch == Triple::aarch64 &&
+ if ((Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
+ Arch == Triple::arm64 || Arch == Triple::arm64_be) &&
(RelType == ELF::R_AARCH64_CALL26 || RelType == ELF::R_AARCH64_JUMP26)) {
// This is an AArch64 branch relocation, need to use a stub function.
DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
@@ -1151,7 +1175,7 @@ relocation_iterator RuntimeDyldELF::processRelocationRef(
// Extra check to avoid relocation againt empty symbols (usually
// the R_PPC64_TOC).
if (SymType != SymbolRef::ST_Unknown && TargetName.empty())
- Value.SymbolName = NULL;
+ Value.SymbolName = nullptr;
if (Value.SymbolName)
addRelocationForSymbol(RE, Value.SymbolName);
@@ -1283,7 +1307,8 @@ void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) {
for (it = GOTs.begin(); it != end; ++it) {
GOTRelocations &GOTEntries = it->second;
for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
- if (GOTEntries[i].SymbolName != 0 && GOTEntries[i].SymbolName == Name) {
+ if (GOTEntries[i].SymbolName != nullptr &&
+ GOTEntries[i].SymbolName == Name) {
GOTEntries[i].Offset = Addr;
}
}
@@ -1297,6 +1322,9 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
switch (Arch) {
case Triple::x86_64:
case Triple::aarch64:
+ case Triple::aarch64_be:
+ case Triple::arm64:
+ case Triple::arm64_be:
case Triple::ppc64:
case Triple::ppc64le:
case Triple::systemz:
@@ -1331,7 +1359,7 @@ uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) {
// Find the matching entry in our vector.
uint64_t SymbolOffset = 0;
for (int i = 0, e = GOTEntries.size(); i != e; ++i) {
- if (GOTEntries[i].SymbolName == 0) {
+ if (!GOTEntries[i].SymbolName) {
if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress &&
GOTEntries[i].Offset == Offset) {
GOTIndex = i;
@@ -1369,7 +1397,8 @@ uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, uint64_t Offset) {
return 0;
}
-void RuntimeDyldELF::finalizeLoad(ObjSectionToIDMap &SectionMap) {
+void RuntimeDyldELF::finalizeLoad(ObjectImage &ObjImg,
+ ObjSectionToIDMap &SectionMap) {
// If necessary, allocate the global offset table
if (MemMgr) {
// Allocate the GOT if necessary
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 27db5cd..a526073 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -59,7 +59,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
uint64_t Value, uint32_t Type, int64_t Addend);
unsigned getMaxStubSize() override {
- if (Arch == Triple::aarch64)
+ if (Arch == Triple::aarch64 || Arch == Triple::arm64 ||
+ Arch == Triple::aarch64_be || Arch == Triple::arm64_be)
return 20; // movz; movk; movk; movk; br
if (Arch == Triple::arm || Arch == Triple::thumb)
return 8; // 32-bit instruction and 32-bit address
@@ -115,11 +116,12 @@ public:
bool isCompatibleFile(const object::ObjectFile *Buffer) const override;
void registerEHFrames() override;
void deregisterEHFrames() override;
- void finalizeLoad(ObjSectionToIDMap &SectionMap) override;
+ void finalizeLoad(ObjectImage &ObjImg,
+ ObjSectionToIDMap &SectionMap) override;
virtual ~RuntimeDyldELF();
static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
- static ObjectImage *createObjectImageFromFile(object::ObjectFile *Obj);
+ static ObjectImage *createObjectImageFromFile(std::unique_ptr<object::ObjectFile> Obj);
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index c153ee1..412cf20 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -90,9 +90,17 @@ public:
/// used to make a relocation section relative instead of symbol relative.
int64_t Addend;
+ struct SectionPair {
+ uint32_t SectionA;
+ uint32_t SectionB;
+ };
+
/// SymOffset - Section offset of the relocation entry's symbol (used for GOT
/// lookup).
- uint64_t SymOffset;
+ union {
+ uint64_t SymOffset;
+ SectionPair Sections;
+ };
/// True if this is a PCRel relocation (MachO specific).
bool IsPCRel;
@@ -113,6 +121,16 @@ public:
bool IsPCRel, unsigned Size)
: SectionID(id), Offset(offset), RelType(type), Addend(addend),
SymOffset(0), IsPCRel(IsPCRel), Size(Size) {}
+
+ RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+ unsigned SectionA, uint64_t SectionAOffset, unsigned SectionB,
+ uint64_t SectionBOffset, bool IsPCRel, unsigned Size)
+ : SectionID(id), Offset(offset), RelType(type),
+ Addend(SectionAOffset - SectionBOffset + addend), IsPCRel(IsPCRel),
+ Size(Size) {
+ Sections.SectionA = SectionA;
+ Sections.SectionB = SectionB;
+ }
};
class RelocationValueRef {
@@ -121,7 +139,8 @@ public:
uint64_t Offset;
int64_t Addend;
const char *SymbolName;
- RelocationValueRef() : SectionID(0), Offset(0), Addend(0), SymbolName(0) {}
+ RelocationValueRef() : SectionID(0), Offset(0), Addend(0),
+ SymbolName(nullptr) {}
inline bool operator==(const RelocationValueRef &Other) const {
return SectionID == Other.SectionID && Offset == Other.Offset &&
@@ -335,7 +354,7 @@ public:
// Work in progress.
SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name);
if (pos == GlobalSymbolTable.end())
- return 0;
+ return nullptr;
SymbolLoc Loc = pos->second;
return getSectionAddress(Loc.first) + Loc.second;
}
@@ -372,7 +391,7 @@ public:
virtual void deregisterEHFrames();
- virtual void finalizeLoad(ObjSectionToIDMap &SectionMap) {}
+ virtual void finalizeLoad(ObjectImage &ObjImg, ObjSectionToIDMap &SectionMap) {}
};
} // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index 7eae9c2..2b425fb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -11,17 +11,20 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dyld"
#include "RuntimeDyldMachO.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
using namespace llvm;
using namespace llvm::object;
+#define DEBUG_TYPE "dyld"
+
namespace llvm {
static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText,
intptr_t DeltaForEH) {
+ DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText
+ << ", Delta for EH: " << DeltaForEH << "\n");
uint32_t Length = *((uint32_t *)P);
P += 4;
unsigned char *Ret = P + Length;
@@ -66,7 +69,7 @@ void RuntimeDyldMachO::registerEHFrames() {
continue;
SectionEntry *Text = &Sections[SectionInfo.TextSID];
SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID];
- SectionEntry *ExceptTab = NULL;
+ SectionEntry *ExceptTab = nullptr;
if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID)
ExceptTab = &Sections[SectionInfo.ExceptTabSID];
@@ -87,7 +90,8 @@ void RuntimeDyldMachO::registerEHFrames() {
UnregisteredEHFrameSections.clear();
}
-void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) {
+void RuntimeDyldMachO::finalizeLoad(ObjectImage &ObjImg,
+ ObjSectionToIDMap &SectionMap) {
unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID;
unsigned TextSID = RTDYLD_INVALID_SECTION_ID;
unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID;
@@ -102,6 +106,12 @@ void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) {
TextSID = i->second;
else if (Name == "__gcc_except_tab")
ExceptTabSID = i->second;
+ else if (Name == "__jump_table")
+ populateJumpTable(cast<MachOObjectFile>(*ObjImg.getObjectFile()),
+ Section, i->second);
+ else if (Name == "__pointers")
+ populatePointersSection(cast<MachOObjectFile>(*ObjImg.getObjectFile()),
+ Section, i->second);
}
UnregisteredEHFrameSections.push_back(
EHFrameRelatedSections(EHFrameSID, TextSID, ExceptTabSID));
@@ -129,91 +139,87 @@ void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) {
// symbol in the target address space.
void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
uint64_t Value) {
- const SectionEntry &Section = Sections[RE.SectionID];
- return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
- RE.IsPCRel, RE.Size);
-}
-
-void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
- uint64_t Offset, uint64_t Value,
- uint32_t Type, int64_t Addend,
- bool isPCRel, unsigned LogSize) {
- uint8_t *LocalAddress = Section.Address + Offset;
- uint64_t FinalAddress = Section.LoadAddress + Offset;
- unsigned MachoType = Type;
- unsigned Size = 1 << LogSize;
-
- DEBUG(dbgs() << "resolveRelocation LocalAddress: "
- << format("%p", LocalAddress)
- << " FinalAddress: " << format("%p", FinalAddress)
- << " Value: " << format("%p", Value) << " Addend: " << Addend
- << " isPCRel: " << isPCRel << " MachoType: " << MachoType
- << " Size: " << Size << "\n");
+ DEBUG (
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t* LocalAddress = Section.Address + RE.Offset;
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+
+ dbgs() << "resolveRelocation Section: " << RE.SectionID
+ << " LocalAddress: " << format("%p", LocalAddress)
+ << " FinalAddress: " << format("%p", FinalAddress)
+ << " Value: " << format("%p", Value)
+ << " Addend: " << RE.Addend
+ << " isPCRel: " << RE.IsPCRel
+ << " MachoType: " << RE.RelType
+ << " Size: " << (1 << RE.Size) << "\n";
+ );
// This just dispatches to the proper target specific routine.
switch (Arch) {
default:
llvm_unreachable("Unsupported CPU type!");
case Triple::x86_64:
- resolveX86_64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value,
- isPCRel, MachoType, Size, Addend);
+ resolveX86_64Relocation(RE, Value);
break;
case Triple::x86:
- resolveI386Relocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel,
- MachoType, Size, Addend);
+ resolveI386Relocation(RE, Value);
break;
case Triple::arm: // Fall through.
case Triple::thumb:
- resolveARMRelocation(LocalAddress, FinalAddress, (uintptr_t)Value, isPCRel,
- MachoType, Size, Addend);
+ resolveARMRelocation(RE, Value);
break;
+ case Triple::aarch64:
case Triple::arm64:
- resolveARM64Relocation(LocalAddress, FinalAddress, (uintptr_t)Value,
- isPCRel, MachoType, Size, Addend);
+ resolveAArch64Relocation(RE, Value);
break;
}
}
-bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress,
- uint64_t FinalAddress,
- uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size,
- int64_t Addend) {
- if (isPCRel)
- Value -= FinalAddress + 4; // see resolveX86_64Relocation
+bool RuntimeDyldMachO::resolveI386Relocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t* LocalAddress = Section.Address + RE.Offset;
- switch (Type) {
- default:
- llvm_unreachable("Invalid relocation type!");
- case MachO::GENERIC_RELOC_VANILLA: {
- uint8_t *p = LocalAddress;
- uint64_t ValueToWrite = Value + Addend;
- for (unsigned i = 0; i < Size; ++i) {
- *p++ = (uint8_t)(ValueToWrite & 0xff);
- ValueToWrite >>= 8;
- }
- return false;
+ if (RE.IsPCRel) {
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
}
- case MachO::GENERIC_RELOC_SECTDIFF:
- case MachO::GENERIC_RELOC_LOCAL_SECTDIFF:
- case MachO::GENERIC_RELOC_PB_LA_PTR:
- return Error("Relocation type not implemented yet!");
+
+ switch (RE.RelType) {
+ default:
+ llvm_unreachable("Invalid relocation type!");
+ case MachO::GENERIC_RELOC_VANILLA:
+ return applyRelocationValue(LocalAddress, Value + RE.Addend,
+ 1 << RE.Size);
+ case MachO::GENERIC_RELOC_SECTDIFF:
+ case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
+ uint64_t SectionABase = Sections[RE.Sections.SectionA].LoadAddress;
+ uint64_t SectionBBase = Sections[RE.Sections.SectionB].LoadAddress;
+ assert((Value == SectionABase || Value == SectionBBase) &&
+ "Unexpected SECTDIFF relocation value.");
+ Value = SectionABase - SectionBBase + RE.Addend;
+ return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
+ }
+ case MachO::GENERIC_RELOC_PB_LA_PTR:
+ return Error("Relocation type not implemented yet!");
}
}
-bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
- uint64_t FinalAddress,
- uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size,
- int64_t Addend) {
+bool RuntimeDyldMachO::resolveX86_64Relocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t* LocalAddress = Section.Address + RE.Offset;
+
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
- if (isPCRel)
+ if (RE.IsPCRel) {
// FIXME: It seems this value needs to be adjusted by 4 for an effective PC
// address. Is that expected? Only for branches, perhaps?
- Value -= FinalAddress + 4;
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
+ Value -= FinalAddress + 4; // see MachOX86_64::resolveRelocation.
+ }
- switch (Type) {
+ switch (RE.RelType) {
default:
llvm_unreachable("Invalid relocation type!");
case MachO::X86_64_RELOC_SIGNED_1:
@@ -221,17 +227,8 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
case MachO::X86_64_RELOC_SIGNED_4:
case MachO::X86_64_RELOC_SIGNED:
case MachO::X86_64_RELOC_UNSIGNED:
- case MachO::X86_64_RELOC_BRANCH: {
- Value += Addend;
- // Mask in the target value a byte at a time (we don't have an alignment
- // guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t *)LocalAddress;
- for (unsigned i = 0; i < Size; ++i) {
- *p++ = (uint8_t)Value;
- Value >>= 8;
- }
- return false;
- }
+ case MachO::X86_64_RELOC_BRANCH:
+ return applyRelocationValue(LocalAddress, Value + RE.Addend, 1 << RE.Size);
case MachO::X86_64_RELOC_GOT_LOAD:
case MachO::X86_64_RELOC_GOT:
case MachO::X86_64_RELOC_SUBTRACTOR:
@@ -240,14 +237,15 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress,
}
}
-bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
- uint64_t FinalAddress,
- uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size,
- int64_t Addend) {
+bool RuntimeDyldMachO::resolveARMRelocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t* LocalAddress = Section.Address + RE.Offset;
+
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
- if (isPCRel) {
+ if (RE.IsPCRel) {
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
Value -= FinalAddress;
// ARM PCRel relocations have an effective-PC offset of two instructions
// (four bytes in Thumb mode, 8 bytes in ARM mode).
@@ -255,19 +253,11 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
Value -= 8;
}
- switch (Type) {
+ switch (RE.RelType) {
default:
llvm_unreachable("Invalid relocation type!");
- case MachO::ARM_RELOC_VANILLA: {
- // Mask in the target value a byte at a time (we don't have an alignment
- // guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t *)LocalAddress;
- for (unsigned i = 0; i < Size; ++i) {
- *p++ = (uint8_t)Value;
- Value >>= 8;
- }
- break;
- }
+ case MachO::ARM_RELOC_VANILLA:
+ return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
case MachO::ARM_RELOC_BR24: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
@@ -275,13 +265,16 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
// The low two bits of the value are not encoded.
Value >>= 2;
// Mask the value to 24 bits.
- Value &= 0xffffff;
+ uint64_t FinalValue = Value & 0xffffff;
+ // Check for overflow.
+ if (Value != FinalValue)
+ return Error("ARM BR24 relocation out of range.");
// FIXME: If the destination is a Thumb function (and the instruction
// is a non-predicated BL instruction), we need to change it to a BLX
// instruction instead.
// Insert the value into the instruction.
- *p = (*p & ~0xffffff) | Value;
+ *p = (*p & ~0xffffff) | FinalValue;
break;
}
case MachO::ARM_THUMB_RELOC_BR22:
@@ -297,29 +290,23 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
return false;
}
-bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
- uint64_t FinalAddress,
- uint64_t Value, bool isPCRel,
- unsigned Type, unsigned Size,
- int64_t Addend) {
+bool RuntimeDyldMachO::resolveAArch64Relocation(const RelocationEntry &RE,
+ uint64_t Value) {
+ const SectionEntry &Section = Sections[RE.SectionID];
+ uint8_t* LocalAddress = Section.Address + RE.Offset;
+
// If the relocation is PC-relative, the value to be encoded is the
// pointer difference.
- if (isPCRel)
+ if (RE.IsPCRel) {
+ uint64_t FinalAddress = Section.LoadAddress + RE.Offset;
Value -= FinalAddress;
+ }
- switch (Type) {
+ switch (RE.RelType) {
default:
llvm_unreachable("Invalid relocation type!");
- case MachO::ARM64_RELOC_UNSIGNED: {
- // Mask in the target value a byte at a time (we don't have an alignment
- // guarantee for the target address, so this is safest).
- uint8_t *p = (uint8_t *)LocalAddress;
- for (unsigned i = 0; i < Size; ++i) {
- *p++ = (uint8_t)Value;
- Value >>= 8;
- }
- break;
- }
+ case MachO::ARM64_RELOC_UNSIGNED:
+ return applyRelocationValue(LocalAddress, Value, 1 << RE.Size);
case MachO::ARM64_RELOC_BRANCH26: {
// Mask the value into the target address. We know instructions are
// 32-bit aligned, so we can do it all at once.
@@ -327,9 +314,12 @@ bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
// The low two bits of the value are not encoded.
Value >>= 2;
// Mask the value to 26 bits.
- Value &= 0x3ffffff;
+ uint64_t FinalValue = Value & 0x3ffffff;
+ // Check for overflow.
+ if (FinalValue != Value)
+ return Error("ARM64 BRANCH26 relocation out of range.");
// Insert the value into the instruction.
- *p = (*p & ~0x3ffffff) | Value;
+ *p = (*p & ~0x3ffffff) | FinalValue;
break;
}
case MachO::ARM64_RELOC_SUBTRACTOR:
@@ -346,6 +336,198 @@ bool RuntimeDyldMachO::resolveARM64Relocation(uint8_t *LocalAddress,
return false;
}
+void RuntimeDyldMachO::populateJumpTable(MachOObjectFile &Obj,
+ const SectionRef &JTSection,
+ unsigned JTSectionID) {
+ assert(!Obj.is64Bit() &&
+ "__jump_table section not supported in 64-bit MachO.");
+
+ MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
+ MachO::section Sec32 = Obj.getSection(JTSection.getRawDataRefImpl());
+ uint32_t JTSectionSize = Sec32.size;
+ unsigned FirstIndirectSymbol = Sec32.reserved1;
+ unsigned JTEntrySize = Sec32.reserved2;
+ unsigned NumJTEntries = JTSectionSize / JTEntrySize;
+ uint8_t* JTSectionAddr = getSectionAddress(JTSectionID);
+ unsigned JTEntryOffset = 0;
+
+ assert((JTSectionSize % JTEntrySize) == 0 &&
+ "Jump-table section does not contain a whole number of stubs?");
+
+ for (unsigned i = 0; i < NumJTEntries; ++i) {
+ unsigned SymbolIndex =
+ Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
+ symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
+ StringRef IndirectSymbolName;
+ SI->getName(IndirectSymbolName);
+ uint8_t* JTEntryAddr = JTSectionAddr + JTEntryOffset;
+ createStubFunction(JTEntryAddr);
+ RelocationEntry RE(JTSectionID, JTEntryOffset + 1,
+ MachO::GENERIC_RELOC_VANILLA, 0, true, 2);
+ addRelocationForSymbol(RE, IndirectSymbolName);
+ JTEntryOffset += JTEntrySize;
+ }
+}
+
+void RuntimeDyldMachO::populatePointersSection(MachOObjectFile &Obj,
+ const SectionRef &PTSection,
+ unsigned PTSectionID) {
+ assert(!Obj.is64Bit() &&
+ "__pointers section not supported in 64-bit MachO.");
+
+ MachO::dysymtab_command DySymTabCmd = Obj.getDysymtabLoadCommand();
+ MachO::section Sec32 = Obj.getSection(PTSection.getRawDataRefImpl());
+ uint32_t PTSectionSize = Sec32.size;
+ unsigned FirstIndirectSymbol = Sec32.reserved1;
+ const unsigned PTEntrySize = 4;
+ unsigned NumPTEntries = PTSectionSize / PTEntrySize;
+ unsigned PTEntryOffset = 0;
+
+ assert((PTSectionSize % PTEntrySize) == 0 &&
+ "Pointers section does not contain a whole number of stubs?");
+
+ DEBUG(dbgs() << "Populating __pointers, Section ID " << PTSectionID
+ << ", " << NumPTEntries << " entries, "
+ << PTEntrySize << " bytes each:\n");
+
+ for (unsigned i = 0; i < NumPTEntries; ++i) {
+ unsigned SymbolIndex =
+ Obj.getIndirectSymbolTableEntry(DySymTabCmd, FirstIndirectSymbol + i);
+ symbol_iterator SI = Obj.getSymbolByIndex(SymbolIndex);
+ StringRef IndirectSymbolName;
+ SI->getName(IndirectSymbolName);
+ DEBUG(dbgs() << " " << IndirectSymbolName << ": index " << SymbolIndex
+ << ", PT offset: " << PTEntryOffset << "\n");
+ RelocationEntry RE(PTSectionID, PTEntryOffset,
+ MachO::GENERIC_RELOC_VANILLA, 0, false, 2);
+ addRelocationForSymbol(RE, IndirectSymbolName);
+ PTEntryOffset += PTEntrySize;
+ }
+}
+
+
+section_iterator getSectionByAddress(const MachOObjectFile &Obj,
+ uint64_t Addr) {
+ section_iterator SI = Obj.section_begin();
+ section_iterator SE = Obj.section_end();
+
+ for (; SI != SE; ++SI) {
+ uint64_t SAddr, SSize;
+ SI->getAddress(SAddr);
+ SI->getSize(SSize);
+ if ((Addr >= SAddr) && (Addr < SAddr + SSize))
+ return SI;
+ }
+
+ return SE;
+}
+
+relocation_iterator RuntimeDyldMachO::processSECTDIFFRelocation(
+ unsigned SectionID,
+ relocation_iterator RelI,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile *MachO =
+ static_cast<const MachOObjectFile*>(Obj.getObjectFile());
+ MachO::any_relocation_info RE =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = MachO->getAnyRelocationType(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ unsigned Size = MachO->getAnyRelocationLength(RE);
+ uint64_t Offset;
+ RelI->getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ unsigned NumBytes = 1 << Size;
+ int64_t Addend = 0;
+ memcpy(&Addend, LocalAddress, NumBytes);
+
+ ++RelI;
+ MachO::any_relocation_info RE2 =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ uint32_t AddrA = MachO->getScatteredRelocationValue(RE);
+ section_iterator SAI = getSectionByAddress(*MachO, AddrA);
+ assert(SAI != MachO->section_end() && "Can't find section for address A");
+ uint64_t SectionABase;
+ SAI->getAddress(SectionABase);
+ uint64_t SectionAOffset = AddrA - SectionABase;
+ SectionRef SectionA = *SAI;
+ bool IsCode;
+ SectionA.isText(IsCode);
+ uint32_t SectionAID = findOrEmitSection(Obj, SectionA, IsCode,
+ ObjSectionToID);
+
+ uint32_t AddrB = MachO->getScatteredRelocationValue(RE2);
+ section_iterator SBI = getSectionByAddress(*MachO, AddrB);
+ assert(SBI != MachO->section_end() && "Can't find section for address B");
+ uint64_t SectionBBase;
+ SBI->getAddress(SectionBBase);
+ uint64_t SectionBOffset = AddrB - SectionBBase;
+ SectionRef SectionB = *SBI;
+ uint32_t SectionBID = findOrEmitSection(Obj, SectionB, IsCode,
+ ObjSectionToID);
+
+ if (Addend != AddrA - AddrB)
+ Error("Unexpected SECTDIFF relocation addend.");
+
+ DEBUG(dbgs() << "Found SECTDIFF: AddrA: " << AddrA << ", AddrB: " << AddrB
+ << ", Addend: " << Addend << ", SectionA ID: "
+ << SectionAID << ", SectionAOffset: " << SectionAOffset
+ << ", SectionB ID: " << SectionBID << ", SectionBOffset: "
+ << SectionBOffset << "\n");
+ RelocationEntry R(SectionID, Offset, RelocType, 0,
+ SectionAID, SectionAOffset, SectionBID, SectionBOffset,
+ IsPCRel, Size);
+
+ addRelocationForSection(R, SectionAID);
+ addRelocationForSection(R, SectionBID);
+
+ return ++RelI;
+}
+
+relocation_iterator RuntimeDyldMachO::processI386ScatteredVANILLA(
+ unsigned SectionID,
+ relocation_iterator RelI,
+ ObjectImage &Obj,
+ ObjSectionToIDMap &ObjSectionToID) {
+ const MachOObjectFile *MachO =
+ static_cast<const MachOObjectFile*>(Obj.getObjectFile());
+ MachO::any_relocation_info RE =
+ MachO->getRelocation(RelI->getRawDataRefImpl());
+
+ SectionEntry &Section = Sections[SectionID];
+ uint32_t RelocType = MachO->getAnyRelocationType(RE);
+ bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+ unsigned Size = MachO->getAnyRelocationLength(RE);
+ uint64_t Offset;
+ RelI->getOffset(Offset);
+ uint8_t *LocalAddress = Section.Address + Offset;
+ unsigned NumBytes = 1 << Size;
+ int64_t Addend = 0;
+ memcpy(&Addend, LocalAddress, NumBytes);
+
+ unsigned SymbolBaseAddr = MachO->getScatteredRelocationValue(RE);
+ section_iterator TargetSI = getSectionByAddress(*MachO, SymbolBaseAddr);
+ assert(TargetSI != MachO->section_end() && "Can't find section for symbol");
+ uint64_t SectionBaseAddr;
+ TargetSI->getAddress(SectionBaseAddr);
+ SectionRef TargetSection = *TargetSI;
+ bool IsCode;
+ TargetSection.isText(IsCode);
+ uint32_t TargetSectionID = findOrEmitSection(Obj, TargetSection, IsCode,
+ ObjSectionToID);
+
+ Addend -= SectionBaseAddr;
+ RelocationEntry R(SectionID, Offset, RelocType, Addend,
+ IsPCRel, Size);
+
+ addRelocationForSection(R, TargetSectionID);
+
+ return ++RelI;
+}
+
relocation_iterator RuntimeDyldMachO::processRelocationRef(
unsigned SectionID, relocation_iterator RelI, ObjectImage &Obj,
ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols,
@@ -358,18 +540,28 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
uint32_t RelType = MachO->getAnyRelocationType(RE);
// FIXME: Properly handle scattered relocations.
- // For now, optimistically skip these: they can often be ignored, as
- // the static linker will already have applied the relocation, and it
- // only needs to be reapplied if symbols move relative to one another.
- // Note: This will fail horribly where the relocations *do* need to be
- // applied, but that was already the case.
- if (MachO->isRelocationScattered(RE))
- return ++RelI;
+ // Special case the couple of scattered relocations that we know how
+ // to handle: SECTDIFF relocations, and scattered VANILLA relocations
+ // on I386.
+ // For all other scattered relocations, just bail out and hope for the
+ // best, since the offsets computed by scattered relocations have often
+ // been optimisticaly filled in by the compiler. This will fail
+ // horribly where the relocations *do* need to be applied, but that was
+ // already the case.
+ if (MachO->isRelocationScattered(RE)) {
+ if (RelType == MachO::GENERIC_RELOC_SECTDIFF ||
+ RelType == MachO::GENERIC_RELOC_LOCAL_SECTDIFF)
+ return processSECTDIFFRelocation(SectionID, RelI, Obj, ObjSectionToID);
+ else if (Arch == Triple::x86 && RelType == MachO::GENERIC_RELOC_VANILLA)
+ return processI386ScatteredVANILLA(SectionID, RelI, Obj, ObjSectionToID);
+ else
+ return ++RelI;
+ }
RelocationValueRef Value;
SectionEntry &Section = Sections[SectionID];
- bool isExtern = MachO->getPlainRelocationExternal(RE);
+ bool IsExtern = MachO->getPlainRelocationExternal(RE);
bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
unsigned Size = MachO->getAnyRelocationLength(RE);
uint64_t Offset;
@@ -379,7 +571,7 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
uint64_t Addend = 0;
memcpy(&Addend, LocalAddress, NumBytes);
- if (isExtern) {
+ if (IsExtern) {
// Obtain the symbol name which is referenced in the relocation
symbol_iterator Symbol = RelI->getSymbol();
StringRef TargetName;
@@ -401,6 +593,17 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
Value.Addend = Addend;
}
}
+
+ // Addends for external, PC-rel relocations on i386 point back to the zero
+ // offset. Calculate the final offset from the relocation target instead.
+ // This allows us to use the same logic for both external and internal
+ // relocations in resolveI386RelocationRef.
+ if (Arch == Triple::x86 && IsPCRel) {
+ uint64_t RelocAddr = 0;
+ RelI->getAddress(RelocAddr);
+ Value.Addend += RelocAddr + 4;
+ }
+
} else {
SectionRef Sec = MachO->getRelocationSection(RE);
bool IsCode = false;
@@ -417,6 +620,10 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
RelType == MachO::X86_64_RELOC_GOT_LOAD)) {
assert(IsPCRel);
assert(Size == 2);
+
+ // FIXME: Teach the generic code above not to prematurely conflate
+ // relocation addends and symbol offsets.
+ Value.Addend -= Addend;
StubMap::const_iterator i = Stubs.find(Value);
uint8_t *Addr;
if (i != Stubs.end()) {
@@ -424,41 +631,45 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef(
} else {
Stubs[Value] = Section.StubOffset;
uint8_t *GOTEntry = Section.Address + Section.StubOffset;
- RelocationEntry RE(SectionID, Section.StubOffset,
- MachO::X86_64_RELOC_UNSIGNED, 0, false, 3);
+ RelocationEntry GOTRE(SectionID, Section.StubOffset,
+ MachO::X86_64_RELOC_UNSIGNED, Value.Addend, false,
+ 3);
if (Value.SymbolName)
- addRelocationForSymbol(RE, Value.SymbolName);
+ addRelocationForSymbol(GOTRE, Value.SymbolName);
else
- addRelocationForSection(RE, Value.SectionID);
+ addRelocationForSection(GOTRE, Value.SectionID);
Section.StubOffset += 8;
Addr = GOTEntry;
}
- resolveRelocation(Section, Offset, (uint64_t)Addr,
- MachO::X86_64_RELOC_UNSIGNED, Value.Addend, true, 2);
+ RelocationEntry TargetRE(SectionID, Offset,
+ MachO::X86_64_RELOC_UNSIGNED, Addend, true,
+ 2);
+ resolveRelocation(TargetRE, (uint64_t)Addr);
} else if (Arch == Triple::arm && (RelType & 0xf) == MachO::ARM_RELOC_BR24) {
// This is an ARM branch relocation, need to use a stub function.
// Look up for existing stub.
StubMap::const_iterator i = Stubs.find(Value);
- if (i != Stubs.end())
- resolveRelocation(Section, Offset, (uint64_t)Section.Address + i->second,
- RelType, 0, IsPCRel, Size);
- else {
+ uint8_t *Addr;
+ if (i != Stubs.end()) {
+ Addr = Section.Address + i->second;
+ } else {
// Create a new stub function.
Stubs[Value] = Section.StubOffset;
uint8_t *StubTargetAddr =
createStubFunction(Section.Address + Section.StubOffset);
- RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
- MachO::GENERIC_RELOC_VANILLA, Value.Addend);
+ RelocationEntry StubRE(SectionID, StubTargetAddr - Section.Address,
+ MachO::GENERIC_RELOC_VANILLA, Value.Addend);
if (Value.SymbolName)
- addRelocationForSymbol(RE, Value.SymbolName);
+ addRelocationForSymbol(StubRE, Value.SymbolName);
else
- addRelocationForSection(RE, Value.SectionID);
- resolveRelocation(Section, Offset,
- (uint64_t)Section.Address + Section.StubOffset, RelType,
- 0, IsPCRel, Size);
+ addRelocationForSection(StubRE, Value.SectionID);
+ Addr = Section.Address + Section.StubOffset;
Section.StubOffset += getMaxStubSize();
}
+ RelocationEntry TargetRE(Value.SectionID, Offset, RelType, 0, IsPCRel,
+ Size);
+ resolveRelocation(TargetRE, (uint64_t)Addr);
} else {
RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, IsPCRel, Size);
if (Value.SymbolName)
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 1006176..060eb8c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -25,22 +25,31 @@ using namespace llvm::object;
namespace llvm {
class RuntimeDyldMachO : public RuntimeDyldImpl {
- bool resolveI386Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
- uint64_t Value, bool isPCRel, unsigned Type,
- unsigned Size, int64_t Addend);
- bool resolveX86_64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
- uint64_t Value, bool isPCRel, unsigned Type,
- unsigned Size, int64_t Addend);
- bool resolveARMRelocation(uint8_t *LocalAddress, uint64_t FinalAddress,
- uint64_t Value, bool isPCRel, unsigned Type,
- unsigned Size, int64_t Addend);
- bool resolveARM64Relocation(uint8_t *LocalAddress, uint64_t FinalAddress,
- uint64_t Value, bool IsPCRel, unsigned Type,
- unsigned Size, int64_t Addend);
-
- void resolveRelocation(const SectionEntry &Section, uint64_t Offset,
- uint64_t Value, uint32_t Type, int64_t Addend,
- bool isPCRel, unsigned Size);
+private:
+
+ /// Write the least significant 'Size' bytes in 'Value' out at the address
+ /// pointed to by Addr.
+ bool applyRelocationValue(uint8_t *Addr, uint64_t Value, unsigned Size) {
+ for (unsigned i = 0; i < Size; ++i) {
+ *Addr++ = (uint8_t)Value;
+ Value >>= 8;
+ }
+
+ return false;
+ }
+
+ bool resolveI386Relocation(const RelocationEntry &RE, uint64_t Value);
+ bool resolveX86_64Relocation(const RelocationEntry &RE, uint64_t Value);
+ bool resolveARMRelocation(const RelocationEntry &RE, uint64_t Value);
+ bool resolveAArch64Relocation(const RelocationEntry &RE, uint64_t Value);
+
+ // Populate stubs in __jump_table section.
+ void populateJumpTable(MachOObjectFile &Obj, const SectionRef &JTSection,
+ unsigned JTSectionID);
+
+ // Populate __pointers section.
+ void populatePointersSection(MachOObjectFile &Obj, const SectionRef &PTSection,
+ unsigned PTSectionID);
unsigned getMaxStubSize() override {
if (Arch == Triple::arm || Arch == Triple::thumb)
@@ -53,6 +62,18 @@ class RuntimeDyldMachO : public RuntimeDyldImpl {
unsigned getStubAlignment() override { return 1; }
+ relocation_iterator processSECTDIFFRelocation(
+ unsigned SectionID,
+ relocation_iterator RelI,
+ ObjectImage &ObjImg,
+ ObjSectionToIDMap &ObjSectionToID);
+
+ relocation_iterator processI386ScatteredVANILLA(
+ unsigned SectionID,
+ relocation_iterator RelI,
+ ObjectImage &ObjImg,
+ ObjSectionToIDMap &ObjSectionToID);
+
struct EHFrameRelatedSections {
EHFrameRelatedSections()
: EHFrameSID(RTDYLD_INVALID_SECTION_ID),
@@ -81,15 +102,16 @@ public:
bool isCompatibleFormat(const ObjectBuffer *Buffer) const override;
bool isCompatibleFile(const object::ObjectFile *Obj) const override;
void registerEHFrames() override;
- void finalizeLoad(ObjSectionToIDMap &SectionMap) override;
+ void finalizeLoad(ObjectImage &ObjImg,
+ ObjSectionToIDMap &SectionMap) override;
static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer) {
return new ObjectImageCommon(InputBuffer);
}
static ObjectImage *
- createObjectImageFromFile(object::ObjectFile *InputObject) {
- return new ObjectImageCommon(InputObject);
+ createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject) {
+ return new ObjectImageCommon(std::move(InputObject));
}
};
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index 9b7d348..b10d51f 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -47,7 +47,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
TheTriple.setTriple(sys::getProcessTriple());
// Adjust the triple to match what the user requested.
- const Target *TheTarget = 0;
+ const Target *TheTarget = nullptr;
if (!MArch.empty()) {
for (TargetRegistry::iterator it = TargetRegistry::begin(),
ie = TargetRegistry::end(); it != ie; ++it) {
@@ -61,7 +61,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
if (ErrorStr)
*ErrorStr = "No available targets are compatible with this -march, "
"see -version for the available targets.\n";
- return 0;
+ return nullptr;
}
// Adjust the triple to match (if known), otherwise stick with the
@@ -72,10 +72,10 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
} else {
std::string Error;
TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
- if (TheTarget == 0) {
+ if (!TheTarget) {
if (ErrorStr)
*ErrorStr = Error;
- return 0;
+ return nullptr;
}
}
diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk
index 071bb04..dd95703 100644
--- a/lib/IR/Android.mk
+++ b/lib/IR/Android.mk
@@ -30,6 +30,7 @@ vmcore_SRC_FILES := \
LeakDetector.cpp \
LegacyPassManager.cpp \
Mangler.cpp \
+ MDBuilder.cpp \
Metadata.cpp \
Module.cpp \
Pass.cpp \
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index d4670e4..0fef0d0 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -51,19 +51,19 @@ AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
static const Module *getModuleFromVal(const Value *V) {
if (const Argument *MA = dyn_cast<Argument>(V))
- return MA->getParent() ? MA->getParent()->getParent() : 0;
+ return MA->getParent() ? MA->getParent()->getParent() : nullptr;
if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
- return BB->getParent() ? BB->getParent()->getParent() : 0;
+ return BB->getParent() ? BB->getParent()->getParent() : nullptr;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
- const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
- return M ? M->getParent() : 0;
+ const Function *M = I->getParent() ? I->getParent()->getParent() : nullptr;
+ return M ? M->getParent() : nullptr;
}
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return GV->getParent();
- return 0;
+ return nullptr;
}
static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
@@ -78,7 +78,6 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break;
case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break;
- case CallingConv::X86_CDeclMethod:Out << "x86_cdeclmethodcc"; break;
case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc"; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break;
@@ -421,10 +420,10 @@ static SlotTracker *createSlotTracker(const Value *V) {
if (!MD->isFunctionLocal())
return new SlotTracker(MD->getFunction());
- return new SlotTracker((Function *)0);
+ return new SlotTracker((Function *)nullptr);
}
- return 0;
+ return nullptr;
}
#if 0
@@ -436,21 +435,21 @@ static SlotTracker *createSlotTracker(const Value *V) {
// Module level constructor. Causes the contents of the Module (sans functions)
// to be added to the slot table.
SlotTracker::SlotTracker(const Module *M)
- : TheModule(M), TheFunction(0), FunctionProcessed(false),
+ : TheModule(M), TheFunction(nullptr), FunctionProcessed(false),
mNext(0), fNext(0), mdnNext(0), asNext(0) {
}
// Function level constructor. Causes the contents of the Module and the one
// function provided to be added to the slot table.
SlotTracker::SlotTracker(const Function *F)
- : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
- mNext(0), fNext(0), mdnNext(0), asNext(0) {
+ : TheModule(F ? F->getParent() : nullptr), TheFunction(F),
+ FunctionProcessed(false), mNext(0), fNext(0), mdnNext(0), asNext(0) {
}
inline void SlotTracker::initialize() {
if (TheModule) {
processModule();
- TheModule = 0; ///< Prevent re-processing next time we're called.
+ TheModule = nullptr; ///< Prevent re-processing next time we're called.
}
if (TheFunction && !FunctionProcessed)
@@ -560,7 +559,7 @@ void SlotTracker::processFunction() {
void SlotTracker::purgeFunction() {
ST_DEBUG("begin purgeFunction!\n");
fMap.clear(); // Simply discard the function level map
- TheFunction = 0;
+ TheFunction = nullptr;
FunctionProcessed = false;
ST_DEBUG("end purgeFunction!\n");
}
@@ -1048,7 +1047,7 @@ static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
Out << "!{";
for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
const Value *V = Node->getOperand(mi);
- if (V == 0)
+ if (!V)
Out << "null";
else {
TypePrinter->print(V->getType(), Out);
@@ -1126,12 +1125,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
return;
}
- if (V->getValueID() == Value::PseudoSourceValueVal ||
- V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
- V->print(Out);
- return;
- }
-
char Prefix = '%';
int Slot;
// If we have a SlotTracker, use it.
@@ -1160,7 +1153,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
Slot = Machine->getLocalSlot(V);
}
delete Machine;
- Machine = 0;
+ Machine = nullptr;
} else {
Slot = -1;
}
@@ -1194,7 +1187,7 @@ AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M,
AssemblyWriter::~AssemblyWriter() { }
void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
- if (Operand == 0) {
+ if (!Operand) {
Out << "<null operand!>";
return;
}
@@ -1259,7 +1252,7 @@ void AssemblyWriter::writeAtomicCmpXchg(AtomicOrdering SuccessOrdering,
void AssemblyWriter::writeParamOperand(const Value *Operand,
AttributeSet Attrs, unsigned Idx) {
- if (Operand == 0) {
+ if (!Operand) {
Out << "<null operand!>";
return;
}
@@ -1500,10 +1493,16 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
PrintLinkage(GA->getLinkage(), Out);
+ PointerType *Ty = GA->getType();
const Constant *Aliasee = GA->getAliasee();
+ if (!Aliasee || Ty != Aliasee->getType()) {
+ if (unsigned AddressSpace = Ty->getAddressSpace())
+ Out << "addrspace(" << AddressSpace << ") ";
+ TypePrinter.print(Ty->getElementType(), Out);
+ Out << ", ";
+ }
- if (Aliasee == 0) {
- TypePrinter.print(GA->getType(), Out);
+ if (!Aliasee) {
Out << " <<NULL ALIASEE>>";
} else {
writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee));
@@ -1707,7 +1706,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
Out << "<badref>";
}
- if (BB->getParent() == 0) {
+ if (!BB->getParent()) {
Out.PadToColumn(50);
Out << "; Error: Block without parent!";
} else if (BB != &BB->getParent()->getEntryBlock()) { // Not the entry block?
@@ -1774,8 +1773,12 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
Out << '%' << SlotNum << " = ";
}
- if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall())
- Out << "tail ";
+ if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+ if (CI->isMustTailCall())
+ Out << "musttail ";
+ else if (CI->isTailCall())
+ Out << "tail ";
+ }
// Print out the opcode...
Out << I.getOpcodeName();
@@ -1804,7 +1807,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeAtomicRMWOperation(Out, RMWI->getOperation());
// Print out the type of the operands...
- const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+ const Value *Operand = I.getNumOperands() ? I.getOperand(0) : nullptr;
// Special case conditional branches to swizzle the condition out to the front
if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
@@ -2147,15 +2150,15 @@ void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
W.printModule(this);
}
-void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+void NamedMDNode::print(raw_ostream &ROS) const {
SlotTracker SlotTable(getParent());
formatted_raw_ostream OS(ROS);
- AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+ AssemblyWriter W(OS, SlotTable, getParent(), nullptr);
W.printNamedMDNode(this);
}
void Type::print(raw_ostream &OS) const {
- if (this == 0) {
+ if (!this) {
OS << "<null Type>";
return;
}
@@ -2170,24 +2173,24 @@ void Type::print(raw_ostream &OS) const {
}
}
-void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
- if (this == 0) {
+void Value::print(raw_ostream &ROS) const {
+ if (!this) {
ROS << "printing a <null> value\n";
return;
}
formatted_raw_ostream OS(ROS);
if (const Instruction *I = dyn_cast<Instruction>(this)) {
- const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+ const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr;
SlotTracker SlotTable(F);
- AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), nullptr);
W.printInstruction(*I);
} else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
SlotTracker SlotTable(BB->getParent());
- AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+ AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), nullptr);
W.printBasicBlock(BB);
} else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
SlotTracker SlotTable(GV->getParent());
- AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+ AssemblyWriter W(OS, SlotTable, GV->getParent(), nullptr);
if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
W.printGlobal(V);
else if (const Function *F = dyn_cast<Function>(GV))
@@ -2197,20 +2200,18 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
} else if (const MDNode *N = dyn_cast<MDNode>(this)) {
const Function *F = N->getFunction();
SlotTracker SlotTable(F);
- AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+ AssemblyWriter W(OS, SlotTable, F ? F->getParent() : nullptr, nullptr);
W.printMDNodeBody(N);
} else if (const Constant *C = dyn_cast<Constant>(this)) {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+ WriteConstantInternal(OS, C, TypePrinter, nullptr, nullptr);
} else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
isa<Argument>(this)) {
this->printAsOperand(OS);
} else {
- // Otherwise we don't know what it is. Call the virtual function to
- // allow a subclass to print itself.
- printCustom(OS);
+ llvm_unreachable("Unknown value to print out!");
}
}
@@ -2220,7 +2221,7 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons
if (!PrintType &&
((!isa<Constant>(this) && !isa<MDNode>(this)) ||
hasName() || isa<GlobalValue>(this))) {
- WriteAsOperandInternal(O, this, 0, 0, M);
+ WriteAsOperandInternal(O, this, nullptr, nullptr, M);
return;
}
@@ -2235,12 +2236,7 @@ void Value::printAsOperand(raw_ostream &O, bool PrintType, const Module *M) cons
O << ' ';
}
- WriteAsOperandInternal(O, this, &TypePrinter, 0, M);
-}
-
-// Value::printCustom - subclasses should override this to implement printing.
-void Value::printCustom(raw_ostream &OS) const {
- llvm_unreachable("Unknown value to print out!");
+ WriteAsOperandInternal(O, this, &TypePrinter, nullptr, M);
}
// Value::dump - allow easy printing of Values from the debugger.
@@ -2250,7 +2246,7 @@ void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
void Type::dump() const { print(dbgs()); }
// Module::dump() - Allow printing of Modules from the debugger.
-void Module::dump() const { print(dbgs(), 0); }
+void Module::dump() const { print(dbgs(), nullptr); }
// NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger.
-void NamedMDNode::dump() const { print(dbgs(), 0); }
+void NamedMDNode::dump() const { print(dbgs()); }
diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp
index 9d9d948..a9074bb 100644
--- a/lib/IR/Attributes.cpp
+++ b/lib/IR/Attributes.cpp
@@ -16,6 +16,7 @@
#include "llvm/IR/Attributes.h"
#include "AttributeImpl.h"
#include "LLVMContextImpl.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Type.h"
#include "llvm/Support/Atomic.h"
@@ -192,6 +193,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return "noinline";
if (hasAttribute(Attribute::NonLazyBind))
return "nonlazybind";
+ if (hasAttribute(Attribute::NonNull))
+ return "nonnull";
if (hasAttribute(Attribute::NoRedZone))
return "noredzone";
if (hasAttribute(Attribute::NoReturn))
@@ -391,6 +394,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
case Attribute::Builtin: return 1ULL << 41;
case Attribute::OptimizeNone: return 1ULL << 42;
case Attribute::InAlloca: return 1ULL << 43;
+ case Attribute::NonNull: return 1ULL << 44;
}
llvm_unreachable("Unsupported attribute type");
}
@@ -402,7 +406,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
AttributeSetNode *AttributeSetNode::get(LLVMContext &C,
ArrayRef<Attribute> Attrs) {
if (Attrs.empty())
- return 0;
+ return nullptr;
// Otherwise, build a key to look up the existing attributes.
LLVMContextImpl *pImpl = C.pImpl;
@@ -595,7 +599,8 @@ AttributeSet AttributeSet::get(LLVMContext &C,
return getImpl(C, Attrs);
}
-AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
+AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
+ const AttrBuilder &B) {
if (!B.hasAttributes())
return AttributeSet();
@@ -617,9 +622,9 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttrBuilder &B) {
}
// Add target-dependent (string) attributes.
- for (AttrBuilder::td_iterator I = B.td_begin(), E = B.td_end();
- I != E; ++I)
- Attrs.push_back(std::make_pair(Index, Attribute::get(C, I->first,I->second)));
+ for (const AttrBuilder::td_type &TDA : B.td_attrs())
+ Attrs.push_back(
+ std::make_pair(Index, Attribute::get(C, TDA.first, TDA.second)));
return get(C, Attrs);
}
@@ -836,7 +841,7 @@ bool AttributeSet::hasAttributes(unsigned Index) const {
/// \brief Return true if the specified attribute is set for at least one
/// parameter or for the return value.
bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const {
- if (pImpl == 0) return false;
+ if (!pImpl) return false;
for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
for (AttributeSetImpl::iterator II = pImpl->begin(I),
@@ -877,14 +882,14 @@ std::string AttributeSet::getAsString(unsigned Index,
/// \brief The attributes for the specified index are returned.
AttributeSetNode *AttributeSet::getAttributes(unsigned Index) const {
- if (!pImpl) return 0;
+ if (!pImpl) return nullptr;
// Loop through to find the attribute node we want.
for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I)
if (pImpl->getSlotIndex(I) == Index)
return pImpl->getSlotNode(I);
- return 0;
+ return nullptr;
}
AttributeSet::iterator AttributeSet::begin(unsigned Slot) const {
@@ -1175,6 +1180,7 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) {
.addAttribute(Attribute::Nest)
.addAttribute(Attribute::NoAlias)
.addAttribute(Attribute::NoCapture)
+ .addAttribute(Attribute::NonNull)
.addAttribute(Attribute::ReadNone)
.addAttribute(Attribute::ReadOnly)
.addAttribute(Attribute::StructRet)
diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp
index b7429b3..e255113 100644
--- a/lib/IR/AutoUpgrade.cpp
+++ b/lib/IR/AutoUpgrade.cpp
@@ -115,7 +115,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "x86.avx.movnt.ps.256" ||
Name == "x86.sse42.crc32.64.8" ||
(Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) {
- NewFn = 0;
+ NewFn = nullptr;
return true;
}
// SSE4.1 ptest functions may have an old signature.
@@ -158,7 +158,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
- NewFn = 0;
+ NewFn = nullptr;
bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
// Upgrade intrinsic attributes. This does not change the function.
@@ -170,7 +170,62 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
return Upgraded;
}
+static bool UpgradeGlobalStructors(GlobalVariable *GV) {
+ ArrayType *ATy = dyn_cast<ArrayType>(GV->getType()->getElementType());
+ StructType *OldTy =
+ ATy ? dyn_cast<StructType>(ATy->getElementType()) : nullptr;
+
+ // Only upgrade an array of a two field struct with the appropriate field
+ // types.
+ if (!OldTy || OldTy->getNumElements() != 2)
+ return false;
+
+ // Get the upgraded 3 element type.
+ PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo();
+ Type *Tys[3] = {
+ OldTy->getElementType(0),
+ OldTy->getElementType(1),
+ VoidPtrTy
+ };
+ StructType *NewTy =
+ StructType::get(GV->getContext(), Tys, /*isPacked=*/false);
+
+ // Build new constants with a null third field filled in.
+ Constant *OldInitC = GV->getInitializer();
+ ConstantArray *OldInit = dyn_cast<ConstantArray>(OldInitC);
+ if (!OldInit && !isa<ConstantAggregateZero>(OldInitC))
+ return false;
+ std::vector<Constant *> Initializers;
+ if (OldInit) {
+ for (Use &U : OldInit->operands()) {
+ ConstantStruct *Init = cast<ConstantStruct>(&U);
+ Constant *NewInit =
+ ConstantStruct::get(NewTy, Init->getOperand(0), Init->getOperand(1),
+ Constant::getNullValue(VoidPtrTy), nullptr);
+ Initializers.push_back(NewInit);
+ }
+ }
+ assert(Initializers.size() == ATy->getNumElements());
+
+ // Replace the old GV with a new one.
+ ATy = ArrayType::get(NewTy, Initializers.size());
+ Constant *NewInit = ConstantArray::get(ATy, Initializers);
+ GlobalVariable *NewGV = new GlobalVariable(
+ *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "",
+ GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(),
+ GV->isExternallyInitialized());
+ NewGV->copyAttributesFrom(GV);
+ NewGV->takeName(GV);
+ assert(GV->use_empty() && "program cannot use initializer list");
+ GV->eraseFromParent();
+ return true;
+}
+
bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+ if (GV->getName() == "llvm.global_ctors" ||
+ GV->getName() == "llvm.global_dtors")
+ return UpgradeGlobalStructors(GV);
+
// Nothing to do yet.
return false;
}
@@ -453,9 +508,9 @@ void llvm::UpgradeInstWithTBAATag(Instruction *I) {
Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
Instruction *&Temp) {
if (Opc != Instruction::BitCast)
- return 0;
+ return nullptr;
- Temp = 0;
+ Temp = nullptr;
Type *SrcTy = V->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
@@ -469,12 +524,12 @@ Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
}
- return 0;
+ return nullptr;
}
Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
if (Opc != Instruction::BitCast)
- return 0;
+ return nullptr;
Type *SrcTy = C->getType();
if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
@@ -489,7 +544,7 @@ Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
DestTy);
}
- return 0;
+ return nullptr;
}
/// Check the debug info version number, if it is out-dated, drop the debug
diff --git a/lib/IR/BasicBlock.cpp b/lib/IR/BasicBlock.cpp
index 3079f0a..ba07433 100644
--- a/lib/IR/BasicBlock.cpp
+++ b/lib/IR/BasicBlock.cpp
@@ -27,7 +27,7 @@ using namespace llvm;
ValueSymbolTable *BasicBlock::getValueSymbolTable() {
if (Function *F = getParent())
return &F->getValueSymbolTable();
- return 0;
+ return nullptr;
}
const DataLayout *BasicBlock::getDataLayout() const {
@@ -45,7 +45,7 @@ template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
BasicBlock *InsertBefore)
- : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+ : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(nullptr) {
// Make sure that we get added to a function
LeakDetector::addGarbageObject(this);
@@ -81,7 +81,7 @@ BasicBlock::~BasicBlock() {
}
}
- assert(getParent() == 0 && "BasicBlock still linked into the program!");
+ assert(getParent() == nullptr && "BasicBlock still linked into the program!");
dropAllReferences();
InstList.clear();
}
@@ -122,12 +122,12 @@ void BasicBlock::moveAfter(BasicBlock *MovePos) {
TerminatorInst *BasicBlock::getTerminator() {
- if (InstList.empty()) return 0;
+ if (InstList.empty()) return nullptr;
return dyn_cast<TerminatorInst>(&InstList.back());
}
const TerminatorInst *BasicBlock::getTerminator() const {
- if (InstList.empty()) return 0;
+ if (InstList.empty()) return nullptr;
return dyn_cast<TerminatorInst>(&InstList.back());
}
@@ -186,10 +186,10 @@ void BasicBlock::dropAllReferences() {
/// return the block, otherwise return a null pointer.
BasicBlock *BasicBlock::getSinglePredecessor() {
pred_iterator PI = pred_begin(this), E = pred_end(this);
- if (PI == E) return 0; // No preds.
+ if (PI == E) return nullptr; // No preds.
BasicBlock *ThePred = *PI;
++PI;
- return (PI == E) ? ThePred : 0 /*multiple preds*/;
+ return (PI == E) ? ThePred : nullptr /*multiple preds*/;
}
/// getUniquePredecessor - If this basic block has a unique predecessor block,
@@ -199,12 +199,12 @@ BasicBlock *BasicBlock::getSinglePredecessor() {
/// a switch statement with multiple cases having the same destination).
BasicBlock *BasicBlock::getUniquePredecessor() {
pred_iterator PI = pred_begin(this), E = pred_end(this);
- if (PI == E) return 0; // No preds.
+ if (PI == E) return nullptr; // No preds.
BasicBlock *PredBB = *PI;
++PI;
for (;PI != E; ++PI) {
if (*PI != PredBB)
- return 0;
+ return nullptr;
// The same predecessor appears multiple times in the predecessor list.
// This is OK.
}
@@ -277,7 +277,7 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
PN->removeIncomingValue(Pred, false);
// If all incoming values to the Phi are the same, we can replace the Phi
// with that value.
- Value* PNV = 0;
+ Value* PNV = nullptr;
if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
if (PNV != PN) {
PN->replaceAllUsesWith(PNV);
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 09117aa..b027ae5 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -7,12 +7,12 @@ add_llvm_library(LLVMCore
ConstantRange.cpp
Constants.cpp
Core.cpp
- DiagnosticInfo.cpp
- DiagnosticPrinter.cpp
DIBuilder.cpp
DataLayout.cpp
DebugInfo.cpp
DebugLoc.cpp
+ DiagnosticInfo.cpp
+ DiagnosticPrinter.cpp
Dominators.cpp
Function.cpp
GCOV.cpp
@@ -28,6 +28,7 @@ add_llvm_library(LLVMCore
LLVMContextImpl.cpp
LeakDetector.cpp
LegacyPassManager.cpp
+ MDBuilder.cpp
Mangler.cpp
Metadata.cpp
Module.cpp
diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp
index 612aba0..706e66f 100644
--- a/lib/IR/ConstantFold.cpp
+++ b/lib/IR/ConstantFold.cpp
@@ -51,7 +51,7 @@ static Constant *BitCastConstantVector(Constant *CV, VectorType *DstTy) {
// Analysis/ConstantFolding.cpp
unsigned NumElts = DstTy->getNumElements();
if (NumElts != CV->getType()->getVectorNumElements())
- return 0;
+ return nullptr;
Type *DstEltTy = DstTy->getElementType();
@@ -94,7 +94,7 @@ foldConstantCastPair(
// Let CastInst::isEliminableCastPair do the heavy lifting.
return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
- 0, FakeIntPtrTy, 0);
+ nullptr, FakeIntPtrTy, nullptr);
}
static Constant *FoldBitCast(Constant *V, Type *DestTy) {
@@ -139,7 +139,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
if (VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
"Not cast between same sized vectors!");
- SrcTy = NULL;
+ SrcTy = nullptr;
// First, check for null. Undef is already handled.
if (isa<ConstantAggregateZero>(V))
return Constant::getNullValue(DestTy);
@@ -173,7 +173,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
CI->getValue()));
// Otherwise, can't fold this (vector?)
- return 0;
+ return nullptr;
}
// Handle ConstantFP input: FP -> Integral.
@@ -181,7 +181,7 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
return ConstantInt::get(FP->getContext(),
FP->getValueAPF().bitcastToAPInt());
- return 0;
+ return nullptr;
}
@@ -216,14 +216,14 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
// In the input is a constant expr, we might be able to recursively simplify.
// If not, we definitely can't do anything.
ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
- if (CE == 0) return 0;
-
+ if (!CE) return nullptr;
+
switch (CE->getOpcode()) {
- default: return 0;
+ default: return nullptr;
case Instruction::Or: {
Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
- if (RHS == 0)
- return 0;
+ if (!RHS)
+ return nullptr;
// X | -1 -> -1.
if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
@@ -231,32 +231,32 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
return RHSC;
Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
- if (LHS == 0)
- return 0;
+ if (!LHS)
+ return nullptr;
return ConstantExpr::getOr(LHS, RHS);
}
case Instruction::And: {
Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
- if (RHS == 0)
- return 0;
+ if (!RHS)
+ return nullptr;
// X & 0 -> 0.
if (RHS->isNullValue())
return RHS;
Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
- if (LHS == 0)
- return 0;
+ if (!LHS)
+ return nullptr;
return ConstantExpr::getAnd(LHS, RHS);
}
case Instruction::LShr: {
ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
- if (Amt == 0)
- return 0;
+ if (!Amt)
+ return nullptr;
unsigned ShAmt = Amt->getZExtValue();
// Cannot analyze non-byte shifts.
if ((ShAmt & 7) != 0)
- return 0;
+ return nullptr;
ShAmt >>= 3;
// If the extract is known to be all zeros, return zero.
@@ -268,17 +268,17 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
// TODO: Handle the 'partially zero' case.
- return 0;
+ return nullptr;
}
case Instruction::Shl: {
ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
- if (Amt == 0)
- return 0;
+ if (!Amt)
+ return nullptr;
unsigned ShAmt = Amt->getZExtValue();
// Cannot analyze non-byte shifts.
if ((ShAmt & 7) != 0)
- return 0;
+ return nullptr;
ShAmt >>= 3;
// If the extract is known to be all zeros, return zero.
@@ -290,7 +290,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
// TODO: Handle the 'partially zero' case.
- return 0;
+ return nullptr;
}
case Instruction::ZExt: {
@@ -324,7 +324,7 @@ static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
}
// TODO: Handle the 'partially zero' case.
- return 0;
+ return nullptr;
}
}
}
@@ -376,7 +376,7 @@ static Constant *getFoldedSizeOf(Type *Ty, Type *DestTy,
// If there's no interesting folding happening, bail so that we don't create
// a constant that looks like it needs folding but really doesn't.
if (!Folded)
- return 0;
+ return nullptr;
// Base case: Get a regular sizeof expression.
Constant *C = ConstantExpr::getSizeOf(Ty);
@@ -442,7 +442,7 @@ static Constant *getFoldedAlignOf(Type *Ty, Type *DestTy,
// If there's no interesting folding happening, bail so that we don't create
// a constant that looks like it needs folding but really doesn't.
if (!Folded)
- return 0;
+ return nullptr;
// Base case: Get a regular alignof expression.
Constant *C = ConstantExpr::getAlignOf(Ty);
@@ -473,7 +473,7 @@ static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
unsigned NumElems = STy->getNumElements();
// An empty struct has no members.
if (NumElems == 0)
- return 0;
+ return nullptr;
// Check for a struct with all members having the same size.
Constant *MemberSize =
getFoldedSizeOf(STy->getElementType(0), DestTy, true);
@@ -497,7 +497,7 @@ static Constant *getFoldedOffsetOf(Type *Ty, Constant *FieldNo,
// If there's no interesting folding happening, bail so that we don't create
// a constant that looks like it needs folding but really doesn't.
if (!Folded)
- return 0;
+ return nullptr;
// Base case: Get a regular offsetof expression.
Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
@@ -582,7 +582,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
APFloat::rmNearestTiesToEven, &ignored);
return ConstantFP::get(V->getContext(), Val);
}
- return 0; // Can't fold.
+ return nullptr; // Can't fold.
case Instruction::FPToUI:
case Instruction::FPToSI:
if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
@@ -595,11 +595,11 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
APInt Val(DestBitWidth, x);
return ConstantInt::get(FPC->getContext(), Val);
}
- return 0; // Can't fold.
+ return nullptr; // Can't fold.
case Instruction::IntToPtr: //always treated as unsigned
if (V->isNullValue()) // Is it an integral null value?
return ConstantPointerNull::get(cast<PointerType>(DestTy));
- return 0; // Other pointer types cannot be casted
+ return nullptr; // Other pointer types cannot be casted
case Instruction::PtrToInt: // always treated as unsigned
// Is it a null pointer value?
if (V->isNullValue())
@@ -643,7 +643,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
}
}
// Other pointer types cannot be casted
- return 0;
+ return nullptr;
case Instruction::UIToFP:
case Instruction::SIToFP:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -655,21 +655,21 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
APFloat::rmNearestTiesToEven);
return ConstantFP::get(V->getContext(), apf);
}
- return 0;
+ return nullptr;
case Instruction::ZExt:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
return ConstantInt::get(V->getContext(),
CI->getValue().zext(BitWidth));
}
- return 0;
+ return nullptr;
case Instruction::SExt:
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
return ConstantInt::get(V->getContext(),
CI->getValue().sext(BitWidth));
}
- return 0;
+ return nullptr;
case Instruction::Trunc: {
uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -685,12 +685,12 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
return Res;
- return 0;
+ return nullptr;
}
case Instruction::BitCast:
return FoldBitCast(V, DestTy);
case Instruction::AddrSpaceCast:
- return 0;
+ return nullptr;
}
}
@@ -746,7 +746,7 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
}
- return 0;
+ return nullptr;
}
Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
@@ -766,14 +766,14 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
return UndefValue::get(Val->getType()->getVectorElementType());
return Val->getAggregateElement(Index);
}
- return 0;
+ return nullptr;
}
Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
Constant *Elt,
Constant *Idx) {
ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
- if (!CIdx) return 0;
+ if (!CIdx) return nullptr;
const APInt &IdxVal = CIdx->getValue();
SmallVector<Constant*, 16> Result;
@@ -803,7 +803,7 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
return UndefValue::get(VectorType::get(EltTy, MaskNumElts));
// Don't break the bitcode reader hack.
- if (isa<ConstantExpr>(Mask)) return 0;
+ if (isa<ConstantExpr>(Mask)) return nullptr;
unsigned SrcNumElts = V1->getType()->getVectorNumElements();
@@ -842,7 +842,7 @@ Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
if (Constant *C = Agg->getAggregateElement(Idxs[0]))
return ConstantFoldExtractValueInstruction(C, Idxs.slice(1));
- return 0;
+ return nullptr;
}
Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
@@ -863,8 +863,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
SmallVector<Constant*, 32> Result;
for (unsigned i = 0; i != NumElts; ++i) {
Constant *C = Agg->getAggregateElement(i);
- if (C == 0) return 0;
-
+ if (!C) return nullptr;
+
if (Idxs[0] == i)
C = ConstantFoldInsertValueInstruction(C, Val, Idxs.slice(1));
@@ -1209,7 +1209,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
// We don't know how to fold this.
- return 0;
+ return nullptr;
}
/// isZeroSizedType - This type is zero sized if its an array or structure of
@@ -1289,7 +1289,7 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
if (!isa<ConstantExpr>(V1)) {
if (!isa<ConstantExpr>(V2)) {
// We distilled thisUse the standard constant folder for a few cases
- ConstantInt *R = 0;
+ ConstantInt *R = nullptr;
R = dyn_cast<ConstantInt>(
ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
if (R && !R->isZero())
@@ -1355,7 +1355,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
!isa<BlockAddress>(V2)) {
// We distilled this down to a simple case, use the standard constant
// folder.
- ConstantInt *R = 0;
+ ConstantInt *R = nullptr;
ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
if (R && !R->isZero())
@@ -1885,7 +1885,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return ConstantExpr::getICmp(pred, C2, C1);
}
}
- return 0;
+ return nullptr;
}
/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
@@ -1951,7 +1951,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
if (isa<UndefValue>(C)) {
PointerType *Ptr = cast<PointerType>(C->getType());
Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
- assert(Ty != 0 && "Invalid indices for GEP!");
+ assert(Ty && "Invalid indices for GEP!");
return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
}
@@ -1965,7 +1965,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
if (isNull) {
PointerType *Ptr = cast<PointerType>(C->getType());
Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs);
- assert(Ty != 0 && "Invalid indices for GEP!");
+ assert(Ty && "Invalid indices for GEP!");
return ConstantPointerNull::get(PointerType::get(Ty,
Ptr->getAddressSpace()));
}
@@ -1977,7 +1977,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
// getelementptr instructions into a single instruction.
//
if (CE->getOpcode() == Instruction::GetElementPtr) {
- Type *LastTy = 0;
+ Type *LastTy = nullptr;
for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
I != E; ++I)
LastTy = *I;
@@ -2072,7 +2072,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
bool Unknown = false;
SmallVector<Constant *, 8> NewIdxs;
Type *Ty = C->getType();
- Type *Prev = 0;
+ Type *Prev = nullptr;
for (unsigned i = 0, e = Idxs.size(); i != e;
Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
@@ -2130,7 +2130,7 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
isa<GlobalVariable>(C) && isInBoundsIndices(Idxs))
return ConstantExpr::getInBoundsGetElementPtr(C, Idxs);
- return 0;
+ return nullptr;
}
Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp
index 2a3a5fd..bb8d60b 100644
--- a/lib/IR/Constants.cpp
+++ b/lib/IR/Constants.cpp
@@ -182,13 +182,13 @@ Constant *Constant::getAllOnesValue(Type *Ty) {
/// 'this' is a constant expr.
Constant *Constant::getAggregateElement(unsigned Elt) const {
if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(this))
- return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : 0;
+ return Elt < CS->getNumOperands() ? CS->getOperand(Elt) : nullptr;
if (const ConstantArray *CA = dyn_cast<ConstantArray>(this))
- return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : 0;
+ return Elt < CA->getNumOperands() ? CA->getOperand(Elt) : nullptr;
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
- return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : 0;
+ return Elt < CV->getNumOperands() ? CV->getOperand(Elt) : nullptr;
if (const ConstantAggregateZero *CAZ =dyn_cast<ConstantAggregateZero>(this))
return CAZ->getElementValue(Elt);
@@ -197,15 +197,16 @@ Constant *Constant::getAggregateElement(unsigned Elt) const {
return UV->getElementValue(Elt);
if (const ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(this))
- return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt) : 0;
- return 0;
+ return Elt < CDS->getNumElements() ? CDS->getElementAsConstant(Elt)
+ : nullptr;
+ return nullptr;
}
Constant *Constant::getAggregateElement(Constant *Elt) const {
assert(isa<IntegerType>(Elt->getType()) && "Index must be an integer");
if (ConstantInt *CI = dyn_cast<ConstantInt>(Elt))
return getAggregateElement(CI->getZExtValue());
- return 0;
+ return nullptr;
}
@@ -309,7 +310,7 @@ bool Constant::isThreadDependent() const {
bool Constant::isConstantUsed() const {
for (const User *U : users()) {
const Constant *UC = dyn_cast<Constant>(U);
- if (UC == 0 || isa<GlobalValue>(UC))
+ if (!UC || isa<GlobalValue>(UC))
return true;
if (UC->isConstantUsed())
@@ -397,7 +398,7 @@ void Constant::removeDeadConstantUsers() const {
Value::const_user_iterator LastNonDeadUser = E;
while (I != E) {
const Constant *User = dyn_cast<Constant>(*I);
- if (User == 0) {
+ if (!User) {
LastNonDeadUser = I;
++I;
continue;
@@ -431,7 +432,7 @@ void Constant::removeDeadConstantUsers() const {
void ConstantInt::anchor() { }
ConstantInt::ConstantInt(IntegerType *Ty, const APInt& V)
- : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+ : Constant(Ty, ConstantIntVal, nullptr, 0), Val(V) {
assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
}
@@ -644,7 +645,7 @@ Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) {
}
ConstantFP::ConstantFP(Type *Ty, const APFloat& V)
- : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+ : Constant(Ty, ConstantFPVal, nullptr, 0), Val(V) {
assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
"FP type Mismatch");
}
@@ -1235,7 +1236,7 @@ ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
"Cannot create an aggregate zero of non-aggregate type!");
ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
- if (Entry == 0)
+ if (!Entry)
Entry = new ConstantAggregateZero(Ty);
return Entry;
@@ -1283,7 +1284,7 @@ Constant *Constant::getSplatValue() const {
return CV->getSplatValue();
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this))
return CV->getSplatValue();
- return 0;
+ return nullptr;
}
/// getSplatValue - If this is a splat constant, where all of the
@@ -1294,7 +1295,7 @@ Constant *ConstantVector::getSplatValue() const {
// Then make sure all remaining elements point to the same value.
for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
if (getOperand(I) != Elt)
- return 0;
+ return nullptr;
return Elt;
}
@@ -1315,7 +1316,7 @@ const APInt &Constant::getUniqueInteger() const {
ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
- if (Entry == 0)
+ if (!Entry)
Entry = new ConstantPointerNull(Ty);
return Entry;
@@ -1335,7 +1336,7 @@ void ConstantPointerNull::destroyConstant() {
UndefValue *UndefValue::get(Type *Ty) {
UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
- if (Entry == 0)
+ if (!Entry)
Entry = new UndefValue(Ty);
return Entry;
@@ -1353,14 +1354,14 @@ void UndefValue::destroyConstant() {
//
BlockAddress *BlockAddress::get(BasicBlock *BB) {
- assert(BB->getParent() != 0 && "Block must have a parent");
+ assert(BB->getParent() && "Block must have a parent");
return get(BB->getParent(), BB);
}
BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
BlockAddress *&BA =
F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
- if (BA == 0)
+ if (!BA)
BA = new BlockAddress(F, BB);
assert(BA->getFunction() == F && "Basic block moved between functions");
@@ -1377,10 +1378,10 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
BlockAddress *BlockAddress::lookup(const BasicBlock *BB) {
if (!BB->hasAddressTaken())
- return 0;
+ return nullptr;
const Function *F = BB->getParent();
- assert(F != 0 && "Block must have a parent");
+ assert(F && "Block must have a parent");
BlockAddress *BA =
F->getContext().pImpl->BlockAddresses.lookup(std::make_pair(F, BB));
assert(BA && "Refcount and block address map disagree!");
@@ -1411,7 +1412,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
// and return early.
BlockAddress *&NewBA =
getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
- if (NewBA == 0) {
+ if (!NewBA) {
getBasicBlock()->AdjustBlockAddressRefCount(-1);
// Remove the old entry, this can't cause the map to rehash (just a
@@ -1792,7 +1793,7 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
// Note that a non-inbounds gep is used, as null isn't within any object.
Type *AligningTy =
StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
- Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+ Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo(0));
Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
Constant *Indices[2] = { Zero, One };
@@ -1936,8 +1937,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
assert(Val->getType()->isVectorTy() &&
"Tried to create extractelement operation on non-vector type!");
- assert(Idx->getType()->isIntegerTy(32) &&
- "Extractelement index must be i32 type!");
+ assert(Idx->getType()->isIntegerTy() &&
+ "Extractelement index must be an integer type!");
if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
return FC; // Fold a few common cases.
@@ -1957,7 +1958,7 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
"Tried to create insertelement operation on non-vector type!");
assert(Elt->getType() == Val->getType()->getVectorElementType() &&
"Insertelement types must match!");
- assert(Idx->getType()->isIntegerTy(32) &&
+ assert(Idx->getType()->isIntegerTy() &&
"Insertelement index must be i32 type!");
if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
@@ -2145,7 +2146,7 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty) {
switch (Opcode) {
default:
// Doesn't have an identity.
- return 0;
+ return nullptr;
case Instruction::Add:
case Instruction::Or:
@@ -2168,7 +2169,7 @@ Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
switch (Opcode) {
default:
// Doesn't have an absorber.
- return 0;
+ return nullptr;
case Instruction::Or:
return Constant::getAllOnesValue(Ty);
@@ -2285,7 +2286,7 @@ Constant *ConstantDataSequential::getImpl(StringRef Elements, Type *Ty) {
// of i8, or a 1-element array of i32. They'll both end up in the same
/// StringMap bucket, linked up by their Next pointers. Walk the list.
ConstantDataSequential **Entry = &Slot.getValue();
- for (ConstantDataSequential *Node = *Entry; Node != 0;
+ for (ConstantDataSequential *Node = *Entry; Node;
Entry = &Node->Next, Node = *Entry)
if (Node->getType() == Ty)
return Node;
@@ -2312,7 +2313,7 @@ void ConstantDataSequential::destroyConstant() {
ConstantDataSequential **Entry = &Slot->getValue();
// Remove the entry from the hash table.
- if ((*Entry)->Next == 0) {
+ if (!(*Entry)->Next) {
// If there is only one value in the bucket (common case) it must be this
// entry, and removing the entry should remove the bucket completely.
assert((*Entry) == this && "Hash mismatch in ConstantDataSequential");
@@ -2333,7 +2334,7 @@ void ConstantDataSequential::destroyConstant() {
// If we were part of a list, make sure that we don't delete the list that is
// still owned by the uniquing map.
- Next = 0;
+ Next = nullptr;
// Finally, actually delete it.
destroyConstantImpl();
@@ -2561,7 +2562,7 @@ Constant *ConstantDataVector::getSplatValue() const {
unsigned EltSize = getElementByteSize();
for (unsigned i = 1, e = getNumElements(); i != e; ++i)
if (memcmp(Base, Base+i*EltSize, EltSize))
- return 0;
+ return nullptr;
// If they're all the same, return the 0th one as a representative.
return getElementAsConstant(0);
@@ -2609,7 +2610,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
AllSame &= Val == ToC;
}
- Constant *Replacement = 0;
+ Constant *Replacement = nullptr;
if (AllSame && ToC->isNullValue()) {
Replacement = ConstantAggregateZero::get(getType());
} else if (AllSame && isa<UndefValue>(ToC)) {
@@ -2695,7 +2696,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
LLVMContextImpl *pImpl = getContext().pImpl;
- Constant *Replacement = 0;
+ Constant *Replacement = nullptr;
if (isAllZeros) {
Replacement = ConstantAggregateZero::get(getType());
} else if (isAllUndef) {
diff --git a/lib/IR/ConstantsContext.h b/lib/IR/ConstantsContext.h
index 59b9d4d..f06509f 100644
--- a/lib/IR/ConstantsContext.h
+++ b/lib/IR/ConstantsContext.h
@@ -24,6 +24,9 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <map>
+#include <tuple>
+
+#define DEBUG_TYPE "ir"
namespace llvm {
template<class ValType>
@@ -584,7 +587,7 @@ public:
/// necessary.
ConstantClass *getOrCreate(TypeClass *Ty, ValRefType V) {
MapKey Lookup(Ty, V);
- ConstantClass* Result = 0;
+ ConstantClass* Result = nullptr;
typename MapTy::iterator I = Map.find(Lookup);
// Is it in the map?
@@ -720,7 +723,7 @@ public:
/// necessary.
ConstantClass *getOrCreate(TypeClass *Ty, Operands V) {
LookupKey Lookup(Ty, V);
- ConstantClass* Result = 0;
+ ConstantClass* Result = nullptr;
typename MapTy::iterator I = Map.find_as(Lookup);
// Is it in the map?
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index f52f466..27ce503 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -17,6 +17,8 @@
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -28,6 +30,7 @@
#include "llvm/PassManager.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Threading.h"
@@ -39,6 +42,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ir"
+
void llvm::initializeCore(PassRegistry &Registry) {
initializeDominatorTreeWrapperPassPass(Registry);
initializePrintModulePassWrapperPass(Registry);
@@ -76,6 +81,21 @@ LLVMContextRef LLVMGetGlobalContext() {
return wrap(&getGlobalContext());
}
+void LLVMContextSetDiagnosticHandler(LLVMContextRef C,
+ LLVMDiagnosticHandler Handler,
+ void *DiagnosticContext) {
+ unwrap(C)->setDiagnosticHandler(
+ LLVM_EXTENSION reinterpret_cast<LLVMContext::DiagnosticHandlerTy>(Handler),
+ DiagnosticContext);
+}
+
+void LLVMContextSetYieldCallback(LLVMContextRef C, LLVMYieldCallback Callback,
+ void *OpaqueHandle) {
+ auto YieldCallback =
+ LLVM_EXTENSION reinterpret_cast<LLVMContext::YieldCallbackTy>(Callback);
+ unwrap(C)->setYieldCallback(YieldCallback, OpaqueHandle);
+}
+
void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
@@ -89,6 +109,40 @@ unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
}
+char *LLVMGetDiagInfoDescription(LLVMDiagnosticInfoRef DI) {
+ std::string MsgStorage;
+ raw_string_ostream Stream(MsgStorage);
+ DiagnosticPrinterRawOStream DP(Stream);
+
+ unwrap(DI)->print(DP);
+ Stream.flush();
+
+ return LLVMCreateMessage(MsgStorage.c_str());
+}
+
+LLVMDiagnosticSeverity LLVMGetDiagInfoSeverity(LLVMDiagnosticInfoRef DI){
+ LLVMDiagnosticSeverity severity;
+
+ switch(unwrap(DI)->getSeverity()) {
+ default:
+ severity = LLVMDSError;
+ break;
+ case DS_Warning:
+ severity = LLVMDSWarning;
+ break;
+ case DS_Remark:
+ severity = LLVMDSRemark;
+ break;
+ case DS_Note:
+ severity = LLVMDSNote;
+ break;
+ }
+
+ return severity;
+}
+
+
+
/*===-- Operations on modules ---------------------------------------------===*/
@@ -136,7 +190,7 @@ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename,
return true;
}
- unwrap(M)->print(dest, NULL);
+ unwrap(M)->print(dest, nullptr);
if (!error.empty()) {
*ErrorMessage = strdup(error.c_str());
@@ -150,7 +204,7 @@ char *LLVMPrintModuleToString(LLVMModuleRef M) {
std::string buf;
raw_string_ostream os(buf);
- unwrap(M)->print(os, NULL);
+ unwrap(M)->print(os, nullptr);
os.flush();
return strdup(buf.c_str());
@@ -374,7 +428,7 @@ const char *LLVMGetStructName(LLVMTypeRef Ty)
{
StructType *Type = unwrap<StructType>(Ty);
if (!Type->hasName())
- return 0;
+ return nullptr;
return Type->getName().data();
}
@@ -496,7 +550,8 @@ LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
}
void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
- unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+ unwrap<Instruction>(Inst)->setMetadata(KindID,
+ MD ? unwrap<MDNode>(MD) : nullptr);
}
/*--.. Conversion functions ................................................--*/
@@ -513,7 +568,7 @@ LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
Value *V = unwrap(Val);
Value::use_iterator I = V->use_begin();
if (I == V->use_end())
- return 0;
+ return nullptr;
return wrap(&*I);
}
@@ -521,7 +576,7 @@ LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
Use *Next = unwrap(U)->getNext();
if (Next)
return wrap(Next);
- return 0;
+ return nullptr;
}
LLVMValueRef LLVMGetUser(LLVMUseRef U) {
@@ -611,7 +666,7 @@ const char *LLVMGetMDString(LLVMValueRef V, unsigned* Len) {
return S->getString().data();
}
*Len = 0;
- return 0;
+ return nullptr;
}
unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V)
@@ -650,7 +705,7 @@ void LLVMAddNamedMetadataOperand(LLVMModuleRef M, const char* name,
NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
if (!N)
return;
- MDNode *Op = Val ? unwrap<MDNode>(Val) : NULL;
+ MDNode *Op = Val ? unwrap<MDNode>(Val) : nullptr;
if (Op)
N->addOperand(Op);
}
@@ -1235,7 +1290,7 @@ const char *LLVMGetSection(LLVMValueRef Global) {
}
void LLVMSetSection(LLVMValueRef Global, const char *Section) {
- unwrap<GlobalValue>(Global)->setSection(Section);
+ unwrap<GlobalObject>(Global)->setSection(Section);
}
LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
@@ -1285,7 +1340,7 @@ unsigned LLVMGetAlignment(LLVMValueRef V) {
void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
Value *P = unwrap<Value>(V);
- if (GlobalValue *GV = dyn_cast<GlobalValue>(P))
+ if (GlobalObject *GV = dyn_cast<GlobalObject>(P))
GV->setAlignment(Bytes);
else if (AllocaInst *AI = dyn_cast<AllocaInst>(P))
AI->setAlignment(Bytes);
@@ -1302,15 +1357,16 @@ void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) {
LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
- GlobalValue::ExternalLinkage, 0, Name));
+ GlobalValue::ExternalLinkage, nullptr, Name));
}
LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
const char *Name,
unsigned AddressSpace) {
return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
- GlobalValue::ExternalLinkage, 0, Name, 0,
- GlobalVariable::NotThreadLocal, AddressSpace));
+ GlobalValue::ExternalLinkage, nullptr, Name,
+ nullptr, GlobalVariable::NotThreadLocal,
+ AddressSpace));
}
LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
@@ -1321,7 +1377,7 @@ LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
Module *Mod = unwrap(M);
Module::global_iterator I = Mod->global_begin();
if (I == Mod->global_end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1329,7 +1385,7 @@ LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
Module *Mod = unwrap(M);
Module::global_iterator I = Mod->global_end();
if (I == Mod->global_begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1337,7 +1393,7 @@ LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
Module::global_iterator I = GV;
if (++I == GV->getParent()->global_end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1345,7 +1401,7 @@ LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
Module::global_iterator I = GV;
if (I == GV->getParent()->global_begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1356,7 +1412,7 @@ void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
if ( !GV->hasInitializer() )
- return 0;
+ return nullptr;
return wrap(GV->getInitializer());
}
@@ -1432,8 +1488,10 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
const char *Name) {
- return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
- unwrap<Constant>(Aliasee), unwrap (M)));
+ auto *PTy = cast<PointerType>(unwrap(Ty));
+ return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ GlobalValue::ExternalLinkage, Name,
+ unwrap<GlobalObject>(Aliasee), unwrap(M)));
}
/*--.. Operations on functions .............................................--*/
@@ -1452,7 +1510,7 @@ LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
Module *Mod = unwrap(M);
Module::iterator I = Mod->begin();
if (I == Mod->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1460,7 +1518,7 @@ LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
Module *Mod = unwrap(M);
Module::iterator I = Mod->end();
if (I == Mod->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1468,7 +1526,7 @@ LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Module::iterator I = Func;
if (++I == Func->getParent()->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1476,7 +1534,7 @@ LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Module::iterator I = Func;
if (I == Func->getParent()->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1501,7 +1559,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
const char *LLVMGetGC(LLVMValueRef Fn) {
Function *F = unwrap<Function>(Fn);
- return F->hasGC()? F->getGC() : 0;
+ return F->hasGC()? F->getGC() : nullptr;
}
void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
@@ -1582,7 +1640,7 @@ LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Function::arg_iterator I = Func->arg_begin();
if (I == Func->arg_end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1590,7 +1648,7 @@ LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Function::arg_iterator I = Func->arg_end();
if (I == Func->arg_begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1598,7 +1656,7 @@ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
Function::arg_iterator I = A;
if (++I == A->getParent()->arg_end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1606,7 +1664,7 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
Argument *A = unwrap<Argument>(Arg);
Function::arg_iterator I = A;
if (I == A->getParent()->arg_begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1676,7 +1734,7 @@ LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Function::iterator I = Func->begin();
if (I == Func->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1684,7 +1742,7 @@ LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
Function *Func = unwrap<Function>(Fn);
Function::iterator I = Func->end();
if (I == Func->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1692,7 +1750,7 @@ LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
Function::iterator I = Block;
if (++I == Block->getParent()->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1700,7 +1758,7 @@ LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
Function::iterator I = Block;
if (I == Block->getParent()->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1752,7 +1810,7 @@ LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
BasicBlock::iterator I = Block->begin();
if (I == Block->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1760,7 +1818,7 @@ LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
BasicBlock *Block = unwrap(BB);
BasicBlock::iterator I = Block->end();
if (I == Block->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1768,7 +1826,7 @@ LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
BasicBlock::iterator I = Instr;
if (++I == Instr->getParent()->end())
- return 0;
+ return nullptr;
return wrap(I);
}
@@ -1776,7 +1834,7 @@ LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
Instruction *Instr = unwrap<Instruction>(Inst);
BasicBlock::iterator I = Instr;
if (I == Instr->getParent()->begin())
- return 0;
+ return nullptr;
return wrap(--I);
}
@@ -1939,7 +1997,7 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
/*--.. Metadata builders ...................................................--*/
void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
- MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+ MDNode *Loc = L ? unwrap<MDNode>(L) : nullptr;
unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
}
@@ -2195,7 +2253,7 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
ITy, unwrap(Ty), AllocSize,
- 0, 0, "");
+ nullptr, nullptr, "");
return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
}
@@ -2206,13 +2264,13 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(),
ITy, unwrap(Ty), AllocSize,
- unwrap(Val), 0, "");
+ unwrap(Val), nullptr, "");
return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
}
LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
const char *Name) {
- return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+ return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), nullptr, Name));
}
LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 1ea381a..92edacc 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -30,8 +30,9 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
}
DIBuilder::DIBuilder(Module &m)
- : M(m), VMContext(M.getContext()), TempEnumTypes(0), TempRetainTypes(0),
- TempSubprograms(0), TempGVs(0), DeclareFn(0), ValueFn(0) {}
+ : M(m), VMContext(M.getContext()), TempEnumTypes(nullptr),
+ TempRetainTypes(nullptr), TempSubprograms(nullptr), TempGVs(nullptr),
+ DeclareFn(nullptr), ValueFn(nullptr) {}
/// finalize - Construct any deferred debug info descriptors.
void DIBuilder::finalize() {
@@ -80,7 +81,7 @@ void DIBuilder::finalize() {
/// N.
static MDNode *getNonCompileUnitScope(MDNode *N) {
if (DIDescriptor(N).isCompileUnit())
- return NULL;
+ return nullptr;
return N;
}
@@ -103,7 +104,7 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
StringRef SplitName,
DebugEmissionKind Kind) {
- assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) ||
+ assert(((Lang <= dwarf::DW_LANG_OCaml && Lang >= dwarf::DW_LANG_C89) ||
(Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
"Invalid Language tag");
assert(!Filename.empty() &&
@@ -146,13 +147,13 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename,
}
static DIImportedEntity
-createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS,
- unsigned Line, StringRef Name,
- SmallVectorImpl<TrackingVH<MDNode> > &AllImportedModules) {
+createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope Context,
+ Value *NS, unsigned Line, StringRef Name,
+ SmallVectorImpl<TrackingVH<MDNode>> &AllImportedModules) {
const MDNode *R;
if (Name.empty()) {
Value *Elts[] = {
- GetTagConstant(C, dwarf::DW_TAG_imported_module),
+ GetTagConstant(C, Tag),
Context,
NS,
ConstantInt::get(Type::getInt32Ty(C), Line),
@@ -160,7 +161,7 @@ createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS,
R = MDNode::get(C, Elts);
} else {
Value *Elts[] = {
- GetTagConstant(C, dwarf::DW_TAG_imported_module),
+ GetTagConstant(C, Tag),
Context,
NS,
ConstantInt::get(Type::getInt32Ty(C), Line),
@@ -175,33 +176,32 @@ createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS,
}
DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
- DINameSpace NS, unsigned Line,
- StringRef Name) {
- return ::createImportedModule(VMContext, Context, NS, Line, Name,
- AllImportedModules);
+ DINameSpace NS,
+ unsigned Line) {
+ return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
+ Context, NS, Line, StringRef(), AllImportedModules);
}
DIImportedEntity DIBuilder::createImportedModule(DIScope Context,
DIImportedEntity NS,
- unsigned Line,
- StringRef Name) {
- return ::createImportedModule(VMContext, Context, NS, Line, Name,
- AllImportedModules);
+ unsigned Line) {
+ return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
+ Context, NS, Line, StringRef(), AllImportedModules);
}
DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
DIScope Decl,
- unsigned Line) {
- Value *Elts[] = {
- GetTagConstant(VMContext, dwarf::DW_TAG_imported_declaration),
- Context,
- Decl.getRef(),
- ConstantInt::get(Type::getInt32Ty(VMContext), Line),
- };
- DIImportedEntity M(MDNode::get(VMContext, Elts));
- assert(M.Verify() && "Imported module should be valid");
- AllImportedModules.push_back(TrackingVH<MDNode>(M));
- return M;
+ unsigned Line, StringRef Name) {
+ return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
+ Context, Decl.getRef(), Line, Name,
+ AllImportedModules);
+}
+
+DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context,
+ DIImportedEntity Imp,
+ unsigned Line, StringRef Name) {
+ return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
+ Context, Imp, Line, Name, AllImportedModules);
}
/// createFile - Create a file descriptor to hold debugging information
@@ -232,8 +232,8 @@ DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) {
// size, alignment, offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type),
- NULL, // Filename
- NULL, // Unused
+ nullptr, // Filename
+ nullptr, // Unused
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
@@ -260,8 +260,8 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
// offset and flags are always empty here.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
- NULL, // File/directory name
- NULL, // Unused
+ nullptr, // File/directory name
+ nullptr, // Unused
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
@@ -279,8 +279,8 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
// Qualified types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
- NULL, // Filename
- NULL, // Unused
+ nullptr, // Filename
+ nullptr, // Unused
MDString::get(VMContext, StringRef()), // Empty name.
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
@@ -299,8 +299,8 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
- NULL, // Filename
- NULL, // Unused
+ nullptr, // Filename
+ nullptr, // Unused
MDString::get(VMContext, Name),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
@@ -317,9 +317,9 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy,
// Pointer types are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type),
- NULL, // Filename
- NULL, // Unused
- NULL,
+ nullptr, // Filename
+ nullptr, // Unused
+ nullptr,
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -338,9 +338,9 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
// References are encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, Tag),
- NULL, // Filename
- NULL, // TheCU,
- NULL, // Name
+ nullptr, // Filename
+ nullptr, // TheCU,
+ nullptr, // Name
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -355,7 +355,6 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) {
DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
unsigned LineNo, DIDescriptor Context) {
// typedefs are encoded in DIDerivedType format.
- assert(Ty.isType() && "Invalid typedef type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
File.getFileNode(),
@@ -378,9 +377,9 @@ DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
assert(FriendTy.isType() && "Invalid friend type!");
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_friend),
- NULL,
+ nullptr,
Ty.getRef(),
- NULL, // Name
+ nullptr, // Name
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -400,9 +399,9 @@ DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
// TAG_inheritance is encoded in DIDerivedType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
- NULL,
+ nullptr,
Ty.getRef(),
- NULL, // Name
+ nullptr, // Name
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
@@ -631,7 +630,8 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
VTableHolder.getRef(),
TemplateParams,
- UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
@@ -667,8 +667,9 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
VTableHolder.getRef(),
- NULL,
- UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
+ nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType R(MDNode::get(VMContext, Elts));
assert(R.isCompositeType() &&
@@ -697,12 +698,13 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- NULL,
+ nullptr,
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
- NULL,
- NULL,
- UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
+ nullptr,
+ nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType R(MDNode::get(VMContext, Elts));
if (!UniqueIdentifier.empty())
@@ -718,19 +720,19 @@ DICompositeType DIBuilder::createSubroutineType(DIFile File,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- NULL,
+ nullptr,
MDString::get(VMContext, ""),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), Flags), // Flags
- NULL,
+ nullptr,
ParameterTypes,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- NULL,
- NULL,
- NULL // Type Identifer
+ nullptr,
+ nullptr,
+ nullptr // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -755,9 +757,10 @@ DICompositeType DIBuilder::createEnumerationType(
UnderlyingType.getRef(),
Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- NULL,
- NULL,
- UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
+ nullptr,
+ nullptr,
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
};
DICompositeType CTy(MDNode::get(VMContext, Elts));
AllEnumTypes.push_back(CTy);
@@ -772,8 +775,8 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
// TAG_array_type is encoded in DICompositeType format.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
- NULL, // Filename/Directory,
- NULL, // Unused
+ nullptr, // Filename/Directory,
+ nullptr, // Unused
MDString::get(VMContext, ""),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
@@ -783,9 +786,9 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
Ty.getRef(),
Subscripts,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- NULL,
- NULL,
- NULL // Type Identifer
+ nullptr,
+ nullptr,
+ nullptr // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -796,8 +799,8 @@ DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
// A vector is an array type with the FlagVector flag applied.
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
- NULL, // Filename/Directory,
- NULL, // Unused
+ nullptr, // Filename/Directory,
+ nullptr, // Unused
MDString::get(VMContext, ""),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
ConstantInt::get(Type::getInt64Ty(VMContext), Size),
@@ -807,9 +810,9 @@ DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits,
Ty.getRef(),
Subscripts,
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- NULL,
- NULL,
- NULL // Type Identifer
+ nullptr,
+ nullptr,
+ nullptr // Type Identifer
};
return DICompositeType(MDNode::get(VMContext, Elts));
}
@@ -890,12 +893,47 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope,
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl),
- NULL,
+ nullptr,
+ DIArray(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
+ nullptr,
+ nullptr, //TemplateParams
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
+ };
+ MDNode *Node = MDNode::get(VMContext, Elts);
+ DICompositeType RetTy(Node);
+ assert(RetTy.isCompositeType() &&
+ "createForwardDecl result should be a DIType");
+ if (!UniqueIdentifier.empty())
+ retainType(RetTy);
+ return RetTy;
+}
+
+/// createForwardDecl - Create a temporary forward-declared type that
+/// can be RAUW'd if the full type is seen.
+DICompositeType DIBuilder::createReplaceableForwardDecl(
+ unsigned Tag, StringRef Name, DIDescriptor Scope, DIFile F, unsigned Line,
+ unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
+ StringRef UniqueIdentifier) {
+ // Create a temporary MDNode.
+ Value *Elts[] = {
+ GetTagConstant(VMContext, Tag),
+ F.getFileNode(),
+ DIScope(getNonCompileUnitScope(Scope)).getRef(),
+ MDString::get(VMContext, Name),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+ ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+ ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+ ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset
+ ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl),
+ nullptr,
DIArray(),
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
- NULL,
- NULL, //TemplateParams
- UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier)
+ nullptr,
+ nullptr, //TemplateParams
+ UniqueIdentifier.empty() ? nullptr
+ : MDString::get(VMContext, UniqueIdentifier)
};
MDNode *Node = MDNode::getTemporary(VMContext, Elts);
DICompositeType RetTy(Node);
@@ -932,7 +970,7 @@ DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name,
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_variable),
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- NULL, // TheCU,
+ nullptr, // TheCU,
MDString::get(VMContext, Name),
MDString::get(VMContext, Name),
MDString::get(VMContext, LinkageName),
@@ -1087,7 +1125,7 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name,
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
ConstantInt::get(Type::getInt32Ty(VMContext), 0),
- NULL,
+ nullptr,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
Fn,
@@ -1121,7 +1159,6 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
assert(getNonCompileUnitScope(Context) &&
"Methods should have both a Context and a context that isn't "
"the compile unit.");
- Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
Value *Elts[] = {
GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
F.getFileNode(),
@@ -1141,7 +1178,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name,
Fn,
TParam,
Constant::getNullValue(Type::getInt32Ty(VMContext)),
- MDNode::getTemporary(VMContext, TElts),
+ nullptr,
// FIXME: Do we want to use different scope/lines?
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
@@ -1189,6 +1226,13 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope,
DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
unsigned Line, unsigned Col,
unsigned Discriminator) {
+ // FIXME: This isn't thread safe nor the right way to defeat MDNode uniquing.
+ // I believe the right way is to have a self-referential element in the node.
+ // Also: why do we bother with line/column - they're not used and the
+ // documentation (SourceLevelDebugging.rst) claims the line/col are necessary
+ // for uniquing, yet then we have this other solution (because line/col were
+ // inadequate) anyway. Remove all 3 and replace them with a self-reference.
+
// Defeat MDNode uniquing for lexical blocks by using unique id.
static unsigned int unique_id = 0;
Value *Elts[] = {
diff --git a/lib/IR/DataLayout.cpp b/lib/IR/DataLayout.cpp
index 6c18387..dea05fb 100644
--- a/lib/IR/DataLayout.cpp
+++ b/lib/IR/DataLayout.cpp
@@ -178,7 +178,7 @@ static const LayoutAlignElem DefaultAlignments[] = {
void DataLayout::reset(StringRef Desc) {
clear();
- LayoutMap = 0;
+ LayoutMap = nullptr;
LittleEndian = false;
StackNaturalAlign = 0;
ManglingMode = MM_None;
@@ -344,7 +344,7 @@ void DataLayout::parseSpecifier(StringRef Desc) {
}
}
-DataLayout::DataLayout(const Module *M) : LayoutMap(0) {
+DataLayout::DataLayout(const Module *M) : LayoutMap(nullptr) {
const DataLayout *Other = M->getDataLayout();
if (Other)
*this = *Other;
@@ -357,7 +357,7 @@ bool DataLayout::operator==(const DataLayout &Other) const {
StackNaturalAlign == Other.StackNaturalAlign &&
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
- Alignments == Other.Alignments && Pointers == Pointers;
+ Alignments == Other.Alignments && Pointers == Other.Pointers;
assert(Ret == (getStringRepresentation() == Other.getStringRepresentation()));
return Ret;
}
@@ -488,7 +488,7 @@ void DataLayout::clear() {
Alignments.clear();
Pointers.clear();
delete static_cast<StructLayoutMap *>(LayoutMap);
- LayoutMap = 0;
+ LayoutMap = nullptr;
}
DataLayout::~DataLayout() {
@@ -687,7 +687,7 @@ unsigned DataLayout::getABITypeAlignment(Type *Ty) const {
/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
/// an integer type of the specified bitwidth.
unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const {
- return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+ return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, nullptr);
}
unsigned DataLayout::getPrefTypeAlignment(Type *Ty) const {
@@ -708,7 +708,7 @@ IntegerType *DataLayout::getIntPtrType(LLVMContext &C,
Type *DataLayout::getIntPtrType(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"Expected a pointer or pointer vector type.");
- unsigned NumBits = getTypeSizeInBits(Ty->getScalarType());
+ unsigned NumBits = getPointerTypeSizeInBits(Ty);
IntegerType *IntTy = IntegerType::get(Ty->getContext(), NumBits);
if (VectorType *VecTy = dyn_cast<VectorType>(Ty))
return VectorType::get(IntTy, VecTy->getNumElements());
@@ -719,7 +719,7 @@ Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const
for (unsigned LegalIntWidth : LegalIntWidths)
if (Width <= LegalIntWidth)
return Type::getIntNTy(C, LegalIntWidth);
- return 0;
+ return nullptr;
}
unsigned DataLayout::getLargestLegalIntTypeSize() const {
diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp
index c9d68af..db9e56d 100644
--- a/lib/IR/DebugInfo.cpp
+++ b/lib/IR/DebugInfo.cpp
@@ -53,8 +53,8 @@ bool DIDescriptor::Verify() const {
}
static Value *getField(const MDNode *DbgNode, unsigned Elt) {
- if (DbgNode == 0 || Elt >= DbgNode->getNumOperands())
- return 0;
+ if (!DbgNode || Elt >= DbgNode->getNumOperands())
+ return nullptr;
return DbgNode->getOperand(Elt);
}
@@ -73,7 +73,7 @@ StringRef DIDescriptor::getStringField(unsigned Elt) const {
}
uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
- if (DbgNode == 0)
+ if (!DbgNode)
return 0;
if (Elt < DbgNode->getNumOperands())
@@ -85,7 +85,7 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
}
int64_t DIDescriptor::getInt64Field(unsigned Elt) const {
- if (DbgNode == 0)
+ if (!DbgNode)
return 0;
if (Elt < DbgNode->getNumOperands())
@@ -102,34 +102,34 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
}
GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
- if (DbgNode == 0)
- return 0;
+ if (!DbgNode)
+ return nullptr;
if (Elt < DbgNode->getNumOperands())
return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
- return 0;
+ return nullptr;
}
Constant *DIDescriptor::getConstantField(unsigned Elt) const {
- if (DbgNode == 0)
- return 0;
+ if (!DbgNode)
+ return nullptr;
if (Elt < DbgNode->getNumOperands())
return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
- return 0;
+ return nullptr;
}
Function *DIDescriptor::getFunctionField(unsigned Elt) const {
- if (DbgNode == 0)
- return 0;
+ if (!DbgNode)
+ return nullptr;
if (Elt < DbgNode->getNumOperands())
return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
- return 0;
+ return nullptr;
}
void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) {
- if (DbgNode == 0)
+ if (!DbgNode)
return;
if (Elt < DbgNode->getNumOperands()) {
@@ -335,7 +335,7 @@ unsigned DIArray::getNumElements() const {
/// replaceAllUsesWith - Replace all uses of the MDNode used by this
/// type with the one in the passed descriptor.
-void DIType::replaceAllUsesWith(DIDescriptor &D) {
+void DIType::replaceAllUsesWith(LLVMContext &VMContext, DIDescriptor D) {
assert(DbgNode && "Trying to replace an unverified type!");
@@ -344,13 +344,19 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) {
// which, due to uniquing, has merged with the source. We shield clients from
// this detail by allowing a value to be replaced with replaceAllUsesWith()
// itself.
- if (DbgNode != D) {
- MDNode *Node = const_cast<MDNode *>(DbgNode);
- const MDNode *DN = D;
- const Value *V = cast_or_null<Value>(DN);
- Node->replaceAllUsesWith(const_cast<Value *>(V));
- MDNode::deleteTemporary(Node);
+ const MDNode *DN = D;
+ if (DbgNode == DN) {
+ SmallVector<Value*, 10> Ops(DbgNode->getNumOperands());
+ for (size_t i = 0; i != Ops.size(); ++i)
+ Ops[i] = DbgNode->getOperand(i);
+ DN = MDNode::get(VMContext, Ops);
}
+
+ MDNode *Node = const_cast<MDNode *>(DbgNode);
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value *>(V));
+ MDNode::deleteTemporary(Node);
+ DbgNode = D;
}
/// replaceAllUsesWith - Replace all uses of the MDNode used by this
@@ -358,19 +364,12 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) {
void DIType::replaceAllUsesWith(MDNode *D) {
assert(DbgNode && "Trying to replace an unverified type!");
-
- // Since we use a TrackingVH for the node, its easy for clients to manufacture
- // legitimate situations where they want to replaceAllUsesWith() on something
- // which, due to uniquing, has merged with the source. We shield clients from
- // this detail by allowing a value to be replaced with replaceAllUsesWith()
- // itself.
- if (DbgNode != D) {
- MDNode *Node = const_cast<MDNode *>(DbgNode);
- const MDNode *DN = D;
- const Value *V = cast_or_null<Value>(DN);
- Node->replaceAllUsesWith(const_cast<Value *>(V));
- MDNode::deleteTemporary(Node);
- }
+ assert(DbgNode != D && "This replacement should always happen");
+ MDNode *Node = const_cast<MDNode *>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value *>(V));
+ MDNode::deleteTemporary(Node);
}
/// Verify - Verify that a compile unit is well formed.
@@ -759,7 +758,7 @@ DIScopeRef DIScope::getContext() const {
return DIScopeRef(DINameSpace(DbgNode).getContext());
assert((isFile() || isCompileUnit()) && "Unhandled type of scope.");
- return DIScopeRef(NULL);
+ return DIScopeRef(nullptr);
}
// If the scope node has a name, return that, else return an empty string.
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index 1a2521e..43360d3 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
- if (ScopeIdx == 0) return 0;
+ if (ScopeIdx == 0) return nullptr;
if (ScopeIdx > 0) {
// Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
@@ -37,7 +37,7 @@ MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
// Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
// position specified. Zero is invalid.
- if (ScopeIdx >= 0) return 0;
+ if (ScopeIdx >= 0) return nullptr;
// Otherwise, the index is in the ScopeInlinedAtRecords array.
assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
@@ -49,7 +49,7 @@ MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
const LLVMContext &Ctx) const {
if (ScopeIdx == 0) {
- Scope = IA = 0;
+ Scope = IA = nullptr;
return;
}
@@ -59,7 +59,7 @@ void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
"Invalid ScopeIdx!");
Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
- IA = 0;
+ IA = nullptr;
return;
}
@@ -96,8 +96,8 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
DebugLoc Result;
// If no scope is available, this is an unknown location.
- if (Scope == 0) return Result;
-
+ if (!Scope) return Result;
+
// Saturate line and col to "unknown".
if (Col > 255) Col = 0;
if (Line >= (1 << 24)) Line = 0;
@@ -106,7 +106,7 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
LLVMContext &Ctx = Scope->getContext();
// If there is no inlined-at location, use the ScopeRecords array.
- if (InlinedAt == 0)
+ if (!InlinedAt)
Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
else
Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
@@ -118,7 +118,7 @@ DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
/// getAsMDNode - This method converts the compressed DebugLoc node into a
/// DILocation-compatible MDNode.
MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
- if (isUnknown()) return 0;
+ if (isUnknown()) return nullptr;
MDNode *Scope, *IA;
getScopeAndInlinedAt(Scope, IA, Ctx);
@@ -137,7 +137,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
DILocation Loc(N);
MDNode *Scope = Loc.getScope();
- if (Scope == 0) return DebugLoc();
+ if (!Scope) return DebugLoc();
return get(Loc.getLineNumber(), Loc.getColumnNumber(), Scope,
Loc.getOrigLocation());
}
@@ -146,8 +146,9 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
DILexicalBlock LexBlock(N);
MDNode *Scope = LexBlock.getContext();
- if (Scope == 0) return DebugLoc();
- return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope, NULL);
+ if (!Scope) return DebugLoc();
+ return get(LexBlock.getLineNumber(), LexBlock.getColumnNumber(), Scope,
+ nullptr);
}
void DebugLoc::dump(const LLVMContext &Ctx) const {
@@ -166,6 +167,28 @@ void DebugLoc::dump(const LLVMContext &Ctx) const {
#endif
}
+void DebugLoc::print(const LLVMContext &Ctx, raw_ostream &OS) const {
+ if (!isUnknown()) {
+ // Print source line info.
+ DIScope Scope(getScope(Ctx));
+ assert((!Scope || Scope.isScope()) &&
+ "Scope of a DebugLoc should be null or a DIScope.");
+ if (Scope)
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << getLine();
+ if (getCol() != 0)
+ OS << ':' << getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ OS << " @[ ";
+ InlinedAtDL.print(Ctx, OS);
+ OS << " ]";
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// DenseMap specialization
//===----------------------------------------------------------------------===//
@@ -234,7 +257,7 @@ void DebugRecVH::deleted() {
// If this is a non-canonical reference, just drop the value to null, we know
// it doesn't have a map entry.
if (Idx == 0) {
- setValPtr(0);
+ setValPtr(nullptr);
return;
}
@@ -245,7 +268,7 @@ void DebugRecVH::deleted() {
assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
Ctx->ScopeRecordIdx.erase(Cur);
// Reset this VH to null and we're done.
- setValPtr(0);
+ setValPtr(nullptr);
Idx = 0;
return;
}
@@ -259,7 +282,7 @@ void DebugRecVH::deleted() {
MDNode *OldScope = Entry.first.get();
MDNode *OldInlinedAt = Entry.second.get();
- assert(OldScope != 0 && OldInlinedAt != 0 &&
+ assert(OldScope && OldInlinedAt &&
"Entry should be non-canonical if either val dropped to null");
// Otherwise, we do have an entry in it, nuke it and we're done.
@@ -269,7 +292,7 @@ void DebugRecVH::deleted() {
// Reset this VH to null. Drop both 'Idx' values to null to indicate that
// we're in non-canonical form now.
- setValPtr(0);
+ setValPtr(nullptr);
Entry.first.Idx = Entry.second.Idx = 0;
}
@@ -277,8 +300,8 @@ void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
// If being replaced with a non-mdnode value (e.g. undef) handle this as if
// the mdnode got deleted.
MDNode *NewVal = dyn_cast<MDNode>(NewVa);
- if (NewVal == 0) return deleted();
-
+ if (!NewVal) return deleted();
+
// If this is a non-canonical reference, just change it, we know it already
// doesn't have a map entry.
if (Idx == 0) {
@@ -313,7 +336,7 @@ void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
MDNode *OldScope = Entry.first.get();
MDNode *OldInlinedAt = Entry.second.get();
- assert(OldScope != 0 && OldInlinedAt != 0 &&
+ assert(OldScope && OldInlinedAt &&
"Entry should be non-canonical if either val dropped to null");
// Otherwise, we do have an entry in it, nuke it and we're done.
diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp
index d59d4cf..6eeb162 100644
--- a/lib/IR/DiagnosticInfo.cpp
+++ b/lib/IR/DiagnosticInfo.cpp
@@ -12,18 +12,80 @@
// Diagnostics reporting is still done as part of the LLVMContext.
//===----------------------------------------------------------------------===//
+#include "LLVMContextImpl.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Atomic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Regex.h"
#include <string>
using namespace llvm;
+namespace {
+
+/// \brief Regular expression corresponding to the value given in one of the
+/// -pass-remarks* command line flags. Passes whose name matches this regexp
+/// will emit a diagnostic when calling the associated diagnostic function
+/// (emitOptimizationRemark, emitOptimizationRemarkMissed or
+/// emitOptimizationRemarkAnalysis).
+struct PassRemarksOpt {
+ std::shared_ptr<Regex> Pattern;
+
+ void operator=(const std::string &Val) {
+ // Create a regexp object to match pass names for emitOptimizationRemark.
+ if (!Val.empty()) {
+ Pattern = std::make_shared<Regex>(Val);
+ std::string RegexError;
+ if (!Pattern->isValid(RegexError))
+ report_fatal_error("Invalid regular expression '" + Val +
+ "' in -pass-remarks: " + RegexError,
+ false);
+ }
+ };
+};
+
+static PassRemarksOpt PassRemarksOptLoc;
+static PassRemarksOpt PassRemarksMissedOptLoc;
+static PassRemarksOpt PassRemarksAnalysisOptLoc;
+
+// -pass-remarks
+// Command line flag to enable emitOptimizationRemark()
+static cl::opt<PassRemarksOpt, true, cl::parser<std::string>>
+PassRemarks("pass-remarks", cl::value_desc("pattern"),
+ cl::desc("Enable optimization remarks from passes whose name match "
+ "the given regular expression"),
+ cl::Hidden, cl::location(PassRemarksOptLoc), cl::ValueRequired,
+ cl::ZeroOrMore);
+
+// -pass-remarks-missed
+// Command line flag to enable emitOptimizationRemarkMissed()
+static cl::opt<PassRemarksOpt, true, cl::parser<std::string>> PassRemarksMissed(
+ "pass-remarks-missed", cl::value_desc("pattern"),
+ cl::desc("Enable missed optimization remarks from passes whose name match "
+ "the given regular expression"),
+ cl::Hidden, cl::location(PassRemarksMissedOptLoc), cl::ValueRequired,
+ cl::ZeroOrMore);
+
+// -pass-remarks-analysis
+// Command line flag to enable emitOptimizationRemarkAnalysis()
+static cl::opt<PassRemarksOpt, true, cl::parser<std::string>>
+PassRemarksAnalysis(
+ "pass-remarks-analysis", cl::value_desc("pattern"),
+ cl::desc(
+ "Enable optimization analysis remarks from passes whose name match "
+ "the given regular expression"),
+ cl::Hidden, cl::location(PassRemarksAnalysisOptLoc), cl::ValueRequired,
+ cl::ZeroOrMore);
+}
+
int llvm::getNextAvailablePluginDiagnosticKind() {
static sys::cas_flag PluginKindID = DK_FirstPluginKind;
return (int)sys::AtomicIncrement(&PluginKindID);
@@ -64,3 +126,66 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const {
DP << getFileName() << ": ";
DP << getMsg();
}
+
+bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const {
+ return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr;
+}
+
+void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename,
+ unsigned *Line,
+ unsigned *Column) const {
+ DILocation DIL(getDebugLoc().getAsMDNode(getFunction().getContext()));
+ *Filename = DIL.getFilename();
+ *Line = DIL.getLineNumber();
+ *Column = DIL.getColumnNumber();
+}
+
+const std::string DiagnosticInfoOptimizationRemarkBase::getLocationStr() const {
+ StringRef Filename("<unknown>");
+ unsigned Line = 0;
+ unsigned Column = 0;
+ if (isLocationAvailable())
+ getLocation(&Filename, &Line, &Column);
+ return Twine(Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
+}
+
+void DiagnosticInfoOptimizationRemarkBase::print(DiagnosticPrinter &DP) const {
+ DP << getLocationStr() << ": " << getMsg();
+}
+
+bool DiagnosticInfoOptimizationRemark::isEnabled() const {
+ return PassRemarksOptLoc.Pattern &&
+ PassRemarksOptLoc.Pattern->match(getPassName());
+}
+
+bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const {
+ return PassRemarksMissedOptLoc.Pattern &&
+ PassRemarksMissedOptLoc.Pattern->match(getPassName());
+}
+
+bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const {
+ return PassRemarksAnalysisOptLoc.Pattern &&
+ PassRemarksAnalysisOptLoc.Pattern->match(getPassName());
+}
+
+void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName,
+ const Function &Fn, const DebugLoc &DLoc,
+ const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationRemark(PassName, Fn, DLoc, Msg));
+}
+
+void llvm::emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg) {
+ Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, Fn, DLoc, Msg));
+}
+
+void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
+ const char *PassName,
+ const Function &Fn,
+ const DebugLoc &DLoc,
+ const Twine &Msg) {
+ Ctx.diagnose(
+ DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
+}
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index c2ea0e1..fe32c46 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -44,7 +44,7 @@ void Argument::anchor() { }
Argument::Argument(Type *Ty, const Twine &Name, Function *Par)
: Value(Ty, Value::ArgumentVal) {
- Parent = 0;
+ Parent = nullptr;
// Make sure that we get added to a function
LeakDetector::addGarbageObject(this);
@@ -76,6 +76,14 @@ unsigned Argument::getArgNo() const {
return ArgIdx;
}
+/// hasNonNullAttr - Return true if this argument has the nonnull attribute on
+/// it in its containing function.
+bool Argument::hasNonNullAttr() const {
+ if (!getType()->isPointerTy()) return false;
+ return getParent()->getAttributes().
+ hasAttribute(getArgNo()+1, Attribute::NonNull);
+}
+
/// hasByValAttr - Return true if this argument has the byval attribute on it
/// in its containing function.
bool Argument::hasByValAttr() const {
@@ -209,8 +217,8 @@ void Function::eraseFromParent() {
Function::Function(FunctionType *Ty, LinkageTypes Linkage,
const Twine &name, Module *ParentModule)
- : GlobalValue(PointerType::getUnqual(Ty),
- Value::FunctionVal, 0, 0, Linkage, name) {
+ : GlobalObject(PointerType::getUnqual(Ty),
+ Value::FunctionVal, nullptr, 0, Linkage, name) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
"invalid return type");
SymTab = new ValueSymbolTable();
@@ -293,7 +301,7 @@ void Function::dropAllReferences() {
BasicBlocks.begin()->eraseFromParent();
// Prefix data is stored in a side table.
- setPrefixData(0);
+ setPrefixData(nullptr);
}
void Function::addAttribute(unsigned i, Attribute::AttrKind attr) {
@@ -348,10 +356,10 @@ void Function::clearGC() {
GCNames->erase(this);
if (GCNames->empty()) {
delete GCNames;
- GCNames = 0;
+ GCNames = nullptr;
if (GCNamePool->empty()) {
delete GCNamePool;
- GCNamePool = 0;
+ GCNamePool = nullptr;
}
}
}
@@ -361,7 +369,7 @@ void Function::clearGC() {
/// create a Function) from the Function Src to this one.
void Function::copyAttributesFrom(const GlobalValue *Src) {
assert(isa<Function>(Src) && "Expected a Function!");
- GlobalValue::copyAttributesFrom(Src);
+ GlobalObject::copyAttributesFrom(Src);
const Function *SrcF = cast<Function>(Src);
setCallingConv(SrcF->getCallingConv());
setAttributes(SrcF->getAttributes());
@@ -372,7 +380,7 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
if (SrcF->hasPrefixData())
setPrefixData(SrcF->getPrefixData());
else
- setPrefixData(0);
+ setPrefixData(nullptr);
}
/// getIntrinsicID - This method returns the ID number of the specified
diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp
index f69bdc4..f2099d6 100644
--- a/lib/IR/GCOV.cpp
+++ b/lib/IR/GCOV.cpp
@@ -26,11 +26,6 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
// GCOVFile implementation.
-/// ~GCOVFile - Delete GCOVFile and its content.
-GCOVFile::~GCOVFile() {
- DeleteContainerPointers(Functions);
-}
-
/// readGCNO - Read GCNO buffer.
bool GCOVFile::readGCNO(GCOVBuffer &Buffer) {
if (!Buffer.readGCNOFormat()) return false;
@@ -39,10 +34,10 @@ bool GCOVFile::readGCNO(GCOVBuffer &Buffer) {
if (!Buffer.readInt(Checksum)) return false;
while (true) {
if (!Buffer.readFunctionTag()) break;
- GCOVFunction *GFun = new GCOVFunction(*this);
+ auto GFun = make_unique<GCOVFunction>(*this);
if (!GFun->readGCNO(Buffer, Version))
return false;
- Functions.push_back(GFun);
+ Functions.push_back(std::move(GFun));
}
GCNOInitialized = true;
@@ -97,17 +92,15 @@ bool GCOVFile::readGCDA(GCOVBuffer &Buffer) {
/// dump - Dump GCOVFile content to dbgs() for debugging purposes.
void GCOVFile::dump() const {
- for (SmallVectorImpl<GCOVFunction *>::const_iterator I = Functions.begin(),
- E = Functions.end(); I != E; ++I)
- (*I)->dump();
+ for (const auto &FPtr : Functions)
+ FPtr->dump();
}
/// collectLineCounts - Collect line counts. This must be used after
/// reading .gcno and .gcda files.
void GCOVFile::collectLineCounts(FileInfo &FI) {
- for (SmallVectorImpl<GCOVFunction *>::iterator I = Functions.begin(),
- E = Functions.end(); I != E; ++I)
- (*I)->collectLineCounts(FI);
+ for (const auto &FPtr : Functions)
+ FPtr->collectLineCounts(FI);
FI.setRunCount(RunCount);
FI.setProgramCount(ProgramCount);
}
@@ -115,12 +108,6 @@ void GCOVFile::collectLineCounts(FileInfo &FI) {
//===----------------------------------------------------------------------===//
// GCOVFunction implementation.
-/// ~GCOVFunction - Delete GCOVFunction and its content.
-GCOVFunction::~GCOVFunction() {
- DeleteContainerPointers(Blocks);
- DeleteContainerPointers(Edges);
-}
-
/// readGCNO - Read a function from the GCNO buffer. Return false if an error
/// occurs.
bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
@@ -150,7 +137,7 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
if (!Buff.readInt(BlockCount)) return false;
for (uint32_t i = 0, e = BlockCount; i != e; ++i) {
if (!Buff.readInt(Dummy)) return false; // Block flags;
- Blocks.push_back(new GCOVBlock(*this, i));
+ Blocks.push_back(make_unique<GCOVBlock>(*this, i));
}
// read edges.
@@ -168,8 +155,8 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
for (uint32_t i = 0, e = EdgeCount; i != e; ++i) {
uint32_t Dst;
if (!Buff.readInt(Dst)) return false;
- GCOVEdge *Edge = new GCOVEdge(Blocks[BlockNo], Blocks[Dst]);
- Edges.push_back(Edge);
+ Edges.push_back(make_unique<GCOVEdge>(*Blocks[BlockNo], *Blocks[Dst]));
+ GCOVEdge *Edge = Edges.back().get();
Blocks[BlockNo]->addDstEdge(Edge);
Blocks[Dst]->addSrcEdge(Edge);
if (!Buff.readInt(Dummy)) return false; // Edge flag
@@ -179,34 +166,46 @@ bool GCOVFunction::readGCNO(GCOVBuffer &Buff, GCOV::GCOVVersion Version) {
// read line table.
while (Buff.readLineTag()) {
uint32_t LineTableLength;
+ // Read the length of this line table.
if (!Buff.readInt(LineTableLength)) return false;
uint32_t EndPos = Buff.getCursor() + LineTableLength*4;
uint32_t BlockNo;
+ // Read the block number this table is associated with.
if (!Buff.readInt(BlockNo)) return false;
if (BlockNo >= BlockCount) {
errs() << "Unexpected block number: " << BlockNo << " (in " << Name
<< ").\n";
return false;
}
- GCOVBlock *Block = Blocks[BlockNo];
- if (!Buff.readInt(Dummy)) return false; // flag
- while (Buff.getCursor() != (EndPos - 4)) {
+ GCOVBlock &Block = *Blocks[BlockNo];
+ // Read the word that pads the beginning of the line table. This may be a
+ // flag of some sort, but seems to always be zero.
+ if (!Buff.readInt(Dummy)) return false;
+
+ // Line information starts here and continues up until the last word.
+ if (Buff.getCursor() != (EndPos - sizeof(uint32_t))) {
StringRef F;
+ // Read the source file name.
if (!Buff.readString(F)) return false;
if (Filename != F) {
errs() << "Multiple sources for a single basic block: " << Filename
<< " != " << F << " (in " << Name << ").\n";
return false;
}
- if (Buff.getCursor() == (EndPos - 4)) break;
- while (true) {
+ // Read lines up to, but not including, the null terminator.
+ while (Buff.getCursor() < (EndPos - 2 * sizeof(uint32_t))) {
uint32_t Line;
if (!Buff.readInt(Line)) return false;
- if (!Line) break;
- Block->addLine(Line);
+ // Line 0 means this instruction was injected by the compiler. Skip it.
+ if (!Line) continue;
+ Block.addLine(Line);
}
+ // Read the null terminator.
+ if (!Buff.readInt(Dummy)) return false;
}
- if (!Buff.readInt(Dummy)) return false; // flag
+ // The last word is either a flag or padding, it isn't clear which. Skip
+ // over it.
+ if (!Buff.readInt(Dummy)) return false;
}
return true;
}
@@ -300,9 +299,8 @@ uint64_t GCOVFunction::getExitCount() const {
/// dump - Dump GCOVFunction content to dbgs() for debugging purposes.
void GCOVFunction::dump() const {
dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n";
- for (SmallVectorImpl<GCOVBlock *>::const_iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I)
- (*I)->dump();
+ for (const auto &Block : Blocks)
+ Block->dump();
}
/// collectLineCounts - Collect line counts. This must be used after
@@ -313,9 +311,8 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) {
if (LineNumber == 0)
return;
- for (SmallVectorImpl<GCOVBlock *>::iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I)
- (*I)->collectLineCounts(FI);
+ for (const auto &Block : Blocks)
+ Block->collectLineCounts(FI);
FI.addFunctionLine(Filename, LineNumber, this);
}
@@ -335,8 +332,8 @@ void GCOVBlock::addCount(size_t DstEdgeNo, uint64_t N) {
assert(DstEdgeNo < DstEdges.size()); // up to caller to ensure EdgeNo is valid
DstEdges[DstEdgeNo]->Count = N;
Counter += N;
- if (!DstEdges[DstEdgeNo]->Dst->getNumDstEdges())
- DstEdges[DstEdgeNo]->Dst->Counter += N;
+ if (!DstEdges[DstEdgeNo]->Dst.getNumDstEdges())
+ DstEdges[DstEdgeNo]->Dst.Counter += N;
}
/// sortDstEdges - Sort destination edges by block number, nop if already
@@ -363,7 +360,7 @@ void GCOVBlock::dump() const {
dbgs() << "\tSource Edges : ";
for (EdgeIterator I = SrcEdges.begin(), E = SrcEdges.end(); I != E; ++I) {
const GCOVEdge *Edge = *I;
- dbgs() << Edge->Src->Number << " (" << Edge->Count << "), ";
+ dbgs() << Edge->Src.Number << " (" << Edge->Count << "), ";
}
dbgs() << "\n";
}
@@ -371,7 +368,7 @@ void GCOVBlock::dump() const {
dbgs() << "\tDestination Edges : ";
for (EdgeIterator I = DstEdges.begin(), E = DstEdges.end(); I != E; ++I) {
const GCOVEdge *Edge = *I;
- dbgs() << Edge->Dst->Number << " (" << Edge->Count << "), ";
+ dbgs() << Edge->Dst.Number << " (" << Edge->Count << "), ";
}
dbgs() << "\n";
}
@@ -435,11 +432,35 @@ static raw_ostream &operator<<(raw_ostream &OS, const formatBranchInfo &FBI) {
return OS;
}
+namespace {
+class LineConsumer {
+ std::unique_ptr<MemoryBuffer> Buffer;
+ StringRef Remaining;
+public:
+ LineConsumer(StringRef Filename) {
+ if (error_code EC = MemoryBuffer::getFileOrSTDIN(Filename, Buffer)) {
+ errs() << Filename << ": " << EC.message() << "\n";
+ Remaining = "";
+ } else
+ Remaining = Buffer->getBuffer();
+ }
+ bool empty() { return Remaining.empty(); }
+ void printNext(raw_ostream &OS, uint32_t LineNum) {
+ StringRef Line;
+ if (empty())
+ Line = "/*EOF*/";
+ else
+ std::tie(Line, Remaining) = Remaining.split("\n");
+ OS << format("%5u:", LineNum) << Line << "\n";
+ }
+};
+}
+
/// Convert a path to a gcov filename. If PreservePaths is true, this
/// translates "/" to "#", ".." to "^", and drops ".", to match gcov.
static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) {
if (!PreservePaths)
- return (sys::path::filename(Filename) + ".gcov").str();
+ return sys::path::filename(Filename).str();
// This behaviour is defined by gcov in terms of text replacements, so it's
// not likely to do anything useful on filesystems with different textual
@@ -467,28 +488,52 @@ static std::string mangleCoveragePath(StringRef Filename, bool PreservePaths) {
if (S < I)
Result.append(S, I);
- Result.append(".gcov");
return Result.str();
}
+std::string FileInfo::getCoveragePath(StringRef Filename,
+ StringRef MainFilename) {
+ if (Options.NoOutput)
+ // This is probably a bug in gcov, but when -n is specified, paths aren't
+ // mangled at all, and the -l and -p options are ignored. Here, we do the
+ // same.
+ return Filename;
+
+ std::string CoveragePath;
+ if (Options.LongFileNames && !Filename.equals(MainFilename))
+ CoveragePath =
+ mangleCoveragePath(MainFilename, Options.PreservePaths) + "##";
+ CoveragePath +=
+ mangleCoveragePath(Filename, Options.PreservePaths) + ".gcov";
+ return CoveragePath;
+}
+
+std::unique_ptr<raw_ostream>
+FileInfo::openCoveragePath(StringRef CoveragePath) {
+ if (Options.NoOutput)
+ return llvm::make_unique<raw_null_ostream>();
+
+ std::string ErrorInfo;
+ auto OS = llvm::make_unique<raw_fd_ostream>(CoveragePath.str().c_str(),
+ ErrorInfo, sys::fs::F_Text);
+ if (!ErrorInfo.empty()) {
+ errs() << ErrorInfo << "\n";
+ return llvm::make_unique<raw_null_ostream>();
+ }
+ return std::move(OS);
+}
+
/// print - Print source files with collected line count information.
-void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) {
+void FileInfo::print(StringRef MainFilename, StringRef GCNOFile,
+ StringRef GCDAFile) {
for (StringMap<LineData>::const_iterator I = LineInfo.begin(),
E = LineInfo.end(); I != E; ++I) {
StringRef Filename = I->first();
- std::unique_ptr<MemoryBuffer> Buff;
- if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
- errs() << Filename << ": " << ec.message() << "\n";
- return;
- }
- StringRef AllLines = Buff->getBuffer();
+ auto AllLines = LineConsumer(Filename);
- std::string CoveragePath = mangleCoveragePath(Filename,
- Options.PreservePaths);
- std::string ErrorInfo;
- raw_fd_ostream OS(CoveragePath.c_str(), ErrorInfo, sys::fs::F_Text);
- if (!ErrorInfo.empty())
- errs() << ErrorInfo << "\n";
+ std::string CoveragePath = getCoveragePath(Filename, MainFilename);
+ std::unique_ptr<raw_ostream> S = openCoveragePath(CoveragePath);
+ raw_ostream &OS = *S;
OS << " -: 0:Source:" << Filename << "\n";
OS << " -: 0:Graph:" << GCNOFile << "\n";
@@ -498,7 +543,8 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) {
const LineData &Line = I->second;
GCOVCoverage FileCoverage(Filename);
- for (uint32_t LineIndex = 0; !AllLines.empty(); ++LineIndex) {
+ for (uint32_t LineIndex = 0;
+ LineIndex < Line.LastLine || !AllLines.empty(); ++LineIndex) {
if (Options.BranchInfo) {
FunctionLines::const_iterator FuncsIt = Line.Functions.find(LineIndex);
if (FuncsIt != Line.Functions.end())
@@ -509,9 +555,7 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) {
if (BlocksIt == Line.Blocks.end()) {
// No basic blocks are on this line. Not an executable line of code.
OS << " -:";
- std::pair<StringRef, StringRef> P = AllLines.split('\n');
- OS << format("%5u:", LineIndex+1) << P.first << "\n";
- AllLines = P.second;
+ AllLines.printNext(OS, LineIndex + 1);
} else {
const BlockVector &Blocks = BlocksIt->second;
@@ -573,9 +617,7 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) {
}
++FileCoverage.LogicalLines;
- std::pair<StringRef, StringRef> P = AllLines.split('\n');
- OS << format("%5u:", LineIndex+1) << P.first << "\n";
- AllLines = P.second;
+ AllLines.printNext(OS, LineIndex + 1);
uint32_t BlockNo = 0;
uint32_t EdgeNo = 0;
@@ -605,10 +647,11 @@ void FileInfo::print(StringRef GCNOFile, StringRef GCDAFile) {
if (Options.FuncCoverage)
printFuncCoverage();
printFileCoverage();
+ return;
}
/// printFunctionSummary - Print function and block summary.
-void FileInfo::printFunctionSummary(raw_fd_ostream &OS,
+void FileInfo::printFunctionSummary(raw_ostream &OS,
const FunctionVector &Funcs) const {
for (FunctionVector::const_iterator I = Funcs.begin(), E = Funcs.end();
I != E; ++I) {
@@ -617,8 +660,8 @@ void FileInfo::printFunctionSummary(raw_fd_ostream &OS,
uint32_t BlocksExec = 0;
for (GCOVFunction::BlockIterator I = Func->block_begin(),
E = Func->block_end(); I != E; ++I) {
- const GCOVBlock *Block = *I;
- if (Block->getNumDstEdges() && Block->getCount())
+ const GCOVBlock &Block = **I;
+ if (Block.getNumDstEdges() && Block.getCount())
++BlocksExec;
}
@@ -630,7 +673,7 @@ void FileInfo::printFunctionSummary(raw_fd_ostream &OS,
}
/// printBlockInfo - Output counts for each block.
-void FileInfo::printBlockInfo(raw_fd_ostream &OS, const GCOVBlock &Block,
+void FileInfo::printBlockInfo(raw_ostream &OS, const GCOVBlock &Block,
uint32_t LineIndex, uint32_t &BlockNo) const {
if (Block.getCount() == 0)
OS << " $$$$$:";
@@ -640,7 +683,7 @@ void FileInfo::printBlockInfo(raw_fd_ostream &OS, const GCOVBlock &Block,
}
/// printBranchInfo - Print conditional branch probabilities.
-void FileInfo::printBranchInfo(raw_fd_ostream &OS, const GCOVBlock &Block,
+void FileInfo::printBranchInfo(raw_ostream &OS, const GCOVBlock &Block,
GCOVCoverage &Coverage, uint32_t &EdgeNo) {
SmallVector<uint64_t, 16> BranchCounts;
uint64_t TotalCounts = 0;
@@ -670,7 +713,7 @@ void FileInfo::printBranchInfo(raw_fd_ostream &OS, const GCOVBlock &Block,
}
/// printUncondBranchInfo - Print unconditional branch probabilities.
-void FileInfo::printUncondBranchInfo(raw_fd_ostream &OS, uint32_t &EdgeNo,
+void FileInfo::printUncondBranchInfo(raw_ostream &OS, uint32_t &EdgeNo,
uint64_t Count) const {
OS << format("unconditional %2u ", EdgeNo++)
<< formatBranchInfo(Options, Count, Count) << "\n";
@@ -716,6 +759,8 @@ void FileInfo::printFileCoverage() const {
const GCOVCoverage &Coverage = I->second;
outs() << "File '" << Coverage.Name << "'\n";
printCoverage(Coverage);
- outs() << Coverage.Name << ":creating '" << Filename << "'\n\n";
+ if (!Options.NoOutput)
+ outs() << Coverage.Name << ":creating '" << Filename << "'\n";
+ outs() << "\n";
}
}
diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp
index f338dd7..c905cfe 100644
--- a/lib/IR/Globals.cpp
+++ b/lib/IR/Globals.cpp
@@ -53,23 +53,41 @@ void GlobalValue::destroyConstant() {
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a GlobalValue) from the GlobalValue Src to this one.
void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
- setAlignment(Src->getAlignment());
- setSection(Src->getSection());
setVisibility(Src->getVisibility());
setUnnamedAddr(Src->hasUnnamedAddr());
setDLLStorageClass(Src->getDLLStorageClass());
}
-void GlobalValue::setAlignment(unsigned Align) {
- assert((!isa<GlobalAlias>(this) || !Align) &&
- "GlobalAlias should not have an alignment!");
+unsigned GlobalValue::getAlignment() const {
+ if (auto *GA = dyn_cast<GlobalAlias>(this))
+ return GA->getAliasee()->getAlignment();
+
+ return cast<GlobalObject>(this)->getAlignment();
+}
+
+void GlobalObject::setAlignment(unsigned Align) {
assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
assert(Align <= MaximumAlignment &&
"Alignment is greater than MaximumAlignment!");
- Alignment = Log2_32(Align) + 1;
+ setGlobalValueSubClassData(Log2_32(Align) + 1);
assert(getAlignment() == Align && "Alignment representation error!");
}
+void GlobalObject::copyAttributesFrom(const GlobalValue *Src) {
+ const auto *GV = cast<GlobalObject>(Src);
+ GlobalValue::copyAttributesFrom(GV);
+ setAlignment(GV->getAlignment());
+ setSection(GV->getSection());
+}
+
+const std::string &GlobalValue::getSection() const {
+ if (auto *GA = dyn_cast<GlobalAlias>(this))
+ return GA->getAliasee()->getSection();
+ return cast<GlobalObject>(this)->getSection();
+}
+
+void GlobalObject::setSection(StringRef S) { Section = S; }
+
bool GlobalValue::isDeclaration() const {
// Globals are definitions if they have an initializer.
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
@@ -83,22 +101,20 @@ bool GlobalValue::isDeclaration() const {
assert(isa<GlobalAlias>(this));
return false;
}
-
+
//===----------------------------------------------------------------------===//
// GlobalVariable Implementation
//===----------------------------------------------------------------------===//
GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
- Constant *InitVal,
- const Twine &Name, ThreadLocalMode TLMode,
- unsigned AddressSpace,
+ Constant *InitVal, const Twine &Name,
+ ThreadLocalMode TLMode, unsigned AddressSpace,
bool isExternallyInitialized)
- : GlobalValue(PointerType::get(Ty, AddressSpace),
- Value::GlobalVariableVal,
- OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != 0, Link, Name),
- isConstantGlobal(constant), threadLocalMode(TLMode),
- isExternallyInitializedConstant(isExternallyInitialized) {
+ : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != nullptr, Link, Name),
+ isConstantGlobal(constant), threadLocalMode(TLMode),
+ isExternallyInitializedConstant(isExternallyInitialized) {
if (InitVal) {
assert(InitVal->getType() == Ty &&
"Initializer should be the same type as the GlobalVariable!");
@@ -110,24 +126,22 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link,
GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
LinkageTypes Link, Constant *InitVal,
- const Twine &Name,
- GlobalVariable *Before, ThreadLocalMode TLMode,
- unsigned AddressSpace,
+ const Twine &Name, GlobalVariable *Before,
+ ThreadLocalMode TLMode, unsigned AddressSpace,
bool isExternallyInitialized)
- : GlobalValue(PointerType::get(Ty, AddressSpace),
- Value::GlobalVariableVal,
- OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != 0, Link, Name),
- isConstantGlobal(constant), threadLocalMode(TLMode),
- isExternallyInitializedConstant(isExternallyInitialized) {
+ : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal,
+ OperandTraits<GlobalVariable>::op_begin(this),
+ InitVal != nullptr, Link, Name),
+ isConstantGlobal(constant), threadLocalMode(TLMode),
+ isExternallyInitializedConstant(isExternallyInitialized) {
if (InitVal) {
assert(InitVal->getType() == Ty &&
"Initializer should be the same type as the GlobalVariable!");
Op<0>() = InitVal;
}
-
+
LeakDetector::addGarbageObject(this);
-
+
if (Before)
Before->getParent()->getGlobalList().insert(Before, this);
else
@@ -171,9 +185,9 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
}
void GlobalVariable::setInitializer(Constant *InitVal) {
- if (InitVal == 0) {
+ if (!InitVal) {
if (hasInitializer()) {
- Op<0>().set(0);
+ Op<0>().set(nullptr);
NumOperands = 0;
}
} else {
@@ -189,7 +203,7 @@ void GlobalVariable::setInitializer(Constant *InitVal) {
/// create a GlobalVariable) from the GlobalVariable Src to this one.
void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
- GlobalValue::copyAttributesFrom(Src);
+ GlobalObject::copyAttributesFrom(Src);
const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
setThreadLocalMode(SrcVar->getThreadLocalMode());
}
@@ -199,20 +213,47 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
// GlobalAlias Implementation
//===----------------------------------------------------------------------===//
-GlobalAlias::GlobalAlias(Type *Ty, LinkageTypes Link,
- const Twine &Name, Constant* aliasee,
+GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
+ const Twine &Name, GlobalObject *Aliasee,
Module *ParentModule)
- : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+ : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalAliasVal,
+ &Op<0>(), 1, Link, Name) {
LeakDetector::addGarbageObject(this);
-
- if (aliasee)
- assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
- Op<0>() = aliasee;
+ Op<0>() = Aliasee;
if (ParentModule)
ParentModule->getAliasList().push_back(this);
}
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Link, const Twine &Name,
+ GlobalObject *Aliasee, Module *ParentModule) {
+ return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule);
+}
+
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ Module *Parent) {
+ return create(Ty, AddressSpace, Linkage, Name, nullptr, Parent);
+}
+
+GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
+ LinkageTypes Linkage, const Twine &Name,
+ GlobalObject *Aliasee) {
+ return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent());
+}
+
+GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name,
+ GlobalObject *Aliasee) {
+ PointerType *PTy = Aliasee->getType();
+ return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name,
+ Aliasee);
+}
+
+GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalObject *Aliasee) {
+ return create(Aliasee->getLinkage(), Name, Aliasee);
+}
+
void GlobalAlias::setParent(Module *parent) {
if (getParent())
LeakDetector::addGarbageObject(this);
@@ -229,42 +270,4 @@ void GlobalAlias::eraseFromParent() {
getParent()->getAliasList().erase(this);
}
-void GlobalAlias::setAliasee(Constant *Aliasee) {
- assert((!Aliasee || Aliasee->getType() == getType()) &&
- "Alias and aliasee types should match!");
-
- setOperand(0, Aliasee);
-}
-
-static GlobalValue *getAliaseeGV(GlobalAlias *GA) {
- Constant *C = GA->getAliasee();
- assert(C && "Must alias something");
-
- if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return GV;
-
- ConstantExpr *CE = cast<ConstantExpr>(C);
- assert((CE->getOpcode() == Instruction::BitCast ||
- CE->getOpcode() == Instruction::AddrSpaceCast ||
- CE->getOpcode() == Instruction::GetElementPtr) &&
- "Unsupported aliasee");
-
- return cast<GlobalValue>(CE->getOperand(0));
-}
-
-GlobalValue *GlobalAlias::getAliasedGlobal() {
- SmallPtrSet<GlobalValue*, 3> Visited;
-
- GlobalAlias *GA = this;
-
- for (;;) {
- GlobalValue *GV = getAliaseeGV(GA);
- if (!Visited.insert(GV))
- return 0;
-
- // Iterate over aliasing chain.
- GA = dyn_cast<GlobalAlias>(GV);
- if (!GA)
- return GV;
- }
-}
+void GlobalAlias::setAliasee(GlobalObject *Aliasee) { setOperand(0, Aliasee); }
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index 099c27c..c8a1747 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -94,7 +94,7 @@ public:
return false;
}
- void getAnalysisUsage(AnalysisUsage &AU) const override{
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
}
};
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 62d191d..a3e1da3b1 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -274,7 +274,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
break;
default:
StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
- if (STy == 0 || STy->getNumElements() != NumOutputs)
+ if (!STy || STy->getNumElements() != NumOutputs)
return false;
break;
}
diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp
index d31a92e..28cc4cb 100644
--- a/lib/IR/Instruction.cpp
+++ b/lib/IR/Instruction.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
Instruction *InsertBefore)
- : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {
// Make sure that we get added to a basicblock
LeakDetector::addGarbageObject(this);
@@ -41,7 +41,7 @@ const DataLayout *Instruction::getDataLayout() const {
Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
BasicBlock *InsertAtEnd)
- : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+ : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(nullptr) {
// Make sure that we get added to a basicblock
LeakDetector::addGarbageObject(this);
@@ -53,7 +53,7 @@ Instruction::Instruction(Type *ty, unsigned it, Use *Ops, unsigned NumOps,
// Out of line virtual method, so the vtable, etc has a home.
Instruction::~Instruction() {
- assert(Parent == 0 && "Instruction still linked in the program!");
+ assert(!Parent && "Instruction still linked in the program!");
if (hasMetadataHashEntry())
clearMetadataHashEntries();
}
@@ -262,6 +262,58 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
}
}
+/// Return true if both instructions have the same special state
+/// This must be kept in sync with lib/Transforms/IPO/MergeFunctions.cpp.
+static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
+ bool IgnoreAlignment = false) {
+ assert(I1->getOpcode() == I2->getOpcode() &&
+ "Can not compare special state of different instructions");
+
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+ return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+ (LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() ||
+ IgnoreAlignment) &&
+ LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
+ LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
+ if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+ return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+ (SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() ||
+ IgnoreAlignment) &&
+ SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
+ SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+ return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+ if (const CallInst *CI = dyn_cast<CallInst>(I1))
+ return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() &&
+ CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+ CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+ return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+ CI->getAttributes() ==
+ cast<InvokeInst>(I2)->getAttributes();
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
+ return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
+ return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
+ if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
+ return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
+ FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
+ return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
+ CXI->getSuccessOrdering() ==
+ cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
+ CXI->getFailureOrdering() ==
+ cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
+ CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
+ return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
+ RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
+ RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
+ RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+
+ return true;
+}
+
/// isIdenticalTo - Return true if the specified instruction is exactly
/// identical to the current one. This means that all operands match and any
/// extra information (e.g. load is volatile) agree.
@@ -284,51 +336,13 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
if (!std::equal(op_begin(), op_end(), I->op_begin()))
return false;
- // Check special state that is a part of some instructions.
- if (const LoadInst *LI = dyn_cast<LoadInst>(this))
- return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I)->getAlignment() &&
- LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
- LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
- if (const StoreInst *SI = dyn_cast<StoreInst>(this))
- return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I)->getAlignment() &&
- SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
- SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
- if (const CmpInst *CI = dyn_cast<CmpInst>(this))
- return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
- if (const CallInst *CI = dyn_cast<CallInst>(this))
- return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
- CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
- CI->getAttributes() == cast<CallInst>(I)->getAttributes();
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
- return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
- CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
- return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
- return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
- if (const FenceInst *FI = dyn_cast<FenceInst>(this))
- return FI->getOrdering() == cast<FenceInst>(FI)->getOrdering() &&
- FI->getSynchScope() == cast<FenceInst>(FI)->getSynchScope();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
- return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
- CXI->getSuccessOrdering() ==
- cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() &&
- CXI->getFailureOrdering() ==
- cast<AtomicCmpXchgInst>(I)->getFailureOrdering() &&
- CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
- return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
- RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
- RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
- RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
if (const PHINode *thisPHI = dyn_cast<PHINode>(this)) {
const PHINode *otherPHI = cast<PHINode>(I);
return std::equal(thisPHI->block_begin(), thisPHI->block_end(),
otherPHI->block_begin());
}
- return true;
+
+ return haveSameSpecialState(this, I);
}
// isSameOperationAs
@@ -355,50 +369,7 @@ bool Instruction::isSameOperationAs(const Instruction *I,
getOperand(i)->getType() != I->getOperand(i)->getType())
return false;
- // Check special state that is a part of some instructions.
- if (const LoadInst *LI = dyn_cast<LoadInst>(this))
- return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
- (LI->getAlignment() == cast<LoadInst>(I)->getAlignment() ||
- IgnoreAlignment) &&
- LI->getOrdering() == cast<LoadInst>(I)->getOrdering() &&
- LI->getSynchScope() == cast<LoadInst>(I)->getSynchScope();
- if (const StoreInst *SI = dyn_cast<StoreInst>(this))
- return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
- (SI->getAlignment() == cast<StoreInst>(I)->getAlignment() ||
- IgnoreAlignment) &&
- SI->getOrdering() == cast<StoreInst>(I)->getOrdering() &&
- SI->getSynchScope() == cast<StoreInst>(I)->getSynchScope();
- if (const CmpInst *CI = dyn_cast<CmpInst>(this))
- return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
- if (const CallInst *CI = dyn_cast<CallInst>(this))
- return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
- CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
- CI->getAttributes() == cast<CallInst>(I)->getAttributes();
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
- return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
- CI->getAttributes() ==
- cast<InvokeInst>(I)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
- return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
- return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
- if (const FenceInst *FI = dyn_cast<FenceInst>(this))
- return FI->getOrdering() == cast<FenceInst>(I)->getOrdering() &&
- FI->getSynchScope() == cast<FenceInst>(I)->getSynchScope();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(this))
- return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I)->isVolatile() &&
- CXI->getSuccessOrdering() ==
- cast<AtomicCmpXchgInst>(I)->getSuccessOrdering() &&
- CXI->getFailureOrdering() ==
- cast<AtomicCmpXchgInst>(I)->getFailureOrdering() &&
- CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I)->getSynchScope();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(this))
- return RMWI->getOperation() == cast<AtomicRMWInst>(I)->getOperation() &&
- RMWI->isVolatile() == cast<AtomicRMWInst>(I)->isVolatile() &&
- RMWI->getOrdering() == cast<AtomicRMWInst>(I)->getOrdering() &&
- RMWI->getSynchScope() == cast<AtomicRMWInst>(I)->getSynchScope();
-
- return true;
+ return haveSameSpecialState(this, I, IgnoreAlignment);
}
/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
@@ -410,7 +381,7 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
// instructions, just check to see whether the parent of the use matches up.
const Instruction *I = cast<Instruction>(U.getUser());
const PHINode *PN = dyn_cast<PHINode>(I);
- if (PN == 0) {
+ if (!PN) {
if (I->getParent() != BB)
return true;
continue;
diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp
index 3aa8413..13c51b8 100644
--- a/lib/IR/Instructions.cpp
+++ b/lib/IR/Instructions.cpp
@@ -68,7 +68,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
return "vector select condition element type must be i1";
VectorType *ET = dyn_cast<VectorType>(Op1->getType());
- if (ET == 0)
+ if (!ET)
return "selected values for vector select must be vectors";
if (ET->getNumElements() != VT->getNumElements())
return "vector select requires selected vectors to have "
@@ -76,7 +76,7 @@ const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
} else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
return "select condition must be i1 or <n x i1>";
}
- return 0;
+ return nullptr;
}
@@ -123,7 +123,7 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
// Nuke the last value.
- Op<-1>().set(0);
+ Op<-1>().set(nullptr);
--NumOperands;
// If the PHI node is dead, because it has zero entries, nuke it now.
@@ -164,7 +164,7 @@ Value *PHINode::hasConstantValue() const {
for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
if (getIncomingValue(i) != ConstantValue && getIncomingValue(i) != this) {
if (ConstantValue != this)
- return 0; // Incoming values not all the same.
+ return nullptr; // Incoming values not all the same.
// The case where the first value is this PHI.
ConstantValue = getIncomingValue(i);
}
@@ -180,14 +180,14 @@ Value *PHINode::hasConstantValue() const {
LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
unsigned NumReservedValues, const Twine &NameStr,
Instruction *InsertBefore)
- : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertBefore) {
+ : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertBefore) {
init(PersonalityFn, 1 + NumReservedValues, NameStr);
}
LandingPadInst::LandingPadInst(Type *RetTy, Value *PersonalityFn,
unsigned NumReservedValues, const Twine &NameStr,
BasicBlock *InsertAtEnd)
- : Instruction(RetTy, Instruction::LandingPad, 0, 0, InsertAtEnd) {
+ : Instruction(RetTy, Instruction::LandingPad, nullptr, 0, InsertAtEnd) {
init(PersonalityFn, 1 + NumReservedValues, NameStr);
}
@@ -324,7 +324,7 @@ CallInst::CallInst(const CallInst &CI)
OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
CI.getNumOperands()) {
setAttributes(CI.getAttributes());
- setTailCall(CI.isTailCall());
+ setTailCallKind(CI.getTailCallKind());
setCallingConv(CI.getCallingConv());
std::copy(CI.op_begin(), CI.op_end(), op_begin());
@@ -420,8 +420,8 @@ static Instruction *createMalloc(Instruction *InsertBefore,
// prototype malloc as "void *malloc(size_t)"
MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
- CallInst *MCall = NULL;
- Instruction *Result = NULL;
+ CallInst *MCall = nullptr;
+ Instruction *Result = nullptr;
if (InsertBefore) {
MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
Result = MCall;
@@ -458,7 +458,7 @@ Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
Value *AllocSize, Value *ArraySize,
Function * MallocF,
const Twine &Name) {
- return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+ return createMalloc(InsertBefore, nullptr, IntPtrTy, AllocTy, AllocSize,
ArraySize, MallocF, Name);
}
@@ -474,7 +474,7 @@ Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
Type *IntPtrTy, Type *AllocTy,
Value *AllocSize, Value *ArraySize,
Function *MallocF, const Twine &Name) {
- return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+ return createMalloc(nullptr, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
ArraySize, MallocF, Name);
}
@@ -492,7 +492,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
- CallInst* Result = NULL;
+ CallInst* Result = nullptr;
Value *PtrCast = Source;
if (InsertBefore) {
if (Source->getType() != IntPtrTy)
@@ -512,14 +512,14 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
/// CreateFree - Generate the IR for a call to the builtin free function.
Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
- return createFree(Source, InsertBefore, NULL);
+ return createFree(Source, InsertBefore, nullptr);
}
/// CreateFree - Generate the IR for a call to the builtin free function.
/// Note: This function does not add the call to the basic block, that is the
/// responsibility of the caller.
Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
- Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+ Instruction* FreeCall = createFree(Source, nullptr, InsertAtEnd);
assert(FreeCall && "CreateFree did not create a CallInst");
return FreeCall;
}
@@ -699,11 +699,11 @@ BasicBlock *ResumeInst::getSuccessorV(unsigned idx) const {
UnreachableInst::UnreachableInst(LLVMContext &Context,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
- 0, 0, InsertBefore) {
+ nullptr, 0, InsertBefore) {
}
UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
: TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
- 0, 0, InsertAtEnd) {
+ nullptr, 0, InsertAtEnd) {
}
unsigned UnreachableInst::getNumSuccessorsV() const {
@@ -732,7 +732,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
OperandTraits<BranchInst>::op_end(this) - 1,
1, InsertBefore) {
- assert(IfTrue != 0 && "Branch destination may not be null!");
+ assert(IfTrue && "Branch destination may not be null!");
Op<-1>() = IfTrue;
}
BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
@@ -752,7 +752,7 @@ BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
: TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
OperandTraits<BranchInst>::op_end(this) - 1,
1, InsertAtEnd) {
- assert(IfTrue != 0 && "Branch destination may not be null!");
+ assert(IfTrue && "Branch destination may not be null!");
Op<-1>() = IfTrue;
}
@@ -852,7 +852,7 @@ AllocaInst::AllocaInst(Type *Ty, Value *ArraySize,
AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
Instruction *InsertBefore)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), 0), InsertBefore) {
+ getAISize(Ty->getContext(), nullptr), InsertBefore) {
setAlignment(0);
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
@@ -861,7 +861,7 @@ AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
AllocaInst::AllocaInst(Type *Ty, const Twine &Name,
BasicBlock *InsertAtEnd)
: UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
- getAISize(Ty->getContext(), 0), InsertAtEnd) {
+ getAISize(Ty->getContext(), nullptr), InsertAtEnd) {
setAlignment(0);
assert(!Ty->isVoidTy() && "Cannot allocate void!");
setName(Name);
@@ -1323,7 +1323,7 @@ AtomicRMWInst::AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SynchronizationScope SynchScope,
Instruction *InsertBefore)
- : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertBefore) {
+ : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertBefore) {
setOrdering(Ordering);
setSynchScope(SynchScope);
}
@@ -1331,7 +1331,7 @@ FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
FenceInst::FenceInst(LLVMContext &C, AtomicOrdering Ordering,
SynchronizationScope SynchScope,
BasicBlock *InsertAtEnd)
- : Instruction(Type::getVoidTy(C), Fence, 0, 0, InsertAtEnd) {
+ : Instruction(Type::getVoidTy(C), Fence, nullptr, 0, InsertAtEnd) {
setOrdering(Ordering);
setSynchScope(SynchScope);
}
@@ -1369,7 +1369,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
template <typename IndexTy>
static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
PointerType *PTy = dyn_cast<PointerType>(Ptr->getScalarType());
- if (!PTy) return 0; // Type isn't a pointer type!
+ if (!PTy) return nullptr; // Type isn't a pointer type!
Type *Agg = PTy->getElementType();
// Handle the special case of the empty set index set, which is always valid.
@@ -1379,17 +1379,17 @@ static Type *getIndexedTypeInternal(Type *Ptr, ArrayRef<IndexTy> IdxList) {
// If there is at least one index, the top level type must be sized, otherwise
// it cannot be 'stepped over'.
if (!Agg->isSized())
- return 0;
+ return nullptr;
unsigned CurIdx = 1;
for (; CurIdx != IdxList.size(); ++CurIdx) {
CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || CT->isPointerTy()) return 0;
+ if (!CT || CT->isPointerTy()) return nullptr;
IndexTy Index = IdxList[CurIdx];
- if (!CT->indexValid(Index)) return 0;
+ if (!CT->indexValid(Index)) return nullptr;
Agg = CT->getTypeAtIndex(Index);
}
- return CurIdx == IdxList.size() ? Agg : 0;
+ return CurIdx == IdxList.size() ? Agg : nullptr;
}
Type *GetElementPtrInst::getIndexedType(Type *Ptr, ArrayRef<Value *> IdxList) {
@@ -1479,7 +1479,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
- if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+ if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy())
return false;
return true;
}
@@ -1526,7 +1526,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
return false;// Second operand of insertelement must be vector element type.
- if (!Index->getType()->isIntegerTy(32))
+ if (!Index->getType()->isIntegerTy())
return false; // Third operand of insertelement must be i32.
return true;
}
@@ -1579,7 +1579,7 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
// Mask must be vector of i32.
VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
- if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+ if (!MaskTy || !MaskTy->getElementType()->isIntegerTy(32))
return false;
// Check to see if Mask is valid.
@@ -1721,13 +1721,13 @@ Type *ExtractValueInst::getIndexedType(Type *Agg,
// as easy to check those manually as well.
if (ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
if (Index >= AT->getNumElements())
- return 0;
+ return nullptr;
} else if (StructType *ST = dyn_cast<StructType>(Agg)) {
if (Index >= ST->getNumElements())
- return 0;
+ return nullptr;
} else {
// Not a valid type to index into.
- return 0;
+ return nullptr;
}
Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
@@ -2130,7 +2130,7 @@ bool CastInst::isNoopCast(const DataLayout *DL) const {
return isNoopCast(Type::getInt64Ty(getContext()));
}
- Type *PtrOpTy = 0;
+ Type *PtrOpTy = nullptr;
if (getOpcode() == Instruction::PtrToInt)
PtrOpTy = getOperand(0)->getType();
else if (getOpcode() == Instruction::IntToPtr)
@@ -3361,7 +3361,7 @@ void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
- 0, 0, InsertBefore) {
+ nullptr, 0, InsertBefore) {
init(Value, Default, 2+NumCases*2);
}
@@ -3372,12 +3372,12 @@ SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
BasicBlock *InsertAtEnd)
: TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
- 0, 0, InsertAtEnd) {
+ nullptr, 0, InsertAtEnd) {
init(Value, Default, 2+NumCases*2);
}
SwitchInst::SwitchInst(const SwitchInst &SI)
- : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+ : TerminatorInst(SI.getType(), Instruction::Switch, nullptr, 0) {
init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
NumOperands = SI.getNumOperands();
Use *OL = OperandList, *InOL = SI.OperandList;
@@ -3425,8 +3425,8 @@ void SwitchInst::removeCase(CaseIt i) {
}
// Nuke the last value.
- OL[NumOps-2].set(0);
- OL[NumOps-2+1].set(0);
+ OL[NumOps-2].set(nullptr);
+ OL[NumOps-2+1].set(nullptr);
NumOperands = NumOps-2;
}
@@ -3492,14 +3492,14 @@ void IndirectBrInst::growOperands() {
IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
Instruction *InsertBefore)
: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
- 0, 0, InsertBefore) {
+ nullptr, 0, InsertBefore) {
init(Address, NumCases);
}
IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
BasicBlock *InsertAtEnd)
: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
- 0, 0, InsertAtEnd) {
+ nullptr, 0, InsertAtEnd) {
init(Address, NumCases);
}
@@ -3541,7 +3541,7 @@ void IndirectBrInst::removeDestination(unsigned idx) {
OL[idx+1] = OL[NumOps-1];
// Nuke the last value.
- OL[NumOps-1].set(0);
+ OL[NumOps-1].set(nullptr);
NumOperands = NumOps-1;
}
@@ -3587,9 +3587,10 @@ InsertValueInst *InsertValueInst::clone_impl() const {
}
AllocaInst *AllocaInst::clone_impl() const {
- return new AllocaInst(getAllocatedType(),
- (Value*)getOperand(0),
- getAlignment());
+ AllocaInst *Result = new AllocaInst(getAllocatedType(),
+ (Value *)getOperand(0), getAlignment());
+ Result->setUsedWithInAlloca(isUsedWithInAlloca());
+ return Result;
}
LoadInst *LoadInst::clone_impl() const {
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp
index 554f2be..5725284 100644
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -35,7 +35,7 @@ static Value *CastOperand(Value *C) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (CE->isCast())
return CE->getOperand(0);
- return NULL;
+ return nullptr;
}
Value *DbgInfoIntrinsic::StripCast(Value *C) {
@@ -57,7 +57,7 @@ Value *DbgDeclareInst::getAddress() const {
if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
return MD->getOperand(0);
else
- return NULL;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 1bfc515..de825f0 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -15,6 +15,7 @@
#include "llvm/IR/LLVMContext.h"
#include "LLVMContextImpl.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/Instruction.h"
@@ -114,6 +115,17 @@ void *LLVMContext::getDiagnosticContext() const {
return pImpl->DiagnosticContext;
}
+void LLVMContext::setYieldCallback(YieldCallbackTy Callback, void *OpaqueHandle)
+{
+ pImpl->YieldCallback = Callback;
+ pImpl->YieldOpaqueHandle = OpaqueHandle;
+}
+
+void LLVMContext::yield() {
+ if (pImpl->YieldCallback)
+ pImpl->YieldCallback(this, pImpl->YieldOpaqueHandle);
+}
+
void LLVMContext::emitError(const Twine &ErrorStr) {
diagnose(DiagnosticInfoInlineAsm(ErrorStr));
}
@@ -125,10 +137,32 @@ void LLVMContext::emitError(const Instruction *I, const Twine &ErrorStr) {
void LLVMContext::diagnose(const DiagnosticInfo &DI) {
// If there is a report handler, use it.
- if (pImpl->DiagnosticHandler != 0) {
+ if (pImpl->DiagnosticHandler) {
pImpl->DiagnosticHandler(DI, pImpl->DiagnosticContext);
return;
}
+
+ // Optimization remarks are selective. They need to check whether the regexp
+ // pattern, passed via one of the -pass-remarks* flags, matches the name of
+ // the pass that is emitting the diagnostic. If there is no match, ignore the
+ // diagnostic and return.
+ switch (DI.getKind()) {
+ case llvm::DK_OptimizationRemark:
+ if (!cast<DiagnosticInfoOptimizationRemark>(DI).isEnabled())
+ return;
+ break;
+ case llvm::DK_OptimizationRemarkMissed:
+ if (!cast<DiagnosticInfoOptimizationRemarkMissed>(DI).isEnabled())
+ return;
+ break;
+ case llvm::DK_OptimizationRemarkAnalysis:
+ if (!cast<DiagnosticInfoOptimizationRemarkAnalysis>(DI).isEnabled())
+ return;
+ break;
+ default:
+ break;
+ }
+
// Otherwise, print the message with a prefix based on the severity.
std::string MsgStorage;
raw_string_ostream Stream(MsgStorage);
diff --git a/lib/IR/LLVMContextImpl.cpp b/lib/IR/LLVMContextImpl.cpp
index ebff9d3..4c2791f 100644
--- a/lib/IR/LLVMContextImpl.cpp
+++ b/lib/IR/LLVMContextImpl.cpp
@@ -14,12 +14,13 @@
#include "LLVMContextImpl.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Module.h"
#include <algorithm>
using namespace llvm;
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
- : TheTrueVal(0), TheFalseVal(0),
+ : TheTrueVal(nullptr), TheFalseVal(nullptr),
VoidTy(C, Type::VoidTyID),
LabelTy(C, Type::LabelTyID),
HalfTy(C, Type::HalfTyID),
@@ -35,10 +36,12 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
Int16Ty(C, 16),
Int32Ty(C, 32),
Int64Ty(C, 64) {
- InlineAsmDiagHandler = 0;
- InlineAsmDiagContext = 0;
- DiagnosticHandler = 0;
- DiagnosticContext = 0;
+ InlineAsmDiagHandler = nullptr;
+ InlineAsmDiagContext = nullptr;
+ DiagnosticHandler = nullptr;
+ DiagnosticContext = nullptr;
+ YieldCallback = nullptr;
+ YieldOpaqueHandle = nullptr;
NamedStructTypesUniqueID = 0;
}
@@ -46,8 +49,7 @@ namespace {
struct DropReferences {
// Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
// is a Constant*.
- template<typename PairT>
- void operator()(const PairT &P) {
+ template <typename PairT> void operator()(const PairT &P) {
P.second->dropAllReferences();
}
};
@@ -64,12 +66,11 @@ struct DropFirst {
}
LLVMContextImpl::~LLVMContextImpl() {
- // NOTE: We need to delete the contents of OwnedModules, but we have to
- // duplicate it into a temporary vector, because the destructor of Module
- // will try to remove itself from OwnedModules set. This would cause
- // iterator invalidation if we iterated on the set directly.
- std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
- DeleteContainerPointers(Modules);
+ // NOTE: We need to delete the contents of OwnedModules, but Module's dtor
+ // will call LLVMContextImpl::removeModule, thus invalidating iterators into
+ // the container. Avoid iterators during this operation:
+ while (!OwnedModules.empty())
+ delete *OwnedModules.begin();
// Free the constants. This is important to do here to ensure that they are
// freed before the LeakDetector is torn down.
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index dc77d29..808c239 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -37,6 +37,9 @@ namespace llvm {
class ConstantInt;
class ConstantFP;
+class DiagnosticInfoOptimizationRemark;
+class DiagnosticInfoOptimizationRemarkMissed;
+class DiagnosticInfoOptimizationRemarkAnalysis;
class LLVMContext;
class Type;
class Value;
@@ -56,8 +59,8 @@ struct DenseMapAPIntKeyInfo {
return hash_combine(Key.type, Key.val);
}
};
- static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
- static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+ static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), nullptr); }
+ static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), nullptr); }
static unsigned getHashValue(const KeyTy &Key) {
return static_cast<unsigned>(hash_value(Key));
}
@@ -242,6 +245,9 @@ public:
LLVMContext::DiagnosticHandlerTy DiagnosticHandler;
void *DiagnosticContext;
+ LLVMContext::YieldCallbackTy YieldCallback;
+ void *YieldOpaqueHandle;
+
typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt *,
DenseMapAPIntKeyInfo> IntMapTy;
IntMapTy IntConstants;
diff --git a/lib/IR/LeaksContext.h b/lib/IR/LeaksContext.h
index 5038dc9..52ac170 100644
--- a/lib/IR/LeaksContext.h
+++ b/lib/IR/LeaksContext.h
@@ -12,8 +12,12 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_IR_LEAKSCONTEXT_H
+#define LLVM_IR_LEAKSCONTEXT_H
+
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/Value.h"
+#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -30,10 +34,10 @@ struct PrinterTrait<Value> {
template <typename T>
struct LeakDetectorImpl {
explicit LeakDetectorImpl(const char* const name = "") :
- Cache(0), Name(name) { }
+ Cache(nullptr), Name(name) { }
void clear() {
- Cache = 0;
+ Cache = nullptr;
Ts.clear();
}
@@ -57,15 +61,15 @@ struct LeakDetectorImpl {
void removeGarbage(const T* o) {
if (o == Cache)
- Cache = 0; // Cache hit
+ Cache = nullptr; // Cache hit
else
Ts.erase(o);
}
bool hasGarbage(const std::string& Message) {
- addGarbage(0); // Flush the Cache
+ addGarbage(nullptr); // Flush the Cache
- assert(Cache == 0 && "No value should be cached anymore!");
+ assert(!Cache && "No value should be cached anymore!");
if (!Ts.empty()) {
errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
@@ -90,3 +94,5 @@ private:
};
}
+
+#endif // LLVM_IR_LEAKSCONTEXT_H
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index 7c5cc68..d3f3482 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/LegacyPassManagers.h"
@@ -22,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
+#include "llvm/Support/TimeValue.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -118,7 +120,7 @@ bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
- if (V == 0 && M == 0)
+ if (!V && !M)
OS << "Releasing pass '";
else
OS << "Running pass '";
@@ -129,7 +131,7 @@ void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
OS << " on module '" << M->getModuleIdentifier() << "'.\n";
return;
}
- if (V == 0) {
+ if (!V) {
OS << '\n';
return;
}
@@ -484,11 +486,11 @@ public:
/// getPassTimer - Return the timer for the specified pass if it exists.
Timer *getPassTimer(Pass *P) {
if (P->getAsPMDataManager())
- return 0;
+ return nullptr;
sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
Timer *&T = TimingData[P];
- if (T == 0)
+ if (!T)
T = new Timer(P->getPassName(), TG);
return T;
}
@@ -579,7 +581,7 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
}
AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
- AnalysisUsage *AnUsage = NULL;
+ AnalysisUsage *AnUsage = nullptr;
DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
if (DMI != AnUsageMap.end())
AnUsage = DMI->second;
@@ -626,7 +628,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
if (!AnalysisPass) {
const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
- if (PI == NULL) {
+ if (!PI) {
// Pass P is not in the global PassRegistry
dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n";
dbgs() << "Verify if there is a pass dependency cycle." << "\n";
@@ -733,7 +735,7 @@ Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
}
}
- return 0;
+ return nullptr;
}
// Print passes managed by this top level manager.
@@ -830,7 +832,7 @@ void PMDataManager::recordAvailableAnalysis(Pass *P) {
// This pass is the current implementation of all of the interfaces it
// implements as well.
const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
- if (PInf == 0) return;
+ if (!PInf) return;
const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
for (unsigned i = 0, e = II.size(); i != e; ++i)
AvailableAnalysis[II[i]->getTypeInfo()] = P;
@@ -847,7 +849,7 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
E = HigherLevelAnalysis.end(); I != E; ++I) {
Pass *P1 = *I;
- if (P1->getAsImmutablePass() == 0 &&
+ if (P1->getAsImmutablePass() == nullptr &&
std::find(PreservedSet.begin(), PreservedSet.end(),
P1->getPassID()) ==
PreservedSet.end())
@@ -887,7 +889,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
for (DenseMap<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
E = AvailableAnalysis.end(); I != E; ) {
DenseMap<AnalysisID, Pass*>::iterator Info = I++;
- if (Info->second->getAsImmutablePass() == 0 &&
+ if (Info->second->getAsImmutablePass() == nullptr &&
std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
@@ -911,7 +913,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
I = InheritedAnalysis[Index]->begin(),
E = InheritedAnalysis[Index]->end(); I != E; ) {
DenseMap<AnalysisID, Pass *>::iterator Info = I++;
- if (Info->second->getAsImmutablePass() == 0 &&
+ if (Info->second->getAsImmutablePass() == nullptr &&
std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
PreservedSet.end()) {
// Remove this analysis
@@ -1028,7 +1030,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
// Set P as P's last user until someone starts using P.
// However, if P is a Pass Manager then it does not need
// to record its last user.
- if (P->getAsPMDataManager() == 0)
+ if (!P->getAsPMDataManager())
LastUses.push_back(P);
TPM->setLastUser(LastUses, P);
@@ -1095,7 +1097,7 @@ void PMDataManager::initializeAnalysisImpl(Pass *P) {
I = AnUsage->getRequiredSet().begin(),
E = AnUsage->getRequiredSet().end(); I != E; ++I) {
Pass *Impl = findAnalysisPass(*I, true);
- if (Impl == 0)
+ if (!Impl)
// This may be analysis pass that is initialized on the fly.
// If that is not the case then it will raise an assert when it is used.
continue;
@@ -1119,7 +1121,7 @@ Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
if (SearchParent)
return TPM->findAnalysisPass(AID);
- return NULL;
+ return nullptr;
}
// Print list of passes that are last used by P.
@@ -1158,7 +1160,8 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
StringRef Msg) {
if (PassDebugging < Executions)
return;
- dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+ dbgs() << "[" << sys::TimeValue::now().str() << "] " << (void *)this
+ << std::string(getDepth() * 2 + 1, ' ');
switch (S1) {
case EXECUTION_MSG:
dbgs() << "Executing Pass '" << P->getPassName();
@@ -1487,8 +1490,10 @@ bool FunctionPassManagerImpl::run(Function &F) {
TimingInfo::createTheTimeInfo();
initializeAllAnalysisInfo();
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
Changed |= getContainedManager(Index)->runOnFunction(F);
+ F.getContext().yield();
+ }
for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
getContainedManager(Index)->cleanup();
@@ -1657,6 +1662,8 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
assert((P->getPotentialPassManagerType() <
RequiredPass->getPotentialPassManagerType()) &&
"Unable to handle Pass that requires lower level Analysis pass");
+ if (!RequiredPass)
+ return;
FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
if (!FPP) {
@@ -1666,14 +1673,24 @@ void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
OnTheFlyManagers[P] = FPP;
}
- FPP->add(RequiredPass);
+ const PassInfo * RequiredPassPI =
+ PassRegistry::getPassRegistry()->getPassInfo(RequiredPass->getPassID());
- // Register P as the last user of RequiredPass.
- if (RequiredPass) {
- SmallVector<Pass *, 1> LU;
- LU.push_back(RequiredPass);
- FPP->setLastUser(LU, P);
+ Pass *FoundPass = nullptr;
+ if (RequiredPassPI && RequiredPassPI->isAnalysis()) {
+ FoundPass =
+ ((PMTopLevelManager*)FPP)->findAnalysisPass(RequiredPass->getPassID());
}
+ if (!FoundPass) {
+ FoundPass = RequiredPass;
+ // This should be guaranteed to add RequiredPass to the passmanager given
+ // that we checked for an avaiable analysis above.
+ FPP->add(RequiredPass);
+ }
+ // Register P as the last user of FoundPass or RequiredPass.
+ SmallVector<Pass *, 1> LU;
+ LU.push_back(FoundPass);
+ FPP->setLastUser(LU, P);
}
/// Return function pass corresponding to PassInfo PI, that is
@@ -1709,8 +1726,10 @@ bool PassManagerImpl::run(Module &M) {
}
initializeAllAnalysisInfo();
- for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+ for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
Changed |= getContainedManager(Index)->runOnModule(M);
+ M.getContext().yield();
+ }
for (SmallVectorImpl<ImmutablePass *>::const_iterator I = IPV.begin(),
E = IPV.end(); I != E; ++I) {
@@ -1773,7 +1792,7 @@ void TimingInfo::createTheTimeInfo() {
Timer *llvm::getPassTimer(Pass *P) {
if (TheTimeInfo)
return TheTimeInfo->getPassTimer(P);
- return 0;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/IR/MDBuilder.cpp b/lib/IR/MDBuilder.cpp
new file mode 100644
index 0000000..65cdf38
--- /dev/null
+++ b/lib/IR/MDBuilder.cpp
@@ -0,0 +1,139 @@
+//===---- llvm/MDBuilder.cpp - Builder for LLVM metadata ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MDBuilder class, which is used as a convenient way to
+// create LLVM metadata with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
+using namespace llvm;
+
+MDString *MDBuilder::createString(StringRef Str) {
+ return MDString::get(Context, Str);
+}
+
+MDNode *MDBuilder::createFPMath(float Accuracy) {
+ if (Accuracy == 0.0)
+ return nullptr;
+ assert(Accuracy > 0.0 && "Invalid fpmath accuracy!");
+ Value *Op = ConstantFP::get(Type::getFloatTy(Context), Accuracy);
+ return MDNode::get(Context, Op);
+}
+
+MDNode *MDBuilder::createBranchWeights(uint32_t TrueWeight,
+ uint32_t FalseWeight) {
+ uint32_t Weights[] = {TrueWeight, FalseWeight};
+ return createBranchWeights(Weights);
+}
+
+MDNode *MDBuilder::createBranchWeights(ArrayRef<uint32_t> Weights) {
+ assert(Weights.size() >= 2 && "Need at least two branch weights!");
+
+ SmallVector<Value *, 4> Vals(Weights.size() + 1);
+ Vals[0] = createString("branch_weights");
+
+ Type *Int32Ty = Type::getInt32Ty(Context);
+ for (unsigned i = 0, e = Weights.size(); i != e; ++i)
+ Vals[i + 1] = ConstantInt::get(Int32Ty, Weights[i]);
+
+ return MDNode::get(Context, Vals);
+}
+
+MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
+ assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!");
+ // If the range is everything then it is useless.
+ if (Hi == Lo)
+ return nullptr;
+
+ // Return the range [Lo, Hi).
+ Type *Ty = IntegerType::get(Context, Lo.getBitWidth());
+ Value *Range[2] = {ConstantInt::get(Ty, Lo), ConstantInt::get(Ty, Hi)};
+ return MDNode::get(Context, Range);
+}
+
+MDNode *MDBuilder::createAnonymousTBAARoot() {
+ // To ensure uniqueness the root node is self-referential.
+ MDNode *Dummy = MDNode::getTemporary(Context, ArrayRef<Value *>());
+ MDNode *Root = MDNode::get(Context, Dummy);
+ // At this point we have
+ // !0 = metadata !{} <- dummy
+ // !1 = metadata !{metadata !0} <- root
+ // Replace the dummy operand with the root node itself and delete the dummy.
+ Root->replaceOperandWith(0, Root);
+ MDNode::deleteTemporary(Dummy);
+ // We now have
+ // !1 = metadata !{metadata !1} <- self-referential root
+ return Root;
+}
+
+MDNode *MDBuilder::createTBAARoot(StringRef Name) {
+ return MDNode::get(Context, createString(Name));
+}
+
+/// \brief Return metadata for a non-root TBAA node with the given name,
+/// parent in the TBAA tree, and value for 'pointsToConstantMemory'.
+MDNode *MDBuilder::createTBAANode(StringRef Name, MDNode *Parent,
+ bool isConstant) {
+ if (isConstant) {
+ Constant *Flags = ConstantInt::get(Type::getInt64Ty(Context), 1);
+ Value *Ops[3] = {createString(Name), Parent, Flags};
+ return MDNode::get(Context, Ops);
+ } else {
+ Value *Ops[2] = {createString(Name), Parent};
+ return MDNode::get(Context, Ops);
+ }
+}
+
+/// \brief Return metadata for a tbaa.struct node with the given
+/// struct field descriptions.
+MDNode *MDBuilder::createTBAAStructNode(ArrayRef<TBAAStructField> Fields) {
+ SmallVector<Value *, 4> Vals(Fields.size() * 3);
+ Type *Int64 = Type::getInt64Ty(Context);
+ for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
+ Vals[i * 3 + 0] = ConstantInt::get(Int64, Fields[i].Offset);
+ Vals[i * 3 + 1] = ConstantInt::get(Int64, Fields[i].Size);
+ Vals[i * 3 + 2] = Fields[i].TBAA;
+ }
+ return MDNode::get(Context, Vals);
+}
+
+/// \brief Return metadata for a TBAA struct node in the type DAG
+/// with the given name, a list of pairs (offset, field type in the type DAG).
+MDNode *MDBuilder::createTBAAStructTypeNode(
+ StringRef Name, ArrayRef<std::pair<MDNode *, uint64_t>> Fields) {
+ SmallVector<Value *, 4> Ops(Fields.size() * 2 + 1);
+ Type *Int64 = Type::getInt64Ty(Context);
+ Ops[0] = createString(Name);
+ for (unsigned i = 0, e = Fields.size(); i != e; ++i) {
+ Ops[i * 2 + 1] = Fields[i].first;
+ Ops[i * 2 + 2] = ConstantInt::get(Int64, Fields[i].second);
+ }
+ return MDNode::get(Context, Ops);
+}
+
+/// \brief Return metadata for a TBAA scalar type node with the
+/// given name, an offset and a parent in the TBAA type DAG.
+MDNode *MDBuilder::createTBAAScalarTypeNode(StringRef Name, MDNode *Parent,
+ uint64_t Offset) {
+ ConstantInt *Off = ConstantInt::get(Type::getInt64Ty(Context), Offset);
+ Value *Ops[3] = {createString(Name), Parent, Off};
+ return MDNode::get(Context, Ops);
+}
+
+/// \brief Return metadata for a TBAA tag node with the given
+/// base type, access type and offset relative to the base type.
+MDNode *MDBuilder::createTBAAStructTagNode(MDNode *BaseType, MDNode *AccessType,
+ uint64_t Offset) {
+ Type *Int64 = Type::getInt64Ty(Context);
+ Value *Ops[3] = {BaseType, AccessType, ConstantInt::get(Int64, Offset)};
+ return MDNode::get(Context, Ops);
+}
diff --git a/lib/IR/Mangler.cpp b/lib/IR/Mangler.cpp
index d82388f..27d973b 100644
--- a/lib/IR/Mangler.cpp
+++ b/lib/IR/Mangler.cpp
@@ -108,7 +108,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
}
bool UseAt = false;
- const Function *MSFunc = NULL;
+ const Function *MSFunc = nullptr;
CallingConv::ID CC;
if (DL->hasMicrosoftFastStdCallMangling()) {
if ((MSFunc = dyn_cast<Function>(GV))) {
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index ba39334..4d932d0 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -87,7 +87,7 @@ public:
MDNodeOperand::~MDNodeOperand() {}
void MDNodeOperand::deleted() {
- getParent()->replaceOperand(this, 0);
+ getParent()->replaceOperand(this, nullptr);
}
void MDNodeOperand::allUsesReplacedWith(Value *NV) {
@@ -148,10 +148,10 @@ MDNode::~MDNode() {
}
static const Function *getFunctionForValue(Value *V) {
- if (!V) return NULL;
+ if (!V) return nullptr;
if (Instruction *I = dyn_cast<Instruction>(V)) {
BasicBlock *BB = I->getParent();
- return BB ? BB->getParent() : 0;
+ return BB ? BB->getParent() : nullptr;
}
if (Argument *A = dyn_cast<Argument>(V))
return A->getParent();
@@ -159,15 +159,15 @@ static const Function *getFunctionForValue(Value *V) {
return BB->getParent();
if (MDNode *MD = dyn_cast<MDNode>(V))
return MD->getFunction();
- return NULL;
+ return nullptr;
}
#ifndef NDEBUG
static const Function *assertLocalFunction(const MDNode *N) {
- if (!N->isFunctionLocal()) return 0;
+ if (!N->isFunctionLocal()) return nullptr;
// FIXME: This does not handle cyclic function local metadata.
- const Function *F = 0, *NewF = 0;
+ const Function *F = nullptr, *NewF = nullptr;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (Value *V = N->getOperand(i)) {
if (MDNode *MD = dyn_cast<MDNode>(V))
@@ -175,10 +175,11 @@ static const Function *assertLocalFunction(const MDNode *N) {
else
NewF = getFunctionForValue(V);
}
- if (F == 0)
+ if (!F)
F = NewF;
- else
- assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+ else
+ assert((NewF == nullptr || F == NewF) &&
+ "inconsistent function-local metadata");
}
return F;
}
@@ -192,11 +193,11 @@ const Function *MDNode::getFunction() const {
#ifndef NDEBUG
return assertLocalFunction(this);
#else
- if (!isFunctionLocal()) return NULL;
+ if (!isFunctionLocal()) return nullptr;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
if (const Function *F = getFunctionForValue(getOperand(i)))
return F;
- return NULL;
+ return nullptr;
#endif
}
@@ -335,14 +336,14 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// Likewise if the MDNode is function-local but for a different function.
if (To && isFunctionLocalValue(To)) {
if (!isFunctionLocal())
- To = 0;
+ To = nullptr;
else {
const Function *F = getFunction();
const Function *FV = getFunctionForValue(To);
// Metadata can be function-local without having an associated function.
// So only consider functions to have changed if non-null.
if (F && FV && F != FV)
- To = 0;
+ To = nullptr;
}
}
@@ -366,7 +367,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
// anymore. This commonly occurs during destruction, and uniquing these
// brings little reuse. Also, this means we don't need to include
// isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
- if (To == 0) {
+ if (!To) {
setIsNotUniqued();
return;
}
@@ -407,7 +408,7 @@ void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
if (!A || !B)
- return NULL;
+ return nullptr;
APFloat AVal = cast<ConstantFP>(A->getOperand(0))->getValueAPF();
APFloat BVal = cast<ConstantFP>(B->getOperand(0))->getValueAPF();
@@ -457,7 +458,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
// the ones that overlap.
if (!A || !B)
- return NULL;
+ return nullptr;
if (A == B)
return A;
@@ -512,7 +513,7 @@ MDNode *MDNode::getMostGenericRange(MDNode *A, MDNode *B) {
ConstantRange Range(cast<ConstantInt>(EndPoints[0])->getValue(),
cast<ConstantInt>(EndPoints[1])->getValue());
if (Range.isFullSet())
- return NULL;
+ return nullptr;
}
return MDNode::get(A->getContext(), EndPoints);
@@ -527,7 +528,7 @@ static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
}
NamedMDNode::NamedMDNode(const Twine &N)
- : Name(N.str()), Parent(0),
+ : Name(N.str()), Parent(nullptr),
Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
}
@@ -575,7 +576,7 @@ StringRef NamedMDNode::getName() const {
//
void Instruction::setMetadata(StringRef Kind, MDNode *Node) {
- if (Node == 0 && !hasMetadata()) return;
+ if (!Node && !hasMetadata()) return;
setMetadata(getContext().getMDKindID(Kind), Node);
}
@@ -631,7 +632,7 @@ void Instruction::dropUnknownMetadata(ArrayRef<unsigned> KnownIDs) {
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
- if (Node == 0 && !hasMetadata()) return;
+ if (!Node && !hasMetadata()) return;
// Handle 'dbg' as a special case since it is not stored in the hash table.
if (KindID == LLVMContext::MD_dbg) {
@@ -691,7 +692,7 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
if (KindID == LLVMContext::MD_dbg)
return DbgLoc.getAsMDNode(getContext());
- if (!hasMetadataHashEntry()) return 0;
+ if (!hasMetadataHashEntry()) return nullptr;
LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
assert(!Info.empty() && "bit out of sync with hash table");
@@ -699,7 +700,7 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
for (const auto &I : Info)
if (I.first == KindID)
return I.second;
- return 0;
+ return nullptr;
}
void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp
index 1accd47..5dbed69 100644
--- a/lib/IR/Module.cpp
+++ b/lib/IR/Module.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/LeakDetector.h"
+#include "llvm/Support/Dwarf.h"
#include <algorithm>
#include <cstdarg>
#include <cstdlib>
@@ -95,7 +96,7 @@ Constant *Module::getOrInsertFunction(StringRef Name,
AttributeSet AttributeList) {
// See if we have a definition for the specified function already.
GlobalValue *F = getNamedValue(Name);
- if (F == 0) {
+ if (!F) {
// Nope, add it
Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
if (!New->isIntrinsic()) // Intrinsics get attrs set on construction
@@ -183,7 +184,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) {
dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
if (AllowLocal || !Result->hasLocalLinkage())
return Result;
- return 0;
+ return nullptr;
}
/// getOrInsertGlobal - Look up the specified global in the module symbol table.
@@ -195,11 +196,11 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) {
Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) {
// See if we have a definition for the specified global already.
GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
- if (GV == 0) {
+ if (!GV) {
// Nope, add it
GlobalVariable *New =
new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
- 0, Name);
+ nullptr, Name);
return New; // Return the new declaration.
}
@@ -284,7 +285,7 @@ Value *Module::getModuleFlag(StringRef Key) const {
if (Key == MFE.Key->getString())
return MFE.Val;
}
- return 0;
+ return nullptr;
}
/// getModuleFlagsMetadata - Returns the NamedMDNode in the module that
@@ -350,7 +351,7 @@ void Module::setDataLayout(const DataLayout *Other) {
const DataLayout *Module::getDataLayout() const {
if (DataLayoutStr.empty())
- return 0;
+ return nullptr;
return &DL;
}
@@ -429,3 +430,10 @@ void Module::dropAllReferences() {
for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
I->dropAllReferences();
}
+
+unsigned Module::getDwarfVersion() const {
+ Value *Val = getModuleFlag("Dwarf Version");
+ if (!Val)
+ return dwarf::DWARF_VERSION;
+ return cast<ConstantInt>(Val)->getZExtValue();
+}
diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp
index e16c5b7..bb55d2a 100644
--- a/lib/IR/Pass.cpp
+++ b/lib/IR/Pass.cpp
@@ -22,6 +22,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "ir"
+
//===----------------------------------------------------------------------===//
// Pass Implementation
//
@@ -44,7 +46,7 @@ PassManagerType ModulePass::getPotentialPassManagerType() const {
}
bool Pass::mustPreserveAnalysisID(char &AID) const {
- return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+ return Resolver->getAnalysisIfAvailable(&AID, true) != nullptr;
}
// dumpPassStructure - Implement the -debug-pass=Structure option
@@ -90,11 +92,11 @@ void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
}
ImmutablePass *Pass::getAsImmutablePass() {
- return 0;
+ return nullptr;
}
PMDataManager *Pass::getAsPMDataManager() {
- return 0;
+ return nullptr;
}
void Pass::setResolver(AnalysisResolver *AR) {
@@ -112,7 +114,7 @@ void Pass::print(raw_ostream &O,const Module*) const {
// dump - call print(cerr);
void Pass::dump() const {
- print(dbgs(), 0);
+ print(dbgs(), nullptr);
}
//===----------------------------------------------------------------------===//
@@ -193,7 +195,7 @@ const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
Pass *Pass::createPass(AnalysisID ID) {
const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
if (!PI)
- return NULL;
+ return nullptr;
return PI->createPass();
}
diff --git a/lib/IR/PassManager.cpp b/lib/IR/PassManager.cpp
index ea15455..0defb6a 100644
--- a/lib/IR/PassManager.cpp
+++ b/lib/IR/PassManager.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -32,6 +33,8 @@ PreservedAnalyses ModulePassManager::run(Module *M, ModuleAnalysisManager *AM) {
if (AM)
AM->invalidate(M, PassPA);
PA.intersect(std::move(PassPA));
+
+ M->getContext().yield();
}
if (DebugPM)
@@ -59,7 +62,7 @@ ModuleAnalysisManager::ResultConceptT *
ModuleAnalysisManager::getCachedResultImpl(void *PassID, Module *M) const {
ModuleAnalysisResultMapT::const_iterator RI =
ModuleAnalysisResults.find(PassID);
- return RI == ModuleAnalysisResults.end() ? 0 : &*RI->second;
+ return RI == ModuleAnalysisResults.end() ? nullptr : &*RI->second;
}
void ModuleAnalysisManager::invalidateImpl(void *PassID, Module *M) {
@@ -92,6 +95,8 @@ PreservedAnalyses FunctionPassManager::run(Function *F,
if (AM)
AM->invalidate(F, PassPA);
PA.intersect(std::move(PassPA));
+
+ F->getContext().yield();
}
if (DebugPM)
@@ -135,7 +140,7 @@ FunctionAnalysisManager::ResultConceptT *
FunctionAnalysisManager::getCachedResultImpl(void *PassID, Function *F) const {
FunctionAnalysisResultMapT::const_iterator RI =
FunctionAnalysisResults.find(std::make_pair(PassID, F));
- return RI == FunctionAnalysisResults.end() ? 0 : &*RI->second->second;
+ return RI == FunctionAnalysisResults.end() ? nullptr : &*RI->second->second;
}
void FunctionAnalysisManager::invalidateImpl(void *PassID, Function *F) {
@@ -165,6 +170,8 @@ void FunctionAnalysisManager::invalidateImpl(Function *F,
while (!InvalidatedPassIDs.empty())
FunctionAnalysisResults.erase(
std::make_pair(InvalidatedPassIDs.pop_back_val(), F));
+ if (ResultsList.empty())
+ FunctionAnalysisResultLists.erase(F);
}
char FunctionAnalysisManagerModuleProxy::PassID;
diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp
index 74dc0f1..6a5bee2 100644
--- a/lib/IR/PassRegistry.cpp
+++ b/lib/IR/PassRegistry.cpp
@@ -57,7 +57,7 @@ struct PassRegistryImpl {
};
DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
- std::vector<const PassInfo*> ToFree;
+ std::vector<std::unique_ptr<const PassInfo>> ToFree;
std::vector<PassRegistrationListener*> Listeners;
};
} // end anonymous namespace
@@ -75,20 +75,15 @@ void *PassRegistry::getImpl() const {
PassRegistry::~PassRegistry() {
sys::SmartScopedWriter<true> Guard(*Lock);
PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
-
- for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
- E = Impl->ToFree.end(); I != E; ++I)
- delete *I;
-
delete Impl;
- pImpl = 0;
+ pImpl = nullptr;
}
const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
sys::SmartScopedReader<true> Guard(*Lock);
PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
- return I != Impl->PassInfoMap.end() ? I->second : 0;
+ return I != Impl->PassInfoMap.end() ? I->second : nullptr;
}
const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
@@ -96,7 +91,7 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
PassRegistryImpl::StringMapType::const_iterator
I = Impl->PassInfoStringMap.find(Arg);
- return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+ return I != Impl->PassInfoStringMap.end() ? I->second : nullptr;
}
//===----------------------------------------------------------------------===//
@@ -117,7 +112,7 @@ void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
(*I)->passRegistered(&PI);
- if (ShouldFree) Impl->ToFree.push_back(&PI);
+ if (ShouldFree) Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&PI));
}
void PassRegistry::unregisterPass(const PassInfo &PI) {
@@ -148,7 +143,7 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
bool isDefault,
bool ShouldFree) {
PassInfo *InterfaceInfo = const_cast<PassInfo*>(getPassInfo(InterfaceID));
- if (InterfaceInfo == 0) {
+ if (!InterfaceInfo) {
// First reference to Interface, register it now.
registerPass(Registeree);
InterfaceInfo = &Registeree;
@@ -174,7 +169,7 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
"Cannot add a pass to the same analysis group more than once!");
AGI.Implementations.insert(ImplementationInfo);
if (isDefault) {
- assert(InterfaceInfo->getNormalCtor() == 0 &&
+ assert(InterfaceInfo->getNormalCtor() == nullptr &&
"Default implementation for analysis group already specified!");
assert(ImplementationInfo->getNormalCtor() &&
"Cannot specify pass as default if it does not have a default ctor");
@@ -185,7 +180,8 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID,
}
PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
- if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+ if (ShouldFree)
+ Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree));
}
void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
diff --git a/lib/IR/SymbolTableListTraitsImpl.h b/lib/IR/SymbolTableListTraitsImpl.h
index 5a383ee..8302597 100644
--- a/lib/IR/SymbolTableListTraitsImpl.h
+++ b/lib/IR/SymbolTableListTraitsImpl.h
@@ -65,7 +65,7 @@ void SymbolTableListTraits<ValueSubClass,ItemParentClass>
template<typename ValueSubClass, typename ItemParentClass>
void SymbolTableListTraits<ValueSubClass,ItemParentClass>
::addNodeToList(ValueSubClass *V) {
- assert(V->getParent() == 0 && "Value already in a container!!");
+ assert(!V->getParent() && "Value already in a container!!");
ItemParentClass *Owner = getListOwner();
V->setParent(Owner);
if (V->hasName())
@@ -76,7 +76,7 @@ void SymbolTableListTraits<ValueSubClass,ItemParentClass>
template<typename ValueSubClass, typename ItemParentClass>
void SymbolTableListTraits<ValueSubClass,ItemParentClass>
::removeNodeFromList(ValueSubClass *V) {
- V->setParent(0);
+ V->setParent(nullptr);
if (V->hasName())
if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
ST->removeValueName(V->getValueName());
diff --git a/lib/IR/Type.cpp b/lib/IR/Type.cpp
index b02509f..1efde47 100644
--- a/lib/IR/Type.cpp
+++ b/lib/IR/Type.cpp
@@ -36,7 +36,7 @@ Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
case MetadataTyID : return getMetadataTy(C);
case X86_MMXTyID : return getX86_MMXTy(C);
default:
- return 0;
+ return nullptr;
}
}
@@ -312,8 +312,8 @@ IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
}
IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
-
- if (Entry == 0)
+
+ if (!Entry)
Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
return Entry;
@@ -448,7 +448,7 @@ void StructType::setName(StringRef Name) {
if (SymbolTableEntry) {
// Delete the old string data.
((EntryTy *)SymbolTableEntry)->Destroy(SymbolTable.getAllocator());
- SymbolTableEntry = 0;
+ SymbolTableEntry = nullptr;
}
return;
}
@@ -497,7 +497,7 @@ StructType *StructType::get(LLVMContext &Context, bool isPacked) {
}
StructType *StructType::get(Type *type, ...) {
- assert(type != 0 && "Cannot create a struct type with no elements with this");
+ assert(type && "Cannot create a struct type with no elements with this");
LLVMContext &Ctx = type->getContext();
va_list ap;
SmallVector<llvm::Type*, 8> StructFields;
@@ -538,7 +538,7 @@ StructType *StructType::create(ArrayRef<Type*> Elements) {
}
StructType *StructType::create(StringRef Name, Type *type, ...) {
- assert(type != 0 && "Cannot create a struct type with no elements with this");
+ assert(type && "Cannot create a struct type with no elements with this");
LLVMContext &Ctx = type->getContext();
va_list ap;
SmallVector<llvm::Type*, 8> StructFields;
@@ -576,13 +576,13 @@ bool StructType::isSized(SmallPtrSet<const Type*, 4> *Visited) const {
StringRef StructType::getName() const {
assert(!isLiteral() && "Literal structs never have names");
- if (SymbolTableEntry == 0) return StringRef();
-
+ if (!SymbolTableEntry) return StringRef();
+
return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
}
void StructType::setBody(Type *type, ...) {
- assert(type != 0 && "Cannot create a struct type with no elements with this");
+ assert(type && "Cannot create a struct type with no elements with this");
va_list ap;
SmallVector<llvm::Type*, 8> StructFields;
va_start(ap, type);
@@ -680,8 +680,8 @@ ArrayType *ArrayType::get(Type *elementType, uint64_t NumElements) {
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
ArrayType *&Entry =
pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
-
- if (Entry == 0)
+
+ if (!Entry)
Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
return Entry;
}
@@ -709,8 +709,8 @@ VectorType *VectorType::get(Type *elementType, unsigned NumElements) {
LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
VectorType *&Entry = ElementType->getContext().pImpl
->VectorTypes[std::make_pair(ElementType, NumElements)];
-
- if (Entry == 0)
+
+ if (!Entry)
Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
return Entry;
}
@@ -734,7 +734,7 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
: CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
- if (Entry == 0)
+ if (!Entry)
Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
return Entry;
}
diff --git a/lib/IR/Use.cpp b/lib/IR/Use.cpp
index 60a0c56..047861c 100644
--- a/lib/IR/Use.cpp
+++ b/lib/IR/Use.cpp
@@ -27,14 +27,14 @@ void Use::swap(Use &RHS) {
Val = RHS.Val;
Val->addUse(*this);
} else {
- Val = 0;
+ Val = nullptr;
}
if (OldVal) {
RHS.Val = OldVal;
RHS.Val->addUse(RHS);
} else {
- RHS.Val = 0;
+ RHS.Val = nullptr;
}
}
@@ -49,7 +49,7 @@ unsigned Use::getOperandNo() const {
return this - getUser()->op_begin();
}
-// Sets up the waymarking algoritm's tags for a series of Uses. See the
+// Sets up the waymarking algorithm's tags for a series of Uses. See the
// algorithm details here:
//
// http://www.llvm.org/docs/ProgrammersManual.html#UserLayout
diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp
index 97a562e..d734e4e 100644
--- a/lib/IR/Value.cpp
+++ b/lib/IR/Value.cpp
@@ -44,7 +44,7 @@ static inline Type *checkType(Type *Ty) {
Value::Value(Type *ty, unsigned scid)
: SubclassID(scid), HasValueHandle(0),
SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
- UseList(0), Name(0) {
+ UseList(nullptr), Name(nullptr) {
// FIXME: Why isn't this in the subclass gunk??
// Note, we cannot call isa<CallInst> before the CallInst has been
// constructed.
@@ -141,7 +141,7 @@ unsigned Value::getNumUses() const {
}
static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
- ST = 0;
+ ST = nullptr;
if (Instruction *I = dyn_cast<Instruction>(V)) {
if (BasicBlock *P = I->getParent())
if (Function *PP = P->getParent())
@@ -203,7 +203,7 @@ void Value::setName(const Twine &NewName) {
if (NameRef.empty()) {
// Free the name for this value.
Name->Destroy();
- Name = 0;
+ Name = nullptr;
return;
}
@@ -225,7 +225,7 @@ void Value::setName(const Twine &NewName) {
// Remove old name.
ST->removeValueName(Name);
Name->Destroy();
- Name = 0;
+ Name = nullptr;
if (NameRef.empty())
return;
@@ -241,7 +241,7 @@ void Value::setName(const Twine &NewName) {
void Value::takeName(Value *V) {
assert(SubclassID != MDStringVal && "Cannot take the name of an MDString!");
- ValueSymbolTable *ST = 0;
+ ValueSymbolTable *ST = nullptr;
// If this value has a name, drop it.
if (hasName()) {
// Get the symtab this is in.
@@ -256,7 +256,7 @@ void Value::takeName(Value *V) {
if (ST)
ST->removeValueName(Name);
Name->Destroy();
- Name = 0;
+ Name = nullptr;
}
// Now we know that this has no name.
@@ -283,7 +283,7 @@ void Value::takeName(Value *V) {
if (ST == VST) {
// Take the name!
Name = V->Name;
- V->Name = 0;
+ V->Name = nullptr;
Name->setValue(this);
return;
}
@@ -294,17 +294,73 @@ void Value::takeName(Value *V) {
if (VST)
VST->removeValueName(V->Name);
Name = V->Name;
- V->Name = 0;
+ V->Name = nullptr;
Name->setValue(this);
if (ST)
ST->reinsertValue(this);
}
+static GlobalObject &findReplacementForAliasUse(Value &C) {
+ if (auto *GO = dyn_cast<GlobalObject>(&C))
+ return *GO;
+ if (auto *GA = dyn_cast<GlobalAlias>(&C))
+ return *GA->getAliasee();
+ auto *CE = cast<ConstantExpr>(&C);
+ assert(CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::GetElementPtr ||
+ CE->getOpcode() == Instruction::AddrSpaceCast);
+ if (CE->getOpcode() == Instruction::GetElementPtr)
+ assert(cast<GEPOperator>(CE)->hasAllZeroIndices());
+ return findReplacementForAliasUse(*CE->getOperand(0));
+}
+
+static void replaceAliasUseWith(Use &U, Value *New) {
+ GlobalObject &Replacement = findReplacementForAliasUse(*New);
+ assert(&cast<GlobalObject>(*U) != &Replacement &&
+ "replaceAliasUseWith cannot form an alias cycle");
+ U.set(&Replacement);
+}
+
+#ifndef NDEBUG
+static bool contains(SmallPtrSet<ConstantExpr *, 4> &Cache, ConstantExpr *Expr,
+ Constant *C) {
+ if (!Cache.insert(Expr))
+ return false;
+
+ for (auto &O : Expr->operands()) {
+ if (O == C)
+ return true;
+ auto *CE = dyn_cast<ConstantExpr>(O);
+ if (!CE)
+ continue;
+ if (contains(Cache, CE, C))
+ return true;
+ }
+ return false;
+}
+
+static bool contains(Value *Expr, Value *V) {
+ if (Expr == V)
+ return true;
+
+ auto *C = dyn_cast<Constant>(V);
+ if (!C)
+ return false;
+
+ auto *CE = dyn_cast<ConstantExpr>(Expr);
+ if (!CE)
+ return false;
+
+ SmallPtrSet<ConstantExpr *, 4> Cache;
+ return contains(Cache, CE, C);
+}
+#endif
void Value::replaceAllUsesWith(Value *New) {
assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
- assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+ assert(!contains(New, this) &&
+ "this->replaceAllUsesWith(expr(this)) is NOT valid!");
assert(New->getType() == getType() &&
"replaceAllUses of value with new value of different type!");
@@ -316,7 +372,11 @@ void Value::replaceAllUsesWith(Value *New) {
Use &U = *UseList;
// Must handle Constants specially, we cannot call replaceUsesOfWith on a
// constant because they are uniqued.
- if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+ if (auto *C = dyn_cast<Constant>(U.getUser())) {
+ if (isa<GlobalAlias>(C)) {
+ replaceAliasUseWith(U, New);
+ continue;
+ }
if (!isa<GlobalValue>(C)) {
C->replaceUsesOfWithOnConstant(this, New, &U);
continue;
@@ -557,7 +617,7 @@ void ValueHandleBase::AddToUseList() {
// If this value already has a ValueHandle, then it must be in the
// ValueHandles map already.
ValueHandleBase *&Entry = pImpl->ValueHandles[VP.getPointer()];
- assert(Entry != 0 && "Value doesn't have any handles?");
+ assert(Entry && "Value doesn't have any handles?");
AddToExistingUseList(&Entry);
return;
}
@@ -571,7 +631,7 @@ void ValueHandleBase::AddToUseList() {
const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
ValueHandleBase *&Entry = Handles[VP.getPointer()];
- assert(Entry == 0 && "Value really did already have handles?");
+ assert(!Entry && "Value really did already have handles?");
AddToExistingUseList(&Entry);
VP.getPointer()->HasValueHandle = true;
@@ -652,7 +712,7 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
break;
case Weak:
// Weak just goes to null, which will unlink it from the list.
- Entry->operator=(0);
+ Entry->operator=(nullptr);
break;
case Callback:
// Forward to the subclass's implementation.
diff --git a/lib/IR/ValueSymbolTable.cpp b/lib/IR/ValueSymbolTable.cpp
index fffacb3..e9e979a 100644
--- a/lib/IR/ValueSymbolTable.cpp
+++ b/lib/IR/ValueSymbolTable.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "valuesymtab"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/GlobalValue.h"
@@ -20,6 +19,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "valuesymtab"
+
// Class destructor
ValueSymbolTable::~ValueSymbolTable() {
#ifndef NDEBUG // Only do this in -g mode...
@@ -56,7 +57,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
// Try insert the vmap entry with this suffix.
ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
- if (NewName.getValue() == 0) {
+ if (!NewName.getValue()) {
// Newly inserted name. Success!
NewName.setValue(V);
V->Name = &NewName;
@@ -78,7 +79,7 @@ void ValueSymbolTable::removeValueName(ValueName *V) {
ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// In the common case, the name is not already in the symbol table.
ValueName &Entry = vmap.GetOrCreateValue(Name);
- if (Entry.getValue() == 0) {
+ if (!Entry.getValue()) {
Entry.setValue(V);
//DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
// << *V << "\n");
@@ -95,7 +96,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// Try insert the vmap entry with this suffix.
ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
- if (NewName.getValue() == 0) {
+ if (!NewName.getValue()) {
// Newly inserted name. Success!
NewName.setValue(V);
//DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 089ad1c..bcc38c1 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -61,6 +61,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
@@ -76,15 +77,71 @@
#include <cstdarg>
using namespace llvm;
-static cl::opt<bool> DisableDebugInfoVerifier("disable-debug-info-verifier",
- cl::init(true));
+static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(false));
namespace {
-class Verifier : public InstVisitor<Verifier> {
- friend class InstVisitor<Verifier>;
-
+struct VerifierSupport {
raw_ostream &OS;
const Module *M;
+
+ /// \brief Track the brokenness of the module while recursively visiting.
+ bool Broken;
+
+ explicit VerifierSupport(raw_ostream &OS)
+ : OS(OS), M(nullptr), Broken(false) {}
+
+ void WriteValue(const Value *V) {
+ if (!V)
+ return;
+ if (isa<Instruction>(V)) {
+ OS << *V << '\n';
+ } else {
+ V->printAsOperand(OS, true, M);
+ OS << '\n';
+ }
+ }
+
+ void WriteType(Type *T) {
+ if (!T)
+ return;
+ OS << ' ' << *T;
+ }
+
+ // CheckFailed - A check failed, so print out the condition and the message
+ // that failed. This provides a nice place to put a breakpoint if you want
+ // to see why something is not correct.
+ void CheckFailed(const Twine &Message, const Value *V1 = nullptr,
+ const Value *V2 = nullptr, const Value *V3 = nullptr,
+ const Value *V4 = nullptr) {
+ OS << Message.str() << "\n";
+ WriteValue(V1);
+ WriteValue(V2);
+ WriteValue(V3);
+ WriteValue(V4);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, const Value *V1, Type *T2,
+ const Value *V3 = nullptr) {
+ OS << Message.str() << "\n";
+ WriteValue(V1);
+ WriteType(T2);
+ WriteValue(V3);
+ Broken = true;
+ }
+
+ void CheckFailed(const Twine &Message, Type *T1, Type *T2 = nullptr,
+ Type *T3 = nullptr) {
+ OS << Message.str() << "\n";
+ WriteType(T1);
+ WriteType(T2);
+ WriteType(T3);
+ Broken = true;
+ }
+};
+class Verifier : public InstVisitor<Verifier>, VerifierSupport {
+ friend class InstVisitor<Verifier>;
+
LLVMContext *Context;
const DataLayout *DL;
DominatorTree DT;
@@ -104,15 +161,10 @@ class Verifier : public InstVisitor<Verifier> {
/// personality function.
const Value *PersonalityFn;
- /// \brief Finder keeps track of all debug info MDNodes in a Module.
- DebugInfoFinder Finder;
-
- /// \brief Track the brokenness of the module while recursively visiting.
- bool Broken;
-
public:
explicit Verifier(raw_ostream &OS = dbgs())
- : OS(OS), M(0), Context(0), DL(0), PersonalityFn(0), Broken(false) {}
+ : VerifierSupport(OS), Context(nullptr), DL(nullptr),
+ PersonalityFn(nullptr) {}
bool verify(const Function &F) {
M = F.getParent();
@@ -142,16 +194,11 @@ public:
// FIXME: It's really gross that we have to cast away constness here.
DT.recalculate(const_cast<Function &>(F));
- Finder.reset();
Broken = false;
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
InstsInThisBlock.clear();
- PersonalityFn = 0;
-
- if (!DisableDebugInfoVerifier)
- // Verify Debug Info.
- verifyDebugInfo();
+ PersonalityFn = nullptr;
return !Broken;
}
@@ -159,7 +206,6 @@ public:
bool verify(const Module &M) {
this->M = &M;
Context = &M.getContext();
- Finder.reset();
Broken = false;
// Scan through, checking all of the external function's linkage now...
@@ -187,13 +233,6 @@ public:
visitModuleFlags(M);
visitModuleIdents(M);
- if (!DisableDebugInfoVerifier) {
- Finder.reset();
- Finder.processModule(M);
- // Verify Debug Info.
- verifyDebugInfo();
- }
-
return !Broken;
}
@@ -262,6 +301,7 @@ private:
void visitLandingPadInst(LandingPadInst &LPI);
void VerifyCallSite(CallSite CS);
+ void verifyMustTailCall(CallInst &CI);
bool PerformTypeCheck(Intrinsic::ID ID, Function *F, Type *Ty, int VT,
unsigned ArgNo, std::string &Suffix);
bool VerifyIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor> &Infos,
@@ -278,56 +318,21 @@ private:
void VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy);
void VerifyConstantExprBitcastType(const ConstantExpr *CE);
+};
+class DebugInfoVerifier : public VerifierSupport {
+public:
+ explicit DebugInfoVerifier(raw_ostream &OS = dbgs()) : VerifierSupport(OS) {}
- void verifyDebugInfo();
-
- void WriteValue(const Value *V) {
- if (!V)
- return;
- if (isa<Instruction>(V)) {
- OS << *V << '\n';
- } else {
- V->printAsOperand(OS, true, M);
- OS << '\n';
- }
- }
-
- void WriteType(Type *T) {
- if (!T)
- return;
- OS << ' ' << *T;
- }
-
- // CheckFailed - A check failed, so print out the condition and the message
- // that failed. This provides a nice place to put a breakpoint if you want
- // to see why something is not correct.
- void CheckFailed(const Twine &Message, const Value *V1 = 0,
- const Value *V2 = 0, const Value *V3 = 0,
- const Value *V4 = 0) {
- OS << Message.str() << "\n";
- WriteValue(V1);
- WriteValue(V2);
- WriteValue(V3);
- WriteValue(V4);
- Broken = true;
- }
-
- void CheckFailed(const Twine &Message, const Value *V1, Type *T2,
- const Value *V3 = 0) {
- OS << Message.str() << "\n";
- WriteValue(V1);
- WriteType(T2);
- WriteValue(V3);
- Broken = true;
+ bool verify(const Module &M) {
+ this->M = &M;
+ verifyDebugInfo();
+ return !Broken;
}
- void CheckFailed(const Twine &Message, Type *T1, Type *T2 = 0, Type *T3 = 0) {
- OS << Message.str() << "\n";
- WriteType(T1);
- WriteType(T2);
- WriteType(T3);
- Broken = true;
- }
+private:
+ void verifyDebugInfo();
+ void processInstructions(DebugInfoFinder &Finder);
+ void processCallInst(DebugInfoFinder &Finder, const CallInst &CI);
};
} // End anonymous namespace
@@ -345,18 +350,14 @@ private:
void Verifier::visit(Instruction &I) {
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
- Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+ Assert1(I.getOperand(i) != nullptr, "Operand is null", &I);
InstVisitor<Verifier>::visit(I);
}
void Verifier::visitGlobalValue(const GlobalValue &GV) {
- Assert1(!GV.isDeclaration() ||
- GV.isMaterializable() ||
- GV.hasExternalLinkage() ||
- GV.hasExternalWeakLinkage() ||
- (isa<GlobalAlias>(GV) &&
- (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+ Assert1(!GV.isDeclaration() || GV.isMaterializable() ||
+ GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(),
"Global is external, but doesn't have external or weak linkage!",
&GV);
@@ -395,14 +396,22 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
"invalid linkage for intrinsic global variable", &GV);
// Don't worry about emitting an error for it not being an array,
// visitGlobalValue will complain on appending non-array.
- if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType())) {
+ if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getType()->getElementType())) {
StructType *STy = dyn_cast<StructType>(ATy->getElementType());
PointerType *FuncPtrTy =
FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
- Assert1(STy && STy->getNumElements() == 2 &&
+ // FIXME: Reject the 2-field form in LLVM 4.0.
+ Assert1(STy && (STy->getNumElements() == 2 ||
+ STy->getNumElements() == 3) &&
STy->getTypeAtIndex(0u)->isIntegerTy(32) &&
STy->getTypeAtIndex(1) == FuncPtrTy,
"wrong type for intrinsic global variable", &GV);
+ if (STy->getNumElements() == 3) {
+ Type *ETy = STy->getTypeAtIndex(2);
+ Assert1(ETy->isPointerTy() &&
+ cast<PointerType>(ETy)->getElementType()->isIntegerTy(8),
+ "wrong type for intrinsic global variable", &GV);
+ }
}
}
@@ -472,11 +481,7 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
"Alias should have external or external weak linkage!", &GA);
Assert1(GA.getAliasee(),
"Aliasee cannot be NULL!", &GA);
- Assert1(GA.getType() == GA.getAliasee()->getType(),
- "Alias and aliasee types should match!", &GA);
Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
- Assert1(!GA.hasSection(), "Alias cannot have a section!", &GA);
- Assert1(!GA.getAlignment(), "Alias connot have an alignment", &GA);
const Constant *Aliasee = GA.getAliasee();
const GlobalValue *GV = dyn_cast<GlobalValue>(Aliasee);
@@ -492,14 +497,7 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
"addrspacecast of GlobalValue",
&GA);
- if (CE->getOpcode() == Instruction::BitCast) {
- unsigned SrcAS = GV->getType()->getPointerAddressSpace();
- unsigned DstAS = CE->getType()->getPointerAddressSpace();
-
- Assert1(SrcAS == DstAS,
- "Alias bitcasts cannot be between different address spaces",
- &GA);
- }
+ VerifyConstantExprBitcastType(CE);
}
Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA);
if (const GlobalAlias *GAAliasee = dyn_cast<GlobalAlias>(GV)) {
@@ -507,10 +505,6 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
&GA);
}
- const GlobalValue *AG = GA.getAliasedGlobal();
- Assert1(AG, "Aliasing chain should end with function or global variable",
- &GA);
-
visitGlobalValue(GA);
}
@@ -522,7 +516,7 @@ void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
Assert1(!MD->isFunctionLocal(),
"Named metadata operand cannot be function local!", MD);
- visitMDNode(*MD, 0);
+ visitMDNode(*MD, nullptr);
}
}
@@ -548,7 +542,7 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
// If this was an instruction, bb, or argument, verify that it is in the
// function that we expect.
- Function *ActualF = 0;
+ Function *ActualF = nullptr;
if (Instruction *I = dyn_cast<Instruction>(Op))
ActualF = I->getParent()->getParent();
else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
@@ -821,6 +815,7 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
bool SawNest = false;
bool SawReturned = false;
+ bool SawSRet = false;
for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
unsigned Idx = Attrs.getSlotIndex(i);
@@ -851,8 +846,12 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs,
SawReturned = true;
}
- if (Attrs.hasAttribute(Idx, Attribute::StructRet))
- Assert1(Idx == 1, "Attribute sret is not on first parameter!", V);
+ if (Attrs.hasAttribute(Idx, Attribute::StructRet)) {
+ Assert1(!SawSRet, "Cannot have multiple 'sret' parameters!", V);
+ Assert1(Idx == 1 || Idx == 2,
+ "Attribute 'sret' is not on first or second parameter!", V);
+ SawSRet = true;
+ }
if (Attrs.hasAttribute(Idx, Attribute::InAlloca)) {
Assert1(Idx == FT->getNumParams(),
@@ -1489,6 +1488,16 @@ void Verifier::VerifyCallSite(CallSite CS) {
// Verify call attributes.
VerifyFunctionAttrs(FTy, Attrs, I);
+ // Conservatively check the inalloca argument.
+ // We have a bug if we can find that there is an underlying alloca without
+ // inalloca.
+ if (CS.hasInAllocaArgument()) {
+ Value *InAllocaArg = CS.getArgument(FTy->getNumParams() - 1);
+ if (auto AI = dyn_cast<AllocaInst>(InAllocaArg->stripInBoundsOffsets()))
+ Assert2(AI->isUsedWithInAlloca(),
+ "inalloca argument for call has mismatched alloca", AI, I);
+ }
+
if (FTy->isVarArg()) {
// FIXME? is 'nest' even legal here?
bool SawNest = false;
@@ -1530,7 +1539,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
}
// Verify that there's no metadata unless it's a direct call to an intrinsic.
- if (CS.getCalledFunction() == 0 ||
+ if (CS.getCalledFunction() == nullptr ||
!CS.getCalledFunction()->getName().startswith("llvm.")) {
for (FunctionType::param_iterator PI = FTy->param_begin(),
PE = FTy->param_end(); PI != PE; ++PI)
@@ -1541,9 +1550,102 @@ void Verifier::VerifyCallSite(CallSite CS) {
visitInstruction(*I);
}
+/// Two types are "congruent" if they are identical, or if they are both pointer
+/// types with different pointee types and the same address space.
+static bool isTypeCongruent(Type *L, Type *R) {
+ if (L == R)
+ return true;
+ PointerType *PL = dyn_cast<PointerType>(L);
+ PointerType *PR = dyn_cast<PointerType>(R);
+ if (!PL || !PR)
+ return false;
+ return PL->getAddressSpace() == PR->getAddressSpace();
+}
+
+static AttrBuilder getParameterABIAttributes(int I, AttributeSet Attrs) {
+ static const Attribute::AttrKind ABIAttrs[] = {
+ Attribute::StructRet, Attribute::ByVal, Attribute::InAlloca,
+ Attribute::InReg, Attribute::Returned};
+ AttrBuilder Copy;
+ for (auto AK : ABIAttrs) {
+ if (Attrs.hasAttribute(I + 1, AK))
+ Copy.addAttribute(AK);
+ }
+ if (Attrs.hasAttribute(I + 1, Attribute::Alignment))
+ Copy.addAlignmentAttr(Attrs.getParamAlignment(I + 1));
+ return Copy;
+}
+
+void Verifier::verifyMustTailCall(CallInst &CI) {
+ Assert1(!CI.isInlineAsm(), "cannot use musttail call with inline asm", &CI);
+
+ // - The caller and callee prototypes must match. Pointer types of
+ // parameters or return types may differ in pointee type, but not
+ // address space.
+ Function *F = CI.getParent()->getParent();
+ auto GetFnTy = [](Value *V) {
+ return cast<FunctionType>(
+ cast<PointerType>(V->getType())->getElementType());
+ };
+ FunctionType *CallerTy = GetFnTy(F);
+ FunctionType *CalleeTy = GetFnTy(CI.getCalledValue());
+ Assert1(CallerTy->getNumParams() == CalleeTy->getNumParams(),
+ "cannot guarantee tail call due to mismatched parameter counts", &CI);
+ Assert1(CallerTy->isVarArg() == CalleeTy->isVarArg(),
+ "cannot guarantee tail call due to mismatched varargs", &CI);
+ Assert1(isTypeCongruent(CallerTy->getReturnType(), CalleeTy->getReturnType()),
+ "cannot guarantee tail call due to mismatched return types", &CI);
+ for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ Assert1(
+ isTypeCongruent(CallerTy->getParamType(I), CalleeTy->getParamType(I)),
+ "cannot guarantee tail call due to mismatched parameter types", &CI);
+ }
+
+ // - The calling conventions of the caller and callee must match.
+ Assert1(F->getCallingConv() == CI.getCallingConv(),
+ "cannot guarantee tail call due to mismatched calling conv", &CI);
+
+ // - All ABI-impacting function attributes, such as sret, byval, inreg,
+ // returned, and inalloca, must match.
+ AttributeSet CallerAttrs = F->getAttributes();
+ AttributeSet CalleeAttrs = CI.getAttributes();
+ for (int I = 0, E = CallerTy->getNumParams(); I != E; ++I) {
+ AttrBuilder CallerABIAttrs = getParameterABIAttributes(I, CallerAttrs);
+ AttrBuilder CalleeABIAttrs = getParameterABIAttributes(I, CalleeAttrs);
+ Assert2(CallerABIAttrs == CalleeABIAttrs,
+ "cannot guarantee tail call due to mismatched ABI impacting "
+ "function attributes", &CI, CI.getOperand(I));
+ }
+
+ // - The call must immediately precede a :ref:`ret <i_ret>` instruction,
+ // or a pointer bitcast followed by a ret instruction.
+ // - The ret instruction must return the (possibly bitcasted) value
+ // produced by the call or void.
+ Value *RetVal = &CI;
+ Instruction *Next = CI.getNextNode();
+
+ // Handle the optional bitcast.
+ if (BitCastInst *BI = dyn_cast_or_null<BitCastInst>(Next)) {
+ Assert1(BI->getOperand(0) == RetVal,
+ "bitcast following musttail call must use the call", BI);
+ RetVal = BI;
+ Next = BI->getNextNode();
+ }
+
+ // Check the return.
+ ReturnInst *Ret = dyn_cast_or_null<ReturnInst>(Next);
+ Assert1(Ret, "musttail call must be precede a ret with an optional bitcast",
+ &CI);
+ Assert1(!Ret->getReturnValue() || Ret->getReturnValue() == RetVal,
+ "musttail call result must be returned", Ret);
+}
+
void Verifier::visitCallInst(CallInst &CI) {
VerifyCallSite(&CI);
+ if (CI.isMustTailCall())
+ verifyMustTailCall(CI);
+
if (Function *F = CI.getCalledFunction())
if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
visitIntrinsicFunctionCall(ID, CI);
@@ -1731,11 +1833,11 @@ void Verifier::visitLoadInst(LoadInst &LI) {
"Atomic load must specify explicit alignment", &LI);
if (!ElTy->isPointerTy()) {
Assert2(ElTy->isIntegerTy(),
- "atomic store operand must have integer type!",
+ "atomic load operand must have integer type!",
&LI, ElTy);
unsigned Size = ElTy->getPrimitiveSizeInBits();
Assert2(Size >= 8 && !(Size & (Size - 1)),
- "atomic store operand must be power-of-two byte-sized integer",
+ "atomic load operand must be power-of-two byte-sized integer",
&LI, ElTy);
}
} else {
@@ -2020,8 +2122,8 @@ void Verifier::visitInstruction(Instruction &I) {
// instruction, it is an error!
for (Use &U : I.uses()) {
if (Instruction *Used = dyn_cast<Instruction>(U.getUser()))
- Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
- " embedded in a basic block!", &I, Used);
+ Assert2(Used->getParent() != nullptr, "Instruction referencing"
+ " instruction not embedded in a basic block!", &I, Used);
else {
CheckFailed("Use of instruction is not an instruction!", U);
return;
@@ -2029,7 +2131,7 @@ void Verifier::visitInstruction(Instruction &I) {
}
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+ Assert1(I.getOperand(i) != nullptr, "Instruction has null operand!", &I);
// Check to make sure that only first-class-values are operands to
// instructions.
@@ -2103,11 +2205,6 @@ void Verifier::visitInstruction(Instruction &I) {
MDNode *MD = I.getMetadata(LLVMContext::MD_range);
Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I);
- if (!DisableDebugInfoVerifier) {
- MD = I.getMetadata(LLVMContext::MD_dbg);
- Finder.processLocation(*M, DILocation(MD));
- }
-
InstsInThisBlock.insert(&I);
}
@@ -2137,18 +2234,18 @@ bool Verifier::VerifyIntrinsicType(Type *Ty,
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
- return VT == 0 || VT->getNumElements() != D.Vector_Width ||
+ return !VT || VT->getNumElements() != D.Vector_Width ||
VerifyIntrinsicType(VT->getElementType(), Infos, ArgTys);
}
case IITDescriptor::Pointer: {
PointerType *PT = dyn_cast<PointerType>(Ty);
- return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace ||
+ return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace ||
VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys);
}
case IITDescriptor::Struct: {
StructType *ST = dyn_cast<StructType>(Ty);
- if (ST == 0 || ST->getNumElements() != D.Struct_NumElements)
+ if (!ST || ST->getNumElements() != D.Struct_NumElements)
return true;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
@@ -2307,17 +2404,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
Assert1(MD->getNumOperands() == 1,
"invalid llvm.dbg.declare intrinsic call 2", &CI);
- if (!DisableDebugInfoVerifier)
- Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
} break;
- case Intrinsic::dbg_value: { //llvm.dbg.value
- if (!DisableDebugInfoVerifier) {
- Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
- "invalid llvm.dbg.value intrinsic call 1", &CI);
- Finder.processValue(*M, cast<DbgValueInst>(&CI));
- }
- break;
- }
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
@@ -2379,25 +2466,58 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
}
}
-void Verifier::verifyDebugInfo() {
+void DebugInfoVerifier::verifyDebugInfo() {
+ if (!VerifyDebugInfo)
+ return;
+
+ DebugInfoFinder Finder;
+ Finder.processModule(*M);
+ processInstructions(Finder);
+
// Verify Debug Info.
- if (!DisableDebugInfoVerifier) {
- for (DICompileUnit CU : Finder.compile_units()) {
- Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU);
- }
- for (DISubprogram S : Finder.subprograms()) {
- Assert1(S.Verify(), "DISubprogram does not Verify!", S);
- }
- for (DIGlobalVariable GV : Finder.global_variables()) {
- Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
- }
- for (DIType T : Finder.types()) {
- Assert1(T.Verify(), "DIType does not Verify!", T);
- }
- for (DIScope S : Finder.scopes()) {
- Assert1(S.Verify(), "DIScope does not Verify!", S);
- }
+ //
+ // NOTE: The loud braces are necessary for MSVC compatibility.
+ for (DICompileUnit CU : Finder.compile_units()) {
+ Assert1(CU.Verify(), "DICompileUnit does not Verify!", CU);
}
+ for (DISubprogram S : Finder.subprograms()) {
+ Assert1(S.Verify(), "DISubprogram does not Verify!", S);
+ }
+ for (DIGlobalVariable GV : Finder.global_variables()) {
+ Assert1(GV.Verify(), "DIGlobalVariable does not Verify!", GV);
+ }
+ for (DIType T : Finder.types()) {
+ Assert1(T.Verify(), "DIType does not Verify!", T);
+ }
+ for (DIScope S : Finder.scopes()) {
+ Assert1(S.Verify(), "DIScope does not Verify!", S);
+ }
+}
+
+void DebugInfoVerifier::processInstructions(DebugInfoFinder &Finder) {
+ for (const Function &F : *M)
+ for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ if (MDNode *MD = I->getMetadata(LLVMContext::MD_dbg))
+ Finder.processLocation(*M, DILocation(MD));
+ if (const CallInst *CI = dyn_cast<CallInst>(&*I))
+ processCallInst(Finder, *CI);
+ }
+}
+
+void DebugInfoVerifier::processCallInst(DebugInfoFinder &Finder,
+ const CallInst &CI) {
+ if (Function *F = CI.getCalledFunction())
+ if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+ switch (ID) {
+ case Intrinsic::dbg_declare:
+ Finder.processDeclare(*M, cast<DbgDeclareInst>(&CI));
+ break;
+ case Intrinsic::dbg_value:
+ Finder.processValue(*M, cast<DbgValueInst>(&CI));
+ break;
+ default:
+ break;
+ }
}
//===----------------------------------------------------------------------===//
@@ -2427,7 +2547,8 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS) {
// Note that this function's return value is inverted from what you would
// expect of a function called "verify".
- return !V.verify(M) || Broken;
+ DebugInfoVerifier DIV(OS ? *OS : NullStr);
+ return !V.verify(M) || !DIV.verify(M) || Broken;
}
namespace {
@@ -2463,15 +2584,48 @@ struct VerifierLegacyPass : public FunctionPass {
AU.setPreservesAll();
}
};
+struct DebugInfoVerifierLegacyPass : public ModulePass {
+ static char ID;
+
+ DebugInfoVerifier V;
+ bool FatalErrors;
+
+ DebugInfoVerifierLegacyPass() : ModulePass(ID), FatalErrors(true) {
+ initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ explicit DebugInfoVerifierLegacyPass(bool FatalErrors)
+ : ModulePass(ID), V(dbgs()), FatalErrors(FatalErrors) {
+ initializeDebugInfoVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override {
+ if (!V.verify(M) && FatalErrors)
+ report_fatal_error("Broken debug info found, compilation aborted!");
+
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+};
}
char VerifierLegacyPass::ID = 0;
INITIALIZE_PASS(VerifierLegacyPass, "verify", "Module Verifier", false, false)
+char DebugInfoVerifierLegacyPass::ID = 0;
+INITIALIZE_PASS(DebugInfoVerifierLegacyPass, "verify-di", "Debug Info Verifier",
+ false, false)
+
FunctionPass *llvm::createVerifierPass(bool FatalErrors) {
return new VerifierLegacyPass(FatalErrors);
}
+ModulePass *llvm::createDebugInfoVerifierPass(bool FatalErrors) {
+ return new DebugInfoVerifierLegacyPass(FatalErrors);
+}
+
PreservedAnalyses VerifierPass::run(Module *M) {
if (verifyModule(*M, &dbgs()) && FatalErrors)
report_fatal_error("Broken module found, compilation aborted!");
diff --git a/lib/IR/module.modulemap b/lib/IR/module.modulemap
new file mode 100644
index 0000000..9698e91
--- /dev/null
+++ b/lib/IR/module.modulemap
@@ -0,0 +1 @@
+module IR { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 8be8ab8..f4ed437 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -42,12 +42,12 @@ Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
// ParseBitcodeFile does not take ownership of the Buffer in the
// case of an error.
delete Buffer;
- return NULL;
+ return nullptr;
}
return ModuleOrErr.get();
}
- return ParseAssembly(Buffer, 0, Err, Context);
+ return ParseAssembly(Buffer, nullptr, Err, Context);
}
Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
@@ -56,7 +56,7 @@ Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + ec.message());
- return 0;
+ return nullptr;
}
return getLazyIRModule(File.release(), Err, Context);
@@ -69,7 +69,7 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
(const unsigned char *)Buffer->getBufferEnd())) {
ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
- Module *M = 0;
+ Module *M = nullptr;
if (error_code EC = ModuleOrErr.getError())
Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
EC.message());
@@ -80,7 +80,7 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
return M;
}
- return ParseAssembly(Buffer, 0, Err, Context);
+ return ParseAssembly(Buffer, nullptr, Err, Context);
}
Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
@@ -89,7 +89,7 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
"Could not open input file: " + ec.message());
- return 0;
+ return nullptr;
}
return ParseIR(File.release(), Err, Context);
@@ -111,7 +111,7 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef,
std::string buf;
raw_string_ostream os(buf);
- Diag.print(NULL, os, false);
+ Diag.print(nullptr, os, false);
os.flush();
*OutMessage = strdup(buf.c_str());
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 51d0899..99236bd 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -63,20 +63,20 @@ const char* LTOCodeGenerator::getVersionString() {
}
LTOCodeGenerator::LTOCodeGenerator()
- : Context(getGlobalContext()), Linker(new Module("ld-temp.o", Context)),
- TargetMach(NULL), EmitDwarfDebugInfo(false), ScopeRestrictionsDone(false),
- CodeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), NativeObjectFile(NULL),
- DiagHandler(NULL), DiagContext(NULL) {
+ : Context(getGlobalContext()), IRLinker(new Module("ld-temp.o", Context)),
+ TargetMach(nullptr), EmitDwarfDebugInfo(false),
+ ScopeRestrictionsDone(false), CodeModel(LTO_CODEGEN_PIC_MODEL_DEFAULT),
+ NativeObjectFile(nullptr), DiagHandler(nullptr), DiagContext(nullptr) {
initializeLTOPasses();
}
LTOCodeGenerator::~LTOCodeGenerator() {
delete TargetMach;
delete NativeObjectFile;
- TargetMach = NULL;
- NativeObjectFile = NULL;
+ TargetMach = nullptr;
+ NativeObjectFile = nullptr;
- Linker.deleteModule();
+ IRLinker.deleteModule();
for (std::vector<char *>::iterator I = CodegenOptions.begin(),
E = CodegenOptions.end();
@@ -114,7 +114,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
}
bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) {
- bool ret = Linker.linkInModule(mod->getLLVVMModule(), &errMsg);
+ bool ret = IRLinker.linkInModule(mod->getLLVVMModule(), &errMsg);
const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
for (int i = 0, e = undefs.size(); i != e; ++i)
@@ -140,7 +140,6 @@ void LTOCodeGenerator::setTargetOptions(TargetOptions options) {
Options.StackAlignmentOverride = options.StackAlignmentOverride;
Options.TrapFuncName = options.TrapFuncName;
Options.PositionIndependentExecutable = options.PositionIndependentExecutable;
- Options.EnableSegmentedStacks = options.EnableSegmentedStacks;
Options.UseInitArray = options.UseInitArray;
}
@@ -162,6 +161,7 @@ void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) {
case LTO_CODEGEN_PIC_MODEL_STATIC:
case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+ case LTO_CODEGEN_PIC_MODEL_DEFAULT:
CodeModel = model;
return;
}
@@ -186,7 +186,7 @@ bool LTOCodeGenerator::writeMergedModules(const char *path,
}
// write bitcode to it
- WriteBitcodeToFile(Linker.getModule(), Out.os());
+ WriteBitcodeToFile(IRLinker.getModule(), Out.os());
Out.os().close();
if (Out.os().has_error()) {
@@ -245,7 +245,7 @@ const void* LTOCodeGenerator::compile(size_t* length,
const char *name;
if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE,
errMsg))
- return NULL;
+ return nullptr;
// remove old buffer if compile() called twice
delete NativeObjectFile;
@@ -255,7 +255,7 @@ const void* LTOCodeGenerator::compile(size_t* length,
if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) {
errMsg = ec.message();
sys::fs::remove(NativeObjectPath);
- return NULL;
+ return nullptr;
}
NativeObjectFile = BuffPtr.release();
@@ -263,24 +263,24 @@ const void* LTOCodeGenerator::compile(size_t* length,
sys::fs::remove(NativeObjectPath);
// return buffer, unless error
- if (NativeObjectFile == NULL)
- return NULL;
+ if (!NativeObjectFile)
+ return nullptr;
*length = NativeObjectFile->getBufferSize();
return NativeObjectFile->getBufferStart();
}
bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
- if (TargetMach != NULL)
+ if (TargetMach)
return true;
- std::string TripleStr = Linker.getModule()->getTargetTriple();
+ std::string TripleStr = IRLinker.getModule()->getTargetTriple();
if (TripleStr.empty())
TripleStr = sys::getDefaultTargetTriple();
llvm::Triple Triple(TripleStr);
// create target machine from info for merged modules
const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
- if (march == NULL)
+ if (!march)
return false;
// The relocation model is actually a static member of TargetMachine and
@@ -296,10 +296,14 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
RelocModel = Reloc::DynamicNoPIC;
break;
+ case LTO_CODEGEN_PIC_MODEL_DEFAULT:
+ // RelocModel is already the default, so leave it that way.
+ break;
}
- // construct LTOModule, hand over ownership of module and target
- SubtargetFeatures Features;
+ // Construct LTOModule, hand over ownership of module and target. Use MAttr as
+ // the default set of features.
+ SubtargetFeatures Features(MAttr);
Features.getDefaultSubtargetFeatures(Triple);
std::string FeatureStr = Features.getString();
// Set a default CPU for Darwin triples.
@@ -308,7 +312,8 @@ bool LTOCodeGenerator::determineTarget(std::string &errMsg) {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
- else if (Triple.getArch() == llvm::Triple::arm64)
+ else if (Triple.getArch() == llvm::Triple::arm64 ||
+ Triple.getArch() == llvm::Triple::aarch64)
MCpu = "cyclone";
}
@@ -352,7 +357,7 @@ applyRestriction(GlobalValue &GV,
static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<GlobalValue*, 8> &UsedValues) {
- if (LLVMUsed == 0) return;
+ if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
@@ -391,11 +396,12 @@ static void accumulateAndSortLibcalls(std::vector<StringRef> &Libcalls,
void LTOCodeGenerator::applyScopeRestrictions() {
if (ScopeRestrictionsDone)
return;
- Module *mergedModule = Linker.getModule();
+ Module *mergedModule = IRLinker.getModule();
// Start off with a verification pass.
PassManager passes;
passes.add(createVerifierPass());
+ passes.add(createDebugInfoVerifierPass());
// mark which symbols can not be internalized
Mangler Mangler(TargetMach->getDataLayout());
@@ -424,9 +430,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
if (!AsmUsed.empty()) {
llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context);
std::vector<Constant*> asmUsed2;
- for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = AsmUsed.begin(),
- e = AsmUsed.end(); i !=e; ++i) {
- GlobalValue *GV = *i;
+ for (auto *GV : AsmUsed) {
Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
asmUsed2.push_back(c);
}
@@ -458,7 +462,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
if (!this->determineTarget(errMsg))
return false;
- Module *mergedModule = Linker.getModule();
+ Module *mergedModule = IRLinker.getModule();
// Mark which symbols can not be internalized
this->applyScopeRestrictions();
@@ -468,6 +472,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
// Start off with a verification pass.
passes.add(createVerifierPass());
+ passes.add(createDebugInfoVerifierPass());
// Add an appropriate DataLayout instance for this module...
mergedModule->setDataLayout(TargetMach->getDataLayout());
@@ -489,6 +494,7 @@ bool LTOCodeGenerator::generateObjectFile(raw_ostream &out,
// Make sure everything is still good.
passes.add(createVerifierPass());
+ passes.add(createDebugInfoVerifierPass());
PassManager codeGenPasses;
@@ -576,7 +582,7 @@ LTOCodeGenerator::setDiagnosticHandler(lto_diagnostic_handler_t DiagHandler,
this->DiagHandler = DiagHandler;
this->DiagContext = Ctxt;
if (!DiagHandler)
- return Context.setDiagnosticHandler(NULL, NULL);
+ return Context.setDiagnosticHandler(nullptr, nullptr);
// Register the LTOCodeGenerator stub in the LLVMContext to forward the
// diagnostic to the external DiagHandler.
Context.setDiagnosticHandler(LTOCodeGenerator::DiagnosticHandler, this);
diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp
index 567da04..d117514 100644
--- a/lib/LTO/LTOModule.cpp
+++ b/lib/LTO/LTOModule.cpp
@@ -100,7 +100,7 @@ LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options,
std::unique_ptr<MemoryBuffer> buffer;
if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
errMsg = ec.message();
- return NULL;
+ return nullptr;
}
return makeLTOModule(buffer.release(), options, errMsg);
}
@@ -120,7 +120,7 @@ LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
if (error_code ec =
MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) {
errMsg = ec.message();
- return NULL;
+ return nullptr;
}
return makeLTOModule(buffer.release(), options, errMsg);
}
@@ -130,7 +130,7 @@ LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
std::string &errMsg, StringRef path) {
std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path));
if (!buffer)
- return NULL;
+ return nullptr;
return makeLTOModule(buffer.release(), options, errMsg);
}
@@ -143,7 +143,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
if (error_code EC = ModuleOrErr.getError()) {
errMsg = EC.message();
delete buffer;
- return NULL;
+ return nullptr;
}
std::unique_ptr<Module> m(ModuleOrErr.get());
@@ -155,7 +155,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
// find machine architecture for this module
const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg);
if (!march)
- return NULL;
+ return nullptr;
// construct LTOModule, hand over ownership of module and target
SubtargetFeatures Features;
@@ -168,7 +168,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
- else if (Triple.getArch() == llvm::Triple::arm64)
+ else if (Triple.getArch() == llvm::Triple::arm64 ||
+ Triple.getArch() == llvm::Triple::aarch64)
CPU = "cyclone";
}
@@ -189,7 +190,7 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
if (Ret->parseSymbols(errMsg)) {
delete Ret;
- return NULL;
+ return nullptr;
}
Ret->parseMetadata();
@@ -396,7 +397,7 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
// set alignment part log2() can have rounding errors
uint32_t align = def->getAlignment();
- uint32_t attr = align ? countTrailingZeros(def->getAlignment()) : 0;
+ uint32_t attr = align ? countTrailingZeros(align) : 0;
// set permissions part
if (isFunction) {
@@ -418,17 +419,17 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) {
attr |= LTO_SYMBOL_DEFINITION_REGULAR;
// set scope part
- if (def->hasHiddenVisibility())
+ if (def->hasLocalLinkage())
+ // Ignore visibility if linkage is local.
+ attr |= LTO_SYMBOL_SCOPE_INTERNAL;
+ else if (def->hasHiddenVisibility())
attr |= LTO_SYMBOL_SCOPE_HIDDEN;
else if (def->hasProtectedVisibility())
attr |= LTO_SYMBOL_SCOPE_PROTECTED;
else if (canBeHidden(def))
attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
- else if (def->hasExternalLinkage() || def->hasWeakLinkage() ||
- def->hasLinkOnceLinkage() || def->hasCommonLinkage())
- attr |= LTO_SYMBOL_SCOPE_DEFAULT;
else
- attr |= LTO_SYMBOL_SCOPE_INTERNAL;
+ attr |= LTO_SYMBOL_SCOPE_DEFAULT;
StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
entry.setValue(1);
@@ -460,7 +461,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
NameAndAttributes &info = _undefines[entry.getKey().data()];
- if (info.symbol == 0) {
+ if (info.symbol == nullptr) {
// FIXME: This is trying to take care of module ASM like this:
//
// module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0"
@@ -474,7 +475,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
info.attributes =
LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
info.isFunction = false;
- info.symbol = 0;
+ info.symbol = nullptr;
// add to table of symbols
_symbols.push_back(info);
@@ -502,13 +503,13 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
if (entry.getValue().name)
return;
- uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;;
+ uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
attr |= LTO_SYMBOL_SCOPE_DEFAULT;
NameAndAttributes info;
info.name = entry.getKey().data();
info.attributes = attr;
info.isFunction = false;
- info.symbol = 0;
+ info.symbol = nullptr;
entry.setValue(info);
}
@@ -698,7 +699,8 @@ namespace {
void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) override {}
void EmitBytes(StringRef Data) override {}
- void EmitValueImpl(const MCExpr *Value, unsigned Size) override {}
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override {}
void EmitULEB128Value(const MCExpr *Value) override {}
void EmitSLEB128Value(const MCExpr *Value) override {}
void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
@@ -709,9 +711,6 @@ namespace {
bool EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) override { return false; }
void EmitFileDirective(StringRef Filename) override {}
- void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) override {}
void FinishImpl() override {}
void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override {
RecordProcEnd(Frame);
@@ -738,7 +737,8 @@ bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) {
_target->getTargetTriple(), _target->getTargetCPU(),
_target->getTargetFeatureString()));
std::unique_ptr<MCTargetAsmParser> TAP(
- T.createMCAsmParser(*STI, *Parser.get(), *MCII));
+ T.createMCAsmParser(*STI, *Parser.get(), *MCII,
+ _target->Options.MCOptions));
if (!TAP) {
errMsg = "target " + std::string(T.getName()) +
" does not define AsmParser.";
@@ -801,14 +801,8 @@ bool LTOModule::parseSymbols(std::string &errMsg) {
return true;
// add aliases
- for (Module::alias_iterator a = _module->alias_begin(),
- e = _module->alias_end(); a != e; ++a) {
- if (isDeclaration(*a->getAliasedGlobal()))
- // Is an alias to a declaration.
- addPotentialUndefinedSymbol(a, false);
- else
- addDefinedDataSymbol(a);
- }
+ for (const auto &Alias : _module->aliases())
+ addDefinedDataSymbol(&Alias);
// make symbols for all undefines
for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(),
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index c6476ce..45f2d4e 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -43,12 +43,12 @@ class TypeMapTy : public ValueMapTypeRemapper {
/// we speculatively add types to MappedTypes, but keep track of them here in
/// case we need to roll back.
SmallVector<Type*, 16> SpeculativeTypes;
-
+
/// SrcDefinitionsToResolve - This is a list of non-opaque structs in the
/// source module that are mapped to an opaque struct in the destination
/// module.
SmallVector<StructType*, 16> SrcDefinitionsToResolve;
-
+
/// DstResolvedOpaqueTypes - This is the set of opaque types in the
/// destination modules who are getting a body from the source module.
SmallPtrSet<StructType*, 16> DstResolvedOpaqueTypes;
@@ -65,7 +65,7 @@ public:
/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
/// module from a type definition in the source module.
void linkDefinedTypeBodies();
-
+
/// get - Return the mapped type to use for the specified input type from the
/// source module.
Type *get(Type *SrcTy);
@@ -90,7 +90,7 @@ private:
Type *remapType(Type *SrcTy) override {
return get(SrcTy);
}
-
+
bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
};
}
@@ -98,12 +98,12 @@ private:
void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
Type *&Entry = MappedTypes[SrcTy];
if (Entry) return;
-
+
if (DstTy == SrcTy) {
Entry = DstTy;
return;
}
-
+
// Check to see if these types are recursively isomorphic and establish a
// mapping between them if so.
if (!areTypesIsomorphic(DstTy, SrcTy)) {
@@ -132,7 +132,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
Entry = DstTy;
return true;
}
-
+
// Okay, we have two types with identical kinds that we haven't seen before.
// If this is an opaque struct type, special case it.
@@ -158,18 +158,18 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
return true;
}
}
-
+
// If the number of subtypes disagree between the two types, then we fail.
if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
return false;
-
+
// Fail if any of the extra properties (e.g. array size) of the type disagree.
if (isa<IntegerType>(DstTy))
return false; // bitwidth disagrees.
if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
return false;
-
+
} else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
return false;
@@ -195,7 +195,7 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
if (!areTypesIsomorphic(DstTy->getContainedType(i),
SrcTy->getContainedType(i)))
return false;
-
+
// If everything seems to have lined up, then everything is great.
return true;
}
@@ -205,31 +205,31 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
void TypeMapTy::linkDefinedTypeBodies() {
SmallVector<Type*, 16> Elements;
SmallString<16> TmpName;
-
+
// Note that processing entries in this loop (calling 'get') can add new
// entries to the SrcDefinitionsToResolve vector.
while (!SrcDefinitionsToResolve.empty()) {
StructType *SrcSTy = SrcDefinitionsToResolve.pop_back_val();
StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
-
+
// TypeMap is a many-to-one mapping, if there were multiple types that
// provide a body for DstSTy then previous iterations of this loop may have
// already handled it. Just ignore this case.
if (!DstSTy->isOpaque()) continue;
assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
-
+
// Map the body of the source type over to a new body for the dest type.
Elements.resize(SrcSTy->getNumElements());
for (unsigned i = 0, e = Elements.size(); i != e; ++i)
Elements[i] = getImpl(SrcSTy->getElementType(i));
-
+
DstSTy->setBody(Elements, SrcSTy->isPacked());
-
+
// If DstSTy has no name or has a longer name than STy, then viciously steal
// STy's name.
if (!SrcSTy->hasName()) continue;
StringRef SrcName = SrcSTy->getName();
-
+
if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
SrcSTy->setName("");
@@ -237,7 +237,7 @@ void TypeMapTy::linkDefinedTypeBodies() {
TmpName.clear();
}
}
-
+
DstResolvedOpaqueTypes.clear();
}
@@ -245,7 +245,7 @@ void TypeMapTy::linkDefinedTypeBodies() {
/// source module.
Type *TypeMapTy::get(Type *Ty) {
Type *Result = getImpl(Ty);
-
+
// If this caused a reference to any struct type, resolve it before returning.
if (!SrcDefinitionsToResolve.empty())
linkDefinedTypeBodies();
@@ -257,7 +257,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// If we already have an entry for this type, return it.
Type **Entry = &MappedTypes[Ty];
if (*Entry) return *Entry;
-
+
// If this is not a named struct type, then just map all of the elements and
// then rebuild the type from inside out.
if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isLiteral()) {
@@ -265,7 +265,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// true for the anonymous {} struct, things like 'float', integers, etc.
if (Ty->getNumContainedTypes() == 0)
return *Entry = Ty;
-
+
// Remap all of the elements, keeping track of whether any of them change.
bool AnyChange = false;
SmallVector<Type*, 4> ElementTypes;
@@ -274,23 +274,23 @@ Type *TypeMapTy::getImpl(Type *Ty) {
ElementTypes[i] = getImpl(Ty->getContainedType(i));
AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
}
-
+
// If we found our type while recursively processing stuff, just use it.
Entry = &MappedTypes[Ty];
if (*Entry) return *Entry;
-
+
// If all of the element types mapped directly over, then the type is usable
// as-is.
if (!AnyChange)
return *Entry = Ty;
-
+
// Otherwise, rebuild a modified type.
switch (Ty->getTypeID()) {
default: llvm_unreachable("unknown derived type to remap");
case Type::ArrayTyID:
return *Entry = ArrayType::get(ElementTypes[0],
cast<ArrayType>(Ty)->getNumElements());
- case Type::VectorTyID:
+ case Type::VectorTyID:
return *Entry = VectorType::get(ElementTypes[0],
cast<VectorType>(Ty)->getNumElements());
case Type::PointerTyID:
@@ -331,7 +331,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
// and is not required for the prettiness of the linked module, we just skip
// it and always rebuild a type here.
StructType *STy = cast<StructType>(Ty);
-
+
// If the type is opaque, we can just use it directly.
if (STy->isOpaque()) {
// A named structure type from src module is used. Add it to the Set of
@@ -339,7 +339,7 @@ Type *TypeMapTy::getImpl(Type *Ty) {
DstStructTypesSet.insert(STy);
return *Entry = STy;
}
-
+
// Otherwise we create a new type and resolve its body later. This will be
// resolved by the top level of get().
SrcDefinitionsToResolve.push_back(STy);
@@ -379,8 +379,8 @@ namespace {
/// function, which is the entrypoint for this file.
class ModuleLinker {
Module *DstM, *SrcM;
-
- TypeMapTy TypeMap;
+
+ TypeMapTy TypeMap;
ValueMaterializerTy ValMaterializer;
/// ValueMap - Mapping of values from what they used to be in Src, to what
@@ -388,25 +388,27 @@ namespace {
/// some overhead due to the use of Value handles which the Linker doesn't
/// actually need, but this allows us to reuse the ValueMapper code.
ValueToValueMapTy ValueMap;
-
+
+ std::vector<std::pair<GlobalValue *, GlobalAlias *>> ReplaceWithAlias;
+
struct AppendingVarInfo {
GlobalVariable *NewGV; // New aggregate global in dest module.
Constant *DstInit; // Old initializer from dest module.
Constant *SrcInit; // Old initializer from src module.
};
-
+
std::vector<AppendingVarInfo> AppendingVars;
-
+
unsigned Mode; // Mode to treat source module.
-
+
// Set of items not to link in from source.
SmallPtrSet<const Value*, 16> DoNotLinkFromSource;
-
+
// Vector of functions to lazily link in.
std::vector<Function*> LazilyLinkFunctions;
bool SuppressWarnings;
-
+
public:
std::string ErrorMsg;
@@ -417,7 +419,7 @@ namespace {
SuppressWarnings(SuppressWarnings) {}
bool run();
-
+
private:
/// emitError - Helper method for setting a message and returning an error
/// code.
@@ -425,7 +427,7 @@ namespace {
ErrorMsg = Message.str();
return true;
}
-
+
/// getLinkageResult - This analyzes the two global values and determines
/// what the result will look like in the destination module.
bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
@@ -439,29 +441,29 @@ namespace {
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
- return 0;
-
+ return nullptr;
+
// Otherwise see if we have a match in the destination module's symtab.
GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
- if (DGV == 0) return 0;
-
+ if (!DGV) return nullptr;
+
// If we found a global with the same name in the dest module, but it has
// internal linkage, we are really not doing any linkage here.
if (DGV->hasLocalLinkage())
- return 0;
+ return nullptr;
// Otherwise, we do in fact link to the destination global.
return DGV;
}
-
+
void computeTypeMapping();
-
+
bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
bool linkGlobalProto(GlobalVariable *SrcGV);
bool linkFunctionProto(Function *SrcF);
bool linkAliasProto(GlobalAlias *SrcA);
bool linkModuleFlagsMetadata();
-
+
void linkAppendingVarInit(const AppendingVarInfo &AVI);
void linkGlobalInits();
void linkFunctionBody(Function *Dst, Function *Src);
@@ -495,10 +497,16 @@ static void forceRenaming(GlobalValue *GV, StringRef Name) {
/// a GlobalValue) from the SrcGV to the DestGV.
static void copyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
// Use the maximum alignment, rather than just copying the alignment of SrcGV.
- unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+ auto *DestGO = dyn_cast<GlobalObject>(DestGV);
+ unsigned Alignment;
+ if (DestGO)
+ Alignment = std::max(DestGO->getAlignment(), SrcGV->getAlignment());
+
DestGV->copyAttributesFrom(SrcGV);
- DestGV->setAlignment(Alignment);
-
+
+ if (DestGO)
+ DestGO->setAlignment(Alignment);
+
forceRenaming(DestGV, SrcGV->getName());
}
@@ -518,7 +526,7 @@ static bool isLessConstraining(GlobalValue::VisibilityTypes a,
Value *ValueMaterializerTy::materializeValueFor(Value *V) {
Function *SF = dyn_cast<Function>(V);
if (!SF)
- return NULL;
+ return nullptr;
Function *DF = Function::Create(TypeMap.get(SF->getFunctionType()),
SF->getLinkage(), SF->getName(), DstM);
@@ -541,10 +549,10 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
assert(Dest && "Must have two globals being queried");
assert(!Src->hasLocalLinkage() &&
"If Src has internal linkage, Dest shouldn't be set!");
-
+
bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable();
bool DestIsDeclaration = Dest->isDeclaration();
-
+
if (SrcIsDeclaration) {
// If Src is external or if both Src & Dest are external.. Just link the
// external globals, we aren't adding anything.
@@ -598,6 +606,8 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
// Compute the visibility. We follow the rules in the System V Application
// Binary Interface.
+ assert(!GlobalValue::isLocalLinkage(LT) &&
+ "Symbols with local linkage should not be merged");
Vis = isLessConstraining(Src->getVisibility(), Dest->getVisibility()) ?
Dest->getVisibility() : Src->getVisibility();
return false;
@@ -612,19 +622,19 @@ void ModuleLinker::computeTypeMapping() {
for (Module::global_iterator I = SrcM->global_begin(),
E = SrcM->global_end(); I != E; ++I) {
GlobalValue *DGV = getLinkedToGlobal(I);
- if (DGV == 0) continue;
-
+ if (!DGV) continue;
+
if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
TypeMap.addTypeMapping(DGV->getType(), I->getType());
- continue;
+ continue;
}
-
+
// Unify the element type of appending arrays.
ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
}
-
+
// Incorporate functions.
for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
if (GlobalValue *DGV = getLinkedToGlobal(I))
@@ -643,14 +653,14 @@ void ModuleLinker::computeTypeMapping() {
for (unsigned i = 0, e = SrcStructTypes.size(); i != e; ++i) {
StructType *ST = SrcStructTypes[i];
if (!ST->hasName()) continue;
-
+
// Check to see if there is a dot in the name followed by a digit.
size_t DotPos = ST->getName().rfind('.');
if (DotPos == 0 || DotPos == StringRef::npos ||
ST->getName().back() == '.' ||
!isdigit(static_cast<unsigned char>(ST->getName()[DotPos+1])))
continue;
-
+
// Check to see if the destination module has a struct with the prefix name.
if (StructType *DST = DstM->getTypeByName(ST->getName().substr(0, DotPos)))
// Don't use it if this actually came from the source module. They're in
@@ -675,9 +685,9 @@ void ModuleLinker::computeTypeMapping() {
}
// Don't bother incorporating aliases, they aren't generally typed well.
-
+
// Now that we have discovered all of the type equivalences, get a body for
- // any 'opaque' types in the dest module that are now resolved.
+ // any 'opaque' types in the dest module that are now resolved.
TypeMap.linkDefinedTypeBodies();
}
@@ -685,26 +695,26 @@ void ModuleLinker::computeTypeMapping() {
/// them together now. Return true on error.
bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
GlobalVariable *SrcGV) {
-
+
if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
return emitError("Linking globals named '" + SrcGV->getName() +
"': can only link appending global with another appending global!");
-
+
ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
ArrayType *SrcTy =
cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
Type *EltTy = DstTy->getElementType();
-
+
// Check to see that they two arrays agree on type.
if (EltTy != SrcTy->getElementType())
return emitError("Appending variables with different element types!");
if (DstGV->isConstant() != SrcGV->isConstant())
return emitError("Appending variables linked with different const'ness!");
-
+
if (DstGV->getAlignment() != SrcGV->getAlignment())
return emitError(
"Appending variables with different alignment need to be linked!");
-
+
if (DstGV->getVisibility() != SrcGV->getVisibility())
return emitError(
"Appending variables with different visibility need to be linked!");
@@ -716,20 +726,20 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
if (DstGV->getSection() != SrcGV->getSection())
return emitError(
"Appending variables with different section name need to be linked!");
-
+
uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
ArrayType *NewType = ArrayType::get(EltTy, NewSize);
-
+
// Create the new global variable.
GlobalVariable *NG =
new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
- DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+ DstGV->getLinkage(), /*init*/nullptr, /*name*/"", DstGV,
DstGV->getThreadLocalMode(),
DstGV->getType()->getAddressSpace());
-
+
// Propagate alignment, visibility and section info.
copyGVAttributes(NG, DstGV);
-
+
AppendingVarInfo AVI;
AVI.NewGV = NG;
AVI.DstInit = DstGV->getInitializer();
@@ -742,10 +752,10 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
DstGV->eraseFromParent();
-
+
// Track the source variable so we don't try to link it.
DoNotLinkFromSource.insert(SrcGV);
-
+
return false;
}
@@ -760,7 +770,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
// Concatenation of appending linkage variables is magic and handled later.
if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
-
+
// Determine whether linkage of these two globals follows the source
// module's definition or the destination module's definition.
GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
@@ -786,22 +796,22 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
// Make sure to remember this mapping.
ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
-
- // Track the source global so that we don't attempt to copy it over when
+
+ // Track the source global so that we don't attempt to copy it over when
// processing global initializers.
DoNotLinkFromSource.insert(SGV);
-
+
return false;
}
}
-
+
// No linking to be performed or linking from the source: simply create an
// identical version of the symbol over in the dest module... the
// initializer will be filled in later by LinkGlobalInits.
GlobalVariable *NewDGV =
new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
- SGV->isConstant(), SGV->getLinkage(), /*init*/0,
- SGV->getName(), /*insertbefore*/0,
+ SGV->isConstant(), SGV->getLinkage(), /*init*/nullptr,
+ SGV->getName(), /*insertbefore*/nullptr,
SGV->getThreadLocalMode(),
SGV->getType()->getAddressSpace());
// Propagate alignment, visibility and section info.
@@ -814,7 +824,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
DGV->eraseFromParent();
}
-
+
// Make sure to remember this mapping.
ValueMap[SGV] = NewDGV;
return false;
@@ -844,15 +854,15 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
// Make sure to remember this mapping.
ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
-
- // Track the function from the source module so we don't attempt to remap
+
+ // Track the function from the source module so we don't attempt to remap
// it.
DoNotLinkFromSource.insert(SF);
-
+
return false;
}
}
-
+
// If the function is to be lazily linked, don't create it just yet.
// The ValueMaterializerTy will deal with creating it if it's used.
if (!DGV && (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() ||
@@ -875,7 +885,7 @@ bool ModuleLinker::linkFunctionProto(Function *SF) {
DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
DGV->eraseFromParent();
}
-
+
ValueMap[SF] = NewDF;
return false;
}
@@ -901,29 +911,27 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
// Make sure to remember this mapping.
ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
-
+
// Track the alias from the source module so we don't attempt to remap it.
DoNotLinkFromSource.insert(SGA);
-
+
return false;
}
}
-
+
// If there is no linkage to be performed or we're linking from the source,
// bring over SGA.
- GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
- SGA->getLinkage(), SGA->getName(),
- /*aliasee*/0, DstM);
+ auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType()));
+ auto *NewDA =
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ SGA->getLinkage(), SGA->getName(), DstM);
copyGVAttributes(NewDA, SGA);
if (NewVisibility)
NewDA->setVisibility(*NewVisibility);
- if (DGV) {
- // Any uses of DGV need to change to NewDA, with cast.
- DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
- DGV->eraseFromParent();
- }
-
+ if (DGV)
+ ReplaceWithAlias.push_back(std::make_pair(DGV, NewDA));
+
ValueMap[SGA] = NewDA;
return false;
}
@@ -934,15 +942,15 @@ static void getArrayElements(Constant *C, SmallVectorImpl<Constant*> &Dest) {
for (unsigned i = 0; i != NumElements; ++i)
Dest.push_back(C->getAggregateElement(i));
}
-
+
void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
// Merge the initializer.
SmallVector<Constant*, 16> Elements;
getArrayElements(AVI.DstInit, Elements);
-
+
Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap, &ValMaterializer);
getArrayElements(SrcInit, Elements);
-
+
ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
}
@@ -953,10 +961,10 @@ void ModuleLinker::linkGlobalInits() {
// Loop over all of the globals in the src module, mapping them over as we go
for (Module::const_global_iterator I = SrcM->global_begin(),
E = SrcM->global_end(); I != E; ++I) {
-
+
// Only process initialized GV's or ones not already in dest.
- if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue;
-
+ if (!I->hasInitializer() || DoNotLinkFromSource.count(I)) continue;
+
// Grab destination global variable.
GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
// Figure out what the initializer looks like in the dest module.
@@ -984,7 +992,7 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
if (Mode == Linker::DestroySource) {
// Splice the body of the source function into the dest function.
Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
-
+
// At this point, all of the instructions and values of the function are now
// copied over. The only problem is that they are still referencing values in
// the Source function as operands. Loop through all of the operands of the
@@ -993,19 +1001,32 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries,
&TypeMap, &ValMaterializer);
-
+
} else {
// Clone the body of the function into the dest function.
SmallVector<ReturnInst*, 8> Returns; // Ignore returns.
- CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL,
+ CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", nullptr,
&TypeMap, &ValMaterializer);
}
-
+
// There is no need to map the arguments anymore.
for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
I != E; ++I)
ValueMap.erase(I);
-
+
+}
+
+static GlobalObject &getGlobalObjectInExpr(Constant &C) {
+ auto *GO = dyn_cast<GlobalObject>(&C);
+ if (GO)
+ return *GO;
+ auto *GA = dyn_cast<GlobalAlias>(&C);
+ if (GA)
+ return *GA->getAliasee();
+ auto &CE = cast<ConstantExpr>(C);
+ assert(CE.getOpcode() == Instruction::BitCast ||
+ CE.getOpcode() == Instruction::AddrSpaceCast);
+ return getGlobalObjectInExpr(*CE.getOperand(0));
}
/// linkAliasBodies - Insert all of the aliases in Src into the Dest module.
@@ -1016,10 +1037,27 @@ void ModuleLinker::linkAliasBodies() {
continue;
if (Constant *Aliasee = I->getAliasee()) {
GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
- DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None,
- &TypeMap, &ValMaterializer));
+ Constant *Val =
+ MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer);
+ DA->setAliasee(&getGlobalObjectInExpr(*Val));
}
}
+
+ // Any uses of DGV need to change to NewDA, with cast.
+ for (auto &Pair : ReplaceWithAlias) {
+ GlobalValue *DGV = Pair.first;
+ GlobalAlias *NewDA = Pair.second;
+
+ for (auto *User : DGV->users()) {
+ if (auto *GA = dyn_cast<GlobalAlias>(User)) {
+ if (GA == NewDA)
+ report_fatal_error("Linking these modules creates an alias cycle.");
+ }
+ }
+
+ DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+ DGV->eraseFromParent();
+ }
}
/// linkNamedMDNodes - Insert all of the named MDNodes in Src into the Dest
@@ -1193,7 +1231,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() {
return HasErr;
}
-
+
bool ModuleLinker::run() {
assert(DstM && "Null destination module");
assert(SrcM && "Null source module");
@@ -1264,13 +1302,13 @@ bool ModuleLinker::run() {
for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
linkAppendingVarInit(AppendingVars[i]);
-
+
// Link in the function bodies that are defined in the source module into
// DstM.
for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
// Skip if not linking from source.
if (DoNotLinkFromSource.count(SF)) continue;
-
+
Function *DF = cast<Function>(ValueMap[SF]);
if (SF->hasPrefixData()) {
// Link in the prefix data.
@@ -1285,7 +1323,7 @@ bool ModuleLinker::run() {
if (SF->Materialize(&ErrorMsg))
return true;
}
-
+
linkFunctionBody(DF, SF);
SF->Dematerialize();
}
@@ -1310,9 +1348,9 @@ bool ModuleLinker::run() {
bool LinkedInAnyFunctions;
do {
LinkedInAnyFunctions = false;
-
+
for(std::vector<Function*>::iterator I = LazilyLinkFunctions.begin(),
- E = LazilyLinkFunctions.end(); I != E; ++I) {
+ E = LazilyLinkFunctions.end(); I != E; ++I) {
Function *SF = *I;
if (!SF)
continue;
@@ -1334,7 +1372,7 @@ bool ModuleLinker::run() {
if (SF->Materialize(&ErrorMsg))
return true;
}
-
+
// Erase from vector *before* the function body is linked - linkFunctionBody could
// invalidate I.
LazilyLinkFunctions.erase(I);
@@ -1349,11 +1387,11 @@ bool ModuleLinker::run() {
break;
}
} while (LinkedInAnyFunctions);
-
+
// Now that all of the types from the source are used, resolve any structs
// copied over to the dest that didn't exist there.
TypeMap.linkDefinedTypeBodies();
-
+
return false;
}
@@ -1369,7 +1407,7 @@ Linker::~Linker() {
void Linker::deleteModule() {
delete Composite;
- Composite = NULL;
+ Composite = nullptr;
}
bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
@@ -1392,7 +1430,7 @@ bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) {
/// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
/// the problem. Upon failure, the Dest module could be in a modified state,
/// and shouldn't be relied on to be consistent.
-bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
+bool Linker::LinkModules(Module *Dest, Module *Src, unsigned Mode,
std::string *ErrorMsg) {
Linker L(Dest);
return L.linkInModule(Src, Mode, ErrorMsg);
@@ -1406,7 +1444,7 @@ LLVMBool LLVMLinkModules(LLVMModuleRef Dest, LLVMModuleRef Src,
LLVMLinkerMode Mode, char **OutMessages) {
std::string Messages;
LLVMBool Result = Linker::LinkModules(unwrap(Dest), unwrap(Src),
- Mode, OutMessages? &Messages : 0);
+ Mode, OutMessages? &Messages : nullptr);
if (OutMessages)
*OutMessages = strdup(Messages.c_str());
return Result;
diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk
index abf346b..975f4e3 100644
--- a/lib/MC/Android.mk
+++ b/lib/MC/Android.mk
@@ -20,7 +20,6 @@ mc_SRC_FILES := \
MCELFStreamer.cpp \
MCExpr.cpp \
MCExternalSymbolizer.cpp \
- MCFixup.cpp \
MCInst.cpp \
MCInstPrinter.cpp \
MCInstrAnalysis.cpp \
@@ -35,13 +34,14 @@ mc_SRC_FILES := \
MCRegisterInfo.cpp \
MCRelocationInfo.cpp \
MCSection.cpp \
- MCSectionCOFF.cpp \
+ MCSectionCOFF.cpp \
MCSectionELF.cpp \
MCSectionMachO.cpp \
MCStreamer.cpp \
MCSubtargetInfo.cpp \
MCSymbol.cpp \
MCSymbolizer.cpp \
+ MCTargetOptions.cpp \
MCValue.cpp \
MCWin64EH.cpp \
WinCOFFObjectWriter.cpp \
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index ab7dabc..6a384c1 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -16,7 +16,6 @@ add_llvm_library(LLVMMC
MCELF.cpp
MCELFObjectTargetWriter.cpp
MCELFStreamer.cpp
- MCFixup.cpp
MCFunction.cpp
MCExpr.cpp
MCExternalSymbolizer.cpp
@@ -45,6 +44,7 @@ add_llvm_library(LLVMMC
MCSubtargetInfo.cpp
MCSymbol.cpp
MCSymbolizer.cpp
+ MCTargetOptions.cpp
MCValue.cpp
MCWin64EH.cpp
MachObjectWriter.cpp
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index e9b8fe2..0a54627 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -27,6 +28,8 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCValue.h"
+#include "llvm/Object/StringTableBuilder.h"
+#include "llvm/Support/Compression.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ELF.h"
@@ -105,10 +108,9 @@ class ELFObjectWriter : public MCObjectWriter {
static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
- static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+ static bool isInSymtab(const MCAsmLayout &Layout, const MCSymbolData &Data,
bool Used, bool Renamed);
- static bool isLocal(const MCSymbolData &Data, bool isSignature,
- bool isUsedInReloc);
+ static bool isLocal(const MCSymbolData &Data, bool isUsedInReloc);
static bool IsELFMetaDataSection(const MCSectionData &SD);
static uint64_t DataSectionSize(const MCSectionData &SD);
static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
@@ -131,11 +133,11 @@ class ELFObjectWriter : public MCObjectWriter {
MCSymbolData *SymbolData;
uint64_t StringIndex;
uint32_t SectionIndex;
+ StringRef Name;
// Support lexicographic sorting.
bool operator<(const ELFSymbolData &RHS) const {
- return SymbolData->getSymbol().getName() <
- RHS.SymbolData->getSymbol().getName();
+ return Name < RHS.Name;
}
};
@@ -148,13 +150,13 @@ class ELFObjectWriter : public MCObjectWriter {
llvm::DenseMap<const MCSectionData *, std::vector<ELFRelocationEntry>>
Relocations;
- DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+ StringTableBuilder ShStrTabBuilder;
/// @}
/// @name Symbol Table Data
/// @{
- SmallString<256> StringTable;
+ StringTableBuilder StrTabBuilder;
std::vector<uint64_t> FileSymbolData;
std::vector<ELFSymbolData> LocalSymbolData;
std::vector<ELFSymbolData> ExternalSymbolData;
@@ -214,7 +216,8 @@ class ELFObjectWriter : public MCObjectWriter {
const MCAsmLayout &Layout,
SectionIndexMapTy &SectionIndexMap);
- bool shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA,
+ bool shouldRelocateWithSymbol(const MCAssembler &Asm,
+ const MCSymbolRefExpr *RefA,
const MCSymbolData *SD, uint64_t C,
unsigned Type) const;
@@ -253,6 +256,8 @@ class ELFObjectWriter : public MCObjectWriter {
void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout,
RelMapTy &RelMap);
+ void CompressDebugSections(MCAssembler &Asm, MCAsmLayout &Layout);
+
void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
const RelMapTy &RelMap);
@@ -481,43 +486,18 @@ void ELFObjectWriter::WriteHeader(const MCAssembler &Asm,
Write16(ShstrtabIndex);
}
-uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &OrigData,
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
const MCAsmLayout &Layout) {
- MCSymbolData *Data = &OrigData;
- if (Data->isCommon() && Data->isExternal())
- return Data->getCommonAlignment();
-
- const MCSymbol *Symbol = &Data->getSymbol();
- bool IsThumbFunc = OrigData.getFlags() & ELF_Other_ThumbFunc;
-
- uint64_t Res = 0;
- if (Symbol->isVariable()) {
- const MCExpr *Expr = Symbol->getVariableValue();
- MCValue Value;
- if (!Expr->EvaluateAsRelocatable(Value, &Layout))
- llvm_unreachable("Invalid expression");
+ if (Data.isCommon() && Data.isExternal())
+ return Data.getCommonAlignment();
- assert(!Value.getSymB());
-
- Res = Value.getConstant();
-
- if (const MCSymbolRefExpr *A = Value.getSymA()) {
- Symbol = &A->getSymbol();
- Data = &Layout.getAssembler().getSymbolData(*Symbol);
- } else {
- Symbol = 0;
- Data = 0;
- }
- }
+ uint64_t Res;
+ if (!Layout.getSymbolOffset(&Data, Res))
+ return 0;
- if (IsThumbFunc)
+ if (Layout.getAssembler().isThumbFunc(&Data.getSymbol()))
Res |= 1;
- if (!Symbol || !Symbol->isInSection())
- return Res;
-
- Res += Layout.getSymbolOffset(Data);
-
return Res;
}
@@ -526,15 +506,17 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
// The presence of symbol versions causes undefined symbols and
// versions declared with @@@ to be renamed.
- for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
- ie = Asm.symbol_end(); it != ie; ++it) {
- const MCSymbol &Alias = it->getSymbol();
- const MCSymbol &Symbol = Alias.AliasedSymbol();
- MCSymbolData &SD = Asm.getSymbolData(Symbol);
+ for (MCSymbolData &OriginalData : Asm.symbols()) {
+ const MCSymbol &Alias = OriginalData.getSymbol();
// Not an alias.
- if (&Symbol == &Alias)
+ if (!Alias.isVariable())
+ continue;
+ auto *Ref = dyn_cast<MCSymbolRefExpr>(Alias.getVariableValue());
+ if (!Ref)
continue;
+ const MCSymbol &Symbol = Ref->getSymbol();
+ MCSymbolData &SD = Asm.getSymbolData(Symbol);
StringRef AliasName = Alias.getName();
size_t Pos = AliasName.find('@');
@@ -543,8 +525,8 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
// Aliases defined with .symvar copy the binding from the symbol they alias.
// This is the first place we are able to copy this information.
- it->setExternal(SD.isExternal());
- MCELF::SetBinding(*it, MCELF::GetBinding(SD));
+ OriginalData.setExternal(SD.isExternal());
+ MCELF::SetBinding(OriginalData, MCELF::GetBinding(SD));
StringRef Rest = AliasName.substr(Pos);
if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
@@ -594,26 +576,14 @@ static uint8_t mergeTypeForSet(uint8_t origType, uint8_t newType) {
return Type;
}
-static const MCSymbol *getBaseSymbol(const MCAsmLayout &Layout,
- const MCSymbol &Symbol) {
- if (!Symbol.isVariable())
- return &Symbol;
-
- const MCExpr *Expr = Symbol.getVariableValue();
- MCValue Value;
- if (!Expr->EvaluateAsRelocatable(Value, &Layout))
- llvm_unreachable("Invalid Expression");
- assert(!Value.getSymB());
- const MCSymbolRefExpr *A = Value.getSymA();
- if (!A)
- return nullptr;
- return getBaseSymbol(Layout, A->getSymbol());
-}
-
void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
const MCAsmLayout &Layout) {
MCSymbolData &OrigData = *MSD.SymbolData;
- const MCSymbol *Base = getBaseSymbol(Layout, OrigData.getSymbol());
+ assert((!OrigData.getFragment() ||
+ (&OrigData.getFragment()->getParent()->getSection() ==
+ &OrigData.getSymbol().getSection())) &&
+ "The symbol's section doesn't match the fragment's symbol");
+ const MCSymbol *Base = Layout.getBaseSymbol(OrigData.getSymbol());
// This has to be in sync with when computeSymbolTable uses SHN_ABS or
// SHN_COMMON.
@@ -627,8 +597,6 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
BaseSD = &Layout.getAssembler().getSymbolData(*Base);
Type = mergeTypeForSet(Type, MCELF::GetType(*BaseSD));
}
- if (OrigData.getFlags() & ELF_Other_ThumbFunc)
- Type = ELF::STT_FUNC;
uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
// Other and Visibility share the same byte with Visibility using the lower
@@ -638,8 +606,6 @@ void ELFObjectWriter::WriteSymbol(SymbolTableWriter &Writer, ELFSymbolData &MSD,
Other |= Visibility;
uint64_t Value = SymbolValue(OrigData, Layout);
- if (OrigData.getFlags() & ELF_Other_ThumbFunc)
- Value |= 1;
uint64_t Size = 0;
const MCExpr *ESize = OrigData.getSize();
@@ -664,7 +630,6 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
SectionIndexMapTy &SectionIndexMap) {
// The string table must be emitted first because we need the index
// into the string table for all the symbol names.
- assert(StringTable.size() && "Missing string table");
// FIXME: Make sure the start of the symbol table is aligned.
@@ -725,7 +690,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
// It is always valid to create a relocation with a symbol. It is preferable
// to use a relocation with a section if that is possible. Using the section
// allows us to omit some local symbols from the symbol table.
-bool ELFObjectWriter::shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA,
+bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
+ const MCSymbolRefExpr *RefA,
const MCSymbolData *SD,
uint64_t C,
unsigned Type) const {
@@ -809,11 +775,37 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCSymbolRefExpr *RefA,
if (Flags & ELF::SHF_TLS)
return true;
+ // If the symbol is a thumb function the final relocation must set the lowest
+ // bit. With a symbol that is done by just having the symbol have that bit
+ // set, so we would lose the bit if we relocated with the section.
+ // FIXME: We could use the section but add the bit to the relocation value.
+ if (Asm.isThumbFunc(&Sym))
+ return true;
+
if (TargetObjectWriter->needsRelocateWithSymbol(Type))
return true;
return false;
}
+static const MCSymbol *getWeakRef(const MCSymbolRefExpr &Ref) {
+ const MCSymbol &Sym = Ref.getSymbol();
+
+ if (Ref.getKind() == MCSymbolRefExpr::VK_WEAKREF)
+ return &Sym;
+
+ if (!Sym.isVariable())
+ return nullptr;
+
+ const MCExpr *Expr = Sym.getVariableValue();
+ const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
+ if (!Inner)
+ return nullptr;
+
+ if (Inner->getKind() == MCSymbolRefExpr::VK_WEAKREF)
+ return &Inner->getSymbol();
+ return nullptr;
+}
+
void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
@@ -868,7 +860,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCSymbolData *SymAD = SymA ? &Asm.getSymbolData(*SymA) : nullptr;
unsigned Type = GetRelocType(Target, Fixup, IsPCRel);
- bool RelocateWithSymbol = shouldRelocateWithSymbol(RefA, SymAD, C, Type);
+ bool RelocateWithSymbol = shouldRelocateWithSymbol(Asm, RefA, SymAD, C, Type);
if (!RelocateWithSymbol && SymA && !SymA->isUndefined())
C += Layout.getSymbolOffset(SymAD);
@@ -899,8 +891,8 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
if (const MCSymbol *R = Renames.lookup(SymA))
SymA = R;
- if (RefA->getKind() == MCSymbolRefExpr::VK_WEAKREF)
- WeakrefUsedInReloc.insert(SymA);
+ if (const MCSymbol *WeakRef = getWeakRef(*RefA))
+ WeakrefUsedInReloc.insert(WeakRef);
else
UsedInReloc.insert(SymA);
}
@@ -913,13 +905,13 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
uint64_t
ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
const MCSymbol *S) {
- MCSymbolData &SD = Asm.getSymbolData(*S);
+ const MCSymbolData &SD = Asm.getSymbolData(*S);
return SD.getIndex();
}
-bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
- const MCSymbolData &Data,
- bool Used, bool Renamed) {
+bool ELFObjectWriter::isInSymtab(const MCAsmLayout &Layout,
+ const MCSymbolData &Data, bool Used,
+ bool Renamed) {
const MCSymbol &Symbol = Data.getSymbol();
if (Symbol.isVariable()) {
const MCExpr *Expr = Symbol.getVariableValue();
@@ -938,9 +930,11 @@ bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
return true;
- const MCSymbol &A = Symbol.AliasedSymbol();
- if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
- return false;
+ if (Symbol.isVariable()) {
+ const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
+ if (Base && Base->isUndefined())
+ return false;
+ }
bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
@@ -952,20 +946,16 @@ bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
return true;
}
-bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
- bool isUsedInReloc) {
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isUsedInReloc) {
if (Data.isExternal())
return false;
const MCSymbol &Symbol = Data.getSymbol();
- const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
-
- if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
- if (isSignature && !isUsedInReloc)
- return true;
+ if (Symbol.isDefined())
+ return true;
+ if (isUsedInReloc)
return false;
- }
return true;
}
@@ -1013,58 +1003,36 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
MCELF::SetBinding(Data, ELF::STB_GLOBAL);
}
- // Index 0 is always the empty string.
- StringMap<uint64_t> StringIndexMap;
- StringTable += '\x00';
-
- // FIXME: We could optimize suffixes in strtab in the same way we
- // optimize them in shstrtab.
-
- for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(),
- ie = Asm.file_names_end();
- it != ie;
- ++it) {
- StringRef Name = *it;
- uint64_t &Entry = StringIndexMap[Name];
- if (!Entry) {
- Entry = StringTable.size();
- StringTable += Name;
- StringTable += '\x00';
- }
- FileSymbolData.push_back(Entry);
- }
-
// Add the data for the symbols.
- for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
- ie = Asm.symbol_end(); it != ie; ++it) {
- const MCSymbol &Symbol = it->getSymbol();
+ for (MCSymbolData &SD : Asm.symbols()) {
+ const MCSymbol &Symbol = SD.getSymbol();
bool Used = UsedInReloc.count(&Symbol);
bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
bool isSignature = RevGroupMap.count(&Symbol);
- if (!isInSymtab(Asm, *it,
+ if (!isInSymtab(Layout, SD,
Used || WeakrefUsed || isSignature,
Renames.count(&Symbol)))
continue;
ELFSymbolData MSD;
- MSD.SymbolData = it;
- const MCSymbol *BaseSymbol = getBaseSymbol(Layout, Symbol);
+ MSD.SymbolData = &SD;
+ const MCSymbol *BaseSymbol = Layout.getBaseSymbol(Symbol);
// Undefined symbols are global, but this is the first place we
// are able to set it.
- bool Local = isLocal(*it, isSignature, Used);
- if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
+ bool Local = isLocal(SD, Used);
+ if (!Local && MCELF::GetBinding(SD) == ELF::STB_LOCAL) {
assert(BaseSymbol);
- MCSymbolData &SD = Asm.getSymbolData(*BaseSymbol);
- MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+ MCSymbolData &BaseData = Asm.getSymbolData(*BaseSymbol);
MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+ MCELF::SetBinding(BaseData, ELF::STB_GLOBAL);
}
if (!BaseSymbol) {
MSD.SectionIndex = ELF::SHN_ABS;
- } else if (it->isCommon()) {
+ } else if (SD.isCommon()) {
assert(!Local);
MSD.SectionIndex = ELF::SHN_COMMON;
} else if (BaseSymbol->isUndefined()) {
@@ -1073,7 +1041,7 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
else
MSD.SectionIndex = ELF::SHN_UNDEF;
if (!Used && WeakrefUsed)
- MCELF::SetBinding(*it, ELF::STB_WEAK);
+ MCELF::SetBinding(SD, ELF::STB_WEAK);
} else {
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(BaseSymbol->getSection());
@@ -1085,7 +1053,6 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
// @@ in defined ones.
StringRef Name = Symbol.getName();
SmallString<32> Buf;
-
size_t Pos = Name.find("@@@");
if (Pos != StringRef::npos) {
Buf += Name.substr(0, Pos);
@@ -1093,14 +1060,8 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
Buf += Name.substr(Pos + Skip);
Name = Buf;
}
+ MSD.Name = StrTabBuilder.add(Name);
- uint64_t &Entry = StringIndexMap[Name];
- if (!Entry) {
- Entry = StringTable.size();
- StringTable += Name;
- StringTable += '\x00';
- }
- MSD.StringIndex = Entry;
if (MSD.SectionIndex == ELF::SHN_UNDEF)
UndefinedSymbolData.push_back(MSD);
else if (Local)
@@ -1109,6 +1070,21 @@ ELFObjectWriter::computeSymbolTable(MCAssembler &Asm, const MCAsmLayout &Layout,
ExternalSymbolData.push_back(MSD);
}
+ for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
+ StrTabBuilder.add(*i);
+
+ StrTabBuilder.finalize();
+
+ for (auto i = Asm.file_names_begin(), e = Asm.file_names_end(); i != e; ++i)
+ FileSymbolData.push_back(StrTabBuilder.getOffset(*i));
+
+ for (ELFSymbolData& MSD : LocalSymbolData)
+ MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
+ for (ELFSymbolData& MSD : ExternalSymbolData)
+ MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
+ for (ELFSymbolData& MSD : UndefinedSymbolData)
+ MSD.StringIndex = StrTabBuilder.getOffset(MSD.Name);
+
// Symbols are required to be in lexicographic order.
array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
@@ -1168,6 +1144,151 @@ void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm,
}
}
+static SmallVector<char, 128>
+getUncompressedData(MCAsmLayout &Layout,
+ MCSectionData::FragmentListType &Fragments) {
+ SmallVector<char, 128> UncompressedData;
+ for (const MCFragment &F : Fragments) {
+ const SmallVectorImpl<char> *Contents;
+ switch (F.getKind()) {
+ case MCFragment::FT_Data:
+ Contents = &cast<MCDataFragment>(F).getContents();
+ break;
+ case MCFragment::FT_Dwarf:
+ Contents = &cast<MCDwarfLineAddrFragment>(F).getContents();
+ break;
+ case MCFragment::FT_DwarfFrame:
+ Contents = &cast<MCDwarfCallFrameFragment>(F).getContents();
+ break;
+ default:
+ llvm_unreachable(
+ "Not expecting any other fragment types in a debug_* section");
+ }
+ UncompressedData.append(Contents->begin(), Contents->end());
+ }
+ return UncompressedData;
+}
+
+// Include the debug info compression header:
+// "ZLIB" followed by 8 bytes representing the uncompressed size of the section,
+// useful for consumers to preallocate a buffer to decompress into.
+static bool
+prependCompressionHeader(uint64_t Size,
+ SmallVectorImpl<char> &CompressedContents) {
+ static const StringRef Magic = "ZLIB";
+ if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size())
+ return false;
+ if (sys::IsLittleEndianHost)
+ Size = sys::SwapByteOrder(Size);
+ CompressedContents.insert(CompressedContents.begin(),
+ Magic.size() + sizeof(Size), 0);
+ std::copy(Magic.begin(), Magic.end(), CompressedContents.begin());
+ std::copy(reinterpret_cast<char *>(&Size),
+ reinterpret_cast<char *>(&Size + 1),
+ CompressedContents.begin() + Magic.size());
+ return true;
+}
+
+// Return a single fragment containing the compressed contents of the whole
+// section. Null if the section was not compressed for any reason.
+static std::unique_ptr<MCDataFragment>
+getCompressedFragment(MCAsmLayout &Layout,
+ MCSectionData::FragmentListType &Fragments) {
+ std::unique_ptr<MCDataFragment> CompressedFragment(new MCDataFragment());
+
+ // Gather the uncompressed data from all the fragments, recording the
+ // alignment fragment, if seen, and any fixups.
+ SmallVector<char, 128> UncompressedData =
+ getUncompressedData(Layout, Fragments);
+
+ SmallVectorImpl<char> &CompressedContents = CompressedFragment->getContents();
+
+ zlib::Status Success = zlib::compress(
+ StringRef(UncompressedData.data(), UncompressedData.size()),
+ CompressedContents);
+ if (Success != zlib::StatusOK)
+ return nullptr;
+
+ if (!prependCompressionHeader(UncompressedData.size(), CompressedContents))
+ return nullptr;
+
+ return CompressedFragment;
+}
+
+typedef DenseMap<const MCSectionData *, std::vector<MCSymbolData *>>
+DefiningSymbolMap;
+
+static void UpdateSymbols(const MCAsmLayout &Layout,
+ const std::vector<MCSymbolData *> &Symbols,
+ MCFragment &NewFragment) {
+ for (MCSymbolData *Sym : Symbols) {
+ Sym->setOffset(Sym->getOffset() +
+ Layout.getFragmentOffset(Sym->getFragment()));
+ Sym->setFragment(&NewFragment);
+ }
+}
+
+static void CompressDebugSection(MCAssembler &Asm, MCAsmLayout &Layout,
+ const DefiningSymbolMap &DefiningSymbols,
+ const MCSectionELF &Section,
+ MCSectionData &SD) {
+ StringRef SectionName = Section.getSectionName();
+ MCSectionData::FragmentListType &Fragments = SD.getFragmentList();
+
+ std::unique_ptr<MCDataFragment> CompressedFragment =
+ getCompressedFragment(Layout, Fragments);
+
+ // Leave the section as-is if the fragments could not be compressed.
+ if (!CompressedFragment)
+ return;
+
+ // Update the fragment+offsets of any symbols referring to fragments in this
+ // section to refer to the new fragment.
+ auto I = DefiningSymbols.find(&SD);
+ if (I != DefiningSymbols.end())
+ UpdateSymbols(Layout, I->second, *CompressedFragment);
+
+ // Invalidate the layout for the whole section since it will have new and
+ // different fragments now.
+ Layout.invalidateFragmentsFrom(&Fragments.front());
+ Fragments.clear();
+
+ // Complete the initialization of the new fragment
+ CompressedFragment->setParent(&SD);
+ CompressedFragment->setLayoutOrder(0);
+ Fragments.push_back(CompressedFragment.release());
+
+ // Rename from .debug_* to .zdebug_*
+ Asm.getContext().renameELFSection(&Section,
+ (".z" + SectionName.drop_front(1)).str());
+}
+
+void ELFObjectWriter::CompressDebugSections(MCAssembler &Asm,
+ MCAsmLayout &Layout) {
+ if (!Asm.getContext().getAsmInfo()->compressDebugSections())
+ return;
+
+ DefiningSymbolMap DefiningSymbols;
+
+ for (MCSymbolData &SD : Asm.symbols())
+ if (MCFragment *F = SD.getFragment())
+ DefiningSymbols[F->getParent()].push_back(&SD);
+
+ for (MCSectionData &SD : Asm) {
+ const MCSectionELF &Section =
+ static_cast<const MCSectionELF &>(SD.getSection());
+ StringRef SectionName = Section.getSectionName();
+
+ // Compressing debug_frame requires handling alignment fragments which is
+ // more work (possibly generalizing MCAssembler.cpp:writeFragment to allow
+ // for writing to arbitrary buffers) for little benefit.
+ if (!SectionName.startswith(".debug_") || SectionName == ".debug_frame")
+ continue;
+
+ CompressDebugSection(Asm, Layout, DefiningSymbols, Section, SD);
+ }
+}
+
void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout,
const RelMapTy &RelMap) {
for (MCAssembler::const_iterator it = Asm.begin(),
@@ -1274,23 +1395,6 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
}
}
-static int compareBySuffix(const MCSectionELF *const *a,
- const MCSectionELF *const *b) {
- const StringRef &NameA = (*a)->getSectionName();
- const StringRef &NameB = (*b)->getSectionName();
- const unsigned sizeA = NameA.size();
- const unsigned sizeB = NameB.size();
- const unsigned len = std::min(sizeA, sizeB);
- for (unsigned int i = 0; i < len; ++i) {
- char ca = NameA[sizeA - i - 1];
- char cb = NameB[sizeB - i - 1];
- if (ca != cb)
- return cb - ca;
- }
-
- return sizeB - sizeA;
-}
-
void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
MCAsmLayout &Layout,
SectionIndexMapTy &SectionIndexMap,
@@ -1331,45 +1435,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
WriteSymbolTable(F, Asm, Layout, SectionIndexMap);
F = new MCDataFragment(&StrtabSD);
- F->getContents().append(StringTable.begin(), StringTable.end());
+ F->getContents().append(StrTabBuilder.data().begin(),
+ StrTabBuilder.data().end());
F = new MCDataFragment(&ShstrtabSD);
- std::vector<const MCSectionELF*> Sections;
- for (MCAssembler::const_iterator it = Asm.begin(),
- ie = Asm.end(); it != ie; ++it) {
+ // Section header string table.
+ for (auto it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
const MCSectionELF &Section =
static_cast<const MCSectionELF&>(it->getSection());
- Sections.push_back(&Section);
- }
- array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix);
-
- // Section header string table.
- //
- // The first entry of a string table holds a null character so skip
- // section 0.
- uint64_t Index = 1;
- F->getContents().push_back('\x00');
-
- for (unsigned int I = 0, E = Sections.size(); I != E; ++I) {
- const MCSectionELF &Section = *Sections[I];
-
- StringRef Name = Section.getSectionName();
- if (I != 0) {
- StringRef PreviousName = Sections[I - 1]->getSectionName();
- if (PreviousName.endswith(Name)) {
- SectionStringTableIndex[&Section] = Index - Name.size() - 1;
- continue;
- }
- }
- // Remember the index into the string table so we can write it
- // into the sh_name field of the section header table.
- SectionStringTableIndex[&Section] = Index;
-
- Index += Name.size() + 1;
- F->getContents().append(Name.begin(), Name.end());
- F->getContents().push_back('\x00');
+ ShStrTabBuilder.add(Section.getSectionName());
}
+ ShStrTabBuilder.finalize();
+ F->getContents().append(ShStrTabBuilder.data().begin(),
+ ShStrTabBuilder.data().end());
}
void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
@@ -1437,7 +1516,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
switch(Section.getType()) {
case ELF::SHT_DYNAMIC:
- sh_link = SectionStringTableIndex[&Section];
+ sh_link = ShStrTabBuilder.getOffset(Section.getSectionName());
sh_info = 0;
break;
@@ -1518,7 +1597,8 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm,
}
}
- WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+ WriteSecHdrEntry(ShStrTabBuilder.getOffset(Section.getSectionName()),
+ Section.getType(),
Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
Alignment, Section.getEntrySize());
}
@@ -1652,6 +1732,8 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm,
unsigned NumUserSections = Asm.size();
+ CompressDebugSections(Asm, const_cast<MCAsmLayout &>(Layout));
+
DenseMap<const MCSectionELF*, const MCSectionELF*> RelMap;
CreateRelocationSections(Asm, const_cast<MCAsmLayout&>(Layout), RelMap);
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 9667145..c0777a6 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -61,8 +61,8 @@ MCAsmInfo::MCAsmInfo() {
UsesELFSectionDirectiveForBSS = false;
AlignmentIsInBytes = true;
TextAlignFillValue = 0;
- GPRel64Directive = 0;
- GPRel32Directive = 0;
+ GPRel64Directive = nullptr;
+ GPRel32Directive = nullptr;
GlobalDirective = "\t.globl\t";
HasSetDirective = true;
HasAggressiveSymbolFolding = true;
@@ -72,7 +72,7 @@ MCAsmInfo::MCAsmInfo() {
HasSingleParameterDotFile = true;
HasIdentDirective = false;
HasNoDeadStrip = false;
- WeakRefDirective = 0;
+ WeakRefDirective = nullptr;
HasWeakDefDirective = false;
HasWeakDefCanBeHiddenDirective = false;
HasLinkOnceDirective = false;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 884ccf9..7f8ae54 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -9,6 +9,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -31,6 +32,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include <cctype>
+#include <unordered_map>
using namespace llvm;
namespace {
@@ -49,34 +51,24 @@ private:
unsigned IsVerboseAsm : 1;
unsigned ShowInst : 1;
- unsigned UseCFI : 1;
unsigned UseDwarfDirectory : 1;
- enum EHSymbolFlags { EHGlobal = 1,
- EHWeakDefinition = 1 << 1,
- EHPrivateExtern = 1 << 2 };
- DenseMap<const MCSymbol*, unsigned> FlagMap;
-
- DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
-
void EmitRegisterName(int64_t Register);
void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override;
public:
MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
- bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *printer, MCCodeEmitter *emitter,
MCAsmBackend *asmbackend, bool showInst)
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
- ShowInst(showInst), UseCFI(useCFI),
- UseDwarfDirectory(useDwarfDirectory) {
+ ShowInst(showInst), UseDwarfDirectory(useDwarfDirectory) {
if (InstPrinter && IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
}
- ~MCAsmStreamer() {}
inline void EmitEOL() {
// If we don't have any comments, just emit a \n.
@@ -130,7 +122,6 @@ public:
void EmitLabel(MCSymbol *Symbol) override;
void EmitDebugLabel(MCSymbol *Symbol) override;
- void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override;
void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
void EmitLinkerOptions(ArrayRef<std::string> Options) override;
void EmitDataRegion(MCDataRegionType Kind) override;
@@ -140,12 +131,6 @@ public:
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override;
- void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) override;
- void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
- const MCSymbol *Label) override;
-
bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
@@ -167,7 +152,7 @@ public:
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
- void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override;
void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
@@ -175,7 +160,8 @@ public:
void EmitBytes(StringRef Data) override;
- void EmitValueImpl(const MCExpr *Value, unsigned Size) override;
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc = SMLoc()) override;
void EmitIntValue(uint64_t Value, unsigned Size) override;
void EmitULEB128Value(const MCExpr *Value) override;
@@ -254,8 +240,6 @@ public:
void EmitRawTextImpl(StringRef String) override;
void FinishImpl() override;
-
- virtual MCSymbolData &getOrCreateSymbolData(const MCSymbol *Symbol) override;
};
} // end anonymous namespace.
@@ -321,21 +305,6 @@ void MCAsmStreamer::ChangeSection(const MCSection *Section,
Section->PrintSwitchToSection(*MAI, OS, Subsection);
}
-void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol,
- MCSymbol *EHSymbol) {
- if (UseCFI)
- return;
-
- unsigned Flags = FlagMap.lookup(Symbol);
-
- if (Flags & EHGlobal)
- EmitSymbolAttribute(EHSymbol, MCSA_Global);
- if (Flags & EHWeakDefinition)
- EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition);
- if (Flags & EHPrivateExtern)
- EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern);
-}
-
void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
MCStreamer::EmitLabel(Symbol);
@@ -441,22 +410,6 @@ void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
EmitEOL();
}
-void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
- const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) {
- EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
-}
-
-void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
- const MCSymbol *Label) {
- EmitIntValue(dwarf::DW_CFA_advance_loc4, 1);
- const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
- AddrDelta = ForceExpAbs(AddrDelta);
- EmitValue(AddrDelta, 4);
-}
-
-
bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {
switch (Attribute) {
@@ -486,7 +439,6 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
return true;
case MCSA_Global: // .globl/.global
OS << MAI->getGlobalDirective();
- FlagMap[Symbol] |= EHGlobal;
break;
case MCSA_Hidden: OS << "\t.hidden\t"; break;
case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
@@ -497,14 +449,12 @@ bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
case MCSA_PrivateExtern:
OS << "\t.private_extern\t";
- FlagMap[Symbol] |= EHPrivateExtern;
break;
case MCSA_Protected: OS << "\t.protected\t"; break;
case MCSA_Reference: OS << "\t.reference\t"; break;
case MCSA_Weak: OS << "\t.weak\t"; break;
case MCSA_WeakDefinition:
OS << "\t.weak_definition\t";
- FlagMap[Symbol] |= EHWeakDefinition;
break;
// .weak_reference
case MCSA_WeakReference: OS << MAI->getWeakRefDirective(); break;
@@ -560,7 +510,7 @@ void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {
// Common symbols do not belong to any actual section.
- AssignSection(Symbol, NULL);
+ AssignSection(Symbol, nullptr);
OS << "\t.comm\t" << *Symbol << ',' << Size;
if (ByteAlignment != 0) {
@@ -579,7 +529,7 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlign) {
// Common symbols do not belong to any actual section.
- AssignSection(Symbol, NULL);
+ AssignSection(Symbol, nullptr);
OS << "\t.lcomm\t" << *Symbol << ',' << Size;
if (ByteAlign > 1) {
@@ -610,7 +560,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
- if (Symbol != NULL) {
+ if (Symbol) {
OS << ',' << *Symbol << ',' << Size;
if (ByteAlignment != 0)
OS << ',' << Log2_32(ByteAlignment);
@@ -625,7 +575,7 @@ void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {
AssignSection(Symbol, Section);
- assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+ assert(Symbol && "Symbol shouldn't be NULL!");
// Instead of using the Section we'll just use the shortcut.
// This is a mach-o specific directive and section.
OS << ".tbss " << *Symbol << ", " << Size;
@@ -702,11 +652,12 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
EmitValue(MCConstantExpr::Create(Value, getContext()), Size);
}
-void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) {
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
assert(Size <= 8 && "Invalid size");
assert(getCurrentSection().first &&
"Cannot emit contents before setting section!");
- const char *Directive = 0;
+ const char *Directive = nullptr;
switch (Size) {
default: break;
case 1: Directive = MAI->getData8bitsDirective(); break;
@@ -775,13 +726,13 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
}
void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
- assert(MAI->getGPRel64Directive() != 0);
+ assert(MAI->getGPRel64Directive() != nullptr);
OS << MAI->getGPRel64Directive() << *Value;
EmitEOL();
}
void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
- assert(MAI->getGPRel32Directive() != 0);
+ assert(MAI->getGPRel32Directive() != nullptr);
OS << MAI->getGPRel32Directive() << *Value;
EmitEOL();
}
@@ -973,10 +924,6 @@ void MCAsmStreamer::EmitIdent(StringRef IdentString) {
void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
MCStreamer::EmitCFISections(EH, Debug);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_sections ";
if (EH) {
OS << ".eh_frame";
@@ -990,11 +937,6 @@ void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) {
}
void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
- if (!UseCFI) {
- RecordProcStart(Frame);
- return;
- }
-
OS << "\t.cfi_startproc";
if (Frame.IsSimple)
OS << " simple";
@@ -1002,11 +944,6 @@ void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
}
void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
- if (!UseCFI) {
- RecordProcEnd(Frame);
- return;
- }
-
// Put a dummy non-null value in Frame.End to mark that this frame has been
// closed.
Frame.End = (MCSymbol *) 1;
@@ -1027,10 +964,6 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) {
void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
MCStreamer::EmitCFIDefCfa(Register, Offset);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_def_cfa ";
EmitRegisterName(Register);
OS << ", " << Offset;
@@ -1039,20 +972,12 @@ void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
MCStreamer::EmitCFIDefCfaOffset(Offset);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_def_cfa_offset " << Offset;
EmitEOL();
}
void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
MCStreamer::EmitCFIDefCfaRegister(Register);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_def_cfa_register ";
EmitRegisterName(Register);
EmitEOL();
@@ -1060,10 +985,6 @@ void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
this->MCStreamer::EmitCFIOffset(Register, Offset);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_offset ";
EmitRegisterName(Register);
OS << ", " << Offset;
@@ -1073,50 +994,30 @@ void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
unsigned Encoding) {
MCStreamer::EmitCFIPersonality(Sym, Encoding);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
EmitEOL();
}
void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
MCStreamer::EmitCFILsda(Sym, Encoding);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
EmitEOL();
}
void MCAsmStreamer::EmitCFIRememberState() {
MCStreamer::EmitCFIRememberState();
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_remember_state";
EmitEOL();
}
void MCAsmStreamer::EmitCFIRestoreState() {
MCStreamer::EmitCFIRestoreState();
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_restore_state";
EmitEOL();
}
void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
MCStreamer::EmitCFISameValue(Register);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_same_value ";
EmitRegisterName(Register);
EmitEOL();
@@ -1124,10 +1025,6 @@ void MCAsmStreamer::EmitCFISameValue(int64_t Register) {
void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
MCStreamer::EmitCFIRelOffset(Register, Offset);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_rel_offset ";
EmitRegisterName(Register);
OS << ", " << Offset;
@@ -1136,50 +1033,30 @@ void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) {
void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) {
MCStreamer::EmitCFIAdjustCfaOffset(Adjustment);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
EmitEOL();
}
void MCAsmStreamer::EmitCFISignalFrame() {
MCStreamer::EmitCFISignalFrame();
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_signal_frame";
EmitEOL();
}
void MCAsmStreamer::EmitCFIUndefined(int64_t Register) {
MCStreamer::EmitCFIUndefined(Register);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_undefined " << Register;
EmitEOL();
}
void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) {
MCStreamer::EmitCFIRegister(Register1, Register2);
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_register " << Register1 << ", " << Register2;
EmitEOL();
}
void MCAsmStreamer::EmitCFIWindowSave() {
MCStreamer::EmitCFIWindowSave();
-
- if (!UseCFI)
- return;
-
OS << "\t.cfi_window_save";
EmitEOL();
}
@@ -1257,14 +1134,17 @@ void MCAsmStreamer::EmitWin64EHHandlerData() {
void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) {
MCStreamer::EmitWin64EHPushReg(Register);
- OS << "\t.seh_pushreg " << Register;
+ OS << "\t.seh_pushreg ";
+ EmitRegisterName(Register);
EmitEOL();
}
void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) {
MCStreamer::EmitWin64EHSetFrame(Register, Offset);
- OS << "\t.seh_setframe " << Register << ", " << Offset;
+ OS << "\t.seh_setframe ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
EmitEOL();
}
@@ -1278,14 +1158,18 @@ void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) {
void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) {
MCStreamer::EmitWin64EHSaveReg(Register, Offset);
- OS << "\t.seh_savereg " << Register << ", " << Offset;
+ OS << "\t.seh_savereg ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
EmitEOL();
}
void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) {
MCStreamer::EmitWin64EHSaveXMM(Register, Offset);
- OS << "\t.seh_savexmm " << Register << ", " << Offset;
+ OS << "\t.seh_savexmm ";
+ EmitRegisterName(Register);
+ OS << ", " << Offset;
EmitEOL();
}
@@ -1455,26 +1339,13 @@ void MCAsmStreamer::FinishImpl() {
EmitLabel(Label);
}
}
-
- if (!UseCFI)
- EmitFrames(AsmBackend.get(), false);
-}
-
-MCSymbolData &MCAsmStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) {
- MCSymbolData *&Entry = SymbolMap[Symbol];
-
- if (!Entry)
- Entry = new MCSymbolData(*Symbol, 0, 0, 0);
-
- return *Entry;
}
MCStreamer *llvm::createAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI,
- bool useDwarfDirectory, MCInstPrinter *IP,
- MCCodeEmitter *CE, MCAsmBackend *MAB,
- bool ShowInst) {
- return new MCAsmStreamer(Context, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- IP, CE, MAB, ShowInst);
+ bool isVerboseAsm, bool useDwarfDirectory,
+ MCInstPrinter *IP, MCCodeEmitter *CE,
+ MCAsmBackend *MAB, bool ShowInst) {
+ return new MCAsmStreamer(Context, OS, isVerboseAsm, useDwarfDirectory, IP, CE,
+ MAB, ShowInst);
}
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 724ca29..886a5f5 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "assembler"
#include "llvm/MC/MCAssembler.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
@@ -28,12 +27,11 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Compression.h"
-#include "llvm/Support/Host.h"
-
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "assembler"
+
namespace {
namespace stats {
STATISTIC(EmittedFragments, "Number of emitted assembler fragments - total");
@@ -119,36 +117,89 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
return F->Offset;
}
-uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+// Simple getSymbolOffset helper for the non-varibale case.
+static bool getLabelOffset(const MCAsmLayout &Layout, const MCSymbolData &SD,
+ bool ReportError, uint64_t &Val) {
+ if (!SD.getFragment()) {
+ if (ReportError)
+ report_fatal_error("unable to evaluate offset to undefined symbol '" +
+ SD.getSymbol().getName() + "'");
+ return false;
+ }
+ Val = Layout.getFragmentOffset(SD.getFragment()) + SD.getOffset();
+ return true;
+}
+
+static bool getSymbolOffsetImpl(const MCAsmLayout &Layout,
+ const MCSymbolData *SD, bool ReportError,
+ uint64_t &Val) {
const MCSymbol &S = SD->getSymbol();
- // If this is a variable, then recursively evaluate now.
- if (S.isVariable()) {
- MCValue Target;
- if (!S.getVariableValue()->EvaluateAsRelocatable(Target, this))
- report_fatal_error("unable to evaluate offset for variable '" +
- S.getName() + "'");
+ if (!S.isVariable())
+ return getLabelOffset(Layout, *SD, ReportError, Val);
- // Verify that any used symbols are defined.
- if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
- report_fatal_error("unable to evaluate offset to undefined symbol '" +
- Target.getSymA()->getSymbol().getName() + "'");
- if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
- report_fatal_error("unable to evaluate offset to undefined symbol '" +
- Target.getSymB()->getSymbol().getName() + "'");
-
- uint64_t Offset = Target.getConstant();
- if (Target.getSymA())
- Offset += getSymbolOffset(&Assembler.getSymbolData(
- Target.getSymA()->getSymbol()));
- if (Target.getSymB())
- Offset -= getSymbolOffset(&Assembler.getSymbolData(
- Target.getSymB()->getSymbol()));
- return Offset;
+ // If SD is a variable, evaluate it.
+ MCValue Target;
+ if (!S.getVariableValue()->EvaluateAsValue(Target, &Layout))
+ report_fatal_error("unable to evaluate offset for variable '" +
+ S.getName() + "'");
+
+ uint64_t Offset = Target.getConstant();
+
+ const MCAssembler &Asm = Layout.getAssembler();
+
+ const MCSymbolRefExpr *A = Target.getSymA();
+ if (A) {
+ uint64_t ValA;
+ if (!getLabelOffset(Layout, Asm.getSymbolData(A->getSymbol()), ReportError,
+ ValA))
+ return false;
+ Offset += ValA;
}
- assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
- return getFragmentOffset(SD->getFragment()) + SD->getOffset();
+ const MCSymbolRefExpr *B = Target.getSymB();
+ if (B) {
+ uint64_t ValB;
+ if (!getLabelOffset(Layout, Asm.getSymbolData(B->getSymbol()), ReportError,
+ ValB))
+ return false;
+ Offset -= ValB;
+ }
+
+ Val = Offset;
+ return true;
+}
+
+bool MCAsmLayout::getSymbolOffset(const MCSymbolData *SD, uint64_t &Val) const {
+ return getSymbolOffsetImpl(*this, SD, false, Val);
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+ uint64_t Val;
+ getSymbolOffsetImpl(*this, SD, true, Val);
+ return Val;
+}
+
+const MCSymbol *MCAsmLayout::getBaseSymbol(const MCSymbol &Symbol) const {
+ if (!Symbol.isVariable())
+ return &Symbol;
+
+ const MCExpr *Expr = Symbol.getVariableValue();
+ MCValue Value;
+ if (!Expr->EvaluateAsValue(Value, this))
+ llvm_unreachable("Invalid Expression");
+
+ const MCSymbolRefExpr *RefB = Value.getSymB();
+ if (RefB)
+ Assembler.getContext().FatalError(
+ SMLoc(), Twine("symbol '") + RefB->getSymbol().getName() +
+ "' could not be evaluated in a subtraction expression");
+
+ const MCSymbolRefExpr *A = Value.getSymA();
+ if (!A)
+ return nullptr;
+
+ return &A->getSymbol();
}
uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
@@ -215,7 +266,7 @@ MCFragment::~MCFragment() {
}
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+ : Kind(_Kind), Parent(_Parent), Atom(nullptr), Offset(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
@@ -233,40 +284,7 @@ MCEncodedFragmentWithFixups::~MCEncodedFragmentWithFixups() {
/* *** */
-const SmallVectorImpl<char> &MCCompressedFragment::getCompressedContents() const {
- assert(getParent()->size() == 1 &&
- "Only compress sections containing a single fragment");
- if (CompressedContents.empty()) {
- std::unique_ptr<MemoryBuffer> CompressedSection;
- zlib::Status Success =
- zlib::compress(StringRef(getContents().data(), getContents().size()),
- CompressedSection);
- (void)Success;
- assert(Success == zlib::StatusOK);
- CompressedContents.push_back('Z');
- CompressedContents.push_back('L');
- CompressedContents.push_back('I');
- CompressedContents.push_back('B');
- uint64_t Size = getContents().size();
- if (sys::IsLittleEndianHost)
- Size = sys::SwapByteOrder(Size);
- CompressedContents.append(reinterpret_cast<char *>(&Size),
- reinterpret_cast<char *>(&Size + 1));
- CompressedContents.append(CompressedSection->getBuffer().begin(),
- CompressedSection->getBuffer().end());
- }
- return CompressedContents;
-}
-
-SmallVectorImpl<char> &MCCompressedFragment::getContents() {
- assert(CompressedContents.empty() &&
- "Fragment contents should not be altered after compression");
- return MCDataFragment::getContents();
-}
-
-/* *** */
-
-MCSectionData::MCSectionData() : Section(0) {}
+MCSectionData::MCSectionData() : Section(nullptr) {}
MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
: Section(&_Section),
@@ -286,7 +304,7 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
SmallVectorImpl<std::pair<unsigned, MCFragment *> >::iterator MI =
std::lower_bound(SubsectionFragmentMap.begin(), SubsectionFragmentMap.end(),
- std::make_pair(Subsection, (MCFragment *)0));
+ std::make_pair(Subsection, (MCFragment *)nullptr));
bool ExactMatch = false;
if (MI != SubsectionFragmentMap.end()) {
ExactMatch = MI->first == Subsection;
@@ -311,13 +329,13 @@ MCSectionData::getSubsectionInsertionPoint(unsigned Subsection) {
/* *** */
-MCSymbolData::MCSymbolData() : Symbol(0) {}
+MCSymbolData::MCSymbolData() : Symbol(nullptr) {}
MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
uint64_t _Offset, MCAssembler *A)
: Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
IsExternal(false), IsPrivateExtern(false),
- CommonSize(0), SymbolSize(0), CommonAlign(0),
+ CommonSize(0), SymbolSize(nullptr), CommonAlign(0),
Flags(0), Index(0)
{
if (A)
@@ -358,6 +376,31 @@ void MCAssembler::reset() {
getLOHContainer().reset();
}
+bool MCAssembler::isThumbFunc(const MCSymbol *Symbol) const {
+ if (ThumbFuncs.count(Symbol))
+ return true;
+
+ if (!Symbol->isVariable())
+ return false;
+
+ // FIXME: It looks like gas supports some cases of the form "foo + 2". It
+ // is not clear if that is a bug or a feature.
+ const MCExpr *Expr = Symbol->getVariableValue();
+ const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(Expr);
+ if (!Ref)
+ return false;
+
+ if (Ref->getKind() != MCSymbolRefExpr::VK_None)
+ return false;
+
+ const MCSymbol &Sym = Ref->getSymbol();
+ if (!isThumbFunc(&Sym))
+ return false;
+
+ ThumbFuncs.insert(Symbol); // Cache it.
+ return true;
+}
+
bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
// Non-temporary labels should always be visible to the linker.
if (!Symbol.isTemporary())
@@ -378,13 +421,13 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
// Absolute and undefined symbols have no defining atom.
if (!SD->getFragment())
- return 0;
+ return nullptr;
// Non-linker visible symbols in sections which can't be atomized have no
// defining atom.
if (!getBackend().isSectionAtomizable(
SD->getFragment()->getParent()->getSection()))
- return 0;
+ return nullptr;
// Otherwise, return the atom for the containing fragment.
return SD->getFragment()->getAtom();
@@ -467,8 +510,6 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Relaxable:
case MCFragment::FT_CompactEncodedInst:
return cast<MCEncodedFragment>(F).getContents().size();
- case MCFragment::FT_Compressed:
- return cast<MCCompressedFragment>(F).getCompressedContents().size();
case MCFragment::FT_Fill:
return cast<MCFillFragment>(F).getSize();
@@ -657,11 +698,6 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout,
break;
}
- case MCFragment::FT_Compressed:
- ++stats::EmittedDataFragments;
- OW->WriteBytes(cast<MCCompressedFragment>(F).getCompressedContents());
- break;
-
case MCFragment::FT_Data:
++stats::EmittedDataFragments;
writeFragmentContents(F, OW);
@@ -738,7 +774,6 @@ void MCAssembler::writeSectionData(const MCSectionData *SD,
ie = SD->end(); it != ie; ++it) {
switch (it->getKind()) {
default: llvm_unreachable("Invalid fragment in virtual section!");
- case MCFragment::FT_Compressed:
case MCFragment::FT_Data: {
// Check that we aren't trying to write a non-zero contents (or fixups)
// into a virtual section. This is to support clients which use standard
@@ -992,7 +1027,7 @@ bool MCAssembler::layoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD) {
// remain NULL if none were relaxed.
// When a fragment is relaxed, all the fragments following it should get
// invalidated because their offset is going to change.
- MCFragment *FirstRelaxedFragment = NULL;
+ MCFragment *FirstRelaxedFragment = nullptr;
// Attempt to relax all the fragments in the section.
for (MCSectionData::iterator I = SD.begin(), IE = SD.end(); I != IE; ++I) {
@@ -1070,8 +1105,6 @@ void MCFragment::dump() {
switch (getKind()) {
case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
case MCFragment::FT_Data: OS << "MCDataFragment"; break;
- case MCFragment::FT_Compressed:
- OS << "MCCompressedFragment"; break;
case MCFragment::FT_CompactEncodedInst:
OS << "MCCompactEncodedInstFragment"; break;
case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
@@ -1098,7 +1131,6 @@ void MCFragment::dump() {
<< " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
break;
}
- case MCFragment::FT_Compressed:
case MCFragment::FT_Data: {
const MCDataFragment *DF = cast<MCDataFragment>(this);
OS << "\n ";
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 73ffdc0..c163268 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -29,19 +29,13 @@
using namespace llvm;
-typedef std::pair<std::string, std::string> SectionGroupPair;
-
-typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
-typedef std::map<SectionGroupPair, const MCSectionELF *> ELFUniqueMapTy;
-typedef std::map<SectionGroupPair, const MCSectionCOFF *> COFFUniqueMapTy;
-
MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
const MCObjectFileInfo *mofi, const SourceMgr *mgr,
bool DoAutoReset)
: SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(),
Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0),
CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), DwarfLocSeen(false),
- GenDwarfForAssembly(false), GenDwarfFileNumber(0),
+ GenDwarfForAssembly(false), GenDwarfFileNumber(0), DwarfVersion(4),
AllowTemporaryLabels(true), DwarfCompileUnitID(0),
AutoReset(DoAutoReset) {
@@ -49,12 +43,8 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri,
if (EC)
CompilationDir.clear();
- MachOUniquingMap = 0;
- ELFUniquingMap = 0;
- COFFUniquingMap = 0;
-
SecureLogFile = getenv("AS_SECURE_LOG_FILE");
- SecureLog = 0;
+ SecureLog = nullptr;
SecureLogUsed = false;
if (SrcMgr && SrcMgr->getNumBuffers() > 0)
@@ -88,13 +78,9 @@ void MCContext::reset() {
DwarfCompileUnitID = 0;
CurrentDwarfLoc = MCDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0);
- // If we have the MachO uniquing map, free it.
- delete (MachOUniqueMapTy*)MachOUniquingMap;
- delete (ELFUniqueMapTy*)ELFUniquingMap;
- delete (COFFUniqueMapTy*)COFFUniquingMap;
- MachOUniquingMap = 0;
- ELFUniquingMap = 0;
- COFFUniquingMap = 0;
+ MachOUniquingMap.clear();
+ ELFUniquingMap.clear();
+ COFFUniquingMap.clear();
NextUniqueID = 0;
AllowTemporaryLabels = true;
@@ -225,11 +211,6 @@ getMachOSection(StringRef Segment, StringRef Section,
// may not have the same flags as the requested section, if so this should be
// diagnosed by the client as an error.
- // Create the map if it doesn't already exist.
- if (MachOUniquingMap == 0)
- MachOUniquingMap = new MachOUniqueMapTy();
- MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
-
// Form the name to look up.
SmallString<64> Name;
Name += Segment;
@@ -237,7 +218,7 @@ getMachOSection(StringRef Segment, StringRef Section,
Name += Section;
// Do the lookup, if we have a hit, return it.
- const MCSectionMachO *&Entry = Map[Name.str()];
+ const MCSectionMachO *&Entry = MachOUniquingMap[Name.str()];
if (Entry) return Entry;
// Otherwise, return a new section.
@@ -251,42 +232,48 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
return getELFSection(Section, Type, Flags, Kind, 0, "");
}
+void MCContext::renameELFSection(const MCSectionELF *Section, StringRef Name) {
+ StringRef GroupName;
+ if (const MCSymbol *Group = Section->getGroup())
+ GroupName = Group->getName();
+
+ ELFUniquingMap.erase(SectionGroupPair(Section->getSectionName(), GroupName));
+ auto I =
+ ELFUniquingMap.insert(std::make_pair(SectionGroupPair(Name, GroupName),
+ Section)).first;
+ StringRef CachedName = I->first.first;
+ const_cast<MCSectionELF*>(Section)->setSectionName(CachedName);
+}
+
const MCSectionELF *MCContext::
getELFSection(StringRef Section, unsigned Type, unsigned Flags,
SectionKind Kind, unsigned EntrySize, StringRef Group) {
- if (ELFUniquingMap == 0)
- ELFUniquingMap = new ELFUniqueMapTy();
- ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
-
- SmallString<32> ZDebugName;
- if (MAI->compressDebugSections() && Section.startswith(".debug_") &&
- Section != ".debug_frame" && Section != ".debug_line")
- Section = (".z" + Section.drop_front(1)).toStringRef(ZDebugName);
-
// Do the lookup, if we have a hit, return it.
- std::pair<ELFUniqueMapTy::iterator, bool> Entry = Map.insert(
- std::make_pair(SectionGroupPair(Section, Group), (MCSectionELF *)0));
- if (!Entry.second) return Entry.first->second;
+ auto IterBool = ELFUniquingMap.insert(
+ std::make_pair(SectionGroupPair(Section, Group), nullptr));
+ auto &Entry = *IterBool.first;
+ if (!IterBool.second) return Entry.second;
// Possibly refine the entry size first.
if (!EntrySize) {
EntrySize = MCSectionELF::DetermineEntrySize(Kind);
}
- MCSymbol *GroupSym = NULL;
+ MCSymbol *GroupSym = nullptr;
if (!Group.empty())
GroupSym = GetOrCreateSymbol(Group);
- MCSectionELF *Result = new (*this) MCSectionELF(
- Entry.first->first.first, Type, Flags, Kind, EntrySize, GroupSym);
- Entry.first->second = Result;
+ StringRef CachedName = Entry.first.first;
+ MCSectionELF *Result = new (*this)
+ MCSectionELF(CachedName, Type, Flags, Kind, EntrySize, GroupSym);
+ Entry.second = Result;
return Result;
}
const MCSectionELF *MCContext::CreateELFGroupSection() {
MCSectionELF *Result =
new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
- SectionKind::getReadOnly(), 4, NULL);
+ SectionKind::getReadOnly(), 4, nullptr);
return Result;
}
@@ -294,26 +281,21 @@ const MCSectionCOFF *
MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
SectionKind Kind, StringRef COMDATSymName,
int Selection, const MCSectionCOFF *Assoc) {
- if (COFFUniquingMap == 0)
- COFFUniquingMap = new COFFUniqueMapTy();
- COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
-
// Do the lookup, if we have a hit, return it.
SectionGroupPair P(Section, COMDATSymName);
- std::pair<COFFUniqueMapTy::iterator, bool> Entry =
- Map.insert(std::make_pair(P, (MCSectionCOFF *)0));
- COFFUniqueMapTy::iterator Iter = Entry.first;
- if (!Entry.second)
+ auto IterBool = COFFUniquingMap.insert(std::make_pair(P, nullptr));
+ auto Iter = IterBool.first;
+ if (!IterBool.second)
return Iter->second;
- const MCSymbol *COMDATSymbol = NULL;
+ const MCSymbol *COMDATSymbol = nullptr;
if (!COMDATSymName.empty())
COMDATSymbol = GetOrCreateSymbol(COMDATSymName);
- MCSectionCOFF *Result =
- new (*this) MCSectionCOFF(Iter->first.first, Characteristics,
- COMDATSymbol, Selection, Assoc, Kind);
+ StringRef CachedName = Iter->first.first;
+ MCSectionCOFF *Result = new (*this) MCSectionCOFF(
+ CachedName, Characteristics, COMDATSymbol, Selection, Assoc, Kind);
Iter->second = Result;
return Result;
@@ -326,14 +308,10 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics,
}
const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) {
- if (COFFUniquingMap == 0)
- COFFUniquingMap = new COFFUniqueMapTy();
- COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
-
SectionGroupPair P(Section, "");
- COFFUniqueMapTy::iterator Iter = Map.find(P);
- if (Iter == Map.end())
- return 0;
+ auto Iter = COFFUniquingMap.find(P);
+ if (Iter == COFFUniquingMap.end())
+ return nullptr;
return Iter->second;
}
@@ -361,7 +339,7 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) {
return !MCDwarfFiles[FileNumber].Name.empty();
}
-void MCContext::FatalError(SMLoc Loc, const Twine &Msg) {
+void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const {
// If we have a source manager and a location, use it. Otherwise just
// use the generic report_fatal_error().
if (!SrcMgr || Loc == SMLoc())
diff --git a/lib/MC/MCDisassembler.cpp b/lib/MC/MCDisassembler.cpp
index 7a2b1a1..77d9ce1 100644
--- a/lib/MC/MCDisassembler.cpp
+++ b/lib/MC/MCDisassembler.cpp
@@ -16,20 +16,6 @@ using namespace llvm;
MCDisassembler::~MCDisassembler() {
}
-void MCDisassembler::setupForSymbolicDisassembly(
- LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp,
- void *DisInfo, MCContext *Ctx, std::unique_ptr<MCRelocationInfo> &RelInfo) {
- this->GetOpInfo = GetOpInfo;
- this->SymbolLookUp = SymbolLookUp;
- this->DisInfo = DisInfo;
- this->Ctx = Ctx;
- assert(Ctx != 0 && "No MCContext given for symbolic disassembly");
- if (!Symbolizer)
- Symbolizer.reset(new MCExternalSymbolizer(*Ctx, std::move(RelInfo),
- GetOpInfo, SymbolLookUp,
- DisInfo));
-}
-
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
uint64_t Offset,
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index b935b83..0530c26 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -41,20 +41,20 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
std::string Error;
const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
if (!TheTarget)
- return 0;
+ return nullptr;
const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple);
if (!MRI)
- return 0;
+ return nullptr;
// Get the assembler info needed to setup the MCContext.
const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple);
if (!MAI)
- return 0;
+ return nullptr;
const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
if (!MII)
- return 0;
+ return nullptr;
// Package up features to be passed to target/subtarget
std::string FeaturesStr;
@@ -62,41 +62,40 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU,
const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(Triple, CPU,
FeaturesStr);
if (!STI)
- return 0;
+ return nullptr;
// Set up the MCContext for creating symbols and MCExpr's.
- MCContext *Ctx = new MCContext(MAI, MRI, 0);
+ MCContext *Ctx = new MCContext(MAI, MRI, nullptr);
if (!Ctx)
- return 0;
+ return nullptr;
// Set up disassembler.
- MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI);
+ MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, *Ctx);
if (!DisAsm)
- return 0;
+ return nullptr;
std::unique_ptr<MCRelocationInfo> RelInfo(
TheTarget->createMCRelocationInfo(Triple, *Ctx));
if (!RelInfo)
- return 0;
+ return nullptr;
std::unique_ptr<MCSymbolizer> Symbolizer(TheTarget->createMCSymbolizer(
Triple, GetOpInfo, SymbolLookUp, DisInfo, Ctx, RelInfo.release()));
DisAsm->setSymbolizer(std::move(Symbolizer));
- DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo,
- Ctx, RelInfo);
+
// Set up the instruction printer.
int AsmPrinterVariant = MAI->getAssemblerDialect();
MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
*MAI, *MII, *MRI, *STI);
if (!IP)
- return 0;
+ return nullptr;
LLVMDisasmContext *DC = new LLVMDisasmContext(Triple, DisInfo, TagType,
GetOpInfo, SymbolLookUp,
TheTarget, MAI, MRI,
STI, MII, Ctx, DisAsm, IP);
if (!DC)
- return 0;
+ return nullptr;
DC->setCPU(CPU);
return DC;
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 72836ff..be6731a 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -9,6 +9,7 @@
#include "llvm/MC/MCDwarf.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
@@ -16,8 +17,8 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -62,7 +63,7 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
// and if there is information from the last .loc directive that has yet to have
// a line entry made for it is made.
//
-void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+void MCLineEntry::Make(MCObjectStreamer *MCOS, const MCSection *Section) {
if (!MCOS->getContext().getDwarfLocSeen())
return;
@@ -113,7 +114,7 @@ static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
// in the LineSection.
//
static inline void
-EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section,
+EmitDwarfLineTable(MCObjectStreamer *MCOS, const MCSection *Section,
const MCLineSection::MCLineEntryCollection &LineEntries) {
unsigned FileNum = 1;
unsigned LastLine = 1;
@@ -121,7 +122,7 @@ EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section,
unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
unsigned Isa = 0;
unsigned Discriminator = 0;
- MCSymbol *LastLabel = NULL;
+ MCSymbol *LastLabel = nullptr;
// Loop through each MCLineEntry and encode the dwarf line number table.
for (auto it = LineEntries.begin(),
@@ -204,7 +205,7 @@ EmitDwarfLineTable(MCStreamer *MCOS, const MCSection *Section,
//
// This emits the Dwarf file and the line tables.
//
-void MCDwarfLineTable::Emit(MCStreamer *MCOS) {
+void MCDwarfLineTable::Emit(MCObjectStreamer *MCOS) {
MCContext &context = MCOS->getContext();
auto &LineTables = context.getMCDwarfLineTables();
@@ -318,7 +319,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS,
return std::make_pair(LineStartSym, LineEndSym);
}
-void MCDwarfLineTable::EmitCU(MCStreamer *MCOS) const {
+void MCDwarfLineTable::EmitCU(MCObjectStreamer *MCOS) const {
MCSymbol *LineEndSym = Header.Emit(MCOS).second;
// Put out the line tables.
@@ -644,8 +645,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
const MCExpr *Length = MakeStartMinusEndExpr(*MCOS, *InfoStart, *InfoEnd, 4);
MCOS->EmitAbsValue(Length, 4);
- // The 2 byte DWARF version, which is 2.
- MCOS->EmitIntValue(2, 2);
+ // The 2 byte DWARF version.
+ MCOS->EmitIntValue(context.getDwarfVersion(), 2);
// The 4 byte offset to the debug abbrevs from the start of the .debug_abbrev,
// it is at the start of that section so this is zero.
@@ -688,7 +689,7 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
const SmallVectorImpl<std::string> &MCDwarfDirs = context.getMCDwarfDirs();
if (MCDwarfDirs.size() > 0) {
MCOS->EmitBytes(MCDwarfDirs[0]);
- MCOS->EmitBytes("/");
+ MCOS->EmitBytes(sys::path::get_separator());
}
const SmallVectorImpl<MCDwarfFile> &MCDwarfFiles =
MCOS->getContext().getMCDwarfFiles();
@@ -727,28 +728,24 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// Third part: the list of label DIEs.
// Loop on saved info for dwarf labels and create the DIEs for them.
- const std::vector<const MCGenDwarfLabelEntry *> &Entries =
- MCOS->getContext().getMCGenDwarfLabelEntries();
- for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
- Entries.begin(), ie = Entries.end(); it != ie;
- ++it) {
- const MCGenDwarfLabelEntry *Entry = *it;
-
+ const std::vector<MCGenDwarfLabelEntry> &Entries =
+ MCOS->getContext().getMCGenDwarfLabelEntries();
+ for (const auto &Entry : Entries) {
// The DW_TAG_label DIE abbrev (2).
MCOS->EmitULEB128IntValue(2);
// AT_name, of the label without any leading underbar.
- MCOS->EmitBytes(Entry->getName());
+ MCOS->EmitBytes(Entry.getName());
MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string.
// AT_decl_file, index into the file table.
- MCOS->EmitIntValue(Entry->getFileNumber(), 4);
+ MCOS->EmitIntValue(Entry.getFileNumber(), 4);
// AT_decl_line, source line number.
- MCOS->EmitIntValue(Entry->getLineNumber(), 4);
+ MCOS->EmitIntValue(Entry.getLineNumber(), 4);
// AT_low_pc, start address of the label.
- const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry->getLabel(),
+ const MCExpr *AT_low_pc = MCSymbolRefExpr::Create(Entry.getLabel(),
MCSymbolRefExpr::VK_None, context);
MCOS->EmitValue(AT_low_pc, AddrSize);
@@ -761,14 +758,6 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS,
// Add the NULL DIE terminating the DW_TAG_unspecified_parameters DIE's.
MCOS->EmitIntValue(0, 1);
}
- // Deallocate the MCGenDwarfLabelEntry classes that saved away the info
- // for the dwarf labels.
- for (std::vector<const MCGenDwarfLabelEntry *>::const_iterator it =
- Entries.begin(), ie = Entries.end(); it != ie;
- ++it) {
- const MCGenDwarfLabelEntry *Entry = *it;
- delete Entry;
- }
// Add the NULL DIE terminating the Compile Unit DIE's.
MCOS->EmitIntValue(0, 1);
@@ -790,8 +779,8 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) {
MCSymbol *LineSectionSymbol = nullptr;
if (CreateDwarfSectionSymbols)
LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0);
- MCSymbol *AbbrevSectionSymbol = NULL;
- MCSymbol *InfoSectionSymbol = NULL;
+ MCSymbol *AbbrevSectionSymbol = nullptr;
+ MCSymbol *InfoSectionSymbol = nullptr;
MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection());
if (CreateDwarfSectionSymbols) {
InfoSectionSymbol = context.CreateTempSymbol();
@@ -856,9 +845,8 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
MCOS->EmitLabel(Label);
// Create and entry for the info and add it to the other entries.
- MCGenDwarfLabelEntry *Entry =
- new MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label);
- MCOS->getContext().addMCGenDwarfLabelEntry(Entry);
+ MCOS->getContext().addMCGenDwarfLabelEntry(
+ MCGenDwarfLabelEntry(Name, FileNumber, LineNumber, Label));
}
static int getDataAlignmentFactor(MCStreamer &streamer) {
@@ -894,7 +882,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
static void EmitFDESymbol(MCStreamer &streamer, const MCSymbol &symbol,
unsigned symbolEncoding, bool isEH,
- const char *comment = 0) {
+ const char *comment = nullptr) {
MCContext &context = streamer.getContext();
const MCAsmInfo *asmInfo = context.getAsmInfo();
const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol,
@@ -923,13 +911,11 @@ namespace {
class FrameEmitterImpl {
int CFAOffset;
int CIENum;
- bool UsingCFI;
bool IsEH;
const MCSymbol *SectionStart;
public:
- FrameEmitterImpl(bool usingCFI, bool isEH)
- : CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
- SectionStart(0) {}
+ FrameEmitterImpl(bool isEH)
+ : CFAOffset(0), CIENum(0), IsEH(isEH), SectionStart(nullptr) {}
void setSectionStart(const MCSymbol *Label) { SectionStart = Label; }
@@ -937,20 +923,20 @@ namespace {
void EmitCompactUnwind(MCStreamer &streamer,
const MCDwarfFrameInfo &frame);
- const MCSymbol &EmitCIE(MCStreamer &streamer,
+ const MCSymbol &EmitCIE(MCObjectStreamer &streamer,
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
bool IsSignalFrame,
unsigned lsdaEncoding,
bool IsSimple);
- MCSymbol *EmitFDE(MCStreamer &streamer,
+ MCSymbol *EmitFDE(MCObjectStreamer &streamer,
const MCSymbol &cieStart,
const MCDwarfFrameInfo &frame);
- void EmitCFIInstructions(MCStreamer &streamer,
+ void EmitCFIInstructions(MCObjectStreamer &streamer,
ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel);
- void EmitCFIInstruction(MCStreamer &Streamer,
+ void EmitCFIInstruction(MCObjectStreamer &Streamer,
const MCCFIInstruction &Instr);
};
@@ -1001,7 +987,7 @@ static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
Streamer.EmitIntValue(Encoding, 1);
}
-void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
+void FrameEmitterImpl::EmitCFIInstruction(MCObjectStreamer &Streamer,
const MCCFIInstruction &Instr) {
int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
bool VerboseAsm = Streamer.isVerboseAsm();
@@ -1153,7 +1139,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
/// EmitFrameMoves - Emit frame instructions to describe the layout of the
/// frame.
-void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
+void FrameEmitterImpl::EmitCFIInstructions(MCObjectStreamer &streamer,
ArrayRef<MCCFIInstruction> Instrs,
MCSymbol *BaseLabel) {
for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
@@ -1214,7 +1200,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
Encoding |= 0x40000000;
// Range Start
- unsigned FDEEncoding = MOFI->getFDEEncoding(UsingCFI);
+ unsigned FDEEncoding = MOFI->getFDEEncoding();
unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
if (VerboseAsm) Streamer.AddComment("Range Start");
Streamer.EmitSymbolValue(Frame.Function, Size);
@@ -1248,7 +1234,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
Streamer.EmitIntValue(0, Size); // No LSDA
}
-const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
+const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer,
const MCSymbol *personality,
unsigned personalityEncoding,
const MCSymbol *lsda,
@@ -1346,8 +1332,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
// Encoding of the FDE pointers
- EmitEncodingByte(streamer, MOFI->getFDEEncoding(UsingCFI),
- "FDE Encoding");
+ EmitEncodingByte(streamer, MOFI->getFDEEncoding(), "FDE Encoding");
}
// Initial Instructions
@@ -1356,7 +1341,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
if (!IsSimple) {
const std::vector<MCCFIInstruction> &Instructions =
MAI->getInitialFrameState();
- EmitCFIInstructions(streamer, Instructions, NULL);
+ EmitCFIInstructions(streamer, Instructions, nullptr);
}
// Padding
@@ -1366,7 +1351,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
return *sectionStart;
}
-MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
+MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer,
const MCSymbol &cieStart,
const MCDwarfFrameInfo &frame) {
MCContext &context = streamer.getContext();
@@ -1405,8 +1390,8 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
}
// PC Begin
- unsigned PCEncoding = IsEH ? MOFI->getFDEEncoding(UsingCFI)
- : (unsigned)dwarf::DW_EH_PE_absptr;
+ unsigned PCEncoding =
+ IsEH ? MOFI->getFDEEncoding() : (unsigned)dwarf::DW_EH_PE_absptr;
unsigned PCSize = getSizeForEncoding(streamer, PCEncoding);
EmitFDESymbol(streamer, *frame.Begin, PCEncoding, IsEH, "FDE initial location");
@@ -1443,8 +1428,12 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
namespace {
struct CIEKey {
- static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1, false, false); }
- static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0, false, false); }
+ static const CIEKey getEmptyKey() {
+ return CIEKey(nullptr, 0, -1, false, false);
+ }
+ static const CIEKey getTombstoneKey() {
+ return CIEKey(nullptr, -1, 0, false, false);
+ }
CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) :
@@ -1487,13 +1476,13 @@ namespace llvm {
};
}
-void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
- bool UsingCFI, bool IsEH) {
+void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
+ bool IsEH) {
Streamer.generateCompactUnwindEncodings(MAB);
MCContext &Context = Streamer.getContext();
const MCObjectFileInfo *MOFI = Context.getObjectFileInfo();
- FrameEmitterImpl Emitter(UsingCFI, IsEH);
+ FrameEmitterImpl Emitter(IsEH);
ArrayRef<MCDwarfFrameInfo> FrameArray = Streamer.getFrameInfos();
// Emit the compact unwind info if available.
@@ -1526,10 +1515,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
Streamer.EmitLabel(SectionStart);
Emitter.setSectionStart(SectionStart);
- MCSymbol *FDEEnd = NULL;
+ MCSymbol *FDEEnd = nullptr;
DenseMap<CIEKey, const MCSymbol*> CIEStarts;
- const MCSymbol *DummyDebugKey = NULL;
+ const MCSymbol *DummyDebugKey = nullptr;
NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame();
for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) {
const MCDwarfFrameInfo &Frame = FrameArray[i];
@@ -1537,7 +1526,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
// Emit the label from the previous iteration
if (FDEEnd) {
Streamer.EmitLabel(FDEEnd);
- FDEEnd = NULL;
+ FDEEnd = nullptr;
}
if (!NeedsEHFrameSection && Frame.CompactUnwindEncoding !=
@@ -1564,7 +1553,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB,
Streamer.EmitLabel(FDEEnd);
}
-void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer,
uint64_t AddrDelta) {
MCContext &Context = Streamer.getContext();
SmallString<256> Tmp;
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index f710c3e..767348c 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -275,11 +275,12 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
EmitCommonSymbol(Symbol, Size, ByteAlignment);
}
-void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) {
+void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
if (getCurrentSectionData()->isBundleLocked())
report_fatal_error("Emitting values inside a locked bundle is forbidden");
fixSymbolsInTLSFixups(Value);
- MCObjectStreamer::EmitValueImpl(Value, Size);
+ MCObjectStreamer::EmitValueImpl(Value, Size, Loc);
}
void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
@@ -537,7 +538,7 @@ void MCELFStreamer::Flush() {
}
void MCELFStreamer::FinishImpl() {
- EmitFrames(NULL, true);
+ EmitFrames(nullptr);
Flush();
@@ -559,10 +560,6 @@ void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
llvm_unreachable("Generic ELF doesn't support this directive");
}
-MCSymbolData &MCELFStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) {
- return getAssembler().getOrCreateSymbolData(*Symbol);
-}
-
void MCELFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
llvm_unreachable("ELF doesn't support this directive");
}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 7f2c478..f724716 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mcexpr"
#include "llvm/MC/MCExpr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
@@ -23,6 +22,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "mcexpr"
+
namespace {
namespace stats {
STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
@@ -270,6 +271,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Mips_GOT_LO16: return "GOT_LO16";
case VK_Mips_CALL_HI16: return "CALL_HI16";
case VK_Mips_CALL_LO16: return "CALL_LO16";
+ case VK_Mips_PCREL_HI16: return "PCREL_HI16";
+ case VK_Mips_PCREL_LO16: return "PCREL_LO16";
case VK_COFF_IMGREL32: return "IMGREL32";
}
llvm_unreachable("Invalid variant kind");
@@ -284,6 +287,8 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("gotoff", VK_GOTOFF)
.Case("GOTPCREL", VK_GOTPCREL)
.Case("gotpcrel", VK_GOTPCREL)
+ .Case("GOT_PREL", VK_GOTPCREL)
+ .Case("got_prel", VK_GOTPCREL)
.Case("GOTTPOFF", VK_GOTTPOFF)
.Case("gottpoff", VK_GOTTPOFF)
.Case("INDNTPOFF", VK_INDNTPOFF)
@@ -444,12 +449,12 @@ void MCTargetExpr::anchor() {}
/* *** */
bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
- return EvaluateAsAbsolute(Res, 0, 0, 0);
+ return EvaluateAsAbsolute(Res, nullptr, nullptr, nullptr);
}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
const MCAsmLayout &Layout) const {
- return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+ return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, nullptr);
}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
@@ -459,7 +464,7 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
- return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+ return EvaluateAsAbsolute(Res, &Asm, nullptr, nullptr);
}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
@@ -477,7 +482,8 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
// absolutize differences across sections and that is what the MachO writer
// uses Addrs for.
bool IsRelocatable =
- EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+ EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs,
+ /*ForceVarExpansion*/ false);
// Record the current value.
Res = Value.getConstant();
@@ -505,8 +511,8 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
return;
- MCSymbolData &AD = Asm->getSymbolData(SA);
- MCSymbolData &BD = Asm->getSymbolData(SB);
+ const MCSymbolData &AD = Asm->getSymbolData(SA);
+ const MCSymbolData &BD = Asm->getSymbolData(SB);
if (AD.getFragment() == BD.getFragment()) {
Addend += (AD.getOffset() - BD.getOffset());
@@ -518,7 +524,7 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
// Clear the symbol expr pointers to indicate we have folded these
// operands.
- A = B = 0;
+ A = B = nullptr;
return;
}
@@ -544,7 +550,7 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
// Clear the symbol expr pointers to indicate we have folded these
// operands.
- A = B = 0;
+ A = B = nullptr;
}
/// \brief Evaluate the result of an add between (conceptually) two MCValues.
@@ -627,15 +633,21 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCAsmLayout *Layout) const {
- MCAssembler *Assembler = Layout ? &Layout->getAssembler() : 0;
- return EvaluateAsRelocatableImpl(Res, Assembler, Layout, 0, false);
+ MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
+ return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false,
+ /*ForceVarExpansion*/ false);
}
-bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAssembler *Asm,
+bool MCExpr::EvaluateAsValue(MCValue &Res, const MCAsmLayout *Layout) const {
+ MCAssembler *Assembler = Layout ? &Layout->getAssembler() : nullptr;
+ return EvaluateAsRelocatableImpl(Res, Assembler, Layout, nullptr, false,
+ /*ForceVarExpansion*/ true);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
const MCAsmLayout *Layout,
- const SectionAddrMap *Addrs,
- bool InSet) const {
+ const SectionAddrMap *Addrs, bool InSet,
+ bool ForceVarExpansion) const {
++stats::MCExprEvaluate;
switch (getKind()) {
@@ -652,9 +664,9 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCAsmInfo &MCAsmInfo = SRE->getMCAsmInfo();
// Evaluate recursively if this is a variable.
- if (Sym.isVariable()) {
- if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm, Layout,
- Addrs, true)) {
+ if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+ if (Sym.getVariableValue()->EvaluateAsRelocatableImpl(
+ Res, Asm, Layout, Addrs, true, ForceVarExpansion)) {
const MCSymbolRefExpr *A = Res.getSymA();
const MCSymbolRefExpr *B = Res.getSymB();
@@ -668,15 +680,16 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
if (!A && !B)
return true;
} else {
+ if (ForceVarExpansion)
+ return true;
bool IsSymbol = A && A->getSymbol().isDefined();
- bool IsWeakRef = SRE->getKind() == MCSymbolRefExpr::VK_WEAKREF;
- if (!IsSymbol && !IsWeakRef)
+ if (!IsSymbol)
return true;
}
}
}
- Res = MCValue::get(SRE, 0, 0);
+ Res = MCValue::get(SRE, nullptr, 0);
return true;
}
@@ -684,8 +697,8 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
MCValue Value;
- if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
- Addrs, InSet))
+ if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs,
+ InSet, ForceVarExpansion))
return false;
switch (AUE->getOpcode()) {
@@ -718,10 +731,10 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
MCValue LHSValue, RHSValue;
- if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
- Addrs, InSet) ||
- !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
- Addrs, InSet))
+ if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout, Addrs,
+ InSet, ForceVarExpansion) ||
+ !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout, Addrs,
+ InSet, ForceVarExpansion))
return false;
// We only support a few operations on non-constant expressions, handle
@@ -795,7 +808,7 @@ const MCSection *MCExpr::FindAssociatedSection() const {
if (Sym.isDefined())
return &Sym.getSection();
- return 0;
+ return nullptr;
}
case Unary:
diff --git a/lib/MC/MCExternalSymbolizer.cpp b/lib/MC/MCExternalSymbolizer.cpp
index 839516e..7c3073a 100644
--- a/lib/MC/MCExternalSymbolizer.cpp
+++ b/lib/MC/MCExternalSymbolizer.cpp
@@ -83,7 +83,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
return false;
}
- const MCExpr *Add = NULL;
+ const MCExpr *Add = nullptr;
if (SymbolicOp.AddSymbol.Present) {
if (SymbolicOp.AddSymbol.Name) {
StringRef Name(SymbolicOp.AddSymbol.Name);
@@ -94,7 +94,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
}
}
- const MCExpr *Sub = NULL;
+ const MCExpr *Sub = nullptr;
if (SymbolicOp.SubtractSymbol.Present) {
if (SymbolicOp.SubtractSymbol.Name) {
StringRef Name(SymbolicOp.SubtractSymbol.Name);
@@ -105,7 +105,7 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
}
}
- const MCExpr *Off = NULL;
+ const MCExpr *Off = nullptr;
if (SymbolicOp.Value != 0)
Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
@@ -116,17 +116,17 @@ bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
else
LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
- if (Off != 0)
+ if (Off)
Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
else
Expr = LHS;
} else if (Add) {
- if (Off != 0)
+ if (Off)
Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
else
Expr = Add;
} else {
- if (Off != 0)
+ if (Off)
Expr = Off;
else
Expr = MCConstantExpr::Create(0, Ctx);
@@ -189,7 +189,7 @@ MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
void *DisInfo,
MCContext *Ctx,
MCRelocationInfo *RelInfo) {
- assert(Ctx != 0 && "No MCContext given for symbolic disassembly");
+ assert(Ctx && "No MCContext given for symbolic disassembly");
return new MCExternalSymbolizer(*Ctx,
std::unique_ptr<MCRelocationInfo>(RelInfo),
diff --git a/lib/MC/MCFixup.cpp b/lib/MC/MCFixup.cpp
deleted file mode 100644
index 8f15db5..0000000
--- a/lib/MC/MCFixup.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===- MCFixup.cpp - Assembly Fixup Implementation ------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCFixup.h"
-using namespace llvm;
-
-static MCSymbolRefExpr::VariantKind getAccessVariant(const MCExpr *Expr) {
- switch (Expr->getKind()) {
- case MCExpr::Unary:
- case MCExpr::Target:
- llvm_unreachable("unsupported");
-
- case MCExpr::Constant:
- return MCSymbolRefExpr::VK_None;
-
- case MCExpr::SymbolRef: {
- const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
- return SRE->getKind();
- }
- case MCExpr::Binary: {
- const MCBinaryExpr *ABE = cast<MCBinaryExpr>(Expr);
- assert(getAccessVariant(ABE->getRHS()) == MCSymbolRefExpr::VK_None);
- return getAccessVariant(ABE->getLHS());
- }
- }
- llvm_unreachable("unknown MCExpr kind");
-}
-
-MCSymbolRefExpr::VariantKind MCFixup::getAccessVariant() const {
- return ::getAccessVariant(getValue());
-}
diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCFunction.cpp
index 767e1e0..1ddc250 100644
--- a/lib/MC/MCFunction.cpp
+++ b/lib/MC/MCFunction.cpp
@@ -20,22 +20,17 @@ MCFunction::MCFunction(StringRef Name, MCModule *Parent)
: Name(Name), ParentModule(Parent)
{}
-MCFunction::~MCFunction() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
-}
-
MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) {
- MCBasicBlock *MCBB = new MCBasicBlock(TA, this);
- Blocks.push_back(MCBB);
- return *MCBB;
+ std::unique_ptr<MCBasicBlock> MCBB(new MCBasicBlock(TA, this));
+ Blocks.push_back(std::move(MCBB));
+ return *Blocks.back();
}
MCBasicBlock *MCFunction::find(uint64_t StartAddr) {
for (const_iterator I = begin(), E = end(); I != E; ++I)
if ((*I)->getInsts()->getBeginAddr() == StartAddr)
- return *I;
- return 0;
+ return I->get();
+ return nullptr;
}
const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const {
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 124cc14..d7b80f5 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -34,7 +34,7 @@ void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MCOperand::dump() const {
- print(dbgs(), 0);
+ print(dbgs(), nullptr);
dbgs() << "\n";
}
#endif
@@ -66,7 +66,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MCInst::dump() const {
- print(dbgs(), 0);
+ print(dbgs(), nullptr);
dbgs() << "\n";
}
#endif
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 7e437f4..37d05e9 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -89,7 +89,7 @@ public:
}
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override;
- void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override;
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment = 0) override;
@@ -172,7 +172,7 @@ void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) {
MCSymbol *Start = getContext().CreateTempSymbol();
EmitLabel(Start);
// Record the region for the object writer to use.
- DataRegionData Data = { Kind, Start, NULL };
+ DataRegionData Data = { Kind, Start, nullptr };
std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
Regions.push_back(Data);
}
@@ -183,7 +183,7 @@ void MCMachOStreamer::EmitDataRegionEnd() {
std::vector<DataRegionData> &Regions = getAssembler().getDataRegions();
assert(Regions.size() && "Mismatched .end_data_region!");
DataRegionData &Data = Regions.back();
- assert(Data.End == NULL && "Mismatched .end_data_region!");
+ assert(!Data.End && "Mismatched .end_data_region!");
// Create a temporary label to mark the end of the data region.
Data.End = getContext().CreateTempSymbol();
EmitLabel(Data.End);
@@ -237,10 +237,6 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
// Remember that the function is a thumb function. Fixup and relocation
// values will need adjusted.
getAssembler().setIsThumbFunc(Symbol);
-
- // Mark the thumb bit on the symbol.
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.setFlags(SD.getFlags() | SF_ThumbFunc);
}
bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -352,7 +348,7 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- AssignSection(Symbol, NULL);
+ AssignSection(Symbol, nullptr);
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
SD.setExternal(true);
@@ -422,7 +418,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst,
}
void MCMachOStreamer::FinishImpl() {
- EmitFrames(&getAssembler().getBackend(), true);
+ EmitFrames(&getAssembler().getBackend());
// We have to set the fragment atom associations so we can relax properly for
// Mach-O.
@@ -430,13 +426,12 @@ void MCMachOStreamer::FinishImpl() {
// First, scan the symbol table to build a lookup table from fragments to
// defining symbols.
DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
- for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
- ie = getAssembler().symbol_end(); it != ie; ++it) {
- if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
- it->getFragment()) {
+ for (MCSymbolData &SD : getAssembler().symbols()) {
+ if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()) &&
+ SD.getFragment()) {
// An atom defining symbol should never be internal to a fragment.
- assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
- DefiningSymbolMap[it->getFragment()] = it;
+ assert(SD.getOffset() == 0 && "Invalid offset in atom defining symbol!");
+ DefiningSymbolMap[SD.getFragment()] = &SD;
}
}
@@ -444,7 +439,7 @@ void MCMachOStreamer::FinishImpl() {
// symbol.
for (MCAssembler::iterator it = getAssembler().begin(),
ie = getAssembler().end(); it != ie; ++it) {
- MCSymbolData *CurrentAtom = 0;
+ MCSymbolData *CurrentAtom = nullptr;
for (MCSectionData::iterator it2 = it->begin(),
ie2 = it->end(); it2 != ie2; ++it2) {
if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCModule.cpp
index 7e9e18a..3ed7356 100644
--- a/lib/MC/MCModule.cpp
+++ b/lib/MC/MCModule.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/MC/MCModule.h"
#include "llvm/MC/MCAtom.h"
#include "llvm/MC/MCFunction.h"
@@ -77,7 +78,7 @@ const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const {
Addr, AtomComp);
if (I != atom_end() && (*I)->getBeginAddr() <= Addr)
return *I;
- return 0;
+ return nullptr;
}
MCAtom *MCModule::findAtomContaining(uint64_t Addr) {
@@ -90,7 +91,7 @@ const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const {
Addr, AtomCompInv);
if (I != atom_end())
return *I;
- return 0;
+ return nullptr;
}
MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
@@ -99,8 +100,9 @@ MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) {
}
MCFunction *MCModule::createFunction(StringRef Name) {
- Functions.push_back(new MCFunction(Name, this));
- return Functions.back();
+ std::unique_ptr<MCFunction> MCF(new MCFunction(Name, this));
+ Functions.push_back(std::move(MCF));
+ return Functions.back().get();
}
static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) {
@@ -130,13 +132,11 @@ void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) {
BBsByAtom.insert(I, BB);
}
+MCModule::MCModule() : Entrypoint(0) { }
+
MCModule::~MCModule() {
for (AtomListTy::iterator AI = atom_begin(),
AE = atom_end();
AI != AE; ++AI)
delete *AI;
- for (FunctionListTy::iterator FI = func_begin(),
- FE = func_end();
- FI != FE; ++FI)
- delete *FI;
}
diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCModuleYAML.cpp
index 102971b..f81cb14 100644
--- a/lib/MC/MCModuleYAML.cpp
+++ b/lib/MC/MCModuleYAML.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Object/YAML.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/YAMLTraits.h"
#include <vector>
@@ -162,12 +163,14 @@ template <> struct ScalarTraits<MCModuleYAML::Operand> {
static void output(const MCModuleYAML::Operand &, void *,
llvm::raw_ostream &);
static StringRef input(StringRef, void *, MCModuleYAML::Operand &);
+ static bool mustQuote(StringRef) { return false; }
};
template <> struct ScalarTraits<MCModuleYAML::OpcodeEnum> {
static void output(const MCModuleYAML::OpcodeEnum &, void *,
llvm::raw_ostream &);
static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &);
+ static bool mustQuote(StringRef) { return false; }
};
void ScalarEnumerationTraits<MCAtom::AtomKind>::enumeration(
@@ -276,7 +279,7 @@ class MCModule2YAML {
const MCModule &MCM;
MCModuleYAML::Module YAMLModule;
void dumpAtom(const MCAtom *MCA);
- void dumpFunction(const MCFunction *MCF);
+ void dumpFunction(const MCFunction &MCF);
void dumpBasicBlock(const MCBasicBlock *MCBB);
public:
@@ -300,7 +303,7 @@ MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() {
dumpAtom(*AI);
for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end();
FI != FE; ++FI)
- dumpFunction(*FI);
+ dumpFunction(**FI);
}
void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
@@ -328,22 +331,22 @@ void MCModule2YAML::dumpAtom(const MCAtom *MCA) {
}
}
-void MCModule2YAML::dumpFunction(const MCFunction *MCF) {
+void MCModule2YAML::dumpFunction(const MCFunction &MCF) {
YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1);
MCModuleYAML::Function &F = YAMLModule.Functions.back();
- F.Name = MCF->getName();
- for (MCFunction::const_iterator BBI = MCF->begin(), BBE = MCF->end();
+ F.Name = MCF.getName();
+ for (MCFunction::const_iterator BBI = MCF.begin(), BBE = MCF.end();
BBI != BBE; ++BBI) {
- const MCBasicBlock *MCBB = *BBI;
+ const MCBasicBlock &MCBB = **BBI;
F.BasicBlocks.resize(F.BasicBlocks.size() + 1);
MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back();
- BB.Address = MCBB->getInsts()->getBeginAddr();
- for (MCBasicBlock::pred_const_iterator PI = MCBB->pred_begin(),
- PE = MCBB->pred_end();
+ BB.Address = MCBB.getInsts()->getBeginAddr();
+ for (MCBasicBlock::pred_const_iterator PI = MCBB.pred_begin(),
+ PE = MCBB.pred_end();
PI != PE; ++PI)
BB.Preds.push_back((*PI)->getInsts()->getBeginAddr());
- for (MCBasicBlock::succ_const_iterator SI = MCBB->succ_begin(),
- SE = MCBB->succ_end();
+ for (MCBasicBlock::succ_const_iterator SI = MCBB.succ_begin(),
+ SE = MCBB.succ_end();
SI != SE; ++SI)
BB.Succs.push_back((*SI)->getInsts()->getBeginAddr());
}
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 894eada..4f2740e 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -41,11 +41,6 @@ namespace {
void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {}
void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {}
- void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
- const MCSymbol *LastLabel,
- const MCSymbol *Label,
- unsigned PointerSize) override {}
-
bool EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) override {
return true;
@@ -64,13 +59,14 @@ namespace {
unsigned ByteAlignment) override {}
void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) override {}
- void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr,
uint64_t Size = 0, unsigned ByteAlignment = 0) override {}
void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) override {}
void EmitBytes(StringRef Data) override {}
- void EmitValueImpl(const MCExpr *Value, unsigned Size) override {}
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc = SMLoc()) override {}
void EmitULEB128Value(const MCExpr *Value) override {}
void EmitSLEB128Value(const MCExpr *Value) override {}
void EmitGPRel32Value(const MCExpr *Value) override {}
diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCObjectDisassembler.cpp
index 146da6d..8a258cb 100644
--- a/lib/MC/MCObjectDisassembler.cpp
+++ b/lib/MC/MCObjectDisassembler.cpp
@@ -31,10 +31,12 @@
using namespace llvm;
using namespace object;
+#define DEBUG_TYPE "mc"
+
MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj,
const MCDisassembler &Dis,
const MCInstrAnalysis &MIA)
- : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {}
+ : Obj(Obj), Dis(Dis), MIA(MIA), MOS(nullptr) {}
uint64_t MCObjectDisassembler::getEntrypoint() {
for (const SymbolRef &Symbol : Obj.symbols()) {
@@ -115,8 +117,8 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
Section.getName(SecName);
if (isText) {
- MCTextAtom *Text = 0;
- MCDataAtom *InvalidData = 0;
+ MCTextAtom *Text = nullptr;
+ MCDataAtom *InvalidData = nullptr;
uint64_t InstSize;
for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
@@ -129,11 +131,11 @@ void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) {
Text->setName(SecName);
}
Text->addInst(Inst, InstSize);
- InvalidData = 0;
+ InvalidData = nullptr;
} else {
assert(InstSize && "getInstruction() consumed no bytes");
if (!InvalidData) {
- Text = 0;
+ Text = nullptr;
InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1);
}
for (uint64_t I = 0; I < InstSize; ++I)
@@ -160,7 +162,7 @@ namespace {
BBInfoSetTy Preds;
MCObjectDisassembler::AddressSetTy SuccAddrs;
- BBInfo() : Atom(0), BB(0) {}
+ BBInfo() : Atom(nullptr), BB(nullptr) {}
void addSucc(BBInfo &Succ) {
Succs.insert(&Succ);
@@ -480,7 +482,7 @@ MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr,
continue;
// FIXME: MCModule should provide a findFunctionByAddr()
if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
- return *FI;
+ return FI->get();
}
// Finally, just create a new one.
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 3b011c8..9d413af 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSectionCOFF.h"
@@ -22,12 +23,13 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
IsFunctionEHFrameSymbolPrivate = false;
SupportsWeakOmittedEHFrame = false;
- if (T.isOSDarwin() && T.getArch() == Triple::arm64)
+ if (T.isOSDarwin() &&
+ (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))
SupportsCompactUnwindWithoutEHFrame = true;
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel
| dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = FDEEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
+ LSDAEncoding = FDECFIEncoding = dwarf::DW_EH_PE_pcrel;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
@@ -44,7 +46,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
SectionKind::getDataRel());
// BSSSection might not be expected initialized on msvc.
- BSSSection = 0;
+ BSSSection = nullptr;
TLSDataSection // .tdata
= Ctx->getMachOSection("__DATA", "__thread_data",
@@ -147,10 +149,11 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
SectionKind::getReadOnlyWithRel());
- COFFDebugSymbolsSection = 0;
+ COFFDebugSymbolsSection = nullptr;
if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) ||
- (T.isOSDarwin() && T.getArch() == Triple::arm64)) {
+ (T.isOSDarwin() &&
+ (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) {
CompactUnwindSection =
Ctx->getMachOSection("__LD", "__compact_unwind",
MachO::S_ATTR_DEBUG,
@@ -158,7 +161,7 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000;
- else if (T.getArch() == Triple::arm64)
+ else if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64)
CompactUnwindDwarfEHFrameOnly = 0x03000000;
}
@@ -245,29 +248,40 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) {
}
void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
- if (T.getArch() == Triple::mips ||
- T.getArch() == Triple::mipsel)
+ switch (T.getArch()) {
+ case Triple::mips:
+ case Triple::mipsel:
FDECFIEncoding = dwarf::DW_EH_PE_sdata4;
- else if (T.getArch() == Triple::mips64 ||
- T.getArch() == Triple::mips64el)
+ break;
+ case Triple::mips64:
+ case Triple::mips64el:
FDECFIEncoding = dwarf::DW_EH_PE_sdata8;
- else
+ break;
+ default:
FDECFIEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ break;
+ }
- if (T.getArch() == Triple::x86) {
+ switch (T.getArch()) {
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
+ break;
+ // Fallthrough if not using EHABI
+ case Triple::x86:
PersonalityEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
LSDAEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
- FDEEncoding = (RelocM == Reloc::PIC_)
- ? dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
- : dwarf::DW_EH_PE_absptr;
TTypeEncoding = (RelocM == Reloc::PIC_)
? dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4
: dwarf::DW_EH_PE_absptr;
- } else if (T.getArch() == Triple::x86_64) {
+ break;
+ case Triple::x86_64:
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
@@ -275,7 +289,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
LSDAEncoding = dwarf::DW_EH_PE_pcrel |
(CMModel == CodeModel::Small
? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
((CMModel == CodeModel::Small || CMModel == CodeModel::Medium)
? dwarf::DW_EH_PE_sdata4 : dwarf::DW_EH_PE_sdata8);
@@ -285,12 +298,14 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
LSDAEncoding = (CMModel == CodeModel::Small)
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
- FDEEncoding = dwarf::DW_EH_PE_udata4;
TTypeEncoding = (CMModel == CodeModel::Small)
? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_absptr;
}
- } else if (T.getArch() == Triple::aarch64 ||
- T.getArch() == Triple::aarch64_be ) {
+ break;
+ case Triple::aarch64:
+ case Triple::aarch64_be:
+ case Triple::arm64:
+ case Triple::arm64_be:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -298,65 +313,64 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata8;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata8;
} else {
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
LSDAEncoding = dwarf::DW_EH_PE_absptr;
- FDEEncoding = dwarf::DW_EH_PE_udata4;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
- } else if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le) {
+ break;
+ case Triple::ppc64:
+ case Triple::ppc64le:
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_udata8;
- } else if (T.getArch() == Triple::sparc) {
+ break;
+ case Triple::sparc:
if (RelocM == Reloc::PIC_) {
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
} else {
LSDAEncoding = dwarf::DW_EH_PE_absptr;
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- FDEEncoding = dwarf::DW_EH_PE_udata4;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
- } else if (T.getArch() == Triple::sparcv9) {
+ break;
+ case Triple::sparcv9:
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
} else {
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- FDEEncoding = dwarf::DW_EH_PE_udata4;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
- } else if (T.getArch() == Triple::systemz) {
+ break;
+ case Triple::systemz:
// All currently-defined code models guarantee that 4-byte PC-relative
// values will be in range.
if (RelocM == Reloc::PIC_) {
PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- FDEEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
dwarf::DW_EH_PE_sdata4;
} else {
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
LSDAEncoding = dwarf::DW_EH_PE_absptr;
- FDEEncoding = dwarf::DW_EH_PE_absptr;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
+ break;
+ default:
+ break;
}
// Solaris requires different flags for .eh_frame to seemingly every other
@@ -461,7 +475,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
ELF::SHF_ALLOC,
SectionKind::getReadOnly());
- COFFDebugSymbolsSection = 0;
+ COFFDebugSymbolsSection = nullptr;
// Debug Info Sections.
DwarfAbbrevSection =
@@ -548,6 +562,10 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) {
void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
+ // The object file format cannot represent common symbols with explicit
+ // alignments.
+ CommDirectiveSupportsAlignment = false;
+
// COFF
BSSSection =
Ctx->getCOFFSection(".bss",
@@ -716,7 +734,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) {
DrectveSection =
Ctx->getCOFFSection(".drectve",
- COFF::IMAGE_SCN_LNK_INFO,
+ COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE,
SectionKind::getMetadata());
PDataSection =
@@ -751,17 +769,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
IsFunctionEHFrameSymbolPrivate = true;
SupportsCompactUnwindWithoutEHFrame = false;
- PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding =
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
+ PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding =
+ dwarf::DW_EH_PE_absptr;
CompactUnwindDwarfEHFrameOnly = 0;
- EHFrameSection = 0; // Created on demand.
- CompactUnwindSection = 0; // Used only by selected targets.
- DwarfAccelNamesSection = 0; // Used only by selected targets.
- DwarfAccelObjCSection = 0; // Used only by selected targets.
- DwarfAccelNamespaceSection = 0; // Used only by selected targets.
- DwarfAccelTypesSection = 0; // Used only by selected targets.
+ EHFrameSection = nullptr; // Created on demand.
+ CompactUnwindSection = nullptr; // Used only by selected targets.
+ DwarfAccelNamesSection = nullptr; // Used only by selected targets.
+ DwarfAccelObjCSection = nullptr; // Used only by selected targets.
+ DwarfAccelNamespaceSection = nullptr; // Used only by selected targets.
+ DwarfAccelTypesSection = nullptr; // Used only by selected targets.
Triple T(TT);
Triple::ArchType Arch = T.getArch();
@@ -769,14 +787,15 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm,
// cellspu-apple-darwin. Perhaps we should fix in Triple?
if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
Arch == Triple::arm || Arch == Triple::thumb ||
- Arch == Triple::arm64 ||
+ Arch == Triple::arm64 || Arch == Triple::aarch64 ||
Arch == Triple::ppc || Arch == Triple::ppc64 ||
Arch == Triple::UnknownArch) &&
(T.isOSDarwin() || T.isOSBinFormatMachO())) {
Env = IsMachO;
InitMachOMCObjectFileInfo(T);
- } else if ((Arch == Triple::x86 || Arch == Triple::x86_64) &&
- T.getObjectFormat() != Triple::ELF && T.isOSWindows()) {
+ } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 ||
+ Arch == Triple::arm || Arch == Triple::thumb) &&
+ (T.isOSWindows() && T.getObjectFormat() == Triple::COFF)) {
Env = IsCOFF;
InitCOFFMCObjectFileInfo(T);
} else {
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index 4451264..a1aa602 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -20,7 +20,6 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/MC/MCSectionELF.h"
using namespace llvm;
MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
@@ -28,12 +27,13 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
: MCStreamer(Context),
Assembler(new MCAssembler(Context, TAB, *Emitter_,
*TAB.createObjectWriter(OS), OS)),
- CurSectionData(0) {}
+ CurSectionData(nullptr), EmitEHFrame(true), EmitDebugFrame(false) {}
MCObjectStreamer::MCObjectStreamer(MCContext &Context, MCAsmBackend &TAB,
raw_ostream &OS, MCCodeEmitter *Emitter_,
MCAssembler *_Assembler)
- : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) {}
+ : MCStreamer(Context), Assembler(_Assembler), CurSectionData(nullptr),
+ EmitEHFrame(true), EmitDebugFrame(false) {}
MCObjectStreamer::~MCObjectStreamer() {
delete &Assembler->getBackend();
@@ -45,18 +45,31 @@ MCObjectStreamer::~MCObjectStreamer() {
void MCObjectStreamer::reset() {
if (Assembler)
Assembler->reset();
- CurSectionData = 0;
+ CurSectionData = nullptr;
CurInsertionPoint = MCSectionData::iterator();
+ EmitEHFrame = true;
+ EmitDebugFrame = false;
MCStreamer::reset();
}
+void MCObjectStreamer::EmitFrames(MCAsmBackend *MAB) {
+ if (!getNumFrameInfos())
+ return;
+
+ if (EmitEHFrame)
+ MCDwarfFrameEmitter::Emit(*this, MAB, true);
+
+ if (EmitDebugFrame)
+ MCDwarfFrameEmitter::Emit(*this, MAB, false);
+}
+
MCFragment *MCObjectStreamer::getCurrentFragment() const {
assert(getCurrentSectionData() && "No current section!");
if (CurInsertionPoint != getCurrentSectionData()->getFragmentList().begin())
return std::prev(CurInsertionPoint);
- return 0;
+ return nullptr;
}
MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
@@ -64,11 +77,7 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::EmitInstToData for details)
if (!F || (Assembler->isBundlingEnabled() && F->hasInstructions())) {
- const auto *Sec = dyn_cast<MCSectionELF>(&getCurrentSectionData()->getSection());
- if (Sec && Sec->getSectionName().startswith(".zdebug_"))
- F = new MCCompressedFragment();
- else
- F = new MCDataFragment();
+ F = new MCDataFragment();
insert(F);
}
return F;
@@ -102,7 +111,14 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
return Value;
}
-void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) {
+void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) {
+ MCStreamer::EmitCFISections(EH, Debug);
+ EmitEHFrame = EH;
+ EmitDebugFrame = Debug;
+}
+
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
MCDataFragment *DF = getOrCreateDataFragment();
MCLineEntry::Make(this, getCurrentSection().first);
@@ -115,7 +131,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) {
}
DF->getFixups().push_back(
MCFixup::Create(DF->getContents().size(), Value,
- MCFixup::getKindForSize(Size, false)));
+ MCFixup::getKindForSize(Size, false), Loc));
DF->getContents().resize(DF->getContents().size() + Size, 0);
}
diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCObjectSymbolizer.cpp
index ba80d15..b149596 100644
--- a/lib/MC/MCObjectSymbolizer.cpp
+++ b/lib/MC/MCObjectSymbolizer.cpp
@@ -215,11 +215,11 @@ const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) {
It = std::lower_bound(SortedSections.begin(), EndIt,
Addr, SectionStartsBefore);
if (It == EndIt)
- return 0;
+ return nullptr;
uint64_t SAddr; It->getAddress(SAddr);
uint64_t SSize; It->getSize(SSize);
if (Addr >= SAddr + SSize)
- return 0;
+ return nullptr;
return &*It;
}
@@ -229,7 +229,7 @@ const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) {
AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr);
if (RI == AddrToReloc.end())
- return 0;
+ return nullptr;
return &RI->second;
}
@@ -257,40 +257,12 @@ void MCObjectSymbolizer::buildSectionList() {
void MCObjectSymbolizer::buildRelocationByAddrMap() {
for (const SectionRef &Section : Obj->sections()) {
- section_iterator RelSecI = Section.getRelocatedSection();
- if (RelSecI == Obj->section_end())
- continue;
-
- uint64_t StartAddr; RelSecI->getAddress(StartAddr);
- uint64_t Size; RelSecI->getSize(Size);
- bool RequiredForExec;
- RelSecI->isRequiredForExecution(RequiredForExec);
- if (RequiredForExec == false || Size == 0)
- continue;
for (const RelocationRef &Reloc : Section.relocations()) {
- // FIXME: libObject is inconsistent regarding error handling. The
- // overwhelming majority of methods always return object_error::success,
- // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset
- // asserts when the file type isn't ET_REL.
- // This workaround handles x86-64 elf, the only one that has a relocinfo.
- uint64_t Offset;
- if (Obj->isELF()) {
- const ELF64LEObjectFile *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj);
- if (ELFObj == 0)
- break;
- if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) {
- Reloc.getOffset(Offset);
- Offset += StartAddr;
- } else {
- Reloc.getAddress(Offset);
- }
- } else {
- Reloc.getOffset(Offset);
- Offset += StartAddr;
- }
+ uint64_t Address;
+ Reloc.getAddress(Address);
// At a specific address, only keep the first relocation.
- if (AddrToReloc.find(Offset) == AddrToReloc.end())
- AddrToReloc[Offset] = Reloc;
+ if (AddrToReloc.find(Address) == AddrToReloc.end())
+ AddrToReloc[Address] = Reloc;
}
}
}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index a3b68d8..bca516e 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -22,8 +22,8 @@
using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
- CurBuf = NULL;
- CurPtr = NULL;
+ CurBuf = nullptr;
+ CurPtr = nullptr;
isAtStartOfLine = true;
AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
}
@@ -39,7 +39,7 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
else
CurPtr = CurBuf->getBufferStart();
- TokStart = 0;
+ TokStart = nullptr;
}
/// ReturnError - Set the error to the specified string at the specified
@@ -218,7 +218,7 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
// integer as a hexadecimal, possibly with leading zeroes.
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
- const char *FirstHex = 0;
+ const char *FirstHex = nullptr;
const char *LookAhead = CurPtr;
while (1) {
if (isdigit(*LookAhead)) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 910a424..168597f 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -39,6 +39,7 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <deque>
#include <set>
#include <string>
#include <vector>
@@ -59,8 +60,9 @@ struct MCAsmMacroParameter {
StringRef Name;
MCAsmMacroArgument Value;
bool Required;
+ bool Vararg;
- MCAsmMacroParameter() : Required(false) { }
+ MCAsmMacroParameter() : Required(false), Vararg(false) {}
};
typedef std::vector<MCAsmMacroParameter> MCAsmMacroParameters;
@@ -110,7 +112,7 @@ struct ParseStatementInfo {
SmallVectorImpl<AsmRewrite> *AsmRewrites;
- ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(0) {}
+ ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {}
ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites)
: Opcode(~0), ParseError(false), AsmRewrites(rewrites) {}
@@ -292,7 +294,7 @@ private:
void handleMacroExit();
/// \brief Extract AsmTokens for a macro argument.
- bool parseMacroArgument(MCAsmMacroArgument &MA);
+ bool parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg);
/// \brief Parse all macro arguments for a given macro.
bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A);
@@ -495,9 +497,9 @@ enum { DEFAULT_ADDRSPACE = 0 };
AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
const MCAsmInfo &_MAI)
: Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
- PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true),
- CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false),
- ParsingInlineAsm(false) {
+ PlatformParser(nullptr), CurBuffer(0), MacrosEnabledFlag(true),
+ HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U),
+ IsDarwin(false), ParsingInlineAsm(false) {
// Save the old handler.
SavedDiagHandler = SrcMgr.getDiagHandler();
SavedDiagContext = SrcMgr.getDiagContext();
@@ -526,7 +528,8 @@ AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
}
AsmParser::~AsmParser() {
- assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+ assert((HadError || ActiveMacros.empty()) &&
+ "Unexpected active macro instantiation!");
// Destroy any macros.
for (StringMap<MCAsmMacro *>::iterator it = MacroMap.begin(),
@@ -959,7 +962,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
switch (E->getKind()) {
case MCExpr::Target:
case MCExpr::Constant:
- return 0;
+ return nullptr;
case MCExpr::SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
@@ -977,7 +980,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant);
if (!Sub)
- return 0;
+ return nullptr;
return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
}
@@ -987,7 +990,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
const MCExpr *RHS = applyModifierToExpr(BE->getRHS(), Variant);
if (!LHS && !RHS)
- return 0;
+ return nullptr;
if (!LHS)
LHS = BE->getLHS();
@@ -1013,7 +1016,7 @@ AsmParser::applyModifierToExpr(const MCExpr *E,
///
bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
// Parse the expression.
- Res = 0;
+ Res = nullptr;
if (parsePrimaryExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc))
return true;
@@ -1050,7 +1053,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
}
bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
- Res = 0;
+ Res = nullptr;
return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc);
}
@@ -1701,7 +1704,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext);
else
- Diag.print(0, OS);
+ Diag.print(nullptr, OS);
return;
}
@@ -1723,7 +1726,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) {
if (Parser->SavedDiagHandler)
Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext);
else
- NewDiag.print(0, OS);
+ NewDiag.print(nullptr, OS);
}
// FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The
@@ -1739,6 +1742,7 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
ArrayRef<MCAsmMacroParameter> Parameters,
ArrayRef<MCAsmMacroArgument> A, const SMLoc &L) {
unsigned NParameters = Parameters.size();
+ bool HasVararg = NParameters ? Parameters.back().Vararg : false;
if ((!IsDarwin || NParameters != 0) && NParameters != A.size())
return Error(L, "Wrong number of arguments");
@@ -1820,13 +1824,16 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body,
Pos = I;
}
} else {
+ bool VarargParameter = HasVararg && Index == (NParameters - 1);
for (MCAsmMacroArgument::const_iterator it = A[Index].begin(),
ie = A[Index].end();
it != ie; ++it)
- if (it->getKind() == AsmToken::String)
- OS << it->getStringContents();
- else
+ // We expect no quotes around the string's contents when
+ // parsing for varargs.
+ if (it->getKind() != AsmToken::String || VarargParameter)
OS << it->getString();
+ else
+ OS << it->getStringContents();
Pos += 1 + Argument.size();
}
@@ -1890,7 +1897,16 @@ private:
};
}
-bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA) {
+bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
+
+ if (Vararg) {
+ if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ StringRef Str = parseStringToEndOfStatement();
+ MA.push_back(AsmToken(AsmToken::String, Str));
+ }
+ return false;
+ }
+
unsigned ParenLevel = 0;
unsigned AddTokens = 0;
@@ -1961,6 +1977,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
// Parse two kinds of macro invocations:
// - macros defined without any parameters accept an arbitrary number of them
// - macros defined with parameters accept at most that many of them
+ bool HasVararg = NParameters ? M->Parameters.back().Vararg : false;
for (unsigned Parameter = 0; !NParameters || Parameter < NParameters;
++Parameter) {
SMLoc IDLoc = Lexer.getLoc();
@@ -1989,7 +2006,8 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
return true;
}
- if (parseMacroArgument(FA.Value))
+ bool Vararg = HasVararg && Parameter == (NParameters - 1);
+ if (parseMacroArgument(FA.Value, Vararg))
return true;
unsigned PI = Parameter;
@@ -2050,7 +2068,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) {
StringMap<MCAsmMacro *>::iterator I = MacroMap.find(Name);
- return (I == MacroMap.end()) ? NULL : I->getValue();
+ return (I == MacroMap.end()) ? nullptr : I->getValue();
}
void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) {
@@ -2364,7 +2382,7 @@ bool AsmParser::parseDirectiveValue(unsigned Size) {
return Error(ExprLoc, "literal value out of range for directive");
getStreamer().EmitIntValue(IntValue, Size);
} else
- getStreamer().EmitValue(Value, Size);
+ getStreamer().EmitValue(Value, Size, ExprLoc);
if (getLexer().is(AsmToken::EndOfStatement))
break;
@@ -3240,6 +3258,12 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
MCAsmMacroParameters Parameters;
while (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+ if (Parameters.size() && Parameters.back().Vararg)
+ return Error(Lexer.getLoc(),
+ "Vararg parameter '" + Parameters.back().Name +
+ "' should be last one in the list of parameters.");
+
MCAsmMacroParameter Parameter;
if (parseIdentifier(Parameter.Name))
return TokError("expected identifier in '.macro' directive");
@@ -3257,6 +3281,8 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
if (Qualifier == "req")
Parameter.Required = true;
+ else if (Qualifier == "vararg" && !IsDarwin)
+ Parameter.Vararg = true;
else
return Error(QualLoc, Qualifier + " is not a valid parameter qualifier "
"for '" + Parameter.Name + "' in macro '" + Name + "'");
@@ -3268,7 +3294,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
SMLoc ParamLoc;
ParamLoc = Lexer.getLoc();
- if (parseMacroArgument(Parameter.Value))
+ if (parseMacroArgument(Parameter.Value, /*Vararg=*/false ))
return true;
if (Parameter.Required)
@@ -3906,9 +3932,9 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
MCSymbol *Sym = getContext().LookupSymbol(Name);
if (expect_defined)
- TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+ TheCondState.CondMet = (Sym && !Sym->isUndefined());
else
- TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+ TheCondState.CondMet = (!Sym || Sym->isUndefined());
TheCondState.Ignore = !TheCondState.CondMet;
}
@@ -4151,7 +4177,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
// Check whether we have reached the end of the file.
if (getLexer().is(AsmToken::Eof)) {
Error(DirectiveLoc, "no matching '.endr' in definition");
- return 0;
+ return nullptr;
}
if (Lexer.is(AsmToken::Identifier) &&
@@ -4166,7 +4192,7 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
Lex();
if (Lexer.isNot(AsmToken::EndOfStatement)) {
TokError("unexpected token in '.endr' directive");
- return 0;
+ return nullptr;
}
break;
}
@@ -4260,7 +4286,7 @@ bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) {
Lex();
MCAsmMacroArguments A;
- if (parseMacroArguments(0, A))
+ if (parseMacroArguments(nullptr, A))
return true;
// Eat the end of statement.
@@ -4300,7 +4326,7 @@ bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) {
Lex();
MCAsmMacroArguments A;
- if (parseMacroArguments(0, A))
+ if (parseMacroArguments(nullptr, A))
return true;
if (A.size() != 1 || A.front().size() != 1)
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 76d3f81..decf01c 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -293,7 +293,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
unsigned Characteristics,
SectionKind Kind) {
return ParseSectionSwitch(Section, Characteristics, Kind, "",
- COFF::IMAGE_COMDAT_SELECT_ANY, 0);
+ COFF::IMAGE_COMDAT_SELECT_ANY, nullptr);
}
bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
@@ -359,7 +359,7 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
}
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
- const MCSectionCOFF *Assoc = 0;
+ const MCSectionCOFF *Assoc = nullptr;
StringRef COMDATSymName;
if (getLexer().is(AsmToken::Comma)) {
Lex();
@@ -504,7 +504,7 @@ bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type,
/// ::= .linkonce [ identifier [ identifier ] ]
bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY;
- const MCSectionCOFF *Assoc = 0;
+ const MCSectionCOFF *Assoc = nullptr;
if (getLexer().is(AsmToken::Identifier))
if (parseCOMDATTypeAndAssoc(Type, Assoc))
return true;
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
index 0856b6e..f74b30a 100644
--- a/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -17,6 +17,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
using namespace llvm;
@@ -612,8 +613,8 @@ bool DarwinAsmParser::parseDirectivePopSection(StringRef, SMLoc) {
/// ::= .previous
bool DarwinAsmParser::parseDirectivePrevious(StringRef DirName, SMLoc) {
MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
- if (PreviousSection.first == NULL)
- return TokError(".previous without corresponding .section");
+ if (!PreviousSection.first)
+ return TokError(".previous without corresponding .section");
getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
return false;
}
@@ -630,13 +631,13 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
// Get the secure log path.
const char *SecureLogFile = getContext().getSecureLogFile();
- if (SecureLogFile == NULL)
+ if (!SecureLogFile)
return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
"environment variable unset.");
// Open the secure log file if we haven't already.
raw_ostream *OS = getContext().getSecureLog();
- if (OS == NULL) {
+ if (!OS) {
std::string Err;
OS = new raw_fd_ostream(SecureLogFile, Err,
sys::fs::F_Append | sys::fs::F_Text);
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
index d79dd67..95c4971 100644
--- a/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -193,7 +193,7 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
unsigned Flags, SectionKind Kind) {
- const MCExpr *Subsection = 0;
+ const MCExpr *Subsection = nullptr;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getParser().parseExpression(Subsection))
return true;
@@ -411,7 +411,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) {
int64_t Size = 0;
StringRef GroupName;
unsigned Flags = 0;
- const MCExpr *Subsection = 0;
+ const MCExpr *Subsection = nullptr;
bool UseLastGroup = false;
// Set the defaults first.
@@ -554,7 +554,7 @@ EndStmt:
bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
MCSectionSubPair PreviousSection = getStreamer().getPreviousSection();
- if (PreviousSection.first == NULL)
+ if (PreviousSection.first == nullptr)
return TokError(".previous without corresponding .section");
getStreamer().SwitchSection(PreviousSection.first, PreviousSection.second);
@@ -730,7 +730,7 @@ bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
}
bool ELFAsmParser::ParseDirectiveSubsection(StringRef, SMLoc) {
- const MCExpr *Subsection = 0;
+ const MCExpr *Subsection = nullptr;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
if (getParser().parseExpression(Subsection))
return true;
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index 3867691..530814b 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -13,7 +13,7 @@
using namespace llvm;
MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()),
- TokStart(0), SkipSpace(true) {
+ TokStart(nullptr), SkipSpace(true) {
}
MCAsmLexer::~MCAsmLexer() {
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 6e1ebad..e417aa9 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -17,7 +17,7 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
+MCAsmParser::MCAsmParser() : TargetParser(nullptr), ShowParsedOperands(0) {
}
MCAsmParser::~MCAsmParser() {
diff --git a/lib/MC/MCRelocationInfo.cpp b/lib/MC/MCRelocationInfo.cpp
index 7d2ec1f..a00c009 100644
--- a/lib/MC/MCRelocationInfo.cpp
+++ b/lib/MC/MCRelocationInfo.cpp
@@ -23,14 +23,14 @@ MCRelocationInfo::~MCRelocationInfo() {
const MCExpr *
MCRelocationInfo::createExprForRelocation(object::RelocationRef Rel) {
- return 0;
+ return nullptr;
}
const MCExpr *
MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr,
unsigned VariantKind) {
if (VariantKind != LLVMDisassembler_VariantKind_None)
- return 0;
+ return nullptr;
return SubExpr;
}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index ad9ca88..335b8cd 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -34,7 +34,7 @@ void MCSectionCOFF::setSelection(int Selection,
const MCSectionCOFF *Assoc) const {
assert(Selection != 0 && "invalid COMDAT selection type");
assert((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) ==
- (Assoc != 0) &&
+ (Assoc != nullptr) &&
"associative COMDAT section must have an associated section");
this->Selection = Selection;
this->Assoc = Assoc;
@@ -62,7 +62,8 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << 'r';
if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
OS << 'n';
-
+ if (getCharacteristics() & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+ OS << 'd';
OS << '"';
if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 9cc534d..46beda4 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -20,7 +20,7 @@ static const struct {
const char *AssemblerName, *EnumName;
} SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE+1] = {
{ "regular", "S_REGULAR" }, // 0x00
- { 0, "S_ZEROFILL" }, // 0x01
+ { nullptr, "S_ZEROFILL" }, // 0x01
{ "cstring_literals", "S_CSTRING_LITERALS" }, // 0x02
{ "4byte_literals", "S_4BYTE_LITERALS" }, // 0x03
{ "8byte_literals", "S_8BYTE_LITERALS" }, // 0x04
@@ -31,11 +31,11 @@ static const struct {
{ "mod_init_funcs", "S_MOD_INIT_FUNC_POINTERS" }, // 0x09
{ "mod_term_funcs", "S_MOD_TERM_FUNC_POINTERS" }, // 0x0A
{ "coalesced", "S_COALESCED" }, // 0x0B
- { 0, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C
+ { nullptr, /*FIXME??*/ "S_GB_ZEROFILL" }, // 0x0C
{ "interposing", "S_INTERPOSING" }, // 0x0D
{ "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E
- { 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
- { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+ { nullptr, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
+ { nullptr, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
{ "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11
{ "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12
{ "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13
@@ -62,11 +62,11 @@ ENTRY("no_dead_strip", S_ATTR_NO_DEAD_STRIP)
ENTRY("live_support", S_ATTR_LIVE_SUPPORT)
ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
ENTRY("debug", S_ATTR_DEBUG)
-ENTRY(0 /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
-ENTRY(0 /*FIXME*/, S_ATTR_EXT_RELOC)
-ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC)
+ENTRY(nullptr /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(nullptr /*FIXME*/, S_ATTR_EXT_RELOC)
+ENTRY(nullptr /*FIXME*/, S_ATTR_LOC_RELOC)
#undef ENTRY
- { 0, "none", 0 }, // used if section has no attributes but has a stub size
+ { 0, "none", nullptr }, // used if section has no attributes but has a stub size
};
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 8fa55aa..7dccf0d 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -37,8 +37,7 @@ void MCTargetStreamer::finish() {}
void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
MCStreamer::MCStreamer(MCContext &Ctx)
- : Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false),
- CurrentW64UnwindInfo(0), LastSymbol(0) {
+ : Context(Ctx), CurrentW64UnwindInfo(nullptr), LastSymbol(nullptr) {
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
@@ -51,10 +50,8 @@ void MCStreamer::reset() {
for (unsigned i = 0; i < getNumW64UnwindInfos(); ++i)
delete W64UnwindInfos[i];
W64UnwindInfos.clear();
- EmitEHFrame = true;
- EmitDebugFrame = false;
- CurrentW64UnwindInfo = 0;
- LastSymbol = 0;
+ CurrentW64UnwindInfo = nullptr;
+ LastSymbol = nullptr;
SectionStack.clear();
SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>());
}
@@ -147,8 +144,9 @@ void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size) {
}
-void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size) {
- EmitValueImpl(Value, Size);
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) {
+ EmitValueImpl(Value, Size, Loc);
}
void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size) {
@@ -203,7 +201,7 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) {
MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
if (FrameInfos.empty())
- return 0;
+ return nullptr;
return &FrameInfos.back();
}
@@ -258,8 +256,6 @@ void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) {
void MCStreamer::EmitCFISections(bool EH, bool Debug) {
assert(EH || Debug);
- EmitEHFrame = EH;
- EmitDebugFrame = Debug;
}
void MCStreamer::EmitCFIStartProc(bool IsSimple) {
@@ -278,6 +274,10 @@ void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) {
}
void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) {
+ // Report an error if we haven't seen a symbol yet where we'd bind
+ // .cfi_startproc.
+ if (!LastSymbol)
+ report_fatal_error("No symbol to start a frame");
Frame.Function = LastSymbol;
// We need to create a local symbol to avoid relocations.
Frame.Begin = getContext().CreateTempSymbol();
@@ -610,17 +610,6 @@ void MCStreamer::EmitRawText(const Twine &T) {
EmitRawTextImpl(T.toStringRef(Str));
}
-void MCStreamer::EmitFrames(MCAsmBackend *MAB, bool usingCFI) {
- if (!getNumFrameInfos())
- return;
-
- if (EmitEHFrame)
- MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, true);
-
- if (EmitDebugFrame)
- MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, false);
-}
-
void MCStreamer::EmitW64Tables() {
if (!getNumW64UnwindInfos())
return;
@@ -639,11 +628,6 @@ void MCStreamer::Finish() {
FinishImpl();
}
-MCSymbolData &MCStreamer::getOrCreateSymbolData(const MCSymbol *Symbol) {
- report_fatal_error("Not supported!");
- return *(static_cast<MCSymbolData*>(0));
-}
-
void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
Symbol->setVariableValue(Value);
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
index 8d8e290..4424c91 100644
--- a/lib/MC/MCSubtargetInfo.cpp
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -24,9 +24,7 @@ MCSchedModel MCSchedModel::DefaultSchedModel; // For unknown processors.
void
MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) {
SubtargetFeatures Features(FS);
- FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
- ProcFeatures, NumFeatures);
-
+ FeatureBits = Features.getFeatureBits(CPU, ProcDesc, ProcFeatures);
InitCPUSchedModel(CPU);
}
@@ -40,16 +38,15 @@ MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) {
void
MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
- const SubtargetFeatureKV *PF,
- const SubtargetFeatureKV *PD,
+ ArrayRef<SubtargetFeatureKV> PF,
+ ArrayRef<SubtargetFeatureKV> PD,
const SubtargetInfoKV *ProcSched,
const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL,
const MCReadAdvanceEntry *RA,
const InstrStage *IS,
const unsigned *OC,
- const unsigned *FP,
- unsigned NF, unsigned NP) {
+ const unsigned *FP) {
TargetTriple = TT;
ProcFeatures = PF;
ProcDesc = PD;
@@ -61,8 +58,6 @@ MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
Stages = IS;
OperandCycles = OC;
ForwardingPaths = FP;
- NumFeatures = NF;
- NumProcs = NP;
InitMCProcessorInfo(CPU, FS);
}
@@ -78,8 +73,7 @@ uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
/// bits. This version will also change all implied bits.
uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
SubtargetFeatures Features;
- FeatureBits = Features.ToggleFeature(FeatureBits, FS,
- ProcFeatures, NumFeatures);
+ FeatureBits = Features.ToggleFeature(FeatureBits, FS, ProcFeatures);
return FeatureBits;
}
@@ -88,6 +82,7 @@ const MCSchedModel *
MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const {
assert(ProcSchedModels && "Processor machine model not available!");
+ unsigned NumProcs = ProcDesc.size();
#ifndef NDEBUG
for (size_t i = 1; i < NumProcs; i++) {
assert(strcmp(ProcSchedModels[i - 1].Key, ProcSchedModels[i].Key) < 0 &&
diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp
new file mode 100644
index 0000000..8e946d5
--- /dev/null
+++ b/lib/MC/MCTargetOptions.cpp
@@ -0,0 +1,19 @@
+//===- lib/MC/MCTargetOptions.cpp - MC Target Options --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCTargetOptions.h"
+
+namespace llvm {
+
+MCTargetOptions::MCTargetOptions()
+ : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
+ MCSaveTempLabels(false), MCUseDwarfDirectory(false),
+ ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {}
+
+} // end namespace llvm
diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp
index 68ecffb..9dfc56e 100644
--- a/lib/MC/MCValue.cpp
+++ b/lib/MC/MCValue.cpp
@@ -10,6 +10,7 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -38,6 +39,23 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void MCValue::dump() const {
- print(dbgs(), 0);
+ print(dbgs(), nullptr);
}
#endif
+
+MCSymbolRefExpr::VariantKind MCValue::getAccessVariant() const {
+ const MCSymbolRefExpr *B = getSymB();
+ if (B) {
+ if (B->getKind() != MCSymbolRefExpr::VK_None)
+ llvm_unreachable("unsupported");
+ }
+
+ const MCSymbolRefExpr *A = getSymA();
+ if (!A)
+ return MCSymbolRefExpr::VK_None;
+
+ MCSymbolRefExpr::VariantKind Kind = A->getKind();
+ if (Kind == MCSymbolRefExpr::VK_WEAKREF)
+ return MCSymbolRefExpr::VK_None;
+ return Kind;
+}
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 5fcea5f..cbaf0b8 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -26,6 +26,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "mc"
+
void MachObjectWriter::reset() {
Relocations.clear();
IndirectSymBase.clear();
@@ -349,6 +351,9 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
}
}
+ if (Layout.getAssembler().isThumbFunc(&Symbol))
+ Flags |= SF_ThumbFunc;
+
// struct nlist (12 bytes)
Write32(MSD.StringIndex);
@@ -516,15 +521,14 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
// table, then sort the symbols is chosen to match 'as'. Even though it
// doesn't matter for correctness, this is important for letting us diff .o
// files.
- for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
- ie = Asm.symbol_end(); it != ie; ++it) {
- const MCSymbol &Symbol = it->getSymbol();
+ for (MCSymbolData &SD : Asm.symbols()) {
+ const MCSymbol &Symbol = SD.getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
continue;
- if (!it->isExternal() && !Symbol.isUndefined())
+ if (!SD.isExternal() && !Symbol.isUndefined())
continue;
uint64_t &Entry = StringIndexMap[Symbol.getName()];
@@ -535,7 +539,7 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
}
MachSymbolData MSD;
- MSD.SymbolData = it;
+ MSD.SymbolData = &SD;
MSD.StringIndex = Entry;
if (Symbol.isUndefined()) {
@@ -552,15 +556,14 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
}
// Now add the data for local symbols.
- for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
- ie = Asm.symbol_end(); it != ie; ++it) {
- const MCSymbol &Symbol = it->getSymbol();
+ for (MCSymbolData &SD : Asm.symbols()) {
+ const MCSymbol &Symbol = SD.getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+ if (!Asm.isSymbolLinkerVisible(SD.getSymbol()))
continue;
- if (it->isExternal() || Symbol.isUndefined())
+ if (SD.isExternal() || Symbol.isUndefined())
continue;
uint64_t &Entry = StringIndexMap[Symbol.getName()];
@@ -571,7 +574,7 @@ ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
}
MachSymbolData MSD;
- MSD.SymbolData = it;
+ MSD.SymbolData = &SD;
MSD.StringIndex = Entry;
if (Symbol.isAbsolute()) {
@@ -621,10 +624,7 @@ void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
void MachObjectWriter::markAbsoluteVariableSymbols(MCAssembler &Asm,
const MCAsmLayout &Layout) {
- for (MCAssembler::symbol_iterator i = Asm.symbol_begin(),
- e = Asm.symbol_end();
- i != e; ++i) {
- MCSymbolData &SD = *i;
+ for (MCSymbolData &SD : Asm.symbols()) {
if (!SD.getSymbol().isVariable())
continue;
@@ -669,7 +669,7 @@ IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
// - addr(atom(B)) - offset(B)
// and the offsets are not relocatable, so the fixup is fully resolved when
// addr(atom(A)) - addr(atom(B)) == 0.
- const MCSymbolData *A_Base = 0, *B_Base = 0;
+ const MCSymbolData *A_Base = nullptr, *B_Base = nullptr;
const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
const MCSection &SecA = SA.getSection();
diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index 2fb91f2..27525c7 100644
--- a/lib/MC/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -51,40 +51,12 @@ static inline bool isEnabled(const StringRef Feature) {
return Ch == '+';
}
-/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
-///
-static inline std::string PrependFlag(const StringRef Feature,
- bool IsEnabled) {
- assert(!Feature.empty() && "Empty string");
- if (hasFlag(Feature))
- return Feature;
- std::string Prefix = IsEnabled ? "+" : "-";
- Prefix += Feature;
- return Prefix;
-}
-
/// Split - Splits a string of comma separated items in to a vector of strings.
///
static void Split(std::vector<std::string> &V, const StringRef S) {
- if (S.empty())
- return;
-
- // Start at beginning of string.
- size_t Pos = 0;
- while (true) {
- // Find the next comma
- size_t Comma = S.find(',', Pos);
- // If no comma found then the rest of the string is used
- if (Comma == std::string::npos) {
- // Add string to vector
- V.push_back(S.substr(Pos));
- break;
- }
- // Otherwise add substring to vector
- V.push_back(S.substr(Pos, Comma - Pos));
- // Advance to next item
- Pos = Comma + 1;
- }
+ SmallVector<StringRef, 2> Tmp;
+ S.split(Tmp, ",", -1, false /* KeepEmpty */);
+ V.assign(Tmp.begin(), Tmp.end());
}
/// Join a vector of strings to a string with a comma separating each element.
@@ -109,63 +81,55 @@ static std::string Join(const std::vector<std::string> &V) {
}
/// Adding features.
-void SubtargetFeatures::AddFeature(const StringRef String,
- bool IsEnabled) {
- // Don't add empty features
- if (!String.empty()) {
- // Convert to lowercase, prepend flag and add to vector
- Features.push_back(PrependFlag(String.lower(), IsEnabled));
- }
+void SubtargetFeatures::AddFeature(const StringRef String) {
+ // Don't add empty features or features we already have.
+ if (!String.empty())
+ // Convert to lowercase, prepend flag if we don't already have a flag.
+ Features.push_back(hasFlag(String) ? String.str() : "+" + String.lower());
}
/// Find KV in array using binary search.
-static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A,
- size_t L) {
- // Determine the end of the array
- const SubtargetFeatureKV *Hi = A + L;
+static const SubtargetFeatureKV *Find(StringRef S,
+ ArrayRef<SubtargetFeatureKV> A) {
// Binary search the array
- const SubtargetFeatureKV *F = std::lower_bound(A, Hi, S);
+ auto F = std::lower_bound(A.begin(), A.end(), S);
// If not found then return NULL
- if (F == Hi || StringRef(F->Key) != S) return NULL;
+ if (F == A.end() || StringRef(F->Key) != S) return nullptr;
// Return the found array item
return F;
}
/// getLongestEntryLength - Return the length of the longest entry in the table.
///
-static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
- size_t Size) {
+static size_t getLongestEntryLength(ArrayRef<SubtargetFeatureKV> Table) {
size_t MaxLen = 0;
- for (size_t i = 0; i < Size; i++)
- MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+ for (auto &I : Table)
+ MaxLen = std::max(MaxLen, std::strlen(I.Key));
return MaxLen;
}
/// Display help for feature choices.
///
-static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
- const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+static void Help(ArrayRef<SubtargetFeatureKV> CPUTable,
+ ArrayRef<SubtargetFeatureKV> FeatTable) {
// Determine the length of the longest CPU and Feature entries.
- unsigned MaxCPULen = getLongestEntryLength(CPUTable, CPUTableSize);
- unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+ unsigned MaxCPULen = getLongestEntryLength(CPUTable);
+ unsigned MaxFeatLen = getLongestEntryLength(FeatTable);
// Print the CPU table.
errs() << "Available CPUs for this target:\n\n";
- for (size_t i = 0; i != CPUTableSize; i++)
- errs() << format(" %-*s - %s.\n",
- MaxCPULen, CPUTable[i].Key, CPUTable[i].Desc);
+ for (auto &CPU : CPUTable)
+ errs() << format(" %-*s - %s.\n", MaxCPULen, CPU.Key, CPU.Desc);
errs() << '\n';
// Print the Feature table.
errs() << "Available features for this target:\n\n";
- for (size_t i = 0; i != FeatTableSize; i++)
- errs() << format(" %-*s - %s.\n",
- MaxFeatLen, FeatTable[i].Key, FeatTable[i].Desc);
+ for (auto &Feature : FeatTable)
+ errs() << format(" %-*s - %s.\n", MaxFeatLen, Feature.Key, Feature.Desc);
errs() << '\n';
errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
"For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
- std::exit(1);
}
//===----------------------------------------------------------------------===//
@@ -187,16 +151,13 @@ std::string SubtargetFeatures::getString() const {
///
static
void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
- const SubtargetFeatureKV *FeatureTable,
- size_t FeatureTableSize) {
- for (size_t i = 0; i < FeatureTableSize; ++i) {
- const SubtargetFeatureKV &FE = FeatureTable[i];
-
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ for (auto &FE : FeatureTable) {
if (FeatureEntry->Value == FE.Value) continue;
if (FeatureEntry->Implies & FE.Value) {
Bits |= FE.Value;
- SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ SetImpliedBits(Bits, &FE, FeatureTable);
}
}
}
@@ -206,16 +167,13 @@ void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
///
static
void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
- const SubtargetFeatureKV *FeatureTable,
- size_t FeatureTableSize) {
- for (size_t i = 0; i < FeatureTableSize; ++i) {
- const SubtargetFeatureKV &FE = FeatureTable[i];
-
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ for (auto &FE : FeatureTable) {
if (FeatureEntry->Value == FE.Value) continue;
if (FE.Implies & FeatureEntry->Value) {
Bits &= ~FE.Value;
- ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ ClearImpliedBits(Bits, &FE, FeatureTable);
}
}
}
@@ -224,23 +182,23 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
/// bits.
uint64_t
SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
- const SubtargetFeatureKV *FeatureTable,
- size_t FeatureTableSize) {
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+
// Find feature in table.
const SubtargetFeatureKV *FeatureEntry =
- Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ Find(StripFlag(Feature), FeatureTable);
// If there is a match
if (FeatureEntry) {
if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
Bits &= ~FeatureEntry->Value;
// For each feature that implies this, clear it.
- ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
} else {
Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
- SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable);
}
} else {
errs() << "'" << Feature
@@ -254,20 +212,20 @@ SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
/// getFeatureBits - Get feature bits a CPU.
///
-uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
- const SubtargetFeatureKV *CPUTable,
- size_t CPUTableSize,
- const SubtargetFeatureKV *FeatureTable,
- size_t FeatureTableSize) {
- if (!FeatureTableSize || !CPUTableSize)
+uint64_t
+SubtargetFeatures::getFeatureBits(const StringRef CPU,
+ ArrayRef<SubtargetFeatureKV> CPUTable,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+
+ if (CPUTable.empty() || FeatureTable.empty())
return 0;
#ifndef NDEBUG
- for (size_t i = 1; i < CPUTableSize; i++) {
+ for (size_t i = 1, e = CPUTable.size(); i != e; ++i) {
assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
"CPU table is not sorted");
}
- for (size_t i = 1; i < FeatureTableSize; i++) {
+ for (size_t i = 1, e = FeatureTable.size(); i != e; ++i) {
assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
"CPU features table is not sorted");
}
@@ -276,21 +234,21 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
// Check if help is needed
if (CPU == "help")
- Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+ Help(CPUTable, FeatureTable);
// Find CPU entry if CPU name is specified.
- if (!CPU.empty()) {
- const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+ else if (!CPU.empty()) {
+ const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable);
+
// If there is a match
if (CPUEntry) {
// Set base feature bits
Bits = CPUEntry->Value;
// Set the feature implied by this CPU feature, if any.
- for (size_t i = 0; i < FeatureTableSize; ++i) {
- const SubtargetFeatureKV &FE = FeatureTable[i];
+ for (auto &FE : FeatureTable) {
if (CPUEntry->Value & FE.Value)
- SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+ SetImpliedBits(Bits, &FE, FeatureTable);
}
} else {
errs() << "'" << CPU
@@ -300,16 +258,14 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
}
// Iterate through each feature
- for (size_t i = 0, E = Features.size(); i < E; i++) {
- const StringRef Feature = Features[i];
-
+ for (auto &Feature : Features) {
// Check for help
if (Feature == "+help")
- Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+ Help(CPUTable, FeatureTable);
// Find feature in table.
const SubtargetFeatureKV *FeatureEntry =
- Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+ Find(StripFlag(Feature), FeatureTable);
// If there is a match
if (FeatureEntry) {
// Enable/disable feature in bits
@@ -317,12 +273,12 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
Bits |= FeatureEntry->Value;
// For each feature that this implies, set it.
- SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ SetImpliedBits(Bits, FeatureEntry, FeatureTable);
} else {
Bits &= ~FeatureEntry->Value;
// For each feature that implies this, clear it.
- ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
}
} else {
errs() << "'" << Feature
@@ -337,8 +293,8 @@ uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
/// print - Print feature string.
///
void SubtargetFeatures::print(raw_ostream &OS) const {
- for (size_t i = 0, e = Features.size(); i != e; ++i)
- OS << Features[i] << " ";
+ for (auto &F : Features)
+ OS << F << " ";
OS << "\n";
}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index 500acd8..961cbc6 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -11,12 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "WinCOFFObjectWriter"
-
#include "llvm/MC/MCWinCOFFObjectWriter.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -35,6 +34,8 @@
using namespace llvm;
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+
namespace {
typedef SmallString<COFF::NameSize> name;
@@ -81,7 +82,7 @@ struct COFFRelocation {
COFF::relocation Data;
COFFSymbol *Symb;
- COFFRelocation() : Symb(NULL) {}
+ COFFRelocation() : Symb(nullptr) {}
static size_t size() { return COFF::RelocationSize; }
};
@@ -118,8 +119,8 @@ public:
class WinCOFFObjectWriter : public MCObjectWriter {
public:
- typedef std::vector<COFFSymbol*> symbols;
- typedef std::vector<COFFSection*> sections;
+ typedef std::vector<std::unique_ptr<COFFSymbol>> symbols;
+ typedef std::vector<std::unique_ptr<COFFSection>> sections;
typedef DenseMap<MCSymbol const *, COFFSymbol *> symbol_map;
typedef DenseMap<MCSection const *, COFFSection *> section_map;
@@ -137,7 +138,6 @@ public:
symbol_map SymbolMap;
WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS);
- virtual ~WinCOFFObjectWriter();
COFFSymbol *createSymbol(StringRef Name);
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
@@ -160,7 +160,7 @@ public:
// Entity writing methods.
void WriteFileHeader(const COFF::header &Header);
- void WriteSymbol(const COFFSymbol *S);
+ void WriteSymbol(const COFFSymbol &S);
void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
void WriteSectionHeader(const COFF::section &S);
void WriteRelocation(const COFF::relocation &R);
@@ -192,10 +192,10 @@ static inline void write_uint32_le(void *Data, uint32_t const &Value) {
COFFSymbol::COFFSymbol(StringRef name)
: Name(name.begin(), name.end())
- , Other(NULL)
- , Section(NULL)
+ , Other(nullptr)
+ , Section(nullptr)
, Relocations(0)
- , MCData(NULL) {
+ , MCData(nullptr) {
memset(&Data, 0, sizeof(Data));
}
@@ -214,7 +214,7 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
/// logic to decide if the symbol should be reported in the symbol table
bool COFFSymbol::should_keep() const {
// no section means its external, keep it
- if (Section == NULL)
+ if (!Section)
return true;
// if it has relocations pointing at it, keep it
@@ -244,8 +244,8 @@ bool COFFSymbol::should_keep() const {
COFFSection::COFFSection(StringRef name)
: Name(name)
- , MCData(NULL)
- , Symbol(NULL) {
+ , MCData(nullptr)
+ , Symbol(nullptr) {
memset(&Header, 0, sizeof(Header));
}
@@ -308,13 +308,6 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW,
Header.Machine = TargetObjectWriter->getMachine();
}
-WinCOFFObjectWriter::~WinCOFFObjectWriter() {
- for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
- delete *I;
- for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
- delete *I;
-}
-
COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
return createCOFFEntity<COFFSymbol>(Name, Symbols);
}
@@ -338,11 +331,9 @@ COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
template <typename object_t, typename list_t>
object_t *WinCOFFObjectWriter::createCOFFEntity(StringRef Name,
list_t &List) {
- object_t *Object = new object_t(Name);
-
- List.push_back(Object);
+ List.push_back(make_unique<object_t>(Name));
- return Object;
+ return List.back().get();
}
/// This function takes a section data object from the assembler
@@ -394,7 +385,19 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
SectionMap[&SectionData.getSection()] = coff_section;
}
-/// This function takes a section data object from the assembler
+static uint64_t getSymbolValue(const MCSymbolData &Data,
+ const MCAsmLayout &Layout) {
+ if (Data.isCommon() && Data.isExternal())
+ return Data.getCommonSize();
+
+ uint64_t Res;
+ if (!Layout.getSymbolOffset(&Data, Res))
+ return 0;
+
+ return Res;
+}
+
+/// This function takes a symbol data object from the assembler
/// and creates the associated COFF symbol staging object.
void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
MCAssembler &Assembler,
@@ -436,31 +439,29 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
coff_symbol->MCData = &SymbolData;
} else {
- const MCSymbolData &ResSymData =
- Assembler.getSymbolData(Symbol.AliasedSymbol());
-
- if (Symbol.isVariable()) {
- int64_t Addr;
- if (Symbol.getVariableValue()->EvaluateAsAbsolute(Addr, Layout))
- coff_symbol->Data.Value = Addr;
- }
+ const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol);
+ const MCSymbol *Base = Layout.getBaseSymbol(Symbol);
+ coff_symbol->Data.Value = getSymbolValue(ResSymData, Layout);
coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0;
coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16;
// If no storage class was specified in the streamer, define it here.
if (coff_symbol->Data.StorageClass == 0) {
- bool external = ResSymData.isExternal() || (ResSymData.Fragment == NULL);
+ bool external = ResSymData.isExternal() || !ResSymData.Fragment;
coff_symbol->Data.StorageClass =
external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
}
- if (Symbol.isAbsolute() || Symbol.AliasedSymbol().isVariable())
+ if (!Base) {
coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
- else if (ResSymData.Fragment != NULL)
- coff_symbol->Section =
- SectionMap[&ResSymData.Fragment->getParent()->getSection()];
+ } else {
+ const MCSymbolData &BaseData = Assembler.getSymbolData(*Base);
+ if (BaseData.Fragment)
+ coff_symbol->Section =
+ SectionMap[&BaseData.Fragment->getParent()->getSection()];
+ }
coff_symbol->MCData = &ResSymData;
}
@@ -561,14 +562,14 @@ void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
WriteLE16(Header.Characteristics);
}
-void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
- WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
- WriteLE32(S->Data.Value);
- WriteLE16(S->Data.SectionNumber);
- WriteLE16(S->Data.Type);
- Write8(S->Data.StorageClass);
- Write8(S->Data.NumberOfAuxSymbols);
- WriteAuxiliarySymbols(S->Aux);
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
+ WriteBytes(StringRef(S.Data.Name, COFF::NameSize));
+ WriteLE32(S.Data.Value);
+ WriteLE16(S.Data.SectionNumber);
+ WriteLE16(S.Data.Type);
+ Write8(S.Data.StorageClass);
+ Write8(S.Data.NumberOfAuxSymbols);
+ WriteAuxiliarySymbols(S.Aux);
}
void WinCOFFObjectWriter::WriteAuxiliarySymbols(
@@ -640,16 +641,42 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
// "Define" each section & symbol. This creates section & symbol
// entries in the staging area.
- for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
- DefineSection(*i);
+ static_assert(sizeof(((COFF::AuxiliaryFile *)nullptr)->FileName) == COFF::SymbolSize,
+ "size mismatch for COFF::AuxiliaryFile::FileName");
+ for (auto FI = Asm.file_names_begin(), FE = Asm.file_names_end();
+ FI != FE; ++FI) {
+ // round up to calculate the number of auxiliary symbols required
+ unsigned Count = (FI->size() + COFF::SymbolSize - 1) / COFF::SymbolSize;
+
+ COFFSymbol *file = createSymbol(".file");
+ file->Data.SectionNumber = COFF::IMAGE_SYM_DEBUG;
+ file->Data.StorageClass = COFF::IMAGE_SYM_CLASS_FILE;
+ file->Aux.resize(Count);
+
+ unsigned Offset = 0;
+ unsigned Length = FI->size();
+ for (auto & Aux : file->Aux) {
+ Aux.AuxType = ATFile;
+
+ if (Length > COFF::SymbolSize) {
+ memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, COFF::SymbolSize);
+ Length = Length - COFF::SymbolSize;
+ } else {
+ memcpy(Aux.Aux.File.FileName, FI->c_str() + Offset, Length);
+ memset(&Aux.Aux.File.FileName[Length], 0, COFF::SymbolSize - Length);
+ Length = 0;
+ }
- for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
- e = Asm.symbol_end();
- i != e; i++) {
- if (ExportSymbol(*i, Asm)) {
- DefineSymbol(*i, Asm, Layout);
+ Offset = Offset + COFF::SymbolSize;
}
}
+
+ for (const auto & Section : Asm)
+ DefineSection(Section);
+
+ for (MCSymbolData &SD : Asm.symbols())
+ if (ExportSymbol(SD, Asm))
+ DefineSymbol(SD, Asm, Layout);
}
void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
@@ -659,7 +686,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
MCValue Target,
bool &IsPCRel,
uint64_t &FixedValue) {
- assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+ assert(Target.getSymA() && "Relocation must reference a symbol!");
const MCSymbol &Symbol = Target.getSymA()->getSymbol();
const MCSymbol &A = Symbol.AliasedSymbol();
@@ -668,7 +695,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
Fixup.getLoc(),
Twine("symbol '") + A.getName() + "' can not be undefined");
- MCSymbolData &A_SD = Asm.getSymbolData(A);
+ const MCSymbolData &A_SD = Asm.getSymbolData(A);
MCSectionData const *SectionData = Fragment->getParent();
@@ -685,7 +712,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
if (SymB) {
const MCSymbol *B = &SymB->getSymbol();
- MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData &B_SD = Asm.getSymbolData(*B);
if (!B_SD.getFragment())
Asm.getContext().FatalError(
Fixup.getLoc(),
@@ -737,11 +764,52 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
// FIXME: Can anyone explain what this does other than adjust for the size
// of the offset?
- if (Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32 ||
- Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32)
+ if ((Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 &&
+ Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32) ||
+ (Header.Machine == COFF::IMAGE_FILE_MACHINE_I386 &&
+ Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32))
FixedValue += 4;
- coff_section->Relocations.push_back(Reloc);
+ if (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) {
+ switch (Reloc.Data.Type) {
+ case COFF::IMAGE_REL_ARM_ABSOLUTE:
+ case COFF::IMAGE_REL_ARM_ADDR32:
+ case COFF::IMAGE_REL_ARM_ADDR32NB:
+ case COFF::IMAGE_REL_ARM_TOKEN:
+ case COFF::IMAGE_REL_ARM_SECTION:
+ case COFF::IMAGE_REL_ARM_SECREL:
+ break;
+ case COFF::IMAGE_REL_ARM_BRANCH11:
+ case COFF::IMAGE_REL_ARM_BLX11:
+ // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
+ // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
+ // for Windows CE).
+ case COFF::IMAGE_REL_ARM_BRANCH24:
+ case COFF::IMAGE_REL_ARM_BLX24:
+ case COFF::IMAGE_REL_ARM_MOV32A:
+ // IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are
+ // only used for ARM mode code, which is documented as being unsupported
+ // by Windows on ARM. Empirical proof indicates that masm is able to
+ // generate the relocations however the rest of the MSVC toolchain is
+ // unable to handle it.
+ llvm_unreachable("unsupported relocation");
+ break;
+ case COFF::IMAGE_REL_ARM_MOV32T:
+ break;
+ case COFF::IMAGE_REL_ARM_BRANCH20T:
+ case COFF::IMAGE_REL_ARM_BRANCH24T:
+ case COFF::IMAGE_REL_ARM_BLX23T:
+ // IMAGE_REL_BRANCH20T, IMAGE_REL_ARM_BRANCH24T, IMAGE_REL_ARM_BLX23T all
+ // perform a 4 byte adjustment to the relocation. Relative branches are
+ // offset by 4 on ARM, however, because there is no RELA relocations, all
+ // branches are offset by 4.
+ FixedValue = FixedValue + 4;
+ break;
+ }
+ }
+
+ if (TargetObjectWriter->recordRelocation(Fixup))
+ coff_section->Relocations.push_back(Reloc);
}
void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
@@ -750,77 +818,64 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
Header.NumberOfSections = 0;
DenseMap<COFFSection *, uint16_t> SectionIndices;
- for (sections::iterator i = Sections.begin(),
- e = Sections.end(); i != e; i++) {
- if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+ for (auto & Section : Sections) {
+ if (Layout.getSectionAddressSize(Section->MCData) > 0) {
size_t Number = ++Header.NumberOfSections;
- SectionIndices[*i] = Number;
- MakeSectionReal(**i, Number);
+ SectionIndices[Section.get()] = Number;
+ MakeSectionReal(*Section, Number);
} else {
- (*i)->Number = -1;
+ Section->Number = -1;
}
}
Header.NumberOfSymbols = 0;
- for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
- COFFSymbol *coff_symbol = *i;
- MCSymbolData const *SymbolData = coff_symbol->MCData;
-
+ for (auto & Symbol : Symbols) {
// Update section number & offset for symbols that have them.
- if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
- assert(coff_symbol->Section != NULL);
+ if (Symbol->Section)
+ Symbol->Data.SectionNumber = Symbol->Section->Number;
- coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
- coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
- + SymbolData->Offset;
- }
-
- if (coff_symbol->should_keep()) {
- MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+ if (Symbol->should_keep()) {
+ MakeSymbolReal(*Symbol, Header.NumberOfSymbols++);
// Update auxiliary symbol info.
- coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
- Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+ Symbol->Data.NumberOfAuxSymbols = Symbol->Aux.size();
+ Header.NumberOfSymbols += Symbol->Data.NumberOfAuxSymbols;
} else
- coff_symbol->Index = -1;
+ Symbol->Index = -1;
}
// Fixup weak external references.
- for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
- COFFSymbol *coff_symbol = *i;
- if (coff_symbol->Other != NULL) {
- assert(coff_symbol->Index != -1);
- assert(coff_symbol->Aux.size() == 1 &&
- "Symbol must contain one aux symbol!");
- assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
+ for (auto & Symbol : Symbols) {
+ if (Symbol->Other) {
+ assert(Symbol->Index != -1);
+ assert(Symbol->Aux.size() == 1 && "Symbol must contain one aux symbol!");
+ assert(Symbol->Aux[0].AuxType == ATWeakExternal &&
"Symbol's aux symbol must be a Weak External!");
- coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
+ Symbol->Aux[0].Aux.WeakExternal.TagIndex = Symbol->Other->Index;
}
}
// Fixup associative COMDAT sections.
- for (sections::iterator i = Sections.begin(),
- e = Sections.end(); i != e; i++) {
- if ((*i)->Symbol->Aux[0].Aux.SectionDefinition.Selection !=
+ for (auto & Section : Sections) {
+ if (Section->Symbol->Aux[0].Aux.SectionDefinition.Selection !=
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
continue;
- const MCSectionCOFF &MCSec = static_cast<const MCSectionCOFF &>(
- (*i)->MCData->getSection());
+ const MCSectionCOFF &MCSec =
+ static_cast<const MCSectionCOFF &>(Section->MCData->getSection());
COFFSection *Assoc = SectionMap.lookup(MCSec.getAssocSection());
- if (!Assoc) {
+ if (!Assoc)
report_fatal_error(Twine("Missing associated COMDAT section ") +
MCSec.getAssocSection()->getSectionName() +
" for section " + MCSec.getSectionName());
- }
// Skip this section if the associated section is unused.
if (Assoc->Number == -1)
continue;
- (*i)->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc];
+ Section->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc];
}
@@ -831,15 +886,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
offset += COFF::HeaderSize;
offset += COFF::SectionSize * Header.NumberOfSections;
- for (MCAssembler::const_iterator i = Asm.begin(),
- e = Asm.end();
- i != e; i++) {
- COFFSection *Sec = SectionMap[&i->getSection()];
+ for (const auto & Section : Asm) {
+ COFFSection *Sec = SectionMap[&Section.getSection()];
if (Sec->Number == -1)
continue;
- Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+ Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(&Section);
if (IsPhysicalSection(Sec)) {
Sec->Header.PointerToRawData = offset;
@@ -866,16 +919,14 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
offset += COFF::RelocationSize * Sec->Relocations.size();
- for (relocations::iterator cr = Sec->Relocations.begin(),
- er = Sec->Relocations.end();
- cr != er; ++cr) {
- assert((*cr).Symb->Index != -1);
- (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+ for (auto & Relocation : Sec->Relocations) {
+ assert(Relocation.Symb->Index != -1);
+ Relocation.Data.SymbolTableIndex = Relocation.Symb->Index;
}
}
- assert(Sec->Symbol->Aux.size() == 1
- && "Section's symbol must have one aux!");
+ assert(Sec->Symbol->Aux.size() == 1 &&
+ "Section's symbol must have one aux!");
AuxSymbol &Aux = Sec->Symbol->Aux[0];
assert(Aux.AuxType == ATSectionDefinition &&
"Section's symbol's aux symbol must be a Section Definition!");
@@ -898,13 +949,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
sections::iterator i, ie;
MCAssembler::const_iterator j, je;
- for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
- if ((*i)->Number != -1) {
- if ((*i)->Relocations.size() >= 0xffff) {
- (*i)->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
- }
- WriteSectionHeader((*i)->Header);
+ for (auto & Section : Sections) {
+ if (Section->Number != -1) {
+ if (Section->Relocations.size() >= 0xffff)
+ Section->Header.Characteristics |= COFF::IMAGE_SCN_LNK_NRELOC_OVFL;
+ WriteSectionHeader(Section->Header);
}
+ }
for (i = Sections.begin(), ie = Sections.end(),
j = Asm.begin(), je = Asm.end();
@@ -934,11 +985,8 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
WriteRelocation(r);
}
- for (relocations::const_iterator k = (*i)->Relocations.begin(),
- ke = (*i)->Relocations.end();
- k != ke; k++) {
- WriteRelocation(k->Data);
- }
+ for (const auto & Relocation : (*i)->Relocations)
+ WriteRelocation(Relocation.Data);
} else
assert((*i)->Header.PointerToRelocations == 0 &&
"Section::PointerToRelocations is insane!");
@@ -948,9 +996,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
assert(OS.tell() == Header.PointerToSymbolTable &&
"Header::PointerToSymbolTable is insane!");
- for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
- if ((*i)->Index != -1)
- WriteSymbol(*i);
+ for (auto & Symbol : Symbols)
+ if (Symbol->Index != -1)
+ WriteSymbol(*Symbol);
OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
index 5bd7b8f..e6df465 100644
--- a/lib/MC/WinCOFFStreamer.cpp
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -7,12 +7,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains an implementation of a Win32 COFF object file streamer.
+// This file contains an implementation of a Windows COFF object file streamer.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "WinCOFFStreamer"
-
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmLayout.h"
@@ -27,6 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/MCWin64EH.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/Support/COFF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -35,95 +35,33 @@
using namespace llvm;
-namespace {
-class WinCOFFStreamer : public MCObjectStreamer {
-public:
- MCSymbol const *CurSymbol;
-
- WinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB,
- MCCodeEmitter &CE,
- raw_ostream &OS);
-
- void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment, bool External);
-
- // MCStreamer interface
-
- void InitSections() override;
- void EmitLabel(MCSymbol *Symbol) override;
- void EmitDebugLabel(MCSymbol *Symbol) override;
- void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
- void EmitThumbFunc(MCSymbol *Func) override;
- bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override;
- void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override;
- void BeginCOFFSymbolDef(MCSymbol const *Symbol) override;
- void EmitCOFFSymbolStorageClass(int StorageClass) override;
- void EmitCOFFSymbolType(int Type) override;
- void EndCOFFSymbolDef() override;
- void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
- void EmitCOFFSecRel32(MCSymbol const *Symbol) override;
- void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
- void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override;
- void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) override;
- void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size,unsigned ByteAlignment) override;
- void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment) override;
- void EmitFileDirective(StringRef Filename) override;
- void EmitIdent(StringRef IdentString) override;
- void EmitWin64EHHandlerData() override;
- void FinishImpl() override;
-
-private:
- void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo &STI) override {
- MCDataFragment *DF = getOrCreateDataFragment();
-
- SmallVector<MCFixup, 4> Fixups;
- SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI);
- VecOS.flush();
-
- // Add the fixups and data.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
- DF->getFixups().push_back(Fixups[i]);
- }
- DF->getContents().append(Code.begin(), Code.end());
- }
-};
-} // end anonymous namespace.
-
-WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
- MCCodeEmitter &CE, raw_ostream &OS)
- : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(NULL) {}
-
-void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment, bool External) {
- assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+#define DEBUG_TYPE "WinCOFFStreamer"
- const MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
- MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
- if (SectionData.getAlignment() < ByteAlignment)
- SectionData.setAlignment(ByteAlignment);
+namespace llvm {
+MCWinCOFFStreamer::MCWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &CE, raw_ostream &OS)
+ : MCObjectStreamer(Context, MAB, OS, &CE), CurSymbol(nullptr) {}
- MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
- SymbolData.setExternal(External);
+void MCWinCOFFStreamer::EmitInstToData(const MCInst &Inst,
+ const MCSubtargetInfo &STI) {
+ MCDataFragment *DF = getOrCreateDataFragment();
- AssignSection(Symbol, Section);
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups, STI);
+ VecOS.flush();
- if (ByteAlignment != 1)
- new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+ // Add the fixups and data.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->getFixups().push_back(Fixups[i]);
+ }
- SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
+ DF->getContents().append(Code.begin(), Code.end());
}
-// MCStreamer interface
-
-void WinCOFFStreamer::InitSections() {
+void MCWinCOFFStreamer::InitSections() {
// FIXME: this is identical to the ELF one.
// This emulates the same behavior of GNU as. This makes it easier
// to compare the output as the major sections are in the same order.
@@ -139,165 +77,182 @@ void WinCOFFStreamer::InitSections() {
SwitchSection(getContext().getObjectFileInfo()->getTextSection());
}
-void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+void MCWinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
MCObjectStreamer::EmitLabel(Symbol);
}
-void WinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
+void MCWinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) {
EmitLabel(Symbol);
}
-void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+
+void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+void MCWinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
llvm_unreachable("not implemented");
}
-bool WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
- MCSymbolAttr Attribute) {
+bool MCWinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
assert(Symbol && "Symbol must be non-null!");
- assert((Symbol->isInSection()
- ? Symbol->getSection().getVariant() == MCSection::SV_COFF
- : true) && "Got non-COFF section in the COFF backend!");
+ assert((!Symbol->isInSection() ||
+ Symbol->getSection().getVariant() == MCSection::SV_COFF) &&
+ "Got non-COFF section in the COFF backend!");
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
switch (Attribute) {
+ default: return false;
case MCSA_WeakReference:
- case MCSA_Weak: {
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
- SD.setExternal(true);
- }
+ case MCSA_Weak:
+ SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+ SD.setExternal(true);
break;
-
case MCSA_Global:
- getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+ SD.setExternal(true);
break;
-
- default:
- return false;
}
return true;
}
-void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+void MCWinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
- assert((Symbol->isInSection()
- ? Symbol->getSection().getVariant() == MCSection::SV_COFF
- : true) && "Got non-COFF section in the COFF backend!");
- assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
- "to BeginCOFFSymbolDef!");
+void MCWinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+ assert((!Symbol->isInSection() ||
+ Symbol->getSection().getVariant() == MCSection::SV_COFF) &&
+ "Got non-COFF section in the COFF backend!");
+
+ if (CurSymbol)
+ FatalError("starting a new symbol definition without completing the "
+ "previous one");
CurSymbol = Symbol;
}
-void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
- assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
- assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
- "the first byte!");
+void MCWinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+ if (!CurSymbol)
+ FatalError("storage class specified outside of symbol definition");
+
+ if (StorageClass & ~0xff)
+ FatalError(Twine("storage class value '") + itostr(StorageClass) +
+ "' out of range");
- getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
- StorageClass << COFF::SF_ClassShift,
- COFF::SF_ClassMask);
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol);
+ SD.modifyFlags(StorageClass << COFF::SF_ClassShift, COFF::SF_ClassMask);
}
-void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
- assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
- assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
- "bytes");
+void MCWinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+ if (!CurSymbol)
+ FatalError("symbol type specified outside of a symbol definition");
+
+ if (Type & ~0xffff)
+ FatalError(Twine("type value '") + itostr(Type) + "' out of range");
- getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
- Type << COFF::SF_TypeShift,
- COFF::SF_TypeMask);
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*CurSymbol);
+ SD.modifyFlags(Type << COFF::SF_TypeShift, COFF::SF_TypeMask);
}
-void WinCOFFStreamer::EndCOFFSymbolDef() {
- assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
- CurSymbol = NULL;
+void MCWinCOFFStreamer::EndCOFFSymbolDef() {
+ if (!CurSymbol)
+ FatalError("ending symbol definition without starting one");
+ CurSymbol = nullptr;
}
-void WinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
MCDataFragment *DF = getOrCreateDataFragment();
- DF->getFixups().push_back(MCFixup::Create(
- DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()),
- FK_SecRel_2));
+ const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
+ MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_2);
+ DF->getFixups().push_back(Fixup);
DF->getContents().resize(DF->getContents().size() + 4, 0);
}
-void WinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
MCDataFragment *DF = getOrCreateDataFragment();
- DF->getFixups().push_back(MCFixup::Create(
- DF->getContents().size(), MCSymbolRefExpr::Create(Symbol, getContext()),
- FK_SecRel_4));
+ const MCSymbolRefExpr *SRE = MCSymbolRefExpr::Create(Symbol, getContext());
+ MCFixup Fixup = MCFixup::Create(DF->getContents().size(), SRE, FK_SecRel_4);
+ DF->getFixups().push_back(Fixup);
DF->getContents().resize(DF->getContents().size() + 4, 0);
}
-void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
- llvm_unreachable("not implemented");
+void MCWinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("not supported");
}
-void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
- assert((Symbol->isInSection()
- ? Symbol->getSection().getVariant() == MCSection::SV_COFF
- : true) && "Got non-COFF section in the COFF backend!");
- AddCommonSymbol(Symbol, Size, ByteAlignment, true);
+void MCWinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert((!Symbol->isInSection() ||
+ Symbol->getSection().getVariant() == MCSection::SV_COFF) &&
+ "Got non-COFF section in the COFF backend!");
+
+ if (ByteAlignment > 32)
+ report_fatal_error("alignment is limited to 32-bytes");
+
+ AssignSection(Symbol, nullptr);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setExternal(true);
+ SD.setCommon(Size, ByteAlignment);
}
-void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- unsigned ByteAlignment) {
- assert((Symbol->isInSection()
- ? Symbol->getSection().getVariant() == MCSection::SV_COFF
- : true) && "Got non-COFF section in the COFF backend!");
- AddCommonSymbol(Symbol, Size, ByteAlignment, false);
+void MCWinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+ const MCSection *Section = getContext().getObjectFileInfo()->getBSSSection();
+ MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+ if (SectionData.getAlignment() < ByteAlignment)
+ SectionData.setAlignment(ByteAlignment);
+
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+ SD.setExternal(false);
+
+ AssignSection(Symbol, Section);
+
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, /*_Value=*/0, /*_ValueSize=*/0,
+ ByteAlignment, &SectionData);
+
+ MCFillFragment *Fragment =
+ new MCFillFragment(/*_Value=*/0, /*_ValueSize=*/0, Size, &SectionData);
+ SD.setFragment(Fragment);
}
-void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size,unsigned ByteAlignment) {
+void MCWinCOFFStreamer::EmitZerofill(const MCSection *Section,
+ MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
- uint64_t Size, unsigned ByteAlignment) {
+void MCWinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section,
+ MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
- // Ignore for now, linkers don't care, and proper debug
- // info will be a much large effort.
+void MCWinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+ getAssembler().addFileName(Filename);
}
// TODO: Implement this if you want to emit .comment section in COFF obj files.
-void WinCOFFStreamer::EmitIdent(StringRef IdentString) {
- llvm_unreachable("unsupported directive");
+void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) {
+ llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::EmitWin64EHHandlerData() {
- MCStreamer::EmitWin64EHHandlerData();
-
- // We have to emit the unwind info now, because this directive
- // actually switches to the .xdata section!
- MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+void MCWinCOFFStreamer::EmitWin64EHHandlerData() {
+ llvm_unreachable("not implemented");
}
-void WinCOFFStreamer::FinishImpl() {
- EmitFrames(NULL, true);
- EmitW64Tables();
+void MCWinCOFFStreamer::FinishImpl() {
MCObjectStreamer::FinishImpl();
}
-namespace llvm
-{
- MCStreamer *createWinCOFFStreamer(MCContext &Context,
- MCAsmBackend &MAB,
- MCCodeEmitter &CE,
- raw_ostream &OS,
- bool RelaxAll) {
- WinCOFFStreamer *S = new WinCOFFStreamer(Context, MAB, CE, OS);
- S->getAssembler().setRelaxAll(RelaxAll);
- return S;
- }
+LLVM_ATTRIBUTE_NORETURN
+void MCWinCOFFStreamer::FatalError(const Twine &Msg) const {
+ getContext().FatalError(SMLoc(), Msg);
+}
}
+
diff --git a/lib/Object/Android.mk b/lib/Object/Android.mk
index 7dfa44f..bd9659c 100644
--- a/lib/Object/Android.mk
+++ b/lib/Object/Android.mk
@@ -12,6 +12,7 @@ object_SRC_FILES := \
MachOUniversal.cpp \
Object.cpp \
ObjectFile.cpp \
+ StringTableBuilder.cpp \
SymbolicFile.cpp
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 999bf28..304ca47 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -13,7 +13,6 @@
#include "llvm/Object/Archive.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Endian.h"
@@ -111,7 +110,7 @@ Archive::Child Archive::Child::getNext() const {
// Check to see if this is past the end of the archive.
if (NextLoc >= Parent->Data->getBufferEnd())
- return Child(Parent, NULL);
+ return Child(Parent, nullptr);
return Child(Parent, NextLoc);
}
@@ -183,14 +182,6 @@ error_code Archive::Child::getMemoryBuffer(std::unique_ptr<MemoryBuffer> &Result
return error_code::success();
}
-error_code Archive::Child::getMemoryBuffer(OwningPtr<MemoryBuffer> &Result,
- bool FullPath) const {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getMemoryBuffer(MB, FullPath);
- Result = std::move(MB);
- return ec;
-}
-
error_code Archive::Child::getAsBinary(std::unique_ptr<Binary> &Result,
LLVMContext *Context) const {
std::unique_ptr<Binary> ret;
@@ -204,14 +195,6 @@ error_code Archive::Child::getAsBinary(std::unique_ptr<Binary> &Result,
return object_error::success;
}
-error_code Archive::Child::getAsBinary(OwningPtr<Binary> &Result,
- LLVMContext *Context) const {
- std::unique_ptr<Binary> B;
- error_code ec = getAsBinary(B, Context);
- Result = std::move(B);
- return ec;
-}
-
ErrorOr<Archive*> Archive::create(MemoryBuffer *Source) {
error_code EC;
std::unique_ptr<Archive> Ret(new Archive(Source, EC));
@@ -349,7 +332,7 @@ Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
}
Archive::child_iterator Archive::child_end() const {
- return Child(this, NULL);
+ return Child(this, nullptr);
}
error_code Archive::Symbol::getName(StringRef &Result) const {
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index dc18296..cd8c9ef 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -12,6 +12,7 @@ add_llvm_library(LLVMObject
MachOUniversal.cpp
Object.cpp
ObjectFile.cpp
+ StringTableBuilder.cpp
SymbolicFile.cpp
YAML.cpp
)
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index a75ebbf..262c040 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -138,7 +138,7 @@ error_code COFFObjectFile::getSymbolName(DataRefImpl Ref,
error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref,
uint64_t &Result) const {
const coff_symbol *Symb = toSymb(Ref);
- const coff_section *Section = NULL;
+ const coff_section *Section = nullptr;
if (error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
@@ -163,7 +163,7 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Ref,
} else {
uint32_t Characteristics = 0;
if (!COFF::isReservedSectionNumber(Symb->SectionNumber)) {
- const coff_section *Section = NULL;
+ const coff_section *Section = nullptr;
if (error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
Characteristics = Section->Characteristics;
@@ -208,7 +208,7 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref,
// in the same section as this symbol, and looking for either the next
// symbol, or the end of the section.
const coff_symbol *Symb = toSymb(Ref);
- const coff_section *Section = NULL;
+ const coff_section *Section = nullptr;
if (error_code EC = getSection(Symb->SectionNumber, Section))
return EC;
@@ -227,7 +227,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Ref,
if (COFF::isReservedSectionNumber(Symb->SectionNumber)) {
Result = section_end();
} else {
- const coff_section *Sec = 0;
+ const coff_section *Sec = nullptr;
if (error_code EC = getSection(Symb->SectionNumber, Sec)) return EC;
DataRefImpl Ref;
Ref.p = reinterpret_cast<uintptr_t>(Sec);
@@ -334,7 +334,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef,
bool &Result) const {
const coff_section *Sec = toSec(SecRef);
const coff_symbol *Symb = toSymb(SymbRef);
- const coff_section *SymbSec = 0;
+ const coff_section *SymbSec = nullptr;
if (error_code EC = getSection(Symb->SectionNumber, SymbSec)) return EC;
if (SymbSec == Sec)
Result = true;
@@ -389,11 +389,6 @@ relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const {
return relocation_iterator(RelocationRef(Ret, this));
}
-bool COFFObjectFile::section_rel_empty(DataRefImpl Ref) const {
- const coff_section *Sec = toSec(Ref);
- return Sec->NumberOfRelocations == 0;
-}
-
// Initialize the pointer to the symbol table.
error_code COFFObjectFile::initSymbolTablePtr() {
if (error_code EC = getObject(
@@ -512,10 +507,11 @@ error_code COFFObjectFile::initExportTablePtr() {
COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC,
bool BufferOwned)
- : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(0),
- PE32Header(0), PE32PlusHeader(0), DataDirectory(0), SectionTable(0),
- SymbolTable(0), StringTable(0), StringTableSize(0), ImportDirectory(0),
- NumberOfImportDirectory(0), ExportDirectory(0) {
+ : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(nullptr),
+ PE32Header(nullptr), PE32PlusHeader(nullptr), DataDirectory(nullptr),
+ SectionTable(nullptr), SymbolTable(nullptr), StringTable(nullptr),
+ StringTableSize(0), ImportDirectory(nullptr), NumberOfImportDirectory(0),
+ ExportDirectory(nullptr) {
// Check that we at least have enough room for a header.
if (!checkSize(Data, EC, sizeof(coff_file_header))) return;
@@ -637,8 +633,8 @@ export_directory_iterator COFFObjectFile::export_directory_begin() const {
}
export_directory_iterator COFFObjectFile::export_directory_end() const {
- if (ExportDirectory == 0)
- return export_directory_iterator(ExportDirectoryEntryRef(0, 0, this));
+ if (!ExportDirectory)
+ return export_directory_iterator(ExportDirectoryEntryRef(nullptr, 0, this));
ExportDirectoryEntryRef Ref(ExportDirectory,
ExportDirectory->AddressTableEntries, this);
return export_directory_iterator(Ref);
@@ -728,7 +724,7 @@ error_code COFFObjectFile::getSection(int32_t Index,
const coff_section *&Result) const {
// Check for special index values.
if (COFF::isReservedSectionNumber(Index))
- Result = NULL;
+ Result = nullptr;
else if (Index > 0 && Index <= COFFHeader->NumberOfSections)
// We already verified the section table data, so no need to check again.
Result = SectionTable + (Index - 1);
@@ -778,7 +774,7 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol,
ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData(
const coff_symbol *Symbol) const {
- const uint8_t *Aux = NULL;
+ const uint8_t *Aux = nullptr;
if (Symbol->NumberOfAuxSymbols > 0) {
// AUX data comes immediately after the symbol in COFF
@@ -923,6 +919,27 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
Res = "Unknown";
}
break;
+ case COFF::IMAGE_FILE_MACHINE_ARMNT:
+ switch (Reloc->Type) {
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32NB);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH11);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_TOKEN);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX24);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX11);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECTION);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_SECREL);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32A);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_MOV32T);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH20T);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BRANCH24T);
+ LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_BLX23T);
+ default:
+ Res = "Unknown";
+ }
+ break;
case COFF::IMAGE_FILE_MACHINE_I386:
switch (Reloc->Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE);
@@ -952,7 +969,7 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel,
error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel,
SmallVectorImpl<char> &Result) const {
const coff_relocation *Reloc = toRel(Rel);
- const coff_symbol *Symb = 0;
+ const coff_symbol *Symb = nullptr;
if (error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb)) return EC;
DataRefImpl Sym;
Sym.p = reinterpret_cast<uintptr_t>(Symb);
diff --git a/lib/Object/COFFYAML.cpp b/lib/Object/COFFYAML.cpp
index 94b72ff..49c5dda 100644
--- a/lib/Object/COFFYAML.cpp
+++ b/lib/Object/COFFYAML.cpp
@@ -38,6 +38,7 @@ void ScalarEnumerationTraits<COFFYAML::COMDATType>::enumeration(
void
ScalarEnumerationTraits<COFFYAML::WeakExternalCharacteristics>::enumeration(
IO &IO, COFFYAML::WeakExternalCharacteristics &Value) {
+ IO.enumCase(Value, "0", 0);
ECase(IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY);
ECase(IMAGE_WEAK_EXTERN_SEARCH_LIBRARY);
ECase(IMAGE_WEAK_EXTERN_SEARCH_ALIAS);
@@ -132,8 +133,8 @@ void ScalarEnumerationTraits<COFF::SymbolComplexType>::enumeration(
ECase(IMAGE_SYM_DTYPE_ARRAY);
}
-void ScalarEnumerationTraits<COFF::RelocationTypeX86>::enumeration(
- IO &IO, COFF::RelocationTypeX86 &Value) {
+void ScalarEnumerationTraits<COFF::RelocationTypeI386>::enumeration(
+ IO &IO, COFF::RelocationTypeI386 &Value) {
ECase(IMAGE_REL_I386_ABSOLUTE);
ECase(IMAGE_REL_I386_DIR16);
ECase(IMAGE_REL_I386_REL16);
@@ -145,6 +146,10 @@ void ScalarEnumerationTraits<COFF::RelocationTypeX86>::enumeration(
ECase(IMAGE_REL_I386_TOKEN);
ECase(IMAGE_REL_I386_SECREL7);
ECase(IMAGE_REL_I386_REL32);
+}
+
+void ScalarEnumerationTraits<COFF::RelocationTypeAMD64>::enumeration(
+ IO &IO, COFF::RelocationTypeAMD64 &Value) {
ECase(IMAGE_REL_AMD64_ABSOLUTE);
ECase(IMAGE_REL_AMD64_ADDR64);
ECase(IMAGE_REL_AMD64_ADDR32);
@@ -272,22 +277,33 @@ struct NHeaderCharacteristics {
COFF::Characteristics Characteristics;
};
+template <typename RelocType>
struct NType {
- NType(IO &) : Type(COFF::RelocationTypeX86(0)) {}
- NType(IO &, uint16_t T) : Type(COFF::RelocationTypeX86(T)) {}
+ NType(IO &) : Type(RelocType(0)) {}
+ NType(IO &, uint16_t T) : Type(RelocType(T)) {}
uint16_t denormalize(IO &) { return Type; }
- COFF::RelocationTypeX86 Type;
+ RelocType Type;
};
}
void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
COFFYAML::Relocation &Rel) {
- MappingNormalization<NType, uint16_t> NT(IO, Rel.Type);
-
IO.mapRequired("VirtualAddress", Rel.VirtualAddress);
IO.mapRequired("SymbolName", Rel.SymbolName);
- IO.mapRequired("Type", NT->Type);
+
+ COFF::header &H = *static_cast<COFF::header *>(IO.getContext());
+ if (H.Machine == COFF::IMAGE_FILE_MACHINE_I386) {
+ MappingNormalization<NType<COFF::RelocationTypeI386>, uint16_t> NT(
+ IO, Rel.Type);
+ IO.mapRequired("Type", NT->Type);
+ } else if (H.Machine == COFF::IMAGE_FILE_MACHINE_AMD64) {
+ MappingNormalization<NType<COFF::RelocationTypeAMD64>, uint16_t> NT(
+ IO, Rel.Type);
+ IO.mapRequired("Type", NT->Type);
+ } else {
+ IO.mapRequired("Type", Rel.Type);
+ }
}
void MappingTraits<COFF::header>::mapping(IO &IO, COFF::header &H) {
@@ -297,6 +313,7 @@ void MappingTraits<COFF::header>::mapping(IO &IO, COFF::header &H) {
IO.mapRequired("Machine", NM->Machine);
IO.mapOptional("Characteristics", NC->Characteristics);
+ IO.setContext(static_cast<void *>(&H));
}
void MappingTraits<COFF::AuxiliaryFunctionDefinition>::mapping(
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index e9a88bf..df4dd5e 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -159,6 +159,15 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC21_S2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC26_S2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC18_S3);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC19_S2);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PCHI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PCLO16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_GOT16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_HI16);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS16_LO16);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_26_S1);
@@ -177,6 +186,7 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_HI16);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_LO16);
LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM);
+ LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC32);
default:
break;
}
diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp
index d513670..7d50f23 100644
--- a/lib/Object/ELFYAML.cpp
+++ b/lib/Object/ELFYAML.cpp
@@ -12,8 +12,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/ELFYAML.h"
+#include "llvm/Support/Casting.h"
namespace llvm {
+
+ELFYAML::Section::~Section() {}
+
namespace yaml {
void
@@ -239,44 +243,57 @@ void ScalarEnumerationTraits<ELFYAML::ELF_ELFOSABI>::enumeration(
void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
ELFYAML::ELF_EF &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
#define BCase(X) IO.bitSetCase(Value, #X, ELF::X);
- BCase(EF_ARM_SOFT_FLOAT)
- BCase(EF_ARM_VFP_FLOAT)
- BCase(EF_ARM_EABI_UNKNOWN)
- BCase(EF_ARM_EABI_VER1)
- BCase(EF_ARM_EABI_VER2)
- BCase(EF_ARM_EABI_VER3)
- BCase(EF_ARM_EABI_VER4)
- BCase(EF_ARM_EABI_VER5)
- BCase(EF_ARM_EABIMASK)
- BCase(EF_MIPS_NOREORDER)
- BCase(EF_MIPS_PIC)
- BCase(EF_MIPS_CPIC)
- BCase(EF_MIPS_ABI2)
- BCase(EF_MIPS_32BITMODE)
- BCase(EF_MIPS_ABI_O32)
- BCase(EF_MIPS_MICROMIPS)
- BCase(EF_MIPS_ARCH_ASE_M16)
- BCase(EF_MIPS_ARCH_1)
- BCase(EF_MIPS_ARCH_2)
- BCase(EF_MIPS_ARCH_3)
- BCase(EF_MIPS_ARCH_4)
- BCase(EF_MIPS_ARCH_5)
- BCase(EF_MIPS_ARCH_32)
- BCase(EF_MIPS_ARCH_64)
- BCase(EF_MIPS_ARCH_32R2)
- BCase(EF_MIPS_ARCH_64R2)
- BCase(EF_MIPS_ARCH)
- BCase(EF_HEXAGON_MACH_V2)
- BCase(EF_HEXAGON_MACH_V3)
- BCase(EF_HEXAGON_MACH_V4)
- BCase(EF_HEXAGON_MACH_V5)
- BCase(EF_HEXAGON_ISA_MACH)
- BCase(EF_HEXAGON_ISA_V2)
- BCase(EF_HEXAGON_ISA_V3)
- BCase(EF_HEXAGON_ISA_V4)
- BCase(EF_HEXAGON_ISA_V5)
+#define BCaseMask(X, M) IO.maskedBitSetCase(Value, #X, ELF::X, ELF::M);
+ switch (Object->Header.Machine) {
+ case ELF::EM_ARM:
+ BCase(EF_ARM_SOFT_FLOAT)
+ BCase(EF_ARM_VFP_FLOAT)
+ BCaseMask(EF_ARM_EABI_UNKNOWN, EF_ARM_EABIMASK)
+ BCaseMask(EF_ARM_EABI_VER1, EF_ARM_EABIMASK)
+ BCaseMask(EF_ARM_EABI_VER2, EF_ARM_EABIMASK)
+ BCaseMask(EF_ARM_EABI_VER3, EF_ARM_EABIMASK)
+ BCaseMask(EF_ARM_EABI_VER4, EF_ARM_EABIMASK)
+ BCaseMask(EF_ARM_EABI_VER5, EF_ARM_EABIMASK)
+ break;
+ case ELF::EM_MIPS:
+ BCase(EF_MIPS_NOREORDER)
+ BCase(EF_MIPS_PIC)
+ BCase(EF_MIPS_CPIC)
+ BCase(EF_MIPS_ABI2)
+ BCase(EF_MIPS_32BITMODE)
+ BCase(EF_MIPS_ABI_O32)
+ BCase(EF_MIPS_MICROMIPS)
+ BCase(EF_MIPS_ARCH_ASE_M16)
+ BCaseMask(EF_MIPS_ARCH_1, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_2, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_3, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_4, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_5, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_32, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_64, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_32R2, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_64R2, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_32R6, EF_MIPS_ARCH)
+ BCaseMask(EF_MIPS_ARCH_64R6, EF_MIPS_ARCH)
+ break;
+ case ELF::EM_HEXAGON:
+ BCase(EF_HEXAGON_MACH_V2)
+ BCase(EF_HEXAGON_MACH_V3)
+ BCase(EF_HEXAGON_MACH_V4)
+ BCase(EF_HEXAGON_MACH_V5)
+ BCase(EF_HEXAGON_ISA_V2)
+ BCase(EF_HEXAGON_ISA_V3)
+ BCase(EF_HEXAGON_ISA_V4)
+ BCase(EF_HEXAGON_ISA_V5)
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
#undef BCase
+#undef BCaseMask
}
void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
@@ -300,6 +317,23 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_PREINIT_ARRAY)
ECase(SHT_GROUP)
ECase(SHT_SYMTAB_SHNDX)
+ ECase(SHT_LOOS)
+ ECase(SHT_GNU_ATTRIBUTES)
+ ECase(SHT_GNU_HASH)
+ ECase(SHT_GNU_verdef)
+ ECase(SHT_GNU_verneed)
+ ECase(SHT_GNU_versym)
+ ECase(SHT_HIOS)
+ ECase(SHT_LOPROC)
+ ECase(SHT_ARM_EXIDX)
+ ECase(SHT_ARM_PREEMPTMAP)
+ ECase(SHT_ARM_ATTRIBUTES)
+ ECase(SHT_ARM_DEBUGOVERLAY)
+ ECase(SHT_ARM_OVERLAYSECTION)
+ ECase(SHT_HEX_ORDERED)
+ ECase(SHT_X86_64_UNWIND)
+ ECase(SHT_MIPS_REGINFO)
+ ECase(SHT_MIPS_OPTIONS)
#undef ECase
}
@@ -334,6 +368,270 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration(
#undef ECase
}
+void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
+ IO &IO, ELFYAML::ELF_REL &Value) {
+ const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext());
+ assert(Object && "The IO context is not initialized");
+#define ECase(X) IO.enumCase(Value, #X, ELF::X);
+ switch (Object->Header.Machine) {
+ case ELF::EM_X86_64:
+ ECase(R_X86_64_NONE)
+ ECase(R_X86_64_64)
+ ECase(R_X86_64_PC32)
+ ECase(R_X86_64_GOT32)
+ ECase(R_X86_64_PLT32)
+ ECase(R_X86_64_COPY)
+ ECase(R_X86_64_GLOB_DAT)
+ ECase(R_X86_64_JUMP_SLOT)
+ ECase(R_X86_64_RELATIVE)
+ ECase(R_X86_64_GOTPCREL)
+ ECase(R_X86_64_32)
+ ECase(R_X86_64_32S)
+ ECase(R_X86_64_16)
+ ECase(R_X86_64_PC16)
+ ECase(R_X86_64_8)
+ ECase(R_X86_64_PC8)
+ ECase(R_X86_64_DTPMOD64)
+ ECase(R_X86_64_DTPOFF64)
+ ECase(R_X86_64_TPOFF64)
+ ECase(R_X86_64_TLSGD)
+ ECase(R_X86_64_TLSLD)
+ ECase(R_X86_64_DTPOFF32)
+ ECase(R_X86_64_GOTTPOFF)
+ ECase(R_X86_64_TPOFF32)
+ ECase(R_X86_64_PC64)
+ ECase(R_X86_64_GOTOFF64)
+ ECase(R_X86_64_GOTPC32)
+ ECase(R_X86_64_GOT64)
+ ECase(R_X86_64_GOTPCREL64)
+ ECase(R_X86_64_GOTPC64)
+ ECase(R_X86_64_GOTPLT64)
+ ECase(R_X86_64_PLTOFF64)
+ ECase(R_X86_64_SIZE32)
+ ECase(R_X86_64_SIZE64)
+ ECase(R_X86_64_GOTPC32_TLSDESC)
+ ECase(R_X86_64_TLSDESC_CALL)
+ ECase(R_X86_64_TLSDESC)
+ ECase(R_X86_64_IRELATIVE)
+ break;
+ case ELF::EM_MIPS:
+ ECase(R_MIPS_NONE)
+ ECase(R_MIPS_16)
+ ECase(R_MIPS_32)
+ ECase(R_MIPS_REL32)
+ ECase(R_MIPS_26)
+ ECase(R_MIPS_HI16)
+ ECase(R_MIPS_LO16)
+ ECase(R_MIPS_GPREL16)
+ ECase(R_MIPS_LITERAL)
+ ECase(R_MIPS_GOT16)
+ ECase(R_MIPS_PC16)
+ ECase(R_MIPS_CALL16)
+ ECase(R_MIPS_GPREL32)
+ ECase(R_MIPS_UNUSED1)
+ ECase(R_MIPS_UNUSED2)
+ ECase(R_MIPS_SHIFT5)
+ ECase(R_MIPS_SHIFT6)
+ ECase(R_MIPS_64)
+ ECase(R_MIPS_GOT_DISP)
+ ECase(R_MIPS_GOT_PAGE)
+ ECase(R_MIPS_GOT_OFST)
+ ECase(R_MIPS_GOT_HI16)
+ ECase(R_MIPS_GOT_LO16)
+ ECase(R_MIPS_SUB)
+ ECase(R_MIPS_INSERT_A)
+ ECase(R_MIPS_INSERT_B)
+ ECase(R_MIPS_DELETE)
+ ECase(R_MIPS_HIGHER)
+ ECase(R_MIPS_HIGHEST)
+ ECase(R_MIPS_CALL_HI16)
+ ECase(R_MIPS_CALL_LO16)
+ ECase(R_MIPS_SCN_DISP)
+ ECase(R_MIPS_REL16)
+ ECase(R_MIPS_ADD_IMMEDIATE)
+ ECase(R_MIPS_PJUMP)
+ ECase(R_MIPS_RELGOT)
+ ECase(R_MIPS_JALR)
+ ECase(R_MIPS_TLS_DTPMOD32)
+ ECase(R_MIPS_TLS_DTPREL32)
+ ECase(R_MIPS_TLS_DTPMOD64)
+ ECase(R_MIPS_TLS_DTPREL64)
+ ECase(R_MIPS_TLS_GD)
+ ECase(R_MIPS_TLS_LDM)
+ ECase(R_MIPS_TLS_DTPREL_HI16)
+ ECase(R_MIPS_TLS_DTPREL_LO16)
+ ECase(R_MIPS_TLS_GOTTPREL)
+ ECase(R_MIPS_TLS_TPREL32)
+ ECase(R_MIPS_TLS_TPREL64)
+ ECase(R_MIPS_TLS_TPREL_HI16)
+ ECase(R_MIPS_TLS_TPREL_LO16)
+ ECase(R_MIPS_GLOB_DAT)
+ ECase(R_MIPS_PC21_S2)
+ ECase(R_MIPS_PC26_S2)
+ ECase(R_MIPS_PC18_S3)
+ ECase(R_MIPS_PC19_S2)
+ ECase(R_MIPS_PCHI16)
+ ECase(R_MIPS_PCLO16)
+ ECase(R_MIPS16_GOT16)
+ ECase(R_MIPS16_HI16)
+ ECase(R_MIPS16_LO16)
+ ECase(R_MIPS_COPY)
+ ECase(R_MIPS_JUMP_SLOT)
+ ECase(R_MICROMIPS_26_S1)
+ ECase(R_MICROMIPS_HI16)
+ ECase(R_MICROMIPS_LO16)
+ ECase(R_MICROMIPS_GOT16)
+ ECase(R_MICROMIPS_PC16_S1)
+ ECase(R_MICROMIPS_CALL16)
+ ECase(R_MICROMIPS_GOT_DISP)
+ ECase(R_MICROMIPS_GOT_PAGE)
+ ECase(R_MICROMIPS_GOT_OFST)
+ ECase(R_MICROMIPS_TLS_GD)
+ ECase(R_MICROMIPS_TLS_LDM)
+ ECase(R_MICROMIPS_TLS_DTPREL_HI16)
+ ECase(R_MICROMIPS_TLS_DTPREL_LO16)
+ ECase(R_MICROMIPS_TLS_TPREL_HI16)
+ ECase(R_MICROMIPS_TLS_TPREL_LO16)
+ ECase(R_MIPS_NUM)
+ ECase(R_MIPS_PC32)
+ break;
+ case ELF::EM_HEXAGON:
+ ECase(R_HEX_NONE)
+ ECase(R_HEX_B22_PCREL)
+ ECase(R_HEX_B15_PCREL)
+ ECase(R_HEX_B7_PCREL)
+ ECase(R_HEX_LO16)
+ ECase(R_HEX_HI16)
+ ECase(R_HEX_32)
+ ECase(R_HEX_16)
+ ECase(R_HEX_8)
+ ECase(R_HEX_GPREL16_0)
+ ECase(R_HEX_GPREL16_1)
+ ECase(R_HEX_GPREL16_2)
+ ECase(R_HEX_GPREL16_3)
+ ECase(R_HEX_HL16)
+ ECase(R_HEX_B13_PCREL)
+ ECase(R_HEX_B9_PCREL)
+ ECase(R_HEX_B32_PCREL_X)
+ ECase(R_HEX_32_6_X)
+ ECase(R_HEX_B22_PCREL_X)
+ ECase(R_HEX_B15_PCREL_X)
+ ECase(R_HEX_B13_PCREL_X)
+ ECase(R_HEX_B9_PCREL_X)
+ ECase(R_HEX_B7_PCREL_X)
+ ECase(R_HEX_16_X)
+ ECase(R_HEX_12_X)
+ ECase(R_HEX_11_X)
+ ECase(R_HEX_10_X)
+ ECase(R_HEX_9_X)
+ ECase(R_HEX_8_X)
+ ECase(R_HEX_7_X)
+ ECase(R_HEX_6_X)
+ ECase(R_HEX_32_PCREL)
+ ECase(R_HEX_COPY)
+ ECase(R_HEX_GLOB_DAT)
+ ECase(R_HEX_JMP_SLOT)
+ ECase(R_HEX_RELATIVE)
+ ECase(R_HEX_PLT_B22_PCREL)
+ ECase(R_HEX_GOTREL_LO16)
+ ECase(R_HEX_GOTREL_HI16)
+ ECase(R_HEX_GOTREL_32)
+ ECase(R_HEX_GOT_LO16)
+ ECase(R_HEX_GOT_HI16)
+ ECase(R_HEX_GOT_32)
+ ECase(R_HEX_GOT_16)
+ ECase(R_HEX_DTPMOD_32)
+ ECase(R_HEX_DTPREL_LO16)
+ ECase(R_HEX_DTPREL_HI16)
+ ECase(R_HEX_DTPREL_32)
+ ECase(R_HEX_DTPREL_16)
+ ECase(R_HEX_GD_PLT_B22_PCREL)
+ ECase(R_HEX_GD_GOT_LO16)
+ ECase(R_HEX_GD_GOT_HI16)
+ ECase(R_HEX_GD_GOT_32)
+ ECase(R_HEX_GD_GOT_16)
+ ECase(R_HEX_IE_LO16)
+ ECase(R_HEX_IE_HI16)
+ ECase(R_HEX_IE_32)
+ ECase(R_HEX_IE_GOT_LO16)
+ ECase(R_HEX_IE_GOT_HI16)
+ ECase(R_HEX_IE_GOT_32)
+ ECase(R_HEX_IE_GOT_16)
+ ECase(R_HEX_TPREL_LO16)
+ ECase(R_HEX_TPREL_HI16)
+ ECase(R_HEX_TPREL_32)
+ ECase(R_HEX_TPREL_16)
+ ECase(R_HEX_6_PCREL_X)
+ ECase(R_HEX_GOTREL_32_6_X)
+ ECase(R_HEX_GOTREL_16_X)
+ ECase(R_HEX_GOTREL_11_X)
+ ECase(R_HEX_GOT_32_6_X)
+ ECase(R_HEX_GOT_16_X)
+ ECase(R_HEX_GOT_11_X)
+ ECase(R_HEX_DTPREL_32_6_X)
+ ECase(R_HEX_DTPREL_16_X)
+ ECase(R_HEX_DTPREL_11_X)
+ ECase(R_HEX_GD_GOT_32_6_X)
+ ECase(R_HEX_GD_GOT_16_X)
+ ECase(R_HEX_GD_GOT_11_X)
+ ECase(R_HEX_IE_32_6_X)
+ ECase(R_HEX_IE_16_X)
+ ECase(R_HEX_IE_GOT_32_6_X)
+ ECase(R_HEX_IE_GOT_16_X)
+ ECase(R_HEX_IE_GOT_11_X)
+ ECase(R_HEX_TPREL_32_6_X)
+ ECase(R_HEX_TPREL_16_X)
+ ECase(R_HEX_TPREL_11_X)
+ break;
+ case ELF::EM_386:
+ ECase(R_386_NONE)
+ ECase(R_386_32)
+ ECase(R_386_PC32)
+ ECase(R_386_GOT32)
+ ECase(R_386_PLT32)
+ ECase(R_386_COPY)
+ ECase(R_386_GLOB_DAT)
+ ECase(R_386_JUMP_SLOT)
+ ECase(R_386_RELATIVE)
+ ECase(R_386_GOTOFF)
+ ECase(R_386_GOTPC)
+ ECase(R_386_32PLT)
+ ECase(R_386_TLS_TPOFF)
+ ECase(R_386_TLS_IE)
+ ECase(R_386_TLS_GOTIE)
+ ECase(R_386_TLS_LE)
+ ECase(R_386_TLS_GD)
+ ECase(R_386_TLS_LDM)
+ ECase(R_386_16)
+ ECase(R_386_PC16)
+ ECase(R_386_8)
+ ECase(R_386_PC8)
+ ECase(R_386_TLS_GD_32)
+ ECase(R_386_TLS_GD_PUSH)
+ ECase(R_386_TLS_GD_CALL)
+ ECase(R_386_TLS_GD_POP)
+ ECase(R_386_TLS_LDM_32)
+ ECase(R_386_TLS_LDM_PUSH)
+ ECase(R_386_TLS_LDM_CALL)
+ ECase(R_386_TLS_LDM_POP)
+ ECase(R_386_TLS_LDO_32)
+ ECase(R_386_TLS_IE_32)
+ ECase(R_386_TLS_LE_32)
+ ECase(R_386_TLS_DTPMOD32)
+ ECase(R_386_TLS_DTPOFF32)
+ ECase(R_386_TLS_TPOFF32)
+ ECase(R_386_TLS_GOTDESC)
+ ECase(R_386_TLS_DESC_CALL)
+ ECase(R_386_TLS_DESC)
+ ECase(R_386_IRELATIVE)
+ ECase(R_386_NUM)
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+#undef ECase
+}
+
void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
ELFYAML::FileHeader &FileHdr) {
IO.mapRequired("Class", FileHdr.Class);
@@ -360,21 +658,72 @@ void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping(
IO.mapOptional("Weak", Symbols.Weak);
}
-void MappingTraits<ELFYAML::Section>::mapping(IO &IO,
- ELFYAML::Section &Section) {
+static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) {
IO.mapOptional("Name", Section.Name, StringRef());
IO.mapRequired("Type", Section.Type);
IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0));
IO.mapOptional("Address", Section.Address, Hex64(0));
- IO.mapOptional("Content", Section.Content);
- IO.mapOptional("Link", Section.Link);
+ IO.mapOptional("Link", Section.Link, StringRef());
+ IO.mapOptional("Info", Section.Info, StringRef());
IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0));
}
+static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Content", Section.Content);
+ IO.mapOptional("Size", Section.Size, Hex64(Section.Content.binary_size()));
+}
+
+static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) {
+ commonSectionMapping(IO, Section);
+ IO.mapOptional("Relocations", Section.Relocations);
+}
+
+void MappingTraits<std::unique_ptr<ELFYAML::Section>>::mapping(
+ IO &IO, std::unique_ptr<ELFYAML::Section> &Section) {
+ ELFYAML::ELF_SHT sectionType;
+ if (IO.outputting())
+ sectionType = Section->Type;
+ else
+ IO.mapRequired("Type", sectionType);
+
+ switch (sectionType) {
+ case ELF::SHT_REL:
+ case ELF::SHT_RELA:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::RelocationSection());
+ sectionMapping(IO, *cast<ELFYAML::RelocationSection>(Section.get()));
+ break;
+ default:
+ if (!IO.outputting())
+ Section.reset(new ELFYAML::RawContentSection());
+ sectionMapping(IO, *cast<ELFYAML::RawContentSection>(Section.get()));
+ }
+}
+
+StringRef MappingTraits<std::unique_ptr<ELFYAML::Section>>::validate(
+ IO &io, std::unique_ptr<ELFYAML::Section> &Section) {
+ const auto *RawSection = dyn_cast<ELFYAML::RawContentSection>(Section.get());
+ if (!RawSection || RawSection->Size >= RawSection->Content.binary_size())
+ return StringRef();
+ return "Section size must be greater or equal to the content size";
+}
+
+void MappingTraits<ELFYAML::Relocation>::mapping(IO &IO,
+ ELFYAML::Relocation &Rel) {
+ IO.mapRequired("Offset", Rel.Offset);
+ IO.mapRequired("Symbol", Rel.Symbol);
+ IO.mapRequired("Type", Rel.Type);
+ IO.mapOptional("Addend", Rel.Addend);
+}
+
void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
+ assert(!IO.getContext() && "The IO context is initialized already");
+ IO.setContext(&Object);
IO.mapRequired("FileHeader", Object.Header);
IO.mapOptional("Sections", Object.Sections);
IO.mapOptional("Symbols", Object.Symbols);
+ IO.setContext(nullptr);
}
} // end namespace yaml
diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt
index a87da6e..7813832 100644
--- a/lib/Object/LLVMBuild.txt
+++ b/lib/Object/LLVMBuild.txt
@@ -19,4 +19,4 @@
type = Library
name = Object
parent = Libraries
-required_libraries = Support BitReader
+required_libraries = BitReader Core Support
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 12132a4..c6bab03 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -420,7 +420,8 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian,
bool Is64bits, error_code &EC,
bool BufferOwned)
: ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object, BufferOwned),
- SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) {
+ SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
+ DataInCodeLoadCmd(nullptr) {
uint32_t LoadCommandCount = this->getHeader().ncmds;
MachO::LoadCommandType SegmentLoadType = is64Bit() ?
MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT;
@@ -471,10 +472,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb,
uint64_t &Res) const {
if (is64Bit()) {
MachO::nlist_64 Entry = getSymbol64TableEntry(Symb);
- Res = Entry.n_value;
+ if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF &&
+ Entry.n_value == 0)
+ Res = UnknownAddressOrSize;
+ else
+ Res = Entry.n_value;
} else {
MachO::nlist Entry = getSymbolTableEntry(Symb);
- Res = Entry.n_value;
+ if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF &&
+ Entry.n_value == 0)
+ Res = UnknownAddressOrSize;
+ else
+ Res = Entry.n_value;
}
return object_error::success;
}
@@ -500,6 +509,10 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
nlist_base Entry = getSymbolTableEntryBase(this, DRI);
uint64_t Value;
getSymbolAddress(DRI, Value);
+ if (Value == UnknownAddressOrSize) {
+ Result = UnknownAddressOrSize;
+ return object_error::success;
+ }
BeginOffset = Value;
@@ -518,6 +531,8 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
DataRefImpl DRI = Symbol.getRawDataRefImpl();
Entry = getSymbolTableEntryBase(this, DRI);
getSymbolAddress(DRI, Value);
+ if (Value == UnknownAddressOrSize)
+ continue;
if (Entry.n_sect == SectionIndex && Value > BeginOffset)
if (!EndOffset || Value < EndOffset)
EndOffset = Value;
@@ -577,7 +592,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) {
uint64_t Value;
getSymbolAddress(DRI, Value);
- if (Value)
+ if (Value && Value != UnknownAddressOrSize)
Result |= SymbolRef::SF_Common;
}
}
@@ -685,15 +700,21 @@ MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const {
return object_error::success;
}
-error_code MachOObjectFile::isSectionData(DataRefImpl DRI, bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
+error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const {
+ uint32_t Flags = getSectionFlags(this, Sec);
+ unsigned SectionType = Flags & MachO::SECTION_TYPE;
+ Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
+ !(SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL);
return object_error::success;
}
-error_code MachOObjectFile::isSectionBSS(DataRefImpl DRI, bool &Result) const {
- // FIXME: Unimplemented.
- Result = false;
+error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const {
+ uint32_t Flags = getSectionFlags(this, Sec);
+ unsigned SectionType = Flags & MachO::SECTION_TYPE;
+ Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
+ (SectionType == MachO::S_ZEROFILL ||
+ SectionType == MachO::S_GB_ZEROFILL);
return object_error::success;
}
@@ -755,65 +776,50 @@ MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
}
relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const {
- uint32_t Offset;
- if (is64Bit()) {
- MachO::section_64 Sect = getSection64(Sec);
- Offset = Sect.reloff;
- } else {
- MachO::section Sect = getSection(Sec);
- Offset = Sect.reloff;
- }
-
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+ Ret.d.a = Sec.d.a;
+ Ret.d.b = 0;
return relocation_iterator(RelocationRef(Ret, this));
}
relocation_iterator
MachOObjectFile::section_rel_end(DataRefImpl Sec) const {
- uint32_t Offset;
uint32_t Num;
if (is64Bit()) {
MachO::section_64 Sect = getSection64(Sec);
- Offset = Sect.reloff;
Num = Sect.nreloc;
} else {
MachO::section Sect = getSection(Sec);
- Offset = Sect.reloff;
Num = Sect.nreloc;
}
- const MachO::any_relocation_info *P =
- reinterpret_cast<const MachO::any_relocation_info *>(getPtr(this, Offset));
-
DataRefImpl Ret;
- Ret.p = reinterpret_cast<uintptr_t>(P + Num);
+ Ret.d.a = Sec.d.a;
+ Ret.d.b = Num;
return relocation_iterator(RelocationRef(Ret, this));
}
-bool MachOObjectFile::section_rel_empty(DataRefImpl Sec) const {
- if (is64Bit()) {
- MachO::section_64 Sect = getSection64(Sec);
- return Sect.nreloc == 0;
- } else {
- MachO::section Sect = getSection(Sec);
- return Sect.nreloc == 0;
- }
-}
-
void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
- const MachO::any_relocation_info *P =
- reinterpret_cast<const MachO::any_relocation_info *>(Rel.p);
- Rel.p = reinterpret_cast<uintptr_t>(P + 1);
+ ++Rel.d.b;
}
error_code
MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const {
- report_fatal_error("getRelocationAddress not implemented in MachOObjectFile");
+ uint64_t Offset;
+ getRelocationOffset(Rel, Offset);
+
+ DataRefImpl Sec;
+ Sec.d.a = Rel.d.a;
+ uint64_t SecAddress;
+ getSectionAddress(Sec, SecAddress);
+ Res = SecAddress + Offset;
+ return object_error::success;
}
error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel,
uint64_t &Res) const {
+ assert(getHeader().filetype == MachO::MH_OBJECT &&
+ "Only implemented for MH_OBJECT");
MachO::any_relocation_info RE = getRelocation(Rel);
Res = getAnyRelocationAddress(RE);
return object_error::success;
@@ -986,7 +992,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
}
case MachO::X86_64_RELOC_SUBTRACTOR: {
DataRefImpl RelNext = Rel;
- RelNext.d.a++;
+ moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = getRelocation(RelNext);
// X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type
@@ -1034,7 +1040,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
return object_error::success;
case MachO::GENERIC_RELOC_SECTDIFF: {
DataRefImpl RelNext = Rel;
- RelNext.d.a++;
+ moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
@@ -1056,7 +1062,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
switch (Type) {
case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: {
DataRefImpl RelNext = Rel;
- RelNext.d.a++;
+ moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = getRelocation(RelNext);
// X86 sect diff's must be followed by a relocation of type
@@ -1095,7 +1101,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel,
printRelocationTargetName(this, RE, fmt);
DataRefImpl RelNext = Rel;
- RelNext.d.a++;
+ moveRelocationNext(RelNext);
MachO::any_relocation_info RENext = getRelocation(RelNext);
// ARM half relocs must be followed by a relocation of type
@@ -1172,13 +1178,7 @@ error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData,
}
basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
- DataRefImpl DRI;
- if (!SymtabLoadCmd)
- return basic_symbol_iterator(SymbolRef(DRI, this));
-
- MachO::symtab_command Symtab = getSymtabLoadCommand();
- DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
- return basic_symbol_iterator(SymbolRef(DRI, this));
+ return getSymbolByIndex(0);
}
basic_symbol_iterator MachOObjectFile::symbol_end_impl() const {
@@ -1196,6 +1196,20 @@ basic_symbol_iterator MachOObjectFile::symbol_end_impl() const {
return basic_symbol_iterator(SymbolRef(DRI, this));
}
+basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
+ DataRefImpl DRI;
+ if (!SymtabLoadCmd)
+ return basic_symbol_iterator(SymbolRef(DRI, this));
+
+ MachO::symtab_command Symtab = getSymtabLoadCommand();
+ assert(Index < Symtab.nsyms && "Requested symbol index is out of range.");
+ unsigned SymbolTableEntrySize =
+ is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
+ DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
+ DRI.p += Index * SymbolTableEntrySize;
+ return basic_symbol_iterator(SymbolRef(DRI, this));
+}
+
section_iterator MachOObjectFile::section_begin() const {
DataRefImpl DRI;
return section_iterator(SectionRef(DRI, this));
@@ -1486,8 +1500,21 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const {
MachO::any_relocation_info
MachOObjectFile::getRelocation(DataRefImpl Rel) const {
- const char *P = reinterpret_cast<const char *>(Rel.p);
- return getStruct<MachO::any_relocation_info>(this, P);
+ DataRefImpl Sec;
+ Sec.d.a = Rel.d.a;
+ uint32_t Offset;
+ if (is64Bit()) {
+ MachO::section_64 Sect = getSection64(Sec);
+ Offset = Sect.reloff;
+ } else {
+ MachO::section Sect = getSection(Sec);
+ Offset = Sect.reloff;
+ }
+
+ auto P = reinterpret_cast<const MachO::any_relocation_info *>(
+ getPtr(this, Offset)) + Rel.d.b;
+ return getStruct<MachO::any_relocation_info>(
+ this, reinterpret_cast<const char *>(P));
}
MachO::data_in_code_entry
diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp
index 70baa9f..5085efd 100644
--- a/lib/Object/MachOUniversal.cpp
+++ b/lib/Object/MachOUniversal.cpp
@@ -14,6 +14,7 @@
#include "llvm/Object/MachOUniversal.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Archive.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -57,7 +58,7 @@ static T getUniversalBinaryStruct(const char *Ptr) {
MachOUniversalBinary::ObjectForArch::ObjectForArch(
const MachOUniversalBinary *Parent, uint32_t Index)
: Parent(Parent), Index(Index) {
- if (Parent == 0 || Index > Parent->getNumberOfObjects()) {
+ if (!Parent || Index > Parent->getNumberOfObjects()) {
clear();
} else {
// Parse object header.
@@ -90,6 +91,25 @@ error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile(
return object_error::parse_failed;
}
+error_code MachOUniversalBinary::ObjectForArch::getAsArchive(
+ std::unique_ptr<Archive> &Result) const {
+ if (Parent) {
+ StringRef ParentData = Parent->getData();
+ StringRef ObjectData = ParentData.substr(Header.offset, Header.size);
+ std::string ObjectName =
+ Parent->getFileName().str() + ":" +
+ Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype));
+ MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer(
+ ObjectData, ObjectName, false);
+ ErrorOr<Archive *> Obj = Archive::create(ObjBuffer);
+ if (error_code EC = Obj.getError())
+ return EC;
+ Result.reset(Obj.get());
+ return object_error::success;
+ }
+ return object_error::parse_failed;
+}
+
void MachOUniversalBinary::anchor() { }
ErrorOr<MachOUniversalBinary *>
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 243bd44..b0068a8 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -60,7 +60,7 @@ wrap(const relocation_iterator *SI) {
// ObjectFile creation
LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) {
ErrorOr<ObjectFile*> ObjOrErr(ObjectFile::createObjectFile(unwrap(MemBuf)));
- ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : 0;
+ ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : nullptr;
return wrap(Obj);
}
@@ -184,13 +184,6 @@ uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) {
return ret;
}
-uint64_t LLVMGetSymbolFileOffset(LLVMSymbolIteratorRef SI) {
- uint64_t ret;
- if (error_code ec = (*unwrap(SI))->getFileOffset(ret))
- report_fatal_error(ec.message());
- return ret;
-}
-
uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) {
uint64_t ret;
if (error_code ec = (*unwrap(SI))->getSize(ret))
diff --git a/lib/Object/StringTableBuilder.cpp b/lib/Object/StringTableBuilder.cpp
new file mode 100644
index 0000000..9152834
--- /dev/null
+++ b/lib/Object/StringTableBuilder.cpp
@@ -0,0 +1,51 @@
+//===-- StringTableBuilder.cpp - String table building utility ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Object/StringTableBuilder.h"
+
+using namespace llvm;
+
+static bool compareBySuffix(StringRef a, StringRef b) {
+ size_t sizeA = a.size();
+ size_t sizeB = b.size();
+ size_t len = std::min(sizeA, sizeB);
+ for (size_t i = 0; i < len; ++i) {
+ char ca = a[sizeA - i - 1];
+ char cb = b[sizeB - i - 1];
+ if (ca != cb)
+ return ca > cb;
+ }
+ return sizeA > sizeB;
+}
+
+void StringTableBuilder::finalize() {
+ SmallVector<StringRef, 8> Strings;
+ for (auto i = StringIndexMap.begin(), e = StringIndexMap.end(); i != e; ++i)
+ Strings.push_back(i->getKey());
+
+ std::sort(Strings.begin(), Strings.end(), compareBySuffix);
+
+ // FIXME: Starting with a null byte is ELF specific. Generalize this so we
+ // can use the class with other object formats.
+ StringTable += '\x00';
+
+ StringRef Previous;
+ for (StringRef s : Strings) {
+ if (Previous.endswith(s)) {
+ StringIndexMap[s] = StringTable.size() - 1 - s.size();
+ continue;
+ }
+
+ StringIndexMap[s] = StringTable.size();
+ StringTable += s;
+ StringTable += '\x00';
+ Previous = s;
+ }
+}
diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp
index fecd237..a5ab8d7 100644
--- a/lib/Option/ArgList.cpp
+++ b/lib/Option/ArgList.cpp
@@ -9,6 +9,7 @@
#include "llvm/Option/ArgList.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/Option.h"
@@ -32,11 +33,6 @@ void arg_iterator::SkipToNextArg() {
}
}
-//
-
-ArgList::ArgList() {
-}
-
ArgList::~ArgList() {
}
@@ -45,14 +41,9 @@ void ArgList::append(Arg *A) {
}
void ArgList::eraseArg(OptSpecifier Id) {
- for (iterator it = begin(), ie = end(); it != ie; ) {
- if ((*it)->getOption().matches(Id)) {
- it = Args.erase(it);
- ie = end();
- } else {
- ++it;
- }
- }
+ Args.erase(std::remove_if(begin(), end(),
+ [=](Arg *A) { return A->getOption().matches(Id); }),
+ end());
}
Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
@@ -60,11 +51,11 @@ Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
for (const_reverse_iterator it = rbegin(), ie = rend(); it != ie; ++it)
if ((*it)->getOption().matches(Id))
return *it;
- return 0;
+ return nullptr;
}
Arg *ArgList::getLastArg(OptSpecifier Id) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id)) {
Res = *it;
@@ -76,7 +67,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id) const {
}
Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1)) {
@@ -91,7 +82,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1) const {
Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -106,7 +97,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2, OptSpecifier Id3) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -123,7 +114,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2, OptSpecifier Id3,
OptSpecifier Id4) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -141,7 +132,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2, OptSpecifier Id3,
OptSpecifier Id4, OptSpecifier Id5) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -161,7 +152,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2, OptSpecifier Id3,
OptSpecifier Id4, OptSpecifier Id5,
OptSpecifier Id6) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -182,7 +173,7 @@ Arg *ArgList::getLastArg(OptSpecifier Id0, OptSpecifier Id1,
OptSpecifier Id2, OptSpecifier Id3,
OptSpecifier Id4, OptSpecifier Id5,
OptSpecifier Id6, OptSpecifier Id7) const {
- Arg *Res = 0;
+ Arg *Res = nullptr;
for (const_iterator it = begin(), ie = end(); it != ie; ++it) {
if ((*it)->getOption().matches(Id0) ||
(*it)->getOption().matches(Id1) ||
@@ -348,52 +339,50 @@ DerivedArgList::DerivedArgList(const InputArgList &_BaseArgs)
: BaseArgs(_BaseArgs) {
}
-DerivedArgList::~DerivedArgList() {
- // We only own the arguments we explicitly synthesized.
- for (iterator it = SynthesizedArgs.begin(), ie = SynthesizedArgs.end();
- it != ie; ++it)
- delete *it;
-}
+DerivedArgList::~DerivedArgList() {}
const char *DerivedArgList::MakeArgString(StringRef Str) const {
return BaseArgs.MakeArgString(Str);
}
+void DerivedArgList::AddSynthesizedArg(Arg *A) {
+ SynthesizedArgs.push_back(std::unique_ptr<Arg>(A));
+}
+
Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const {
- Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
- Twine(Opt.getName())),
- BaseArgs.MakeIndex(Opt.getName()), BaseArg);
- SynthesizedArgs.push_back(A);
- return A;
+ SynthesizedArgs.push_back(make_unique<Arg>(
+ Opt,
+ ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
+ BaseArgs.MakeIndex(Opt.getName()), BaseArg));
+ return SynthesizedArgs.back().get();
}
Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Value);
- Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
- Twine(Opt.getName())),
- Index, BaseArgs.getArgString(Index), BaseArg);
- SynthesizedArgs.push_back(A);
- return A;
+ SynthesizedArgs.push_back(make_unique<Arg>(
+ Opt,
+ ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
+ Index, BaseArgs.getArgString(Index), BaseArg));
+ return SynthesizedArgs.back().get();
}
Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value);
- Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
- Twine(Opt.getName())),
- Index, BaseArgs.getArgString(Index + 1), BaseArg);
- SynthesizedArgs.push_back(A);
- return A;
+ SynthesizedArgs.push_back(make_unique<Arg>(
+ Opt,
+ ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
+ Index, BaseArgs.getArgString(Index + 1), BaseArg));
+ return SynthesizedArgs.back().get();
}
Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt,
StringRef Value) const {
unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str());
- Arg *A = new Arg(Opt, ArgList::MakeArgString(Twine(Opt.getPrefix()) +
- Twine(Opt.getName())), Index,
- BaseArgs.getArgString(Index) + Opt.getName().size(),
- BaseArg);
- SynthesizedArgs.push_back(A);
- return A;
+ SynthesizedArgs.push_back(make_unique<Arg>(
+ Opt,
+ ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())),
+ Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg));
+ return SynthesizedArgs.back().get();
}
diff --git a/lib/Option/OptTable.cpp b/lib/Option/OptTable.cpp
index 6fa459a..6842f4d 100644
--- a/lib/Option/OptTable.cpp
+++ b/lib/Option/OptTable.cpp
@@ -62,7 +62,7 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) {
for (const char * const *APre = A.Prefixes,
* const *BPre = B.Prefixes;
- *APre != 0 && *BPre != 0; ++APre, ++BPre) {
+ *APre != nullptr && *BPre != nullptr; ++APre, ++BPre){
if (int N = StrCmpOptionName(*APre, *BPre))
return N < 0;
}
@@ -136,7 +136,7 @@ OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos,
for (unsigned i = FirstSearchableIndex + 1, e = getNumOptions() + 1;
i != e; ++i) {
if (const char *const *P = getInfo(i).Prefixes) {
- for (; *P != 0; ++P) {
+ for (; *P != nullptr; ++P) {
PrefixesUnion.insert(*P);
}
}
@@ -160,7 +160,7 @@ OptTable::~OptTable() {
const Option OptTable::getOption(OptSpecifier Opt) const {
unsigned id = Opt.getID();
if (id == 0)
- return Option(0, 0);
+ return Option(nullptr, nullptr);
assert((unsigned) (id - 1) < getNumOptions() && "Invalid ID.");
return Option(&getInfo(id), this);
}
@@ -178,7 +178,7 @@ static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) {
/// \returns Matched size. 0 means no match.
static unsigned matchOption(const OptTable::Info *I, StringRef Str,
bool IgnoreCase) {
- for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) {
+ for (const char * const *Pre = I->Prefixes; *Pre != nullptr; ++Pre) {
StringRef Prefix(*Pre);
if (Str.startswith(Prefix)) {
StringRef Rest = Str.substr(Prefix.size());
@@ -240,7 +240,7 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
// Otherwise, see if this argument was missing values.
if (Prev != Index)
- return 0;
+ return nullptr;
}
// If we failed to find an option and this arg started with /, then it's
diff --git a/lib/Option/Option.cpp b/lib/Option/Option.cpp
index 7b5ff2b..10662a3 100644
--- a/lib/Option/Option.cpp
+++ b/lib/Option/Option.cpp
@@ -58,8 +58,8 @@ void Option::dump() const {
if (Info->Prefixes) {
llvm::errs() << " Prefixes:[";
- for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) {
- llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", ");
+ for (const char * const *Pre = Info->Prefixes; *Pre != nullptr; ++Pre) {
+ llvm::errs() << '"' << *Pre << (*(Pre + 1) == nullptr ? "\"" : "\", ");
}
llvm::errs() << ']';
}
@@ -116,7 +116,7 @@ Arg *Option::accept(const ArgList &Args,
switch (getKind()) {
case FlagClass: {
if (ArgSize != strlen(Args.getArgString(Index)))
- return 0;
+ return nullptr;
Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
if (getAliasArgs()) {
@@ -166,11 +166,11 @@ Arg *Option::accept(const ArgList &Args,
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
- return 0;
+ return nullptr;
Index += 2;
if (Index > Args.getNumInputArgStrings())
- return 0;
+ return nullptr;
return new Arg(UnaliasedOption, Spelling,
Index - 2, Args.getArgString(Index - 1));
@@ -178,11 +178,11 @@ Arg *Option::accept(const ArgList &Args,
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
- return 0;
+ return nullptr;
Index += 1 + getNumArgs();
if (Index > Args.getNumInputArgStrings())
- return 0;
+ return nullptr;
Arg *A = new Arg(UnaliasedOption, Spelling, Index - 1 - getNumArgs(),
Args.getArgString(Index - getNumArgs()));
@@ -201,7 +201,7 @@ Arg *Option::accept(const ArgList &Args,
// Otherwise it must be separate.
Index += 2;
if (Index > Args.getNumInputArgStrings())
- return 0;
+ return nullptr;
return new Arg(UnaliasedOption, Spelling,
Index - 2, Args.getArgString(Index - 1));
@@ -210,7 +210,7 @@ Arg *Option::accept(const ArgList &Args,
// Always matches.
Index += 2;
if (Index > Args.getNumInputArgStrings())
- return 0;
+ return nullptr;
return new Arg(UnaliasedOption, Spelling, Index - 2,
Args.getArgString(Index - 2) + ArgSize,
@@ -219,7 +219,7 @@ Arg *Option::accept(const ArgList &Args,
// Matches iff this is an exact match.
// FIXME: Avoid strlen.
if (ArgSize != strlen(Args.getArgString(Index)))
- return 0;
+ return nullptr;
Arg *A = new Arg(UnaliasedOption, Spelling, Index++);
while (Index < Args.getNumInputArgStrings())
A->getValues().push_back(Args.getArgString(Index++));
diff --git a/lib/ProfileData/Android.mk b/lib/ProfileData/Android.mk
new file mode 100644
index 0000000..5ae5ba8
--- /dev/null
+++ b/lib/ProfileData/Android.mk
@@ -0,0 +1,33 @@
+LOCAL_PATH:= $(call my-dir)
+
+profiledata_SRC_FILES := \
+ InstrProf.cpp \
+ InstrProfReader.cpp \
+ InstrProfWriter.cpp
+
+
+# For the host
+# =====================================================
+include $(CLEAR_VARS)
+
+LOCAL_MODULE:= libLLVMProfileData
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := $(profiledata_SRC_FILES)
+
+include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_HOST_STATIC_LIBRARY)
+
+# For the device
+# =====================================================
+ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS))
+include $(CLEAR_VARS)
+
+LOCAL_MODULE:= libLLVMProfileData
+LOCAL_MODULE_TAGS := optional
+LOCAL_SRC_FILES := $(profiledata_SRC_FILES)
+
+include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
+include $(BUILD_STATIC_LIBRARY)
+endif
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 850f613..de2b13d 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -33,6 +33,8 @@ class InstrProfErrorCategoryType : public error_category {
return "Invalid header";
case instrprof_error::unsupported_version:
return "Unsupported format version";
+ case instrprof_error::unsupported_hash_type:
+ return "Unsupported hash function";
case instrprof_error::too_large:
return "Too much profile data";
case instrprof_error::truncated:
@@ -50,7 +52,7 @@ class InstrProfErrorCategoryType : public error_category {
}
llvm_unreachable("A value of instrprof_error has no message.");
}
- error_condition default_error_condition(int EV) const {
+ error_condition default_error_condition(int EV) const override {
if (EV == instrprof_error::success)
return errc::success;
return errc::invalid_argument;
diff --git a/lib/ProfileData/InstrProfIndexed.h b/lib/ProfileData/InstrProfIndexed.h
new file mode 100644
index 0000000..7761704
--- /dev/null
+++ b/lib/ProfileData/InstrProfIndexed.h
@@ -0,0 +1,55 @@
+//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared header for the instrumented profile data reader and writer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+#define LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MD5.h"
+
+namespace llvm {
+
+namespace IndexedInstrProf {
+enum class HashT : uint32_t {
+ MD5,
+
+ Last = MD5
+};
+
+static inline uint64_t MD5Hash(StringRef Str) {
+ MD5 Hash;
+ Hash.update(Str);
+ llvm::MD5::MD5Result Result;
+ Hash.final(Result);
+ // Return the least significant 8 bytes. Our MD5 implementation returns the
+ // result in little endian, so we may need to swap bytes.
+ using namespace llvm::support;
+ return endian::read<uint64_t, little, unaligned>(Result);
+}
+
+static inline uint64_t ComputeHash(HashT Type, StringRef K) {
+ switch (Type) {
+ case HashT::MD5:
+ return IndexedInstrProf::MD5Hash(K);
+ }
+ llvm_unreachable("Unhandled hash type");
+}
+
+const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81"
+const uint64_t Version = 1;
+const HashT HashType = HashT::MD5;
+}
+
+} // end namespace llvm
+
+#endif // LLVM_PROFILEDATA_INSTRPROF_INDEXED_H_
diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp
index b07f402..7014f5e 100644
--- a/lib/ProfileData/InstrProfReader.cpp
+++ b/lib/ProfileData/InstrProfReader.cpp
@@ -15,30 +15,62 @@
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "InstrProfIndexed.h"
+
#include <cassert>
using namespace llvm;
-error_code InstrProfReader::create(std::string Path,
- std::unique_ptr<InstrProfReader> &Result) {
- std::unique_ptr<MemoryBuffer> Buffer;
+static error_code setupMemoryBuffer(std::string Path,
+ std::unique_ptr<MemoryBuffer> &Buffer) {
if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
return EC;
// Sanity check the file.
if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
return instrprof_error::too_large;
+ return instrprof_error::success;
+}
+
+static error_code initializeReader(InstrProfReader &Reader) {
+ return Reader.readHeader();
+}
+
+error_code InstrProfReader::create(std::string Path,
+ std::unique_ptr<InstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
// Create the reader.
- if (RawInstrProfReader64::hasFormat(*Buffer))
+ if (IndexedInstrProfReader::hasFormat(*Buffer))
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+ else if (RawInstrProfReader64::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader64(std::move(Buffer)));
else if (RawInstrProfReader32::hasFormat(*Buffer))
Result.reset(new RawInstrProfReader32(std::move(Buffer)));
else
Result.reset(new TextInstrProfReader(std::move(Buffer)));
- // Read the header and return the result.
- return Result->readHeader();
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
+}
+
+error_code IndexedInstrProfReader::create(
+ std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
+ // Set up the buffer to read.
+ std::unique_ptr<MemoryBuffer> Buffer;
+ if (error_code EC = setupMemoryBuffer(Path, Buffer))
+ return EC;
+
+ // Create the reader.
+ if (!IndexedInstrProfReader::hasFormat(*Buffer))
+ return instrprof_error::bad_magic;
+ Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
+
+ // Initialize the reader and return the result.
+ return initializeReader(*Result);
}
void InstrProfIterator::Increment() {
@@ -69,6 +101,8 @@ error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
return error(instrprof_error::truncated);
if ((Line++)->getAsInteger(10, NumCounters))
return error(instrprof_error::malformed);
+ if (NumCounters == 0)
+ return error(instrprof_error::malformed);
// Read each counter and fill our internal storage with the values.
Counts.clear();
@@ -138,6 +172,29 @@ error_code RawInstrProfReader<IntPtrT>::readHeader() {
return readHeader(*Header);
}
+template <class IntPtrT>
+error_code RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
+ const char *End = DataBuffer->getBufferEnd();
+ // Skip zero padding between profiles.
+ while (CurrentPos != End && *CurrentPos == 0)
+ ++CurrentPos;
+ // If there's nothing left, we're done.
+ if (CurrentPos == End)
+ return instrprof_error::eof;
+ // If there isn't enough space for another header, this is probably just
+ // garbage at the end of the file.
+ if (CurrentPos + sizeof(RawHeader) > End)
+ return instrprof_error::malformed;
+ // The magic should have the same byte order as in the previous header.
+ uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
+ if (Magic != swap(getRawMagic<IntPtrT>()))
+ return instrprof_error::bad_magic;
+
+ // There's another profile to read, so we need to process the header.
+ auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
+ return readHeader(*Header);
+}
+
static uint64_t getRawVersion() {
return 1;
}
@@ -156,16 +213,17 @@ error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
ptrdiff_t DataOffset = sizeof(RawHeader);
ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
- size_t FileSize = NamesOffset + sizeof(char) * NamesSize;
+ size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
- if (FileSize != DataBuffer->getBufferSize())
+ auto *Start = reinterpret_cast<const char *>(&Header);
+ if (Start + ProfileSize > DataBuffer->getBufferEnd())
return error(instrprof_error::bad_header);
- const char *Start = DataBuffer->getBufferStart();
Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
DataEnd = Data + DataSize;
CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
NamesStart = Start + NamesOffset;
+ ProfileEnd = Start + ProfileSize;
return success();
}
@@ -174,12 +232,15 @@ template <class IntPtrT>
error_code
RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
if (Data == DataEnd)
- return error(instrprof_error::eof);
+ if (error_code EC = readNextHeader(ProfileEnd))
+ return EC;
// Get the raw data.
StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize));
- auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr),
- swap(Data->NumCounters));
+ uint32_t NumCounters = swap(Data->NumCounters);
+ if (NumCounters == 0)
+ return error(instrprof_error::malformed);
+ auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters);
// Check bounds.
auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
@@ -210,3 +271,83 @@ namespace llvm {
template class RawInstrProfReader<uint32_t>;
template class RawInstrProfReader<uint64_t>;
}
+
+InstrProfLookupTrait::hash_value_type
+InstrProfLookupTrait::ComputeHash(StringRef K) {
+ return IndexedInstrProf::ComputeHash(HashType, K);
+}
+
+bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
+ if (DataBuffer.getBufferSize() < 8)
+ return false;
+ using namespace support;
+ uint64_t Magic =
+ endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
+ return Magic == IndexedInstrProf::Magic;
+}
+
+error_code IndexedInstrProfReader::readHeader() {
+ const unsigned char *Start =
+ (const unsigned char *)DataBuffer->getBufferStart();
+ const unsigned char *Cur = Start;
+ if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
+ return error(instrprof_error::truncated);
+
+ using namespace support;
+
+ // Check the magic number.
+ uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Magic != IndexedInstrProf::Magic)
+ return error(instrprof_error::bad_magic);
+
+ // Read the version.
+ uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
+ if (Version != IndexedInstrProf::Version)
+ return error(instrprof_error::unsupported_version);
+
+ // Read the maximal function count.
+ MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // Read the hash type and start offset.
+ IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
+ endian::readNext<uint64_t, little, unaligned>(Cur));
+ if (HashType > IndexedInstrProf::HashT::Last)
+ return error(instrprof_error::unsupported_hash_type);
+ uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
+
+ // The rest of the file is an on disk hash table.
+ Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
+ InstrProfLookupTrait(HashType)));
+ // Set up our iterator for readNextRecord.
+ RecordIterator = Index->data_begin();
+
+ return success();
+}
+
+error_code IndexedInstrProfReader::getFunctionCounts(
+ StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
+ const auto &Iter = Index->find(FuncName);
+ if (Iter == Index->end())
+ return error(instrprof_error::unknown_function);
+
+ // Found it. Make sure it's valid before giving back a result.
+ const InstrProfRecord &Record = *Iter;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ FuncHash = Record.Hash;
+ Counts = Record.Counts;
+ return success();
+}
+
+error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
+ // Are we out of records?
+ if (RecordIterator == Index->data_end())
+ return error(instrprof_error::eof);
+
+ // Read the next one.
+ Record = *RecordIterator;
+ ++RecordIterator;
+ if (Record.Name.empty())
+ return error(instrprof_error::malformed);
+ return success();
+}
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 3024f96..83c41d9 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -13,10 +13,59 @@
//===----------------------------------------------------------------------===//
#include "llvm/ProfileData/InstrProfWriter.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/OnDiskHashTable.h"
+
+#include "InstrProfIndexed.h"
using namespace llvm;
+namespace {
+class InstrProfRecordTrait {
+public:
+ typedef StringRef key_type;
+ typedef StringRef key_type_ref;
+
+ typedef const InstrProfWriter::CounterData *const data_type;
+ typedef const InstrProfWriter::CounterData *const data_type_ref;
+
+ typedef uint64_t hash_value_type;
+ typedef uint64_t offset_type;
+
+ static hash_value_type ComputeHash(key_type_ref K) {
+ return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K);
+ }
+
+ static std::pair<offset_type, offset_type>
+ EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+
+ offset_type N = K.size();
+ LE.write<offset_type>(N);
+
+ offset_type M = (1 + V->Counts.size()) * sizeof(uint64_t);
+ LE.write<offset_type>(M);
+
+ return std::make_pair(N, M);
+ }
+
+ static void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N){
+ Out.write(K.data(), N);
+ }
+
+ static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V,
+ offset_type) {
+ using namespace llvm::support;
+ endian::Writer<little> LE(Out);
+ LE.write<uint64_t>(V->Hash);
+ for (uint64_t I : V->Counts)
+ LE.write<uint64_t>(I);
+ }
+};
+}
+
error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
uint64_t FunctionHash,
ArrayRef<uint64_t> Counters) {
@@ -26,7 +75,7 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
auto &Data = FunctionData[FunctionName];
Data.Hash = FunctionHash;
Data.Counts = Counters;
- return instrprof_error::success;;
+ return instrprof_error::success;
}
auto &Data = Where->getValue();
@@ -45,16 +94,33 @@ error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName,
return instrprof_error::success;
}
-void InstrProfWriter::write(raw_ostream &OS) {
- // Write out the counts for each function.
+void InstrProfWriter::write(raw_fd_ostream &OS) {
+ OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator;
+ uint64_t MaxFunctionCount = 0;
+
+ // Populate the hash table generator.
for (const auto &I : FunctionData) {
- StringRef Name = I.getKey();
- uint64_t Hash = I.getValue().Hash;
- const std::vector<uint64_t> &Counts = I.getValue().Counts;
-
- OS << Name << "\n" << Hash << "\n" << Counts.size() << "\n";
- for (uint64_t Count : Counts)
- OS << Count << "\n";
- OS << "\n";
+ Generator.insert(I.getKey(), &I.getValue());
+ if (I.getValue().Counts[0] > MaxFunctionCount)
+ MaxFunctionCount = I.getValue().Counts[0];
}
+
+ using namespace llvm::support;
+ endian::Writer<little> LE(OS);
+
+ // Write the header.
+ LE.write<uint64_t>(IndexedInstrProf::Magic);
+ LE.write<uint64_t>(IndexedInstrProf::Version);
+ LE.write<uint64_t>(MaxFunctionCount);
+ LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType));
+
+ // Save a space to write the hash table start location.
+ uint64_t HashTableStartLoc = OS.tell();
+ LE.write<uint64_t>(0);
+ // Write the hash table.
+ uint64_t HashTableStart = Generator.Emit(OS);
+
+ // Go back and fill in the hash table start.
+ OS.seek(HashTableStartLoc);
+ LE.write<uint64_t>(HashTableStart);
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 85ce31b..f9fe095 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -1358,7 +1358,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
{
switch (PackCategoriesIntoKey(category, rhs.category)) {
default:
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
case PackCategoriesIntoKey(fcNaN, fcZero):
case PackCategoriesIntoKey(fcNaN, fcNormal):
@@ -1485,7 +1485,7 @@ APFloat::multiplySpecials(const APFloat &rhs)
{
switch (PackCategoriesIntoKey(category, rhs.category)) {
default:
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
case PackCategoriesIntoKey(fcNaN, fcZero):
case PackCategoriesIntoKey(fcNaN, fcNormal):
@@ -1529,7 +1529,7 @@ APFloat::divideSpecials(const APFloat &rhs)
{
switch (PackCategoriesIntoKey(category, rhs.category)) {
default:
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
case PackCategoriesIntoKey(fcZero, fcNaN):
case PackCategoriesIntoKey(fcNormal, fcNaN):
@@ -1570,7 +1570,7 @@ APFloat::modSpecials(const APFloat &rhs)
{
switch (PackCategoriesIntoKey(category, rhs.category)) {
default:
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
case PackCategoriesIntoKey(fcNaN, fcZero):
case PackCategoriesIntoKey(fcNaN, fcNormal):
@@ -1679,7 +1679,7 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
fs = multiplySpecials(rhs);
if (isFiniteNonZero()) {
- lostFraction lost_fraction = multiplySignificand(rhs, 0);
+ lostFraction lost_fraction = multiplySignificand(rhs, nullptr);
fs = normalize(rounding_mode, lost_fraction);
if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
@@ -1882,7 +1882,7 @@ APFloat::compare(const APFloat &rhs) const
switch (PackCategoriesIntoKey(category, rhs.category)) {
default:
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
case PackCategoriesIntoKey(fcNaN, fcZero):
case PackCategoriesIntoKey(fcNaN, fcNormal):
@@ -2439,7 +2439,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
if (exp >= 0) {
/* multiplySignificand leaves the precision-th bit set to 1. */
- calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+ calcLostFraction = decSig.multiplySignificand(pow5, nullptr);
powHUerr = powStatus != opOK;
} else {
calcLostFraction = decSig.divideSignificand(pow5);
@@ -3331,7 +3331,7 @@ APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
if (Sem == &PPCDoubleDouble)
return initFromPPCDoubleDoubleAPInt(api);
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
APFloat
@@ -3795,7 +3795,7 @@ APFloat::opStatus APFloat::next(bool nextDown) {
if (isSignaling()) {
result = opInvalidOp;
// For consistency, propagate the sign of the sNaN to the qNaN.
- makeNaN(false, isNegative(), 0);
+ makeNaN(false, isNegative(), nullptr);
}
break;
case fcZero:
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 0c46725..fa929eb 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "apint"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
@@ -28,6 +27,8 @@
#include <limits>
using namespace llvm;
+#define DEBUG_TYPE "apint"
+
/// A utility function for allocating memory, checking for allocation failures,
/// and ensuring the contents are zeroed.
inline static uint64_t* getClearedMemory(unsigned numWords) {
@@ -1683,10 +1684,10 @@ void APInt::divide(const APInt LHS, unsigned lhsWords,
// Allocate space for the temporary values we need either on the stack, if
// it will fit, or on the heap if it won't.
unsigned SPACE[128];
- unsigned *U = 0;
- unsigned *V = 0;
- unsigned *Q = 0;
- unsigned *R = 0;
+ unsigned *U = nullptr;
+ unsigned *V = nullptr;
+ unsigned *Q = nullptr;
+ unsigned *R = nullptr;
if ((Remainder?4:3)*n+2*m+1 <= 128) {
U = &SPACE[0];
V = &SPACE[m+n+1];
@@ -1872,7 +1873,7 @@ APInt APInt::udiv(const APInt& RHS) const {
// We have to compute it the hard way. Invoke the Knuth divide algorithm.
APInt Quotient(1,0); // to hold result.
- divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+ divide(*this, lhsWords, RHS, rhsWords, &Quotient, nullptr);
return Quotient;
}
@@ -1920,7 +1921,7 @@ APInt APInt::urem(const APInt& RHS) const {
// We have to compute it the hard way. Invoke the Knuth divide algorithm.
APInt Remainder(1,0);
- divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+ divide(*this, lhsWords, RHS, rhsWords, nullptr, &Remainder);
return Remainder;
}
diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp
index 7e17748..7c306b2 100644
--- a/lib/Support/Allocator.cpp
+++ b/lib/Support/Allocator.cpp
@@ -21,29 +21,10 @@
namespace llvm {
-SlabAllocator::~SlabAllocator() { }
-
-MallocSlabAllocator::~MallocSlabAllocator() { }
-
-MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
- MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
- Slab->Size = Size;
- Slab->NextPtr = 0;
- return Slab;
-}
-
-void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
- Allocator.Deallocate(Slab);
-}
-
-void BumpPtrAllocatorBase::PrintStats() const {
- unsigned NumSlabs = 0;
- size_t TotalMemory = 0;
- for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
- TotalMemory += Slab->Size;
- ++NumSlabs;
- }
+namespace detail {
+void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated,
+ size_t TotalMemory) {
errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
<< "Bytes used: " << BytesAllocated << '\n'
<< "Bytes allocated: " << TotalMemory << '\n'
@@ -51,13 +32,7 @@ void BumpPtrAllocatorBase::PrintStats() const {
<< " (includes alignment, etc)\n";
}
-size_t BumpPtrAllocatorBase::getTotalMemory() const {
- size_t TotalMemory = 0;
- for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
- TotalMemory += Slab->Size;
- }
- return TotalMemory;
-}
+} // End namespace detail.
void PrintRecyclerStats(size_t Size,
size_t Align,
diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp
index 9559ad7..2ef32b0 100644
--- a/lib/Support/Atomic.cpp
+++ b/lib/Support/Atomic.cpp
@@ -17,6 +17,7 @@
using namespace llvm;
#if defined(_MSC_VER)
+#include <Intrin.h>
#include <windows.h>
#undef MemoryFence
#endif
diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp
index 00cf75b..6f7e341 100644
--- a/lib/Support/BlockFrequency.cpp
+++ b/lib/Support/BlockFrequency.cpp
@@ -18,94 +18,8 @@
using namespace llvm;
-/// Multiply FREQ by N and store result in W array.
-static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) {
- uint64_t u0 = freq & UINT32_MAX;
- uint64_t u1 = freq >> 32;
-
- // Represent 96-bit value as W[2]:W[1]:W[0];
- uint64_t t = u0 * N;
- uint64_t k = t >> 32;
- W[0] = t;
- t = u1 * N + k;
- W[1] = t;
- W[2] = t >> 32;
-}
-
-/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a
-/// 32 bit unsigned integer, we can use a short division algorithm.
-static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) {
- // We assume that W[2] is non-zero since if W[2] is not then the user should
- // just use hardware division.
- assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is "
- "zero, the caller should just use 64/32 hardware.");
- uint32_t Q[3] = { 0, 0, 0 };
-
- // The generalized short division algorithm sets i to m + n - 1, where n is
- // the number of words in the divisior and m is the number of words by which
- // the divident exceeds the divisor (i.e. m + n == the length of the dividend
- // in words). Due to our assumption that W[2] is non-zero, we know that the
- // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to
- // m + n - 1 = 2 + 1 - 1 = 2.
- uint32_t R = 0;
- for (int i = 2; i >= 0; --i) {
- uint64_t PartialD = uint64_t(R) << 32 | W[i];
- if (PartialD == 0) {
- Q[i] = 0;
- R = 0;
- } else if (PartialD < D) {
- Q[i] = 0;
- R = uint32_t(PartialD);
- } else if (PartialD == D) {
- Q[i] = 1;
- R = 0;
- } else {
- Q[i] = uint32_t(PartialD / D);
- R = uint32_t(PartialD - (Q[i] * D));
- }
- }
-
- // If Q[2] is non-zero, then we overflowed.
- uint64_t Result;
- if (Q[2]) {
- Result = UINT64_MAX;
- R = D;
- } else {
- // Form the final uint64_t result, avoiding endianness issues.
- Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32);
- }
-
- if (Rout)
- *Rout = R;
-
- return Result;
-}
-
-uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) {
- assert(D != 0 && "Division by zero");
-
- // Calculate Frequency * N.
- uint64_t MulLo = (Frequency & UINT32_MAX) * N;
- uint64_t MulHi = (Frequency >> 32) * N;
- uint64_t MulRes = (MulHi << 32) + MulLo;
-
- // If the product fits in 64 bits, just use built-in division.
- if (MulHi <= UINT32_MAX && MulRes >= MulLo) {
- Frequency = MulRes / D;
- return MulRes % D;
- }
-
- // Product overflowed, use 96-bit operations.
- // 96-bit value represented as W[2]:W[1]:W[0].
- uint32_t W[3];
- uint32_t R;
- mult96bit(Frequency, N, W);
- Frequency = divrem96bit(W, D, &R);
- return R;
-}
-
BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) {
- scale(Prob.getNumerator(), Prob.getDenominator());
+ Frequency = Prob.scale(Frequency);
return *this;
}
@@ -117,7 +31,7 @@ BlockFrequency::operator*(const BranchProbability &Prob) const {
}
BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) {
- scale(Prob.getDenominator(), Prob.getNumerator());
+ Frequency = Prob.scaleByInverse(Frequency);
return *this;
}
@@ -156,8 +70,3 @@ BlockFrequency &BlockFrequency::operator>>=(const unsigned count) {
Frequency |= Frequency == 0;
return *this;
}
-
-uint32_t BlockFrequency::scale(const BranchProbability &Prob) {
- return scale(Prob.getNumerator(), Prob.getDenominator());
-}
-
diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp
index e8b83e5..65878d6 100644
--- a/lib/Support/BranchProbability.cpp
+++ b/lib/Support/BranchProbability.cpp
@@ -18,19 +18,56 @@
using namespace llvm;
-void BranchProbability::print(raw_ostream &OS) const {
- OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0);
+raw_ostream &BranchProbability::print(raw_ostream &OS) const {
+ return OS << N << " / " << D << " = "
+ << format("%g%%", ((double)N / D) * 100.0);
}
-void BranchProbability::dump() const {
- dbgs() << *this << '\n';
-}
+void BranchProbability::dump() const { print(dbgs()) << '\n'; }
+
+static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D) {
+ assert(D && "divide by 0");
+
+ // Fast path for multiplying by 1.0.
+ if (!Num || D == N)
+ return Num;
+
+ // Split Num into upper and lower parts to multiply, then recombine.
+ uint64_t ProductHigh = (Num >> 32) * N;
+ uint64_t ProductLow = (Num & UINT32_MAX) * N;
+
+ // Split into 32-bit digits.
+ uint32_t Upper32 = ProductHigh >> 32;
+ uint32_t Lower32 = ProductLow & UINT32_MAX;
+ uint32_t Mid32Partial = ProductHigh & UINT32_MAX;
+ uint32_t Mid32 = Mid32Partial + (ProductLow >> 32);
+
+ // Carry.
+ Upper32 += Mid32 < Mid32Partial;
-namespace llvm {
+ // Check for overflow.
+ if (Upper32 >= D)
+ return UINT64_MAX;
+
+ uint64_t Rem = (uint64_t(Upper32) << 32) | Mid32;
+ uint64_t UpperQ = Rem / D;
+
+ // Check for overflow.
+ if (UpperQ > UINT32_MAX)
+ return UINT64_MAX;
+
+ Rem = ((Rem % D) << 32) | Lower32;
+ uint64_t LowerQ = Rem / D;
+ uint64_t Q = (UpperQ << 32) + LowerQ;
+
+ // Check for overflow.
+ return Q < LowerQ ? UINT64_MAX : Q;
+}
-raw_ostream &operator<<(raw_ostream &OS, const BranchProbability &Prob) {
- Prob.print(OS);
- return OS;
+uint64_t BranchProbability::scale(uint64_t Num) const {
+ return ::scale(Num, N, D);
}
+uint64_t BranchProbability::scaleByInverse(uint64_t Num) const {
+ return ::scale(Num, D, N);
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index b3c2614..37bbf48 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -38,6 +38,8 @@
using namespace llvm;
using namespace cl;
+#define DEBUG_TYPE "commandline"
+
//===----------------------------------------------------------------------===//
// Template instantiations and anchors.
//
@@ -81,7 +83,7 @@ void StringSaver::anchor() {}
// Globals for name and overview of program. Program name is not a string to
// avoid static ctor/dtor issues.
static char ProgramName[80] = "<premain>";
-static const char *ProgramOverview = 0;
+static const char *ProgramOverview = nullptr;
// This collects additional help to be printed.
static ManagedStatic<std::vector<const char*> > MoreHelp;
@@ -100,10 +102,10 @@ void cl::MarkOptionsChanged() {
/// RegisteredOptionList - This is the list of the command line options that
/// have statically constructed themselves.
-static Option *RegisteredOptionList = 0;
+static Option *RegisteredOptionList = nullptr;
void Option::addArgument() {
- assert(NextRegistered == 0 && "argument multiply registered!");
+ assert(!NextRegistered && "argument multiply registered!");
NextRegistered = RegisteredOptionList;
RegisteredOptionList = this;
@@ -111,7 +113,7 @@ void Option::addArgument() {
}
void Option::removeArgument() {
- assert(NextRegistered != 0 && "argument never registered");
+ assert(NextRegistered && "argument never registered");
assert(RegisteredOptionList == this && "argument is not the last registered");
RegisteredOptionList = NextRegistered;
MarkOptionsChanged();
@@ -144,7 +146,7 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
SmallVectorImpl<Option*> &SinkOpts,
StringMap<Option*> &OptionsMap) {
SmallVector<const char*, 16> OptionNames;
- Option *CAOpt = 0; // The ConsumeAfter option if it exists.
+ Option *CAOpt = nullptr; // The ConsumeAfter option if it exists.
for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
// If this option wants to handle multiple option names, get the full set.
// This handles enum options like "-O1 -O2" etc.
@@ -189,7 +191,7 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
static Option *LookupOption(StringRef &Arg, StringRef &Value,
const StringMap<Option*> &OptionsMap) {
// Reject all dashes.
- if (Arg.empty()) return 0;
+ if (Arg.empty()) return nullptr;
size_t EqualPos = Arg.find('=');
@@ -197,14 +199,14 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value,
if (EqualPos == StringRef::npos) {
// Look up the option.
StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
- return I != OptionsMap.end() ? I->second : 0;
+ return I != OptionsMap.end() ? I->second : nullptr;
}
// If the argument before the = is a valid option name, we match. If not,
// return Arg unmolested.
StringMap<Option*>::const_iterator I =
OptionsMap.find(Arg.substr(0, EqualPos));
- if (I == OptionsMap.end()) return 0;
+ if (I == OptionsMap.end()) return nullptr;
Value = Arg.substr(EqualPos+1);
Arg = Arg.substr(0, EqualPos);
@@ -219,7 +221,7 @@ static Option *LookupNearestOption(StringRef Arg,
const StringMap<Option*> &OptionsMap,
std::string &NearestString) {
// Reject all dashes.
- if (Arg.empty()) return 0;
+ if (Arg.empty()) return nullptr;
// Split on any equal sign.
std::pair<StringRef, StringRef> SplitArg = Arg.split('=');
@@ -227,7 +229,7 @@ static Option *LookupNearestOption(StringRef Arg,
StringRef &RHS = SplitArg.second;
// Find the closest match.
- Option *Best = 0;
+ Option *Best = nullptr;
unsigned BestDistance = 0;
for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
ie = OptionsMap.end(); it != ie; ++it) {
@@ -300,7 +302,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
// Enforce value requirements
switch (Handler->getValueExpectedFlag()) {
case ValueRequired:
- if (Value.data() == 0) { // No value specified?
+ if (!Value.data()) { // No value specified?
if (i+1 >= argc)
return Handler->error("requires a value!");
// Steal the next argument, like for '-o filename'
@@ -349,7 +351,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
int Dummy = i;
- return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
+ return ProvideOption(Handler, Handler->ArgStr, Arg, 0, nullptr, Dummy);
}
@@ -385,7 +387,7 @@ static Option *getOptionPred(StringRef Name, size_t &Length,
Length = Name.size();
return OMI->second; // Found one!
}
- return 0; // No option found!
+ return nullptr; // No option found!
}
/// HandlePrefixedOrGroupedOption - The specified argument string (which started
@@ -395,12 +397,12 @@ static Option *getOptionPred(StringRef Name, size_t &Length,
static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
bool &ErrorParsing,
const StringMap<Option*> &OptionsMap) {
- if (Arg.size() == 1) return 0;
+ if (Arg.size() == 1) return nullptr;
// Do the lookup!
size_t Length = 0;
Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
- if (PGOpt == 0) return 0;
+ if (!PGOpt) return nullptr;
// If the option is a prefixed option, then the value is simply the
// rest of the name... so fall through to later processing, by
@@ -427,7 +429,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
"Option can not be cl::Grouping AND cl::ValueRequired!");
int Dummy = 0;
ErrorParsing |= ProvideOption(PGOpt, OneArgName,
- StringRef(), 0, 0, Dummy);
+ StringRef(), 0, nullptr, Dummy);
// Get the next grouping option.
PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
@@ -746,7 +748,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
argc = static_cast<int>(newArgv.size());
// Copy the program name into ProgName, making sure not to overflow it.
- std::string ProgName = sys::path::filename(argv[0]);
+ StringRef ProgName = sys::path::filename(argv[0]);
size_t Len = std::min(ProgName.size(), size_t(79));
memcpy(ProgramName, ProgName.data(), Len);
ProgramName[Len] = '\0';
@@ -760,7 +762,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
// Determine whether or not there are an unlimited number of positionals
bool HasUnlimitedPositionals = false;
- Option *ConsumeAfterOpt = 0;
+ Option *ConsumeAfterOpt = nullptr;
if (!PositionalOpts.empty()) {
if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
assert(PositionalOpts.size() > 1 &&
@@ -770,7 +772,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
// Calculate how many positional values are _required_.
bool UnboundedFound = false;
- for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+ for (size_t i = ConsumeAfterOpt ? 1 : 0, e = PositionalOpts.size();
i != e; ++i) {
Option *Opt = PositionalOpts[i];
if (RequiresValue(Opt))
@@ -806,13 +808,13 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
// If the program has named positional arguments, and the name has been run
// across, keep track of which positional argument was named. Otherwise put
// the positional args into the PositionalVals list...
- Option *ActivePositionalArg = 0;
+ Option *ActivePositionalArg = nullptr;
// Loop over all of the arguments... processing them.
bool DashDashFound = false; // Have we read '--'?
for (int i = 1; i < argc; ++i) {
- Option *Handler = 0;
- Option *NearestHandler = 0;
+ Option *Handler = nullptr;
+ Option *NearestHandler = nullptr;
std::string NearestHandlerString;
StringRef Value;
StringRef ArgName = "";
@@ -845,8 +847,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
// All of the positional arguments have been fulfulled, give the rest to
// the consume after option... if it's specified...
//
- if (PositionalVals.size() >= NumPositionalRequired &&
- ConsumeAfterOpt != 0) {
+ if (PositionalVals.size() >= NumPositionalRequired && ConsumeAfterOpt) {
for (++i; i < argc; ++i)
PositionalVals.push_back(std::make_pair(argv[i],i));
break; // Handle outside of the argument processing loop...
@@ -884,18 +885,18 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
Handler = LookupOption(ArgName, Value, Opts);
// Check to see if this "option" is really a prefixed or grouped argument.
- if (Handler == 0)
+ if (!Handler)
Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
ErrorParsing, Opts);
// Otherwise, look for the closest available option to report to the user
// in the upcoming error.
- if (Handler == 0 && SinkOpts.empty())
+ if (!Handler && SinkOpts.empty())
NearestHandler = LookupNearestOption(ArgName, Opts,
NearestHandlerString);
}
- if (Handler == 0) {
+ if (!Handler) {
if (SinkOpts.empty()) {
errs() << ProgramName << ": Unknown command line argument '"
<< argv[i] << "'. Try: '" << argv[0] << " -help'\n";
@@ -939,7 +940,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
<< " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
- } else if (ConsumeAfterOpt == 0) {
+ } else if (!ConsumeAfterOpt) {
// Positional args have already been handled if ConsumeAfter is specified.
unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
@@ -1044,7 +1045,7 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
//
bool Option::error(const Twine &Message, StringRef ArgName) {
- if (ArgName.data() == 0) ArgName = ArgStr;
+ if (!ArgName.data()) ArgName = ArgStr;
if (ArgName.empty())
errs() << HelpStr; // Be nice for positional arguments
else
@@ -1455,12 +1456,12 @@ public:
outs() << "USAGE: " << ProgramName << " [options]";
// Print out the positional options.
- Option *CAOpt = 0; // The cl::ConsumeAfter option, if it exists...
+ Option *CAOpt = nullptr; // The cl::ConsumeAfter option, if it exists...
if (!PositionalOpts.empty() &&
PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
CAOpt = PositionalOpts[0];
- for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+ for (size_t i = CAOpt != nullptr, e = PositionalOpts.size(); i != e; ++i) {
if (PositionalOpts[i]->ArgStr[0])
outs() << " --" << PositionalOpts[i]->ArgStr;
outs() << " " << PositionalOpts[i]->HelpStr;
@@ -1555,7 +1556,7 @@ protected:
outs() << (*Category)->getName() << ":\n";
// Check if description is set.
- if ((*Category)->getDescription() != 0)
+ if ((*Category)->getDescription() != nullptr)
outs() << (*Category)->getDescription() << "\n\n";
else
outs() << "\n";
@@ -1686,9 +1687,9 @@ void cl::PrintOptionValues() {
Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions);
}
-static void (*OverrideVersionPrinter)() = 0;
+static void (*OverrideVersionPrinter)() = nullptr;
-static std::vector<void (*)()>* ExtraVersionPrinters = 0;
+static std::vector<void (*)()>* ExtraVersionPrinters = nullptr;
namespace {
class VersionPrinter {
@@ -1721,7 +1722,7 @@ public:
void operator=(bool OptionWasSpecified) {
if (!OptionWasSpecified) return;
- if (OverrideVersionPrinter != 0) {
+ if (OverrideVersionPrinter != nullptr) {
(*OverrideVersionPrinter)();
exit(0);
}
@@ -1729,7 +1730,7 @@ public:
// Iterate over any registered extra printers and call them to add further
// information.
- if (ExtraVersionPrinters != 0) {
+ if (ExtraVersionPrinters != nullptr) {
outs() << '\n';
for (std::vector<void (*)()>::iterator I = ExtraVersionPrinters->begin(),
E = ExtraVersionPrinters->end();
@@ -1779,7 +1780,7 @@ void cl::SetVersionPrinter(void (*func)()) {
}
void cl::AddExtraVersionPrinter(void (*func)()) {
- if (ExtraVersionPrinters == 0)
+ if (!ExtraVersionPrinters)
ExtraVersionPrinters = new std::vector<void (*)()>;
ExtraVersionPrinters->push_back(func);
diff --git a/lib/Support/Compression.cpp b/lib/Support/Compression.cpp
index 5e53361..c32eb213 100644
--- a/lib/Support/Compression.cpp
+++ b/lib/Support/Compression.cpp
@@ -16,7 +16,6 @@
#include "llvm/Config/config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryBuffer.h"
#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
#include <zlib.h>
#endif
@@ -47,36 +46,26 @@ static zlib::Status encodeZlibReturnValue(int ReturnValue) {
bool zlib::isAvailable() { return true; }
zlib::Status zlib::compress(StringRef InputBuffer,
- std::unique_ptr<MemoryBuffer> &CompressedBuffer,
+ SmallVectorImpl<char> &CompressedBuffer,
CompressionLevel Level) {
unsigned long CompressedSize = ::compressBound(InputBuffer.size());
- std::unique_ptr<char[]> TmpBuffer(new char[CompressedSize]);
+ CompressedBuffer.resize(CompressedSize);
int CLevel = encodeZlibCompressionLevel(Level);
Status Res = encodeZlibReturnValue(::compress2(
- (Bytef *)TmpBuffer.get(), &CompressedSize,
+ (Bytef *)CompressedBuffer.data(), &CompressedSize,
(const Bytef *)InputBuffer.data(), InputBuffer.size(), CLevel));
- if (Res == StatusOK) {
- CompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
- StringRef(TmpBuffer.get(), CompressedSize)));
- // Tell MSan that memory initialized by zlib is valid.
- __msan_unpoison(CompressedBuffer->getBufferStart(), CompressedSize);
- }
+ CompressedBuffer.resize(CompressedSize);
return Res;
}
zlib::Status zlib::uncompress(StringRef InputBuffer,
- std::unique_ptr<MemoryBuffer> &UncompressedBuffer,
+ SmallVectorImpl<char> &UncompressedBuffer,
size_t UncompressedSize) {
- std::unique_ptr<char[]> TmpBuffer(new char[UncompressedSize]);
- Status Res = encodeZlibReturnValue(
- ::uncompress((Bytef *)TmpBuffer.get(), (uLongf *)&UncompressedSize,
- (const Bytef *)InputBuffer.data(), InputBuffer.size()));
- if (Res == StatusOK) {
- UncompressedBuffer.reset(MemoryBuffer::getMemBufferCopy(
- StringRef(TmpBuffer.get(), UncompressedSize)));
- // Tell MSan that memory initialized by zlib is valid.
- __msan_unpoison(UncompressedBuffer->getBufferStart(), UncompressedSize);
- }
+ UncompressedBuffer.resize(UncompressedSize);
+ Status Res = encodeZlibReturnValue(::uncompress(
+ (Bytef *)UncompressedBuffer.data(), (uLongf *)&UncompressedSize,
+ (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+ UncompressedBuffer.resize(UncompressedSize);
return Res;
}
@@ -87,12 +76,12 @@ uint32_t zlib::crc32(StringRef Buffer) {
#else
bool zlib::isAvailable() { return false; }
zlib::Status zlib::compress(StringRef InputBuffer,
- std::unique_ptr<MemoryBuffer> &CompressedBuffer,
+ SmallVectorImpl<char> &CompressedBuffer,
CompressionLevel Level) {
return zlib::StatusUnsupported;
}
zlib::Status zlib::uncompress(StringRef InputBuffer,
- std::unique_ptr<MemoryBuffer> &UncompressedBuffer,
+ SmallVectorImpl<char> &UncompressedBuffer,
size_t UncompressedSize) {
return zlib::StatusUnsupported;
}
diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp
index ccc0089..a426377 100644
--- a/lib/Support/CrashRecoveryContext.cpp
+++ b/lib/Support/CrashRecoveryContext.cpp
@@ -89,16 +89,16 @@ CrashRecoveryContext::~CrashRecoveryContext() {
}
bool CrashRecoveryContext::isRecoveringFromCrash() {
- return tlIsRecoveringFromCrash->get() != 0;
+ return tlIsRecoveringFromCrash->get() != nullptr;
}
CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
if (!gCrashRecoveryEnabled)
- return 0;
+ return nullptr;
const CrashRecoveryContextImpl *CRCI = CurrentContext->get();
if (!CRCI)
- return 0;
+ return nullptr;
return CRCI->CRC;
}
@@ -120,7 +120,7 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) {
if (cleanup == head) {
head = cleanup->next;
if (head)
- head->prev = 0;
+ head->prev = nullptr;
}
else {
cleanup->prev->next = cleanup->next;
@@ -261,7 +261,7 @@ static void CrashRecoverySignalHandler(int Signal) {
sigset_t SigMask;
sigemptyset(&SigMask);
sigaddset(&SigMask, Signal);
- sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+ sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
if (CRCI)
const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
@@ -296,12 +296,12 @@ void CrashRecoveryContext::Disable() {
// Restore the previous signal handlers.
for (unsigned i = 0; i != NumSignals; ++i)
- sigaction(Signals[i], &PrevActions[i], 0);
+ sigaction(Signals[i], &PrevActions[i], nullptr);
}
#endif
-bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
// If crash recovery is disabled, do nothing.
if (gCrashRecoveryEnabled) {
assert(!Impl && "Crash recovery context already initialized!");
@@ -313,7 +313,7 @@ bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
}
}
- Fn(UserData);
+ Fn();
return true;
}
@@ -334,8 +334,7 @@ const std::string &CrashRecoveryContext::getBacktrace() const {
namespace {
struct RunSafelyOnThreadInfo {
- void (*Fn)(void*);
- void *Data;
+ function_ref<void()> Fn;
CrashRecoveryContext *CRC;
bool Result;
};
@@ -344,11 +343,11 @@ struct RunSafelyOnThreadInfo {
static void RunSafelyOnThread_Dispatch(void *UserData) {
RunSafelyOnThreadInfo *Info =
reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
- Info->Result = Info->CRC->RunSafely(Info->Fn, Info->Data);
+ Info->Result = Info->CRC->RunSafely(Info->Fn);
}
-bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn,
unsigned RequestedStackSize) {
- RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+ RunSafelyOnThreadInfo Info = { Fn, this, false };
llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl)
CRC->setSwitchedThread();
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
index 29acb7d..0d504ee 100644
--- a/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -42,6 +42,8 @@
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "dag-delta"
+
namespace {
class DAGDeltaAlgorithmImpl {
diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp
index a564d21..7b82921 100644
--- a/lib/Support/DataExtractor.cpp
+++ b/lib/Support/DataExtractor.cpp
@@ -44,7 +44,7 @@ static T *getUs(uint32_t *offset_ptr, T *dst, uint32_t count,
// success
return dst;
}
- return NULL;
+ return nullptr;
}
uint8_t DataExtractor::getU8(uint32_t *offset_ptr) const {
@@ -125,7 +125,7 @@ const char *DataExtractor::getCStr(uint32_t *offset_ptr) const {
*offset_ptr = pos + 1;
return Data.data() + offset;
}
- return NULL;
+ return nullptr;
}
uint64_t DataExtractor::getULEB128(uint32_t *offset_ptr) const {
diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp
index 1caeddf..eec8584 100644
--- a/lib/Support/DataStream.cpp
+++ b/lib/Support/DataStream.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "Data-stream"
#include "llvm/Support/DataStream.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/FileSystem.h"
@@ -30,6 +29,8 @@
#endif
using namespace llvm;
+#define DEBUG_TYPE "Data-stream"
+
// Interface goals:
// * StreamableMemoryObject doesn't care about complexities like using
// threads/async callbacks to actually overlap download+compile
@@ -83,7 +84,7 @@ DataStreamer *getDataFileStreamer(const std::string &Filename,
if (error_code e = s->OpenFile(Filename)) {
*StrError = std::string("Could not open ") + Filename + ": " +
e.message() + "\n";
- return NULL;
+ return nullptr;
}
return s;
}
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index d9cb8a9..ad4d4ef 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -109,7 +109,7 @@ raw_ostream &llvm::dbgs() {
if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0)
// TODO: Add a handler for SIGUSER1-type signals so the user can
// force a debug dump.
- sys::AddSignalHandler(&debug_user_sig_handler, 0);
+ sys::AddSignalHandler(&debug_user_sig_handler, nullptr);
// Otherwise we've already set the debug stream buffer size to
// zero, disabling buffering so it will output directly to errs().
}
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index 6604cc7..c9efa61 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -100,7 +100,7 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
return "DW_TAG_GNU_formal_parameter_pack";
case DW_TAG_APPLE_property: return "DW_TAG_APPLE_property";
}
- return 0;
+ return nullptr;
}
/// ChildrenString - Return the string for the specified children flag.
@@ -110,7 +110,7 @@ const char *llvm::dwarf::ChildrenString(unsigned Children) {
case DW_CHILDREN_no: return "DW_CHILDREN_no";
case DW_CHILDREN_yes: return "DW_CHILDREN_yes";
}
- return 0;
+ return nullptr;
}
/// AttributeString - Return the string for the specified attribute.
@@ -271,7 +271,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
case DW_AT_GNU_pubnames: return "DW_AT_GNU_pubnames";
case DW_AT_GNU_pubtypes: return "DW_AT_GNU_pubtypes";
}
- return 0;
+ return nullptr;
}
/// FormEncodingString - Return the string for the specified form encoding.
@@ -308,7 +308,7 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
case DW_FORM_GNU_addr_index: return "DW_FORM_GNU_addr_index";
case DW_FORM_GNU_str_index: return "DW_FORM_GNU_str_index";
}
- return 0;
+ return nullptr;
}
/// OperationEncodingString - Return the string for the specified operation
@@ -477,7 +477,7 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
case DW_OP_GNU_addr_index: return "DW_OP_GNU_addr_index";
case DW_OP_GNU_const_index: return "DW_OP_GNU_const_index";
}
- return 0;
+ return nullptr;
}
/// AttributeEncodingString - Return the string for the specified attribute
@@ -503,7 +503,7 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
case DW_ATE_lo_user: return "DW_ATE_lo_user";
case DW_ATE_hi_user: return "DW_ATE_hi_user";
}
- return 0;
+ return nullptr;
}
/// DecimalSignString - Return the string for the specified decimal sign
@@ -516,7 +516,7 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
case DW_DS_leading_separate: return "DW_DS_leading_separate";
case DW_DS_trailing_separate: return "DW_DS_trailing_separate";
}
- return 0;
+ return nullptr;
}
/// EndianityString - Return the string for the specified endianity.
@@ -529,7 +529,7 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) {
case DW_END_lo_user: return "DW_END_lo_user";
case DW_END_hi_user: return "DW_END_hi_user";
}
- return 0;
+ return nullptr;
}
/// AccessibilityString - Return the string for the specified accessibility.
@@ -541,7 +541,7 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) {
case DW_ACCESS_protected: return "DW_ACCESS_protected";
case DW_ACCESS_private: return "DW_ACCESS_private";
}
- return 0;
+ return nullptr;
}
/// VisibilityString - Return the string for the specified visibility.
@@ -552,7 +552,7 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
case DW_VIS_exported: return "DW_VIS_exported";
case DW_VIS_qualified: return "DW_VIS_qualified";
}
- return 0;
+ return nullptr;
}
/// VirtualityString - Return the string for the specified virtuality.
@@ -563,7 +563,7 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual";
case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual";
}
- return 0;
+ return nullptr;
}
/// LanguageString - Return the string for the specified language.
@@ -600,7 +600,7 @@ const char *llvm::dwarf::LanguageString(unsigned Language) {
case DW_LANG_lo_user: return "DW_LANG_lo_user";
case DW_LANG_hi_user: return "DW_LANG_hi_user";
}
- return 0;
+ return nullptr;
}
/// CaseString - Return the string for the specified identifier case.
@@ -612,7 +612,7 @@ const char *llvm::dwarf::CaseString(unsigned Case) {
case DW_ID_down_case: return "DW_ID_down_case";
case DW_ID_case_insensitive: return "DW_ID_case_insensitive";
}
- return 0;
+ return nullptr;
}
/// ConventionString - Return the string for the specified calling convention.
@@ -625,7 +625,7 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) {
case DW_CC_lo_user: return "DW_CC_lo_user";
case DW_CC_hi_user: return "DW_CC_hi_user";
}
- return 0;
+ return nullptr;
}
/// InlineCodeString - Return the string for the specified inline code.
@@ -637,7 +637,7 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) {
case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined";
case DW_INL_declared_inlined: return "DW_INL_declared_inlined";
}
- return 0;
+ return nullptr;
}
/// ArrayOrderString - Return the string for the specified array order.
@@ -647,7 +647,7 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
case DW_ORD_row_major: return "DW_ORD_row_major";
case DW_ORD_col_major: return "DW_ORD_col_major";
}
- return 0;
+ return nullptr;
}
/// DiscriminantString - Return the string for the specified discriminant
@@ -657,7 +657,7 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
case DW_DSC_label: return "DW_DSC_label";
case DW_DSC_range: return "DW_DSC_range";
}
- return 0;
+ return nullptr;
}
/// LNStandardString - Return the string for the specified line number standard.
@@ -677,7 +677,7 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) {
case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin";
case DW_LNS_set_isa: return "DW_LNS_set_isa";
}
- return 0;
+ return nullptr;
}
/// LNExtendedString - Return the string for the specified line number extended
@@ -692,7 +692,7 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
case DW_LNE_lo_user: return "DW_LNE_lo_user";
case DW_LNE_hi_user: return "DW_LNE_hi_user";
}
- return 0;
+ return nullptr;
}
/// MacinfoString - Return the string for the specified macinfo type encodings.
@@ -706,7 +706,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
case DW_MACINFO_end_file: return "DW_MACINFO_end_file";
case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext";
}
- return 0;
+ return nullptr;
}
/// CallFrameString - Return the string for the specified call frame instruction
@@ -745,7 +745,7 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
case DW_CFA_lo_user: return "DW_CFA_lo_user";
case DW_CFA_hi_user: return "DW_CFA_hi_user";
}
- return 0;
+ return nullptr;
}
const char *llvm::dwarf::AtomTypeString(unsigned AT) {
@@ -761,7 +761,7 @@ const char *llvm::dwarf::AtomTypeString(unsigned AT) {
case DW_ATOM_type_flags:
return "DW_ATOM_type_flags";
}
- return 0;
+ return nullptr;
}
const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) {
diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp
index 5d77153..82d7c0c 100644
--- a/lib/Support/DynamicLibrary.cpp
+++ b/lib/Support/DynamicLibrary.cpp
@@ -51,14 +51,14 @@ using namespace llvm::sys;
//=== independent code.
//===----------------------------------------------------------------------===//
-static DenseSet<void *> *OpenedHandles = 0;
+static DenseSet<void *> *OpenedHandles = nullptr;
DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
std::string *errMsg) {
SmartScopedLock<true> lock(*SymbolsMutex);
void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL);
- if (handle == 0) {
+ if (!handle) {
if (errMsg) *errMsg = dlerror();
return DynamicLibrary();
}
@@ -66,11 +66,11 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
#ifdef __CYGWIN__
// Cygwin searches symbols only in the main
// with the handle of dlopen(NULL, RTLD_GLOBAL).
- if (filename == NULL)
+ if (!filename)
handle = RTLD_DEFAULT;
#endif
- if (OpenedHandles == 0)
+ if (!OpenedHandles)
OpenedHandles = new DenseSet<void *>();
// If we've already loaded this library, dlclose() the handle in order to
@@ -83,7 +83,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename,
void *DynamicLibrary::getAddressOfSymbol(const char *symbolName) {
if (!isValid())
- return NULL;
+ return nullptr;
return dlsym(Data, symbolName);
}
@@ -166,7 +166,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
#endif
#undef EXPLICIT_SYMBOL
- return 0;
+ return nullptr;
}
#endif // LLVM_ON_WIN32
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 1aa8303..342c4f0 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -34,8 +34,8 @@
using namespace llvm;
-static fatal_error_handler_t ErrorHandler = 0;
-static void *ErrorHandlerUserData = 0;
+static fatal_error_handler_t ErrorHandler = nullptr;
+static void *ErrorHandlerUserData = nullptr;
void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
void *user_data) {
@@ -47,7 +47,7 @@ void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
}
void llvm::remove_fatal_error_handler() {
- ErrorHandler = 0;
+ ErrorHandler = nullptr;
}
void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) {
diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp
index 8f2c9fc..49311c2 100644
--- a/lib/Support/FileOutputBuffer.cpp
+++ b/lib/Support/FileOutputBuffer.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
@@ -85,19 +84,9 @@ error_code FileOutputBuffer::create(StringRef FilePath,
return error_code::success();
}
-error_code FileOutputBuffer::create(StringRef FilePath,
- size_t Size,
- OwningPtr<FileOutputBuffer> &Result,
- unsigned Flags) {
- std::unique_ptr<FileOutputBuffer> FOB;
- error_code ec = create(FilePath, Size, FOB, Flags);
- Result = std::move(FOB);
- return ec;
-}
-
error_code FileOutputBuffer::commit(int64_t NewSmallerSize) {
// Unmap buffer, letting OS flush dirty pages to file on disk.
- Region.reset(0);
+ Region.reset(nullptr);
// If requested, resize file as part of commit.
if ( NewSmallerSize != -1 ) {
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 145f12d..4635114 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -190,7 +190,7 @@ FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
// The low bit is set if this is the pointer back to the bucket.
if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
- return 0;
+ return nullptr;
return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
}
@@ -262,7 +262,7 @@ void FoldingSetImpl::GrowHashTable() {
while (Node *NodeInBucket = GetNextPtr(Probe)) {
// Figure out the next link, remove NodeInBucket from the old link.
Probe = NodeInBucket->getNextInBucket();
- NodeInBucket->SetNextInBucket(0);
+ NodeInBucket->SetNextInBucket(nullptr);
// Insert the node into the new bucket, after recomputing the hash.
InsertNode(NodeInBucket,
@@ -285,7 +285,7 @@ FoldingSetImpl::Node
void **Bucket = GetBucketFor(IDHash, Buckets, NumBuckets);
void *Probe = *Bucket;
- InsertPos = 0;
+ InsertPos = nullptr;
FoldingSetNodeID TempID;
while (Node *NodeInBucket = GetNextPtr(Probe)) {
@@ -298,14 +298,14 @@ FoldingSetImpl::Node
// Didn't find the node, return null with the bucket as the InsertPos.
InsertPos = Bucket;
- return 0;
+ return nullptr;
}
/// InsertNode - Insert the specified node into the folding set, knowing that it
/// is not already in the map. InsertPos must be obtained from
/// FindNodeOrInsertPos.
void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
- assert(N->getNextInBucket() == 0);
+ assert(!N->getNextInBucket());
// Do we need to grow the hashtable?
if (NumNodes+1 > NumBuckets*2) {
GrowHashTable();
@@ -323,7 +323,7 @@ void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
// If this is the first insertion into this bucket, its next pointer will be
// null. Pretend as if it pointed to itself, setting the low bit to indicate
// that it is a pointer to the bucket.
- if (Next == 0)
+ if (!Next)
Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
// Set the node's next pointer, and make the bucket point to the node.
@@ -337,10 +337,10 @@ bool FoldingSetImpl::RemoveNode(Node *N) {
// Because each bucket is a circular list, we don't need to compute N's hash
// to remove it.
void *Ptr = N->getNextInBucket();
- if (Ptr == 0) return false; // Not in folding set.
+ if (!Ptr) return false; // Not in folding set.
--NumNodes;
- N->SetNextInBucket(0);
+ N->SetNextInBucket(nullptr);
// Remember what N originally pointed to, either a bucket or another node.
void *NodeNextPtr = Ptr;
@@ -390,7 +390,7 @@ FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
// Skip to the first non-null non-self-cycle bucket.
while (*Bucket != reinterpret_cast<void*>(-1) &&
- (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+ (!*Bucket || !GetNextPtr(*Bucket)))
++Bucket;
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
@@ -410,7 +410,7 @@ void FoldingSetIteratorImpl::advance() {
do {
++Bucket;
} while (*Bucket != reinterpret_cast<void*>(-1) &&
- (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+ (!*Bucket || !GetNextPtr(*Bucket)));
NodePtr = static_cast<FoldingSetNode*>(*Bucket);
}
@@ -420,5 +420,5 @@ void FoldingSetIteratorImpl::advance() {
// FoldingSetBucketIteratorImpl Implementation
FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
- Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+ Ptr = (!*Bucket || !GetNextPtr(*Bucket)) ? (void*) Bucket : *Bucket;
}
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 9febf66..618ec26 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -81,7 +81,7 @@ void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
TheStream->write(Ptr, Size);
// Reset the scanning pointer.
- Scanned = 0;
+ Scanned = nullptr;
}
/// fouts() - This returns a reference to a formatted_raw_ostream for
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index 83aa255..f5b2943 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -83,7 +83,7 @@ static bool LLVM_ATTRIBUTE_UNUSED
ExecGraphViewer(StringRef ExecPath, std::vector<const char*> &args,
StringRef Filename, bool wait, std::string &ErrMsg) {
if (wait) {
- if (sys::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) {
+ if (sys::ExecuteAndWait(ExecPath, &args[0],nullptr,nullptr,0,0,&ErrMsg)) {
errs() << "Error: " << ErrMsg << "\n";
return false;
}
@@ -91,7 +91,7 @@ ExecGraphViewer(StringRef ExecPath, std::vector<const char*> &args,
errs() << " done. \n";
}
else {
- sys::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg);
+ sys::ExecuteNoWait(ExecPath, &args[0],nullptr,nullptr,0,&ErrMsg);
errs() << "Remember to erase graph file: " << Filename.str() << "\n";
}
return true;
@@ -108,7 +108,7 @@ void llvm::DisplayGraph(StringRef FilenameRef, bool wait,
std::vector<const char*> args;
args.push_back(Graphviz.c_str());
args.push_back(Filename.c_str());
- args.push_back(0);
+ args.push_back(nullptr);
errs() << "Running 'Graphviz' program... ";
if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg))
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index b6e2cb1..fd0472e 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -39,6 +39,8 @@
#include <mach/machine.h>
#endif
+#define DEBUG_TYPE "host-detection"
+
//===----------------------------------------------------------------------===//
//
// Implementations of the CPU detection routines
@@ -221,6 +223,7 @@ StringRef sys::getHostCPUName() {
(EBX & 0x20);
GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
+ bool HasTBM = (ECX >> 21) & 0x1;
if (memcmp(text.c, "GenuineIntel", 12) == 0) {
switch (Family) {
@@ -433,9 +436,11 @@ StringRef sys::getHostCPUName() {
case 21:
if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback.
return "btver1";
+ if (Model >= 0x50)
+ return "bdver4"; // 50h-6Fh: Excavator
if (Model >= 0x30)
return "bdver3"; // 30h-3Fh: Steamroller
- if (Model >= 0x10)
+ if (Model >= 0x10 || HasTBM)
return "bdver2"; // 10h-1Fh: Piledriver
return "bdver1"; // 00h-0Fh: Bulldozer
case 22:
@@ -681,7 +686,7 @@ StringRef sys::getHostCPUName() {
}
#endif
-#if defined(__linux__) && defined(__arm__)
+#if defined(__linux__) && (defined(__arm__) || defined(__aarch64__))
bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
std::string Err;
DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err);
@@ -710,8 +715,24 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
break;
}
+#if defined(__aarch64__)
+ // Keep track of which crypto features we have seen
+ enum {
+ CAP_AES = 0x1,
+ CAP_PMULL = 0x2,
+ CAP_SHA1 = 0x4,
+ CAP_SHA2 = 0x8
+ };
+ uint32_t crypto = 0;
+#endif
+
for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
StringRef LLVMFeatureStr = StringSwitch<StringRef>(CPUFeatures[I])
+#if defined(__aarch64__)
+ .Case("asimd", "neon")
+ .Case("fp", "fp-armv8")
+ .Case("crc32", "crc")
+#else
.Case("half", "fp16")
.Case("neon", "neon")
.Case("vfpv3", "vfp3")
@@ -719,12 +740,32 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
.Case("vfpv4", "vfp4")
.Case("idiva", "hwdiv-arm")
.Case("idivt", "hwdiv")
+#endif
.Default("");
+#if defined(__aarch64__)
+ // We need to check crypto seperately since we need all of the crypto
+ // extensions to enable the subtarget feature
+ if (CPUFeatures[I] == "aes")
+ crypto |= CAP_AES;
+ else if (CPUFeatures[I] == "pmull")
+ crypto |= CAP_PMULL;
+ else if (CPUFeatures[I] == "sha1")
+ crypto |= CAP_SHA1;
+ else if (CPUFeatures[I] == "sha2")
+ crypto |= CAP_SHA2;
+#endif
+
if (LLVMFeatureStr != "")
Features.GetOrCreateValue(LLVMFeatureStr).setValue(true);
}
+#if defined(__aarch64__)
+ // If we have all crypto bits we can add the feature
+ if (crypto == (CAP_AES | CAP_PMULL | CAP_SHA1 | CAP_SHA2))
+ Features.GetOrCreateValue("crypto").setValue(true);
+#endif
+
return true;
}
#else
diff --git a/lib/Support/IntervalMap.cpp b/lib/Support/IntervalMap.cpp
index 4dfcc40..e11a7f2 100644
--- a/lib/Support/IntervalMap.cpp
+++ b/lib/Support/IntervalMap.cpp
@@ -58,7 +58,7 @@ void Path::moveLeft(unsigned Level) {
}
} else if (height() < Level)
// end() may have created a height=0 path.
- path.resize(Level + 1, Entry(0, 0, 0));
+ path.resize(Level + 1, Entry(nullptr, 0, 0));
// NR is the subtree containing our left sibling.
--path[l].offset;
diff --git a/lib/Support/LineIterator.cpp b/lib/Support/LineIterator.cpp
index 056d817..947a8fb 100644
--- a/lib/Support/LineIterator.cpp
+++ b/lib/Support/LineIterator.cpp
@@ -13,9 +13,10 @@
using namespace llvm;
line_iterator::line_iterator(const MemoryBuffer &Buffer, char CommentMarker)
- : Buffer(Buffer.getBufferSize() ? &Buffer : 0),
+ : Buffer(Buffer.getBufferSize() ? &Buffer : nullptr),
CommentMarker(CommentMarker), LineNumber(1),
- CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : 0, 0) {
+ CurrentLine(Buffer.getBufferSize() ? Buffer.getBufferStart() : nullptr,
+ 0) {
// Ensure that if we are constructed on a non-empty memory buffer that it is
// a null terminated buffer.
if (Buffer.getBufferSize()) {
@@ -53,7 +54,7 @@ void line_iterator::advance() {
if (*Pos == '\0') {
// We've hit the end of the buffer, reset ourselves to the end state.
- Buffer = 0;
+ Buffer = nullptr;
CurrentLine = StringRef();
return;
}
diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp
index cd1cbcb..9b4bfbe 100644
--- a/lib/Support/LockFileManager.cpp
+++ b/lib/Support/LockFileManager.cpp
@@ -43,8 +43,11 @@ LockFileManager::readLockFile(StringRef LockFileName) {
std::tie(Hostname, PIDStr) = getToken(MB->getBuffer(), " ");
PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" "));
int PID;
- if (!PIDStr.getAsInteger(10, PID))
- return std::make_pair(std::string(Hostname), PID);
+ if (!PIDStr.getAsInteger(10, PID)) {
+ auto Owner = std::make_pair(std::string(Hostname), PID);
+ if (processStillExecuting(Owner.first, Owner.second))
+ return Owner;
+ }
// Delete the lock file. It's invalid anyway.
sys::fs::remove(LockFileName);
@@ -171,9 +174,9 @@ LockFileManager::~LockFileManager() {
sys::fs::remove(UniqueLockFileName.str());
}
-void LockFileManager::waitForUnlock() {
+LockFileManager::WaitForUnlockResult LockFileManager::waitForUnlock() {
if (getState() != LFS_Shared)
- return;
+ return Res_Success;
#if LLVM_ON_WIN32
unsigned long Interval = 1;
@@ -193,7 +196,7 @@ void LockFileManager::waitForUnlock() {
#if LLVM_ON_WIN32
Sleep(Interval);
#else
- nanosleep(&Interval, NULL);
+ nanosleep(&Interval, nullptr);
#endif
bool LockFileJustDisappeared = false;
@@ -211,7 +214,7 @@ void LockFileManager::waitForUnlock() {
// available now.
if (LockFileGone) {
if (sys::fs::exists(FileName.str())) {
- return;
+ return Res_Success;
}
// The lock file is gone, so now we're waiting for the original file to
@@ -234,7 +237,7 @@ void LockFileManager::waitForUnlock() {
// owning the lock died without cleaning up, just bail out.
if (!LockFileGone &&
!processStillExecuting((*Owner).first, (*Owner).second)) {
- return;
+ return Res_OwnerDied;
}
// Exponentially increase the time we wait for the lock to be removed.
@@ -257,4 +260,5 @@ void LockFileManager::waitForUnlock() {
);
// Give up.
+ return Res_Timeout;
}
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 098cccb..6a1c2a5 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -17,15 +17,16 @@
#include <cassert>
using namespace llvm;
-static const ManagedStaticBase *StaticList = 0;
+static const ManagedStaticBase *StaticList = nullptr;
void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
void (*Deleter)(void*)) const {
+ assert(Creator);
if (llvm_is_multithreaded()) {
llvm_acquire_global_lock();
- if (Ptr == 0) {
- void* tmp = Creator ? Creator() : 0;
+ if (!Ptr) {
+ void* tmp = Creator();
TsanHappensBefore(this);
sys::MemoryFence();
@@ -45,9 +46,9 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
llvm_release_global_lock();
} else {
- assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+ assert(!Ptr && !DeleterFn && !Next &&
"Partially initialized ManagedStatic!?");
- Ptr = Creator ? Creator() : 0;
+ Ptr = Creator();
DeleterFn = Deleter;
// Add to list of managed statics.
@@ -62,14 +63,14 @@ void ManagedStaticBase::destroy() const {
"Not destroyed in reverse order of construction?");
// Unlink from list.
StaticList = Next;
- Next = 0;
+ Next = nullptr;
// Destroy memory.
DeleterFn(Ptr);
// Cleanup.
- Ptr = 0;
- DeleterFn = 0;
+ Ptr = nullptr;
+ DeleterFn = nullptr;
}
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 2d593a8..629d885 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Errno.h"
@@ -27,19 +26,11 @@
#include <cstdio>
#include <cstring>
#include <new>
-#include <sys/stat.h>
#include <sys/types.h>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
#else
#include <io.h>
-// Simplistic definitinos of these macros for use in getOpenFile.
-#ifndef S_ISREG
-#define S_ISREG(x) (1)
-#endif
-#ifndef S_ISBLK
-#define S_ISBLK(x) (0)
-#endif
#endif
using namespace llvm;
@@ -117,7 +108,7 @@ MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
StringRef BufferName) {
MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
- if (!Buf) return 0;
+ if (!Buf) return nullptr;
memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
InputData.size());
return Buf;
@@ -137,7 +128,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, 16);
size_t RealLen = AlignedStringLen + Size + 1;
char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
- if (!Mem) return 0;
+ if (!Mem) return nullptr;
// The name is stored after the class itself.
CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
@@ -155,7 +146,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
/// the MemoryBuffer object.
MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
- if (!SB) return 0;
+ if (!SB) return nullptr;
memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
return SB;
}
@@ -173,15 +164,6 @@ error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
return getFile(Filename, Result, FileSize);
}
-error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
- OwningPtr<MemoryBuffer> &Result,
- int64_t FileSize) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getFileOrSTDIN(Filename, MB, FileSize);
- Result = std::move(MB);
- return ec;
-}
-
//===----------------------------------------------------------------------===//
// MemoryBuffer::getFile implementation.
@@ -252,44 +234,38 @@ static error_code getMemoryBufferForStream(int FD,
static error_code getFileAux(const char *Filename,
std::unique_ptr<MemoryBuffer> &Result,
int64_t FileSize,
- bool RequiresNullTerminator);
+ bool RequiresNullTerminator,
+ bool IsVolatileSize);
error_code MemoryBuffer::getFile(Twine Filename,
std::unique_ptr<MemoryBuffer> &Result,
int64_t FileSize,
- bool RequiresNullTerminator) {
+ bool RequiresNullTerminator,
+ bool IsVolatileSize) {
// Ensure the path is null terminated.
SmallString<256> PathBuf;
StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf);
return getFileAux(NullTerminatedName.data(), Result, FileSize,
- RequiresNullTerminator);
-}
-
-error_code MemoryBuffer::getFile(Twine Filename,
- OwningPtr<MemoryBuffer> &Result,
- int64_t FileSize,
- bool RequiresNullTerminator) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getFile(Filename, MB, FileSize, RequiresNullTerminator);
- Result = std::move(MB);
- return ec;
+ RequiresNullTerminator, IsVolatileSize);
}
static error_code getOpenFileImpl(int FD, const char *Filename,
std::unique_ptr<MemoryBuffer> &Result,
uint64_t FileSize, uint64_t MapSize,
- int64_t Offset, bool RequiresNullTerminator);
+ int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatileSize);
static error_code getFileAux(const char *Filename,
std::unique_ptr<MemoryBuffer> &Result, int64_t FileSize,
- bool RequiresNullTerminator) {
+ bool RequiresNullTerminator,
+ bool IsVolatileSize) {
int FD;
error_code EC = sys::fs::openFileForRead(Filename, FD);
if (EC)
return EC;
error_code ret = getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
- RequiresNullTerminator);
+ RequiresNullTerminator, IsVolatileSize);
close(FD);
return ret;
}
@@ -299,7 +275,14 @@ static bool shouldUseMmap(int FD,
size_t MapSize,
off_t Offset,
bool RequiresNullTerminator,
- int PageSize) {
+ int PageSize,
+ bool IsVolatileSize) {
+ // mmap may leave the buffer without null terminator if the file size changed
+ // by the time the last page is mapped in, so avoid it if the file size is
+ // likely to change.
+ if (IsVolatileSize)
+ return false;
+
// We don't use mmap for small files because this can severely fragment our
// address space.
if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
@@ -315,9 +298,8 @@ static bool shouldUseMmap(int FD,
// RequiresNullTerminator = false and MapSize != -1.
if (FileSize == size_t(-1)) {
sys::fs::file_status Status;
- error_code EC = sys::fs::status(FD, Status);
- if (EC)
- return EC;
+ if (sys::fs::status(FD, Status))
+ return false;
FileSize = Status.getSize();
}
@@ -328,15 +310,6 @@ static bool shouldUseMmap(int FD,
if (End != FileSize)
return false;
-#if defined(_WIN32) || defined(__CYGWIN__)
- // Don't peek the next page if file is multiple of *physical* pagesize(4k)
- // but is not multiple of AllocationGranularity(64k),
- // when a null terminator is required.
- // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096.
- if ((FileSize & (4096 - 1)) == 0)
- return false;
-#endif
-
// Don't try to map files that are exactly a multiple of the system page size
// if we need a null terminator.
if ((FileSize & (PageSize -1)) == 0)
@@ -348,7 +321,8 @@ static bool shouldUseMmap(int FD,
static error_code getOpenFileImpl(int FD, const char *Filename,
std::unique_ptr<MemoryBuffer> &Result,
uint64_t FileSize, uint64_t MapSize,
- int64_t Offset, bool RequiresNullTerminator) {
+ int64_t Offset, bool RequiresNullTerminator,
+ bool IsVolatileSize) {
static int PageSize = sys::process::get_self()->page_size();
// Default is to map the full file.
@@ -375,7 +349,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
}
if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
- PageSize)) {
+ PageSize, IsVolatileSize)) {
error_code EC;
Result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile(
RequiresNullTerminator, FD, MapSize, Offset, EC));
@@ -412,9 +386,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
return error_code(errno, posix_category());
}
if (NumRead == 0) {
- assert(0 && "We got inaccurate FileSize value or fstat reported an "
- "invalid file size.");
- *BufPtr = '\0'; // null-terminate at the actual size.
+ memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer.
break;
}
BytesLeft -= NumRead;
@@ -428,35 +400,18 @@ static error_code getOpenFileImpl(int FD, const char *Filename,
error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
std::unique_ptr<MemoryBuffer> &Result,
uint64_t FileSize,
- bool RequiresNullTerminator) {
+ bool RequiresNullTerminator,
+ bool IsVolatileSize) {
return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0,
- RequiresNullTerminator);
-}
-
-error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
- OwningPtr<MemoryBuffer> &Result,
- uint64_t FileSize,
- bool RequiresNullTerminator) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getOpenFileImpl(FD, Filename, MB, FileSize, FileSize, 0,
- RequiresNullTerminator);
- Result = std::move(MB);
- return ec;
+ RequiresNullTerminator, IsVolatileSize);
}
error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename,
std::unique_ptr<MemoryBuffer> &Result,
- uint64_t MapSize, int64_t Offset) {
- return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false);
-}
-
-error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename,
- OwningPtr<MemoryBuffer> &Result,
- uint64_t MapSize, int64_t Offset) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getOpenFileImpl(FD, Filename, MB, -1, MapSize, Offset, false);
- Result = std::move(MB);
- return ec;
+ uint64_t MapSize, int64_t Offset,
+ bool IsVolatileSize) {
+ return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false,
+ IsVolatileSize);
}
//===----------------------------------------------------------------------===//
@@ -472,10 +427,3 @@ error_code MemoryBuffer::getSTDIN(std::unique_ptr<MemoryBuffer> &Result) {
return getMemoryBufferForStream(0, "<stdin>", Result);
}
-
-error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &Result) {
- std::unique_ptr<MemoryBuffer> MB;
- error_code ec = getSTDIN(MB);
- Result = std::move(MB);
- return ec;
-}
diff --git a/lib/Support/Mutex.cpp b/lib/Support/Mutex.cpp
index 37c9d73..c8d3844 100644
--- a/lib/Support/Mutex.cpp
+++ b/lib/Support/Mutex.cpp
@@ -42,7 +42,7 @@ using namespace sys;
// Construct a Mutex using pthread calls
MutexImpl::MutexImpl( bool recursive)
- : data_(0)
+ : data_(nullptr)
{
// Declare the pthread_mutex data structures
pthread_mutex_t* mutex =
@@ -75,7 +75,7 @@ MutexImpl::MutexImpl( bool recursive)
MutexImpl::~MutexImpl()
{
pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
+ assert(mutex != nullptr);
pthread_mutex_destroy(mutex);
free(mutex);
}
@@ -84,7 +84,7 @@ bool
MutexImpl::acquire()
{
pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
+ assert(mutex != nullptr);
int errorcode = pthread_mutex_lock(mutex);
return errorcode == 0;
@@ -94,7 +94,7 @@ bool
MutexImpl::release()
{
pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
+ assert(mutex != nullptr);
int errorcode = pthread_mutex_unlock(mutex);
return errorcode == 0;
@@ -104,7 +104,7 @@ bool
MutexImpl::tryacquire()
{
pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
- assert(mutex != 0);
+ assert(mutex != nullptr);
int errorcode = pthread_mutex_trylock(mutex);
return errorcode == 0;
diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp
index 5b73631..b8d676f 100644
--- a/lib/Support/Path.cpp
+++ b/lib/Support/Path.cpp
@@ -569,6 +569,12 @@ bool is_separator(char value) {
}
}
+static const char preferred_separator_string[] = { preferred_separator, '\0' };
+
+const StringRef get_separator() {
+ return preferred_separator_string;
+}
+
void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
result.clear();
@@ -577,7 +583,7 @@ void system_temp_directory(bool erasedOnReboot, SmallVectorImpl<char> &result) {
// macros defined in <unistd.h> on darwin >= 9
int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR
: _CS_DARWIN_USER_CACHE_DIR;
- size_t ConfLen = confstr(ConfName, 0, 0);
+ size_t ConfLen = confstr(ConfName, nullptr, 0);
if (ConfLen > 0) {
do {
result.resize(ConfLen);
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index d4e205c..987778a 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -46,7 +46,7 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
/// PrintCurStackTrace - Print the current stack trace to the specified stream.
static void PrintCurStackTrace(raw_ostream &OS) {
// Don't print an empty trace.
- if (PrettyStackTraceHead->get() == 0) return;
+ if (!PrettyStackTraceHead->get()) return;
// If there are pretty stack frames registered, walk and emit them.
OS << "Stack dump:\n";
@@ -136,7 +136,7 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
}
static bool RegisterCrashPrinter() {
- sys::AddSignalHandler(CrashHandler, 0);
+ sys::AddSignalHandler(CrashHandler, nullptr);
return false;
}
diff --git a/lib/Support/RWMutex.cpp b/lib/Support/RWMutex.cpp
index 6a34f2d..3b6309c 100644
--- a/lib/Support/RWMutex.cpp
+++ b/lib/Support/RWMutex.cpp
@@ -44,7 +44,7 @@ using namespace sys;
// Construct a RWMutex using pthread calls
RWMutexImpl::RWMutexImpl()
- : data_(0)
+ : data_(nullptr)
{
// Declare the pthread_rwlock data structures
pthread_rwlock_t* rwlock =
@@ -56,7 +56,7 @@ RWMutexImpl::RWMutexImpl()
#endif
// Initialize the rwlock
- int errorcode = pthread_rwlock_init(rwlock, NULL);
+ int errorcode = pthread_rwlock_init(rwlock, nullptr);
(void)errorcode;
assert(errorcode == 0);
@@ -68,7 +68,7 @@ RWMutexImpl::RWMutexImpl()
RWMutexImpl::~RWMutexImpl()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
+ assert(rwlock != nullptr);
pthread_rwlock_destroy(rwlock);
free(rwlock);
}
@@ -77,7 +77,7 @@ bool
RWMutexImpl::reader_acquire()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
+ assert(rwlock != nullptr);
int errorcode = pthread_rwlock_rdlock(rwlock);
return errorcode == 0;
@@ -87,7 +87,7 @@ bool
RWMutexImpl::reader_release()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
+ assert(rwlock != nullptr);
int errorcode = pthread_rwlock_unlock(rwlock);
return errorcode == 0;
@@ -97,7 +97,7 @@ bool
RWMutexImpl::writer_acquire()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
+ assert(rwlock != nullptr);
int errorcode = pthread_rwlock_wrlock(rwlock);
return errorcode == 0;
@@ -107,7 +107,7 @@ bool
RWMutexImpl::writer_release()
{
pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
- assert(rwlock != 0);
+ assert(rwlock != nullptr);
int errorcode = pthread_rwlock_unlock(rwlock);
return errorcode == 0;
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 1115534..f7fe1e4 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -43,7 +43,7 @@ bool Regex::isValid(std::string &Error) {
if (!error)
return true;
- size_t len = llvm_regerror(error, preg, NULL, 0);
+ size_t len = llvm_regerror(error, preg, nullptr, 0);
Error.resize(len - 1);
llvm_regerror(error, preg, &Error[0], len);
diff --git a/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
index 2d23902..55f3320 100644
--- a/lib/Support/SearchForAddressOfSpecialSymbol.cpp
+++ b/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -48,7 +48,7 @@ static void *DoSearch(const char* symbolName) {
#endif
#undef EXPLICIT_SYMBOL
- return 0;
+ return nullptr;
}
namespace llvm {
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 844e416..a80e095 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -103,7 +103,7 @@ const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
unsigned ArraySize = CurArraySize;
unsigned ProbeAmt = 1;
const void *const *Array = CurArray;
- const void *const *Tombstone = 0;
+ const void *const *Tombstone = nullptr;
while (1) {
// Found Ptr's bucket?
if (Array[Bucket] == Ptr)
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
index 4bfd96a..acd75fb 100644
--- a/lib/Support/SourceMgr.cpp
+++ b/lib/Support/SourceMgr.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Locale.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/system_error.h"
using namespace llvm;
@@ -60,7 +61,7 @@ size_t SourceMgr::AddIncludeFile(const std::string &Filename,
// If the file didn't exist directly, see if it's in an include path.
for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
- IncludedFile = IncludeDirectories[i] + "/" + Filename;
+ IncludedFile = IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf);
}
@@ -114,7 +115,7 @@ SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const {
if (*Ptr == '\n') ++LineNo;
// Allocate the line number cache if it doesn't exist.
- if (LineNoCache == 0)
+ if (!LineNoCache)
LineNoCache = new LineNoCacheTy();
// Update the line # cache.
@@ -228,7 +229,7 @@ void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
}
- Diagnostic.print(0, OS, ShowColors);
+ Diagnostic.print(nullptr, OS, ShowColors);
}
void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index 9ac1f86..72a6d82 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -27,7 +27,7 @@ StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
}
// Otherwise, initialize it with zero buckets to avoid the allocation.
- TheTable = 0;
+ TheTable = nullptr;
NumBuckets = 0;
NumItems = 0;
NumTombstones = 0;
@@ -70,7 +70,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return it.
- if (LLVM_LIKELY(BucketItem == 0)) {
+ if (LLVM_LIKELY(!BucketItem)) {
// If we found a tombstone, we want to reuse the tombstone instead of an
// empty bucket. This reduces probing.
if (FirstTombstone != -1) {
@@ -124,7 +124,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
while (1) {
StringMapEntryBase *BucketItem = TheTable[BucketNo];
// If we found an empty bucket, this key isn't in the table yet, return.
- if (LLVM_LIKELY(BucketItem == 0))
+ if (LLVM_LIKELY(!BucketItem))
return -1;
if (BucketItem == getTombstoneVal()) {
@@ -166,7 +166,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
/// table, returning it. If the key is not in the table, this returns null.
StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
- if (Bucket == -1) return 0;
+ if (Bucket == -1) return nullptr;
StringMapEntryBase *Result = TheTable[Bucket];
TheTable[Bucket] = getTombstoneVal();
@@ -212,7 +212,7 @@ void StringMapImpl::RehashTable() {
// Fast case, bucket available.
unsigned FullHash = HashTable[I];
unsigned NewBucket = FullHash & (NewSize-1);
- if (NewTableArray[NewBucket] == 0) {
+ if (!NewTableArray[NewBucket]) {
NewTableArray[FullHash & (NewSize-1)] = Bucket;
NewHashArray[FullHash & (NewSize-1)] = FullHash;
continue;
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index bd2a37b..cde8258 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -281,7 +281,7 @@ void StringRef::split(SmallVectorImpl<StringRef> &A,
// rest.data() is used to distinguish cases like "a," that splits into
// "a" + "" and "a" that splits into "a" + 0.
for (int splits = 0;
- rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+ rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
++splits) {
std::pair<StringRef, StringRef> p = rest.split(Separators);
@@ -290,7 +290,7 @@ void StringRef::split(SmallVectorImpl<StringRef> &A,
rest = p.second;
}
// If we have a tail left, add it.
- if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+ if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
A.push_back(rest);
}
diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp
index 8d91a53..a008831 100644
--- a/lib/Support/TargetRegistry.cpp
+++ b/lib/Support/TargetRegistry.cpp
@@ -17,7 +17,7 @@
using namespace llvm;
// Clients are responsible for avoid race conditions in registration.
-static Target *FirstTarget = 0;
+static Target *FirstTarget = nullptr;
TargetRegistry::iterator TargetRegistry::begin() {
return iterator(FirstTarget);
@@ -29,7 +29,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
// Allocate target machine. First, check whether the user has explicitly
// specified an architecture to compile for. If so we have to look it up by
// name, because it might be a backend that has no mapping to a target triple.
- const Target *TheTarget = 0;
+ const Target *TheTarget = nullptr;
if (!ArchName.empty()) {
for (TargetRegistry::iterator it = TargetRegistry::begin(),
ie = TargetRegistry::end(); it != ie; ++it) {
@@ -41,7 +41,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
if (!TheTarget) {
Error = "error: invalid target '" + ArchName + "'.\n";
- return 0;
+ return nullptr;
}
// Adjust the triple to match (if known), otherwise stick with the
@@ -53,11 +53,11 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
// Get the target specific parser.
std::string TempError;
TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), TempError);
- if (TheTarget == 0) {
+ if (!TheTarget) {
Error = ": error: unable to get target for '"
+ TheTriple.getTriple()
+ "', see --version and --triple.\n";
- return 0;
+ return nullptr;
}
}
@@ -69,16 +69,16 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
// Provide special warning when no targets are initialized.
if (begin() == end()) {
Error = "Unable to find target for this triple (no targets are registered)";
- return 0;
+ return nullptr;
}
- const Target *Matching = 0;
+ const Target *Matching = nullptr;
Triple::ArchType Arch = Triple(TT).getArch();
for (iterator it = begin(), ie = end(); it != ie; ++it) {
if (it->ArchMatchFn(Arch)) {
if (Matching) {
Error = std::string("Cannot choose between targets \"") +
Matching->Name + "\" and \"" + it->Name + "\"";
- return 0;
+ return nullptr;
}
Matching = &*it;
}
@@ -87,7 +87,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
if (!Matching) {
Error = "No available targets are compatible with this triple, "
"see -version for the available targets.";
- return 0;
+ return nullptr;
}
return Matching;
@@ -121,7 +121,7 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
if (TheTarget && !TheTarget->hasJIT()) {
Error = "No JIT compatible target available for this host";
- return 0;
+ return nullptr;
}
return TheTarget;
diff --git a/lib/Support/ThreadLocal.cpp b/lib/Support/ThreadLocal.cpp
index aebbcad..2dec9eb 100644
--- a/lib/Support/ThreadLocal.cpp
+++ b/lib/Support/ThreadLocal.cpp
@@ -53,7 +53,7 @@ using namespace sys;
ThreadLocalImpl::ThreadLocalImpl() : data() {
static_assert(sizeof(pthread_key_t) <= sizeof(data), "size too big");
pthread_key_t* key = reinterpret_cast<pthread_key_t*>(&data);
- int errorcode = pthread_key_create(key, NULL);
+ int errorcode = pthread_key_create(key, nullptr);
assert(errorcode == 0);
(void) errorcode;
}
@@ -78,7 +78,7 @@ const void* ThreadLocalImpl::getInstance() {
}
void ThreadLocalImpl::removeInstance() {
- setInstance(0);
+ setInstance(nullptr);
}
}
diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp
index 9d7ac6c..1acfa79 100644
--- a/lib/Support/Threading.cpp
+++ b/lib/Support/Threading.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
static bool multithreaded_mode = false;
-static sys::Mutex* global_lock = 0;
+static sys::Mutex* global_lock = nullptr;
bool llvm::llvm_start_multithreaded() {
#if LLVM_ENABLE_THREADS != 0
@@ -73,7 +73,7 @@ struct ThreadInfo {
static void *ExecuteOnThread_Dispatch(void *Arg) {
ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
TI->UserFn(TI->UserData);
- return 0;
+ return nullptr;
}
void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
@@ -97,7 +97,7 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
goto error;
// Wait for the thread and clean up.
- ::pthread_join(Thread, 0);
+ ::pthread_join(Thread, nullptr);
error:
::pthread_attr_destroy(&Attr);
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 7cf4d37..61465ae 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Mutex.h"
@@ -77,7 +78,7 @@ raw_ostream *llvm::CreateInfoOutputFile() {
}
-static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *DefaultTimerGroup = nullptr;
static TimerGroup *getDefaultTimerGroup() {
TimerGroup *tmp = DefaultTimerGroup;
sys::MemoryFence();
@@ -100,7 +101,7 @@ static TimerGroup *getDefaultTimerGroup() {
//===----------------------------------------------------------------------===//
void Timer::init(StringRef N) {
- assert(TG == 0 && "Timer already initialized");
+ assert(!TG && "Timer already initialized");
Name.assign(N.begin(), N.end());
Started = false;
TG = getDefaultTimerGroup();
@@ -108,7 +109,7 @@ void Timer::init(StringRef N) {
}
void Timer::init(StringRef N, TimerGroup &tg) {
- assert(TG == 0 && "Timer already initialized");
+ assert(!TG && "Timer already initialized");
Name.assign(N.begin(), N.end());
Started = false;
TG = &tg;
@@ -235,11 +236,11 @@ static Timer &getNamedRegionTimer(StringRef Name) {
NamedRegionTimer::NamedRegionTimer(StringRef Name,
bool Enabled)
- : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
+ : TimeRegion(!Enabled ? nullptr : &getNamedRegionTimer(Name)) {}
NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
bool Enabled)
- : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
+ : TimeRegion(!Enabled ? nullptr : &NamedGroupedTimers->get(Name, GroupName)){}
//===----------------------------------------------------------------------===//
// TimerGroup Implementation
@@ -247,10 +248,10 @@ NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
/// TimerGroupList - This is the global list of TimerGroups, maintained by the
/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
-static TimerGroup *TimerGroupList = 0;
+static TimerGroup *TimerGroupList = nullptr;
TimerGroup::TimerGroup(StringRef name)
- : Name(name.begin(), name.end()), FirstTimer(0) {
+ : Name(name.begin(), name.end()), FirstTimer(nullptr) {
// Add the group to TimerGroupList.
sys::SmartScopedLock<true> L(*TimerLock);
@@ -264,7 +265,7 @@ TimerGroup::TimerGroup(StringRef name)
TimerGroup::~TimerGroup() {
// If the timer group is destroyed before the timers it owns, accumulate and
// print the timing data.
- while (FirstTimer != 0)
+ while (FirstTimer)
removeTimer(*FirstTimer);
// Remove the group from the TimerGroupList.
@@ -282,7 +283,7 @@ void TimerGroup::removeTimer(Timer &T) {
if (T.Started)
TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
- T.TG = 0;
+ T.TG = nullptr;
// Unlink the timer from our list.
*T.Prev = T.Next;
@@ -291,7 +292,7 @@ void TimerGroup::removeTimer(Timer &T) {
// Print the report when all timers in this group are destroyed if some of
// them were started.
- if (FirstTimer != 0 || TimersToPrint.empty())
+ if (FirstTimer || TimersToPrint.empty())
return;
raw_ostream *OutStream = CreateInfoOutputFile();
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 71abb9d..b3d48fb 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -24,6 +24,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case arm: return "arm";
case armeb: return "armeb";
case arm64: return "arm64";
+ case arm64_be: return "arm64_be";
case hexagon: return "hexagon";
case mips: return "mips";
case mipsel: return "mipsel";
@@ -57,7 +58,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
const char *Triple::getArchTypePrefix(ArchType Kind) {
switch (Kind) {
default:
- return 0;
+ return nullptr;
case aarch64:
case aarch64_be: return "aarch64";
@@ -67,7 +68,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case thumb:
case thumbeb: return "arm";
- case arm64: return "arm64";
+ case arm64:
+ case arm64_be: return "arm64";
case ppc64:
case ppc64le:
@@ -178,6 +180,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("arm", arm)
.Case("armeb", armeb)
.Case("arm64", arm64)
+ .Case("arm64_be", arm64_be)
.Case("mips", mips)
.Case("mipsel", mipsel)
.Case("mips64", mips64)
@@ -210,7 +213,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
// Returns architecture name that is understood by the target assembler.
const char *Triple::getArchNameForAssembler() {
if (!isOSDarwin() && getVendor() != Triple::Apple)
- return NULL;
+ return nullptr;
return StringSwitch<const char*>(getArchName())
.Case("i386", "i386")
@@ -225,6 +228,7 @@ const char *Triple::getArchNameForAssembler() {
.Cases("armv7", "thumbv7", "armv7")
.Case("armeb", "armeb")
.Case("arm64", "arm64")
+ .Case("arm64_be", "arm64")
.Case("r600", "r600")
.Case("nvptx", "nvptx")
.Case("nvptx64", "nvptx64")
@@ -232,7 +236,7 @@ const char *Triple::getArchNameForAssembler() {
.Case("amdil", "amdil")
.Case("spir", "spir")
.Case("spir64", "spir64")
- .Default(NULL);
+ .Default(nullptr);
}
static Triple::ArchType parseArch(StringRef ArchName) {
@@ -257,6 +261,7 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("thumbeb", Triple::thumbeb)
.StartsWith("thumbebv", Triple::thumbeb)
.Case("arm64", Triple::arm64)
+ .Case("arm64_be", Triple::arm64_be)
.Case("msp430", Triple::msp430)
.Cases("mips", "mipseb", "mipsallegrex", Triple::mips)
.Cases("mipsel", "mipsallegrexel", Triple::mipsel)
@@ -797,6 +802,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
return 32;
case llvm::Triple::arm64:
+ case llvm::Triple::arm64_be:
case llvm::Triple::aarch64:
case llvm::Triple::aarch64_be:
case llvm::Triple::mips64:
@@ -832,6 +838,7 @@ Triple Triple::get32BitArchVariant() const {
case Triple::aarch64:
case Triple::aarch64_be:
case Triple::arm64:
+ case Triple::arm64_be:
case Triple::msp430:
case Triple::systemz:
case Triple::ppc64le:
@@ -899,6 +906,7 @@ Triple Triple::get64BitArchVariant() const {
case Triple::systemz:
case Triple::x86_64:
case Triple::arm64:
+ case Triple::arm64_be:
// Already 64-bit.
break;
diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc
index 08cd34d..23b49b7 100644
--- a/lib/Support/Unix/Memory.inc
+++ b/lib/Support/Unix/Memory.inc
@@ -121,7 +121,7 @@ Memory::allocateMappedMemory(size_t NumBytes,
Protect, MMFlags, fd, 0);
if (Addr == MAP_FAILED) {
if (NearBlock) //Try again without a near hint
- return allocateMappedMemory(NumBytes, 0, PFlags, EC);
+ return allocateMappedMemory(NumBytes, nullptr, PFlags, EC);
EC = error_code(errno, system_category());
return MemoryBlock();
@@ -139,13 +139,13 @@ Memory::allocateMappedMemory(size_t NumBytes,
error_code
Memory::releaseMappedMemory(MemoryBlock &M) {
- if (M.Address == 0 || M.Size == 0)
+ if (M.Address == nullptr || M.Size == 0)
return error_code::success();
if (0 != ::munmap(M.Address, M.Size))
return error_code(errno, system_category());
- M.Address = 0;
+ M.Address = nullptr;
M.Size = 0;
return error_code::success();
@@ -153,7 +153,7 @@ Memory::releaseMappedMemory(MemoryBlock &M) {
error_code
Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
- if (M.Address == 0 || M.Size == 0)
+ if (M.Address == nullptr || M.Size == 0)
return error_code::success();
if (!Flags)
@@ -203,7 +203,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
;
void* start = NearBlock ? (unsigned char*)NearBlock->base() +
- NearBlock->size() : 0;
+ NearBlock->size() : nullptr;
#if defined(__APPLE__) && (defined(__arm__) || defined(__arm64__))
void *pa = ::mmap(start, PageSize*NumPages, PROT_READ|PROT_EXEC,
@@ -214,7 +214,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
#endif
if (pa == MAP_FAILED) {
if (NearBlock) //Try again without a near hint
- return AllocateRWX(NumBytes, 0);
+ return AllocateRWX(NumBytes, nullptr);
MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
return MemoryBlock();
@@ -246,7 +246,7 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
}
bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
- if (M.Address == 0 || M.Size == 0) return false;
+ if (M.Address == nullptr || M.Size == 0) return false;
if (0 != ::munmap(M.Address, M.Size))
return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
return false;
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 1c91053..519a016 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -89,7 +89,7 @@ namespace {
static error_code TempDir(SmallVectorImpl<char> &result) {
// FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
- const char *dir = 0;
+ const char *dir = nullptr;
(dir = std::getenv("TMPDIR")) || (dir = std::getenv("TMP")) ||
(dir = std::getenv("TEMP")) || (dir = std::getenv("TEMPDIR")) ||
#ifdef P_tmpdir
@@ -246,7 +246,7 @@ error_code current_path(SmallVectorImpl<char> &result) {
#endif
while (true) {
- if (::getcwd(result.data(), result.capacity()) == 0) {
+ if (::getcwd(result.data(), result.capacity()) == nullptr) {
// See if there was a real error.
if (errno != errc::not_enough_memory)
return error_code(errno, system_category());
@@ -494,7 +494,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) {
#ifdef MAP_FILE
flags |= MAP_FILE;
#endif
- Mapping = ::mmap(0, Size, prot, flags, FD, Offset);
+ Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset);
if (Mapping == MAP_FAILED)
return error_code(errno, system_category());
return error_code::success();
@@ -525,7 +525,7 @@ mapped_file_region::mapped_file_region(const Twine &path,
ec = init(ofd, true, offset);
if (ec)
- Mapping = 0;
+ Mapping = nullptr;
}
mapped_file_region::mapped_file_region(int fd,
@@ -545,7 +545,7 @@ mapped_file_region::mapped_file_region(int fd,
ec = init(fd, closefd, offset);
if (ec)
- Mapping = 0;
+ Mapping = nullptr;
}
mapped_file_region::~mapped_file_region() {
@@ -555,7 +555,7 @@ mapped_file_region::~mapped_file_region() {
mapped_file_region::mapped_file_region(mapped_file_region &&other)
: Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) {
- other.Mapping = 0;
+ other.Mapping = nullptr;
}
mapped_file_region::mapmode mapped_file_region::flags() const {
@@ -587,7 +587,7 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it,
StringRef path){
SmallString<128> path_null(path);
DIR *directory = ::opendir(path_null.c_str());
- if (directory == 0)
+ if (!directory)
return error_code(errno, system_category());
it.IterationHandle = reinterpret_cast<intptr_t>(directory);
@@ -608,9 +608,9 @@ error_code detail::directory_iterator_destruct(detail::DirIterState &it) {
error_code detail::directory_iterator_increment(detail::DirIterState &it) {
errno = 0;
dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
- if (cur_dir == 0 && errno != 0) {
+ if (cur_dir == nullptr && errno != 0) {
return error_code(errno, system_category());
- } else if (cur_dir != 0) {
+ } else if (cur_dir != nullptr) {
StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
if ((name.size() == 1 && name[0] == '.') ||
(name.size() == 2 && name[0] == '.' && name[1] == '.'))
@@ -630,7 +630,7 @@ error_code get_magic(const Twine &path, uint32_t len,
// Open path.
std::FILE *file = std::fopen(Path.data(), "rb");
- if (file == 0)
+ if (!file)
return error_code(errno, system_category());
// Reserve storage.
@@ -667,7 +667,7 @@ error_code map_file_pages(const Twine &path, off_t file_offset, size_t size,
#ifdef MAP_FILE
flags |= MAP_FILE;
#endif
- result = ::mmap(0, size, prot, flags, fd, file_offset);
+ result = ::mmap(nullptr, size, prot, flags, fd, file_offset);
if (result == MAP_FAILED) {
return error_code(errno, system_category());
}
diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc
index 9fb4356..8faa638 100644
--- a/lib/Support/Unix/Process.inc
+++ b/lib/Support/Unix/Process.inc
@@ -270,7 +270,7 @@ static bool terminalHasColors(int fd) {
MutexGuard G(M);
int errret = 0;
- if (setupterm((char *)0, fd, &errret) != 0)
+ if (setupterm((char *)nullptr, fd, &errret) != 0)
// Regardless of why, if we can't get terminfo, we shouldn't try to print
// colors.
return false;
@@ -292,7 +292,7 @@ static bool terminalHasColors(int fd) {
// Now extract the structure allocated by setupterm and free its memory
// through a really silly dance.
- struct term *termp = set_curterm((struct term *)0);
+ struct term *termp = set_curterm((struct term *)nullptr);
(void)del_curterm(termp); // Drop any errors here.
// Return true if we found a color capabilities for the current terminal.
diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc
index b4df928..1225a9c 100644
--- a/lib/Support/Unix/Program.inc
+++ b/lib/Support/Unix/Program.inc
@@ -70,7 +70,7 @@ sys::FindProgramByName(const std::string& progName) {
// Get the path. If its empty, we can't do anything to find it.
const char *PathStr = getenv("PATH");
- if (PathStr == 0)
+ if (!PathStr)
return "";
// Now we have a colon separated list of directories to search; try them.
@@ -99,7 +99,7 @@ sys::FindProgramByName(const std::string& progName) {
}
static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) {
- if (Path == 0) // Noop
+ if (!Path) // Noop
return false;
std::string File;
if (Path->empty())
@@ -129,7 +129,7 @@ static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) {
#ifdef HAVE_POSIX_SPAWN
static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg,
posix_spawn_file_actions_t *FileActions) {
- if (Path == 0) // Noop
+ if (!Path) // Noop
return false;
const char *File;
if (Path->empty())
@@ -195,7 +195,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
#ifdef HAVE_POSIX_SPAWN
if (memoryLimit == 0) {
posix_spawn_file_actions_t FileActionsStore;
- posix_spawn_file_actions_t *FileActions = 0;
+ posix_spawn_file_actions_t *FileActions = nullptr;
// If we call posix_spawn_file_actions_addopen we have to make sure the
// c strings we pass to it stay alive until the call to posix_spawn,
@@ -203,7 +203,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
std::string RedirectsStorage[3];
if (redirects) {
- std::string *RedirectsStr[3] = {0, 0, 0};
+ std::string *RedirectsStr[3] = {nullptr, nullptr, nullptr};
for (int I = 0; I < 3; ++I) {
if (redirects[I]) {
RedirectsStorage[I] = *redirects[I];
@@ -218,7 +218,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
if (RedirectIO_PS(RedirectsStr[0], 0, ErrMsg, FileActions) ||
RedirectIO_PS(RedirectsStr[1], 1, ErrMsg, FileActions))
return false;
- if (redirects[1] == 0 || redirects[2] == 0 ||
+ if (redirects[1] == nullptr || redirects[2] == nullptr ||
*redirects[1] != *redirects[2]) {
// Just redirect stderr
if (RedirectIO_PS(RedirectsStr[2], 2, ErrMsg, FileActions))
@@ -242,8 +242,9 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
// Explicitly initialized to prevent what appears to be a valgrind false
// positive.
pid_t PID = 0;
- int Err = posix_spawn(&PID, Program.str().c_str(), FileActions, /*attrp*/0,
- const_cast<char **>(args), const_cast<char **>(envp));
+ int Err = posix_spawn(&PID, Program.str().c_str(), FileActions,
+ /*attrp*/nullptr, const_cast<char **>(args),
+ const_cast<char **>(envp));
if (FileActions)
posix_spawn_file_actions_destroy(FileActions);
@@ -294,7 +295,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args,
// Execute!
std::string PathStr = Program;
- if (envp != 0)
+ if (envp != nullptr)
execve(PathStr.c_str(),
const_cast<char **>(args),
const_cast<char **>(envp));
@@ -360,7 +361,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
// Turn off the alarm and restore the signal handler
alarm(0);
- sigaction(SIGALRM, &Old, 0);
+ sigaction(SIGALRM, &Old, nullptr);
// Wait for child to die
if (wait(&status) != ChildPid)
@@ -381,7 +382,7 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait,
// We exited normally without timeout, so turn off the timer.
if (SecondsToWait && !WaitUntilTerminates) {
alarm(0);
- sigaction(SIGALRM, &Old, 0);
+ sigaction(SIGALRM, &Old, nullptr);
}
// Return the proper exit status. Detect error conditions
diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc
index b4c78d6..1841fea 100644
--- a/lib/Support/Unix/Signals.inc
+++ b/lib/Support/Unix/Signals.inc
@@ -44,7 +44,7 @@ static RETSIGTYPE SignalHandler(int Sig); // defined below.
static SmartMutex<true> SignalsMutex;
/// InterruptFunction - The function to call if ctrl-c is pressed.
-static void (*InterruptFunction)() = 0;
+static void (*InterruptFunction)() = nullptr;
static std::vector<std::string> FilesToRemove;
static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
@@ -55,7 +55,7 @@ static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
static const int IntSigs[] = {
SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
};
-static const int *const IntSigsEnd = array_endof(IntSigs);
+static const int *const IntSigsEnd = std::end(IntSigs);
// KillSigs - Signals that represent that we have a bug, and our prompt
// termination has been ordered.
@@ -74,7 +74,7 @@ static const int KillSigs[] = {
, SIGEMT
#endif
};
-static const int *const KillSigsEnd = array_endof(KillSigs);
+static const int *const KillSigsEnd = std::end(KillSigs);
static unsigned NumRegisteredSignals = 0;
static struct {
@@ -113,7 +113,7 @@ static void UnregisterHandlers() {
// Restore all of the signal handlers to how they were before we showed up.
for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
sigaction(RegisteredSignalInfo[i].SigNo,
- &RegisteredSignalInfo[i].SA, 0);
+ &RegisteredSignalInfo[i].SA, nullptr);
NumRegisteredSignals = 0;
}
@@ -160,7 +160,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
// Unmask all potentially blocked kill signals.
sigset_t SigMask;
sigfillset(&SigMask);
- sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+ sigprocmask(SIG_UNBLOCK, &SigMask, nullptr);
SignalsMutex.acquire();
RemoveFilesToRemove();
@@ -169,7 +169,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
if (InterruptFunction) {
void (*IF)() = InterruptFunction;
SignalsMutex.release();
- InterruptFunction = 0;
+ InterruptFunction = nullptr;
IF(); // run the interrupt function.
return;
}
@@ -212,7 +212,7 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) {
bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
std::string* ErrMsg) {
SignalsMutex.acquire();
- std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0];
+ std::string *OldPtr = FilesToRemove.empty() ? nullptr : &FilesToRemove[0];
FilesToRemove.push_back(Filename);
// We want to call 'c_str()' on every std::string in this vector so that if
@@ -279,8 +279,8 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
const char* name = strrchr(dlinfo.dli_fname, '/');
int nwidth;
- if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
- else nwidth = strlen(name) - 1;
+ if (!name) nwidth = strlen(dlinfo.dli_fname);
+ else nwidth = strlen(name) - 1;
if (nwidth > width) width = nwidth;
}
@@ -292,22 +292,22 @@ void llvm::sys::PrintStackTrace(FILE *FD) {
fprintf(FD, "%-2d", i);
const char* name = strrchr(dlinfo.dli_fname, '/');
- if (name == NULL) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
- else fprintf(FD, " %-*s", width, name+1);
+ if (!name) fprintf(FD, " %-*s", width, dlinfo.dli_fname);
+ else fprintf(FD, " %-*s", width, name+1);
fprintf(FD, " %#0*lx",
(int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
- if (dlinfo.dli_sname != NULL) {
+ if (dlinfo.dli_sname != nullptr) {
fputc(' ', FD);
# if HAVE_CXXABI_H
int res;
- char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+ char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res);
# else
char* d = NULL;
# endif
- if (d == NULL) fputs(dlinfo.dli_sname, FD);
- else fputs(d, FD);
+ if (!d) fputs(dlinfo.dli_sname, FD);
+ else fputs(d, FD);
free(d);
// FIXME: When we move to C++11, use %t length modifier. It's not in
@@ -331,7 +331,7 @@ static void PrintStackTraceSignalHandler(void *) {
/// PrintStackTraceOnErrorSignal - When an error signal (such as SIGABRT or
/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
void llvm::sys::PrintStackTraceOnErrorSignal() {
- AddSignalHandler(PrintStackTraceSignalHandler, 0);
+ AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
// Environment variable to disable any kind of crash dialog.
diff --git a/lib/Support/Unix/TimeValue.inc b/lib/Support/Unix/TimeValue.inc
index 80532b0..7d4acf7 100644
--- a/lib/Support/Unix/TimeValue.inc
+++ b/lib/Support/Unix/TimeValue.inc
@@ -26,15 +26,17 @@ std::string TimeValue::str() const {
struct tm Storage;
struct tm *LT = ::localtime_r(&OurTime, &Storage);
assert(LT);
- char Buffer[25];
- strftime(Buffer, 25, "%b %e %H:%M %Y", LT);
- return std::string(Buffer);
+ char Buffer1[sizeof("YYYY-MM-DD HH:MM:SS")];
+ strftime(Buffer1, sizeof(Buffer1), "%Y-%m-%d %H:%M:%S", LT);
+ char Buffer2[sizeof("YYYY-MM-DD HH:MM:SS.MMMUUUNNN")];
+ snprintf(Buffer2, sizeof(Buffer2), "%s.%.9u", Buffer1, this->nanoseconds());
+ return std::string(Buffer2);
}
TimeValue TimeValue::now() {
struct timeval the_time;
timerclear(&the_time);
- if (0 != ::gettimeofday(&the_time,0)) {
+ if (0 != ::gettimeofday(&the_time,nullptr)) {
// This is *really* unlikely to occur because the only gettimeofday
// errors concern the timezone parameter which we're passing in as 0.
// In the unlikely case it does happen, just return MinTime, no error
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 504471e..5d0278f 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -58,7 +58,7 @@ extern "C" {
stricmp(ModuleName, "msvcr70") != 0 &&
#ifndef __MINGW32__
// Mingw32 uses msvcrt.dll by default. Don't ignore it.
- // Otherwise, user should be aware, what he's doing :)
+ // Otherwise the user should be aware what they are doing.
stricmp(ModuleName, "msvcrt") != 0 &&
#endif
stricmp(ModuleName, "msvcrt20") != 0 &&
diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc
index a87c9e8..c3df801 100644
--- a/lib/Support/Windows/Process.inc
+++ b/lib/Support/Windows/Process.inc
@@ -82,16 +82,14 @@ TimeValue self_process::get_system_time() const {
return getTimeValueFromFILETIME(KernelTime);
}
-// This function retrieves the page size using GetSystemInfo and is present
-// solely so it can be called once to initialize the self_process member below.
+// This function retrieves the page size using GetNativeSystemInfo() and is
+// present solely so it can be called once to initialize the self_process member
+// below.
static unsigned getPageSize() {
- // NOTE: A 32-bit application running under WOW64 is supposed to use
- // GetNativeSystemInfo. However, this interface is not present prior
- // to Windows XP so to use it requires dynamic linking. It is not clear
- // how this affects the reported page size, if at all. One could argue
- // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+ // GetNativeSystemInfo() provides the physical page size which may differ
+ // from GetSystemInfo() in 32-bit applications running under WOW64.
SYSTEM_INFO info;
- GetSystemInfo(&info);
+ GetNativeSystemInfo(&info);
// FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize,
// but dwAllocationGranularity.
return static_cast<unsigned>(info.dwPageSize);
diff --git a/lib/Support/Windows/TimeValue.inc b/lib/Support/Windows/TimeValue.inc
index 6c59024..0223ab4 100644
--- a/lib/Support/Windows/TimeValue.inc
+++ b/lib/Support/Windows/TimeValue.inc
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "WindowsSupport.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
#include <time.h>
@@ -32,6 +34,7 @@ TimeValue TimeValue::now() {
}
std::string TimeValue::str() const {
+ std::string S;
struct tm *LT;
#ifdef __MINGW32__
// Old versions of mingw don't have _localtime64_s. Remove this once we drop support
@@ -47,13 +50,11 @@ std::string TimeValue::str() const {
LT = &Storage;
#endif
- char Buffer[25];
- // FIXME: the windows version of strftime doesn't support %e
- strftime(Buffer, 25, "%b %d %H:%M %Y", LT);
- assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') &&
- "Unexpected format in strftime()!");
- // Emulate %e on %d to mute '0'.
- if (Buffer[4] == '0')
- Buffer[4] = ' ';
- return std::string(Buffer);
+ char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")];
+ strftime(Buffer, sizeof(Buffer), "%Y-%m-%d %H:%M:%S", LT);
+ raw_string_ostream OS(S);
+ OS << format("%s.%.9u", static_cast<const char *>(Buffer),
+ this->nanoseconds());
+ OS.flush();
+ return S;
}
diff --git a/lib/Support/YAMLParser.cpp b/lib/Support/YAMLParser.cpp
index 73ce5e0..3be02ee 100644
--- a/lib/Support/YAMLParser.cpp
+++ b/lib/Support/YAMLParser.cpp
@@ -1876,14 +1876,14 @@ Node *KeyValueNode::getValue() {
void MappingNode::increment() {
if (failed()) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
if (CurrentEntry) {
CurrentEntry->skip();
if (Type == MT_Inline) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
}
@@ -1896,13 +1896,13 @@ void MappingNode::increment() {
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError("Unexpected token. Expected Key or Block End", T);
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else {
switch (T.Kind) {
@@ -1915,14 +1915,14 @@ void MappingNode::increment() {
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Key, Flow Entry, or Flow "
"Mapping End."
, T);
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
}
}
@@ -1930,7 +1930,7 @@ void MappingNode::increment() {
void SequenceNode::increment() {
if (failed()) {
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
return;
}
if (CurrentEntry)
@@ -1941,37 +1941,37 @@ void SequenceNode::increment() {
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
- if (CurrentEntry == 0) { // An error occurred.
+ if (!CurrentEntry) { // An error occurred.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
break;
case Token::TK_BlockEnd:
getNext();
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
setError( "Unexpected token. Expected Block Entry or Block End."
, T);
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else if (SeqType == ST_Indentless) {
switch (T.Kind) {
case Token::TK_BlockEntry:
getNext();
CurrentEntry = parseBlockNode();
- if (CurrentEntry == 0) { // An error occurred.
+ if (!CurrentEntry) { // An error occurred.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
break;
default:
case Token::TK_Error:
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
}
} else if (SeqType == ST_Flow) {
switch (T.Kind) {
@@ -1985,7 +1985,7 @@ void SequenceNode::increment() {
case Token::TK_Error:
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
case Token::TK_StreamEnd:
case Token::TK_DocumentEnd:
@@ -1993,13 +1993,13 @@ void SequenceNode::increment() {
setError("Could not find closing ]!", T);
// Set this to end iterator.
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
default:
if (!WasPreviousTokenFlowEntry) {
setError("Expected , between entries!", T);
IsAtEnd = true;
- CurrentEntry = 0;
+ CurrentEntry = nullptr;
break;
}
// Otherwise it must be a flow entry.
@@ -2013,7 +2013,7 @@ void SequenceNode::increment() {
}
}
-Document::Document(Stream &S) : stream(S), Root(0) {
+Document::Document(Stream &S) : stream(S), Root(nullptr) {
// Tag maps starts with two default mappings.
TagMap["!"] = "!";
TagMap["!!"] = "tag:yaml.org,2002:";
@@ -2070,7 +2070,7 @@ parse_property:
case Token::TK_Anchor:
if (AnchorInfo.Kind == Token::TK_Anchor) {
setError("Already encountered an anchor for this node!", T);
- return 0;
+ return nullptr;
}
AnchorInfo = getNext(); // Consume TK_Anchor.
T = peekNext();
@@ -2078,7 +2078,7 @@ parse_property:
case Token::TK_Tag:
if (TagInfo.Kind == Token::TK_Tag) {
setError("Already encountered a tag for this node!", T);
- return 0;
+ return nullptr;
}
TagInfo = getNext(); // Consume TK_Tag.
T = peekNext();
@@ -2146,10 +2146,10 @@ parse_property:
// !!null null.
return new (NodeAllocator) NullNode(stream.CurrentDoc);
case Token::TK_Error:
- return 0;
+ return nullptr;
}
llvm_unreachable("Control flow shouldn't reach here.");
- return 0;
+ return nullptr;
}
bool Document::parseDirectives() {
diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp
index 5472e0e..e5f9494 100644
--- a/lib/Support/YAMLTraits.cpp
+++ b/lib/Support/YAMLTraits.cpp
@@ -47,7 +47,7 @@ Input::Input(StringRef InputContent,
void *DiagHandlerCtxt)
: IO(Ctxt),
Strm(new Stream(InputContent, SrcMgr)),
- CurrentNode(NULL) {
+ CurrentNode(nullptr) {
if (DiagHandler)
SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt);
DocIterator = Strm->begin();
@@ -158,10 +158,9 @@ void Input::endMapping() {
MapHNode *MN = dyn_cast_or_null<MapHNode>(CurrentNode);
if (!MN)
return;
- for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(),
- End = MN->Mapping.end(); i != End; ++i) {
- if (!MN->isValidKey(i->first())) {
- setError(i->second, Twine("unknown key '") + i->first() + "'");
+ for (const auto &NN : MN->Mapping) {
+ if (!MN->isValidKey(NN.first())) {
+ setError(NN.second, Twine("unknown key '") + NN.first() + "'");
break;
}
}
@@ -255,9 +254,8 @@ bool Input::bitSetMatch(const char *Str, bool) {
return false;
if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
unsigned Index = 0;
- for (std::vector<HNode *>::iterator i = SQ->Entries.begin(),
- End = SQ->Entries.end(); i != End; ++i) {
- if (ScalarHNode *SN = dyn_cast<ScalarHNode>(*i)) {
+ for (HNode *N : SQ->Entries) {
+ if (ScalarHNode *SN = dyn_cast<ScalarHNode>(N)) {
if (SN->value().equals(Str)) {
BitValuesUsed[Index] = true;
return true;
@@ -287,7 +285,7 @@ void Input::endBitSetScalar() {
}
}
-void Input::scalarString(StringRef &S) {
+void Input::scalarString(StringRef &S, bool) {
if (ScalarHNode *SN = dyn_cast<ScalarHNode>(CurrentNode)) {
S = SN->value();
} else {
@@ -319,9 +317,8 @@ Input::HNode *Input::createHNodes(Node *N) {
return new ScalarHNode(N, KeyStr);
} else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
SequenceHNode *SQHNode = new SequenceHNode(N);
- for (SequenceNode::iterator i = SQ->begin(), End = SQ->end(); i != End;
- ++i) {
- HNode *Entry = this->createHNodes(i);
+ for (Node &SN : *SQ) {
+ HNode *Entry = this->createHNodes(&SN);
if (EC)
break;
SQHNode->Entries.push_back(Entry);
@@ -329,9 +326,8 @@ Input::HNode *Input::createHNodes(Node *N) {
return SQHNode;
} else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
MapHNode *mapHNode = new MapHNode(N);
- for (MappingNode::iterator i = Map->begin(), End = Map->end(); i != End;
- ++i) {
- ScalarNode *KeyScalar = dyn_cast<ScalarNode>(i->getKey());
+ for (KeyValueNode &KVN : *Map) {
+ ScalarNode *KeyScalar = dyn_cast<ScalarNode>(KVN.getKey());
StringStorage.clear();
StringRef KeyStr = KeyScalar->getValue(StringStorage);
if (!StringStorage.empty()) {
@@ -341,7 +337,7 @@ Input::HNode *Input::createHNodes(Node *N) {
memcpy(Buf, &StringStorage[0], Len);
KeyStr = StringRef(Buf, Len);
}
- HNode *ValueHNode = this->createHNodes(i->getValue());
+ HNode *ValueHNode = this->createHNodes(KVN.getValue());
if (EC)
break;
mapHNode->Mapping[KeyStr] = ValueHNode;
@@ -351,14 +347,13 @@ Input::HNode *Input::createHNodes(Node *N) {
return new EmptyHNode(N);
} else {
setError(N, "unknown node kind");
- return NULL;
+ return nullptr;
}
}
bool Input::MapHNode::isValidKey(StringRef Key) {
- for (SmallVectorImpl<const char *>::iterator i = ValidKeys.begin(),
- End = ValidKeys.end(); i != End; ++i) {
- if (Key.equals(*i))
+ for (const char *K : ValidKeys) {
+ if (Key.equals(K))
return true;
}
return false;
@@ -373,17 +368,13 @@ bool Input::canElideEmptySequence() {
}
Input::MapHNode::~MapHNode() {
- for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end();
- i != End; ++i) {
- delete i->second;
- }
+ for (auto &N : Mapping)
+ delete N.second;
}
Input::SequenceHNode::~SequenceHNode() {
- for (std::vector<HNode*>::iterator i = Entries.begin(), End = Entries.end();
- i != End; ++i) {
- delete *i;
- }
+ for (HNode *N : Entries)
+ delete N;
}
@@ -550,10 +541,7 @@ void Output::endBitSetScalar() {
this->outputUpToEndOfLine(" ]");
}
-void Output::scalarString(StringRef &S) {
- const char ScalarSafeChars[] = "abcdefghijklmnopqrstuvwxyz"
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t";
-
+void Output::scalarString(StringRef &S, bool MustQuote) {
this->newLineCheck();
if (S.empty()) {
// Print '' for the empty string because leaving the field empty is not
@@ -561,10 +549,8 @@ void Output::scalarString(StringRef &S) {
this->outputUpToEndOfLine("''");
return;
}
- if (S.find_first_not_of(ScalarSafeChars) == StringRef::npos &&
- !isspace(S.front()) && !isspace(S.back())) {
- // If the string consists only of safe characters, print it out without
- // quotes.
+ if (!MustQuote) {
+ // Only quote if we must.
this->outputUpToEndOfLine(S);
return;
}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 3c45743..f55838e 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -87,8 +87,8 @@ void raw_ostream::SetBuffered() {
void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
BufferKind Mode) {
- assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
- (Mode != Unbuffered && BufferStart && Size)) &&
+ assert(((Mode == Unbuffered && !BufferStart && Size == 0) ||
+ (Mode != Unbuffered && BufferStart && Size != 0)) &&
"stream must be unbuffered or have at least one byte");
// Make sure the current buffer is free of content (we can't flush here; the
// child buffer management logic will be in write_impl).
@@ -433,7 +433,7 @@ void format_object_base::home() {
raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
sys::fs::OpenFlags Flags)
: Error(false), UseAtomicWrites(false), pos(0) {
- assert(Filename != 0 && "Filename is null");
+ assert(Filename && "Filename is null");
ErrorInfo.clear();
// Handle "-" as stdout. Note that when we do this, we consider ourself
diff --git a/lib/Support/regengine.inc b/lib/Support/regengine.inc
index 7e41f96..62d8c26 100644
--- a/lib/Support/regengine.inc
+++ b/lib/Support/regengine.inc
@@ -205,7 +205,7 @@ matcher(struct re_guts *g, const char *string, size_t nmatch,
if (nmatch == 1 && !g->backrefs)
break; /* no further info needed */
- /* oh my, he wants the subexpressions... */
+ /* oh my, they want the subexpressions... */
if (m->pmatch == NULL)
m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
sizeof(llvm_regmatch_t));
diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp
index fd81ab4..476026d 100644
--- a/lib/TableGen/Main.cpp
+++ b/lib/TableGen/Main.cpp
@@ -17,6 +17,7 @@
#include "TGParser.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/system_error.h"
diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
index a43665b..c553a21 100644
--- a/lib/TableGen/Record.cpp
+++ b/lib/TableGen/Record.cpp
@@ -101,13 +101,13 @@ bool RecTy::baseClassOf(const RecTy *RHS) const{
}
Init *BitRecTy::convertValue(BitsInit *BI) {
- if (BI->getNumBits() != 1) return 0; // Only accept if just one bit!
+ if (BI->getNumBits() != 1) return nullptr; // Only accept if just one bit!
return BI->getBit(0);
}
Init *BitRecTy::convertValue(IntInit *II) {
int64_t Val = II->getValue();
- if (Val != 0 && Val != 1) return 0; // Only accept 0 or 1 for a bit!
+ if (Val != 0 && Val != 1) return nullptr; // Only accept 0 or 1 for a bit!
return BitInit::get(Val != 0);
}
@@ -116,7 +116,7 @@ Init *BitRecTy::convertValue(TypedInit *VI) {
RecTy *Ty = VI->getType();
if (isa<BitRecTy>(Ty) || isa<BitsRecTy>(Ty) || isa<IntRecTy>(Ty))
return VI; // Accept variable if it is already of bit type!
- return 0;
+ return nullptr;
}
bool BitRecTy::baseClassOf(const RecTy *RHS) const{
@@ -151,7 +151,7 @@ Init *BitsRecTy::convertValue(UnsetInit *UI) {
}
Init *BitsRecTy::convertValue(BitInit *UI) {
- if (Size != 1) return 0; // Can only convert single bit.
+ if (Size != 1) return nullptr; // Can only convert single bit.
return BitsInit::get(UI);
}
@@ -170,7 +170,7 @@ Init *BitsRecTy::convertValue(IntInit *II) {
int64_t Value = II->getValue();
// Make sure this bitfield is large enough to hold the integer value.
if (!canFitInBitfield(Value, Size))
- return 0;
+ return nullptr;
SmallVector<Init *, 16> NewBits(Size);
@@ -184,7 +184,7 @@ Init *BitsRecTy::convertValue(BitsInit *BI) {
// If the number of bits is right, return it. Otherwise we need to expand or
// truncate.
if (BI->getNumBits() == Size) return BI;
- return 0;
+ return nullptr;
}
Init *BitsRecTy::convertValue(TypedInit *VI) {
@@ -199,7 +199,7 @@ Init *BitsRecTy::convertValue(TypedInit *VI) {
return BitsInit::get(NewBits);
}
- return 0;
+ return nullptr;
}
bool BitsRecTy::baseClassOf(const RecTy *RHS) const{
@@ -219,7 +219,7 @@ Init *IntRecTy::convertValue(BitsInit *BI) {
if (BitInit *Bit = dyn_cast<BitInit>(BI->getBit(i))) {
Result |= Bit->getValue() << i;
} else {
- return 0;
+ return nullptr;
}
return IntInit::get(Result);
}
@@ -227,7 +227,7 @@ Init *IntRecTy::convertValue(BitsInit *BI) {
Init *IntRecTy::convertValue(TypedInit *TI) {
if (TI->getType()->typeIsConvertibleTo(this))
return TI; // Accept variable if already of the right type!
- return 0;
+ return nullptr;
}
bool IntRecTy::baseClassOf(const RecTy *RHS) const{
@@ -238,7 +238,7 @@ bool IntRecTy::baseClassOf(const RecTy *RHS) const{
Init *StringRecTy::convertValue(UnOpInit *BO) {
if (BO->getOpcode() == UnOpInit::CAST) {
Init *L = BO->getOperand()->convertInitializerTo(this);
- if (L == 0) return 0;
+ if (!L) return nullptr;
if (L != BO->getOperand())
return UnOpInit::get(UnOpInit::CAST, L, new StringRecTy);
return BO;
@@ -251,7 +251,7 @@ Init *StringRecTy::convertValue(BinOpInit *BO) {
if (BO->getOpcode() == BinOpInit::STRCONCAT) {
Init *L = BO->getLHS()->convertInitializerTo(this);
Init *R = BO->getRHS()->convertInitializerTo(this);
- if (L == 0 || R == 0) return 0;
+ if (!L || !R) return nullptr;
if (L != BO->getLHS() || R != BO->getRHS())
return BinOpInit::get(BinOpInit::STRCONCAT, L, R, new StringRecTy);
return BO;
@@ -264,7 +264,7 @@ Init *StringRecTy::convertValue(BinOpInit *BO) {
Init *StringRecTy::convertValue(TypedInit *TI) {
if (isa<StringRecTy>(TI->getType()))
return TI; // Accept variable if already of the right type!
- return 0;
+ return nullptr;
}
std::string ListRecTy::getAsString() const {
@@ -280,10 +280,10 @@ Init *ListRecTy::convertValue(ListInit *LI) {
if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty))
Elements.push_back(CI);
else
- return 0;
+ return nullptr;
if (!isa<ListRecTy>(LI->getType()))
- return 0;
+ return nullptr;
return ListInit::get(Elements, this);
}
@@ -293,7 +293,7 @@ Init *ListRecTy::convertValue(TypedInit *TI) {
if (ListRecTy *LRT = dyn_cast<ListRecTy>(TI->getType()))
if (LRT->getElementType()->typeIsConvertibleTo(getElementType()))
return TI;
- return 0;
+ return nullptr;
}
bool ListRecTy::baseClassOf(const RecTy *RHS) const{
@@ -305,30 +305,30 @@ bool ListRecTy::baseClassOf(const RecTy *RHS) const{
Init *DagRecTy::convertValue(TypedInit *TI) {
if (TI->getType()->typeIsConvertibleTo(this))
return TI;
- return 0;
+ return nullptr;
}
Init *DagRecTy::convertValue(UnOpInit *BO) {
if (BO->getOpcode() == UnOpInit::CAST) {
Init *L = BO->getOperand()->convertInitializerTo(this);
- if (L == 0) return 0;
+ if (!L) return nullptr;
if (L != BO->getOperand())
return UnOpInit::get(UnOpInit::CAST, L, new DagRecTy);
return BO;
}
- return 0;
+ return nullptr;
}
Init *DagRecTy::convertValue(BinOpInit *BO) {
if (BO->getOpcode() == BinOpInit::CONCAT) {
Init *L = BO->getLHS()->convertInitializerTo(this);
Init *R = BO->getRHS()->convertInitializerTo(this);
- if (L == 0 || R == 0) return 0;
+ if (!L || !R) return nullptr;
if (L != BO->getLHS() || R != BO->getRHS())
return BinOpInit::get(BinOpInit::CONCAT, L, R, new DagRecTy);
return BO;
}
- return 0;
+ return nullptr;
}
RecordRecTy *RecordRecTy::get(Record *R) {
@@ -342,7 +342,7 @@ std::string RecordRecTy::getAsString() const {
Init *RecordRecTy::convertValue(DefInit *DI) {
// Ensure that DI is a subclass of Rec.
if (!DI->getDef()->isSubClassOf(Rec))
- return 0;
+ return nullptr;
return DI;
}
@@ -352,7 +352,7 @@ Init *RecordRecTy::convertValue(TypedInit *TI) {
if (RRT->getRecord()->isSubClassOf(getRecord()) ||
RRT->getRecord() == getRecord())
return TI;
- return 0;
+ return nullptr;
}
bool RecordRecTy::baseClassOf(const RecTy *RHS) const{
@@ -391,7 +391,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
++i) {
RecordRecTy *SuperRecTy1 = RecordRecTy::get(*i);
RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
- if (NewType1 != 0) {
+ if (NewType1) {
if (NewType1 != SuperRecTy1) {
delete SuperRecTy1;
}
@@ -409,7 +409,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
++i) {
RecordRecTy *SuperRecTy2 = RecordRecTy::get(*i);
RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
- if (NewType2 != 0) {
+ if (NewType2) {
if (NewType2 != SuperRecTy2) {
delete SuperRecTy2;
}
@@ -417,7 +417,7 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
}
}
}
- return 0;
+ return nullptr;
}
@@ -462,7 +462,7 @@ BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
FoldingSetNodeID ID;
ProfileBitsInit(ID, Range);
- void *IP = 0;
+ void *IP = nullptr;
if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
return I;
@@ -482,7 +482,7 @@ BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
if (Bits[i] >= getNumBits())
- return 0;
+ return nullptr;
NewBits[i] = getBit(Bits[i]);
}
return BitsInit::get(NewBits);
@@ -516,8 +516,8 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
bool Changed = false;
SmallVector<Init *, 16> NewBits(getNumBits());
- Init *CachedInit = 0;
- Init *CachedBitVar = 0;
+ Init *CachedInit = nullptr;
+ Init *CachedBitVar = nullptr;
bool CachedBitVarChanged = false;
for (unsigned i = 0, e = getNumBits(); i != e; ++i) {
@@ -590,7 +590,7 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
if (Bits[i] >= 64)
- return 0;
+ return nullptr;
NewBits[i] = BitInit::get(Value & (INT64_C(1) << Bits[i]));
}
@@ -623,18 +623,18 @@ static void ProfileListInit(FoldingSetNodeID &ID,
ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
typedef FoldingSet<ListInit> Pool;
static Pool ThePool;
+ static std::vector<std::unique_ptr<ListInit>> TheActualPool;
- // Just use the FoldingSetNodeID to compute a hash. Use a DenseMap
- // for actual storage.
FoldingSetNodeID ID;
ProfileListInit(ID, Range, EltTy);
- void *IP = 0;
+ void *IP = nullptr;
if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
return I;
ListInit *I = new ListInit(Range, EltTy);
ThePool.InsertNode(I, IP);
+ TheActualPool.push_back(std::unique_ptr<ListInit>(I));
return I;
}
@@ -651,7 +651,7 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
std::vector<Init*> Vals;
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
if (Elements[i] >= getSize())
- return 0;
+ return nullptr;
Vals.push_back(getElement(Elements[i]));
}
return ListInit::get(Vals, getType());
@@ -660,7 +660,7 @@ ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
Record *ListInit::getElementAsRecord(unsigned i) const {
assert(i < Values.size() && "List element index out of range!");
DefInit *DI = dyn_cast<DefInit>(Values[i]);
- if (DI == 0)
+ if (!DI)
PrintFatalError("Expected record in list!");
return DI->getDef();
}
@@ -690,14 +690,14 @@ Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
unsigned Elt) const {
if (Elt >= getSize())
- return 0; // Out of range reference.
+ return nullptr; // Out of range reference.
Init *E = getElement(Elt);
// If the element is set to some value, or if we are resolving a reference
// to a specific variable and that variable is explicitly unset, then
// replace the VarListElementInit with it.
if (IRV || !isa<UnsetInit>(E))
return E;
- return 0;
+ return nullptr;
}
std::string ListInit::getAsString() const {
@@ -714,7 +714,7 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
Init *Resolved = resolveReferences(R, IRV);
OpInit *OResolved = dyn_cast<OpInit>(Resolved);
if (OResolved) {
- Resolved = OResolved->Fold(&R, 0);
+ Resolved = OResolved->Fold(&R, nullptr);
}
if (Resolved != this) {
@@ -728,7 +728,7 @@ Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
}
}
- return 0;
+ return nullptr;
}
Init *OpInit::getBit(unsigned Bit) const {
@@ -813,7 +813,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
if (LHSl->getSize() == 0) {
assert(0 && "Empty list in car");
- return 0;
+ return nullptr;
}
return LHSl->getElement(0);
}
@@ -823,7 +823,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) {
if (LHSl->getSize() == 0) {
assert(0 && "Empty list in cdr");
- return 0;
+ return nullptr;
}
// Note the +1. We can't just pass the result of getValues()
// directly.
@@ -862,8 +862,8 @@ Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
Init *lhs = LHS->resolveReferences(R, RV);
if (LHS != lhs)
- return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, 0);
- return Fold(&R, 0);
+ return (UnOpInit::get(getOpcode(), lhs, getType()))->Fold(&R, nullptr);
+ return Fold(&R, nullptr);
}
std::string UnOpInit::getAsString() const {
@@ -902,7 +902,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
if (LHSs && RHSs) {
DefInit *LOp = dyn_cast<DefInit>(LHSs->getOperator());
DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
- if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+ if (!LOp || !ROp || LOp->getDef() != ROp->getDef())
PrintFatalError("Concated Dag operators do not match!");
std::vector<Init*> Args;
std::vector<std::string> ArgNames;
@@ -918,6 +918,18 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
}
break;
}
+ case LISTCONCAT: {
+ ListInit *LHSs = dyn_cast<ListInit>(LHS);
+ ListInit *RHSs = dyn_cast<ListInit>(RHS);
+ if (LHSs && RHSs) {
+ std::vector<Init *> Args;
+ Args.insert(Args.end(), LHSs->begin(), LHSs->end());
+ Args.insert(Args.end(), RHSs->begin(), RHSs->end());
+ return ListInit::get(
+ Args, static_cast<ListRecTy *>(LHSs->getType())->getElementType());
+ }
+ break;
+ }
case STRCONCAT: {
StringInit *LHSs = dyn_cast<StringInit>(LHS);
StringInit *RHSs = dyn_cast<StringInit>(RHS);
@@ -974,8 +986,8 @@ Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) const {
Init *rhs = RHS->resolveReferences(R, RV);
if (LHS != lhs || RHS != rhs)
- return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0);
- return Fold(&R, 0);
+ return (BinOpInit::get(getOpcode(), lhs, rhs, getType()))->Fold(&R,nullptr);
+ return Fold(&R, nullptr);
}
std::string BinOpInit::getAsString() const {
@@ -987,6 +999,7 @@ std::string BinOpInit::getAsString() const {
case SRA: Result = "!sra"; break;
case SRL: Result = "!srl"; break;
case EQ: Result = "!eq"; break;
+ case LISTCONCAT: Result = "!listconcat"; break;
case STRCONCAT: Result = "!strconcat"; break;
}
return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
@@ -1031,11 +1044,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
if (TArg && TArg->getType()->getAsString() == "dag") {
Init *Result = ForeachHelper(LHS, Arg, RHSo, Type,
CurRec, CurMultiClass);
- if (Result != 0) {
- return Result;
- } else {
- return 0;
- }
+ return Result;
}
for (int i = 0; i < RHSo->getNumOperands(); ++i) {
@@ -1044,7 +1053,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
if (RHSoo) {
Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
Type, CurRec, CurMultiClass);
- if (Result != 0) {
+ if (Result) {
NewOperands.push_back(Result);
} else {
NewOperands.push_back(Arg);
@@ -1059,10 +1068,7 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
// Now run the operator and use its result as the new leaf
const OpInit *NewOp = RHSo->clone(NewOperands);
Init *NewVal = NewOp->Fold(CurRec, CurMultiClass);
- if (NewVal != NewOp)
- return NewVal;
-
- return 0;
+ return (NewVal != NewOp) ? NewVal : nullptr;
}
static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
@@ -1086,7 +1092,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
Init *Val = MHSd->getOperator();
Init *Result = EvaluateOperation(RHSo, LHS, Val,
Type, CurRec, CurMultiClass);
- if (Result != 0) {
+ if (Result) {
Val = Result;
}
@@ -1100,7 +1106,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
// Process args
Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
CurRec, CurMultiClass);
- if (Result != 0) {
+ if (Result) {
Arg = Result;
}
@@ -1138,7 +1144,7 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
return ListInit::get(NewList, MHSl->getType());
}
}
- return 0;
+ return nullptr;
}
Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
@@ -1195,7 +1201,7 @@ Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
case FOREACH: {
Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
CurRec, CurMultiClass);
- if (Result != 0) {
+ if (Result) {
return Result;
}
break;
@@ -1227,16 +1233,16 @@ Init *TernOpInit::resolveReferences(Record &R,
IntInit *Value = dyn_cast<IntInit>(lhs);
if (Init *I = lhs->convertInitializerTo(IntRecTy::get()))
Value = dyn_cast<IntInit>(I);
- if (Value != 0) {
+ if (Value) {
// Short-circuit
if (Value->getValue()) {
Init *mhs = MHS->resolveReferences(R, RV);
return (TernOpInit::get(getOpcode(), lhs, mhs,
- RHS, getType()))->Fold(&R, 0);
+ RHS, getType()))->Fold(&R, nullptr);
} else {
Init *rhs = RHS->resolveReferences(R, RV);
return (TernOpInit::get(getOpcode(), lhs, MHS,
- rhs, getType()))->Fold(&R, 0);
+ rhs, getType()))->Fold(&R, nullptr);
}
}
}
@@ -1246,8 +1252,8 @@ Init *TernOpInit::resolveReferences(Record &R,
if (LHS != lhs || MHS != mhs || RHS != rhs)
return (TernOpInit::get(getOpcode(), lhs, mhs, rhs,
- getType()))->Fold(&R, 0);
- return Fold(&R, 0);
+ getType()))->Fold(&R, nullptr);
+ return Fold(&R, nullptr);
}
std::string TernOpInit::getAsString() const {
@@ -1265,19 +1271,19 @@ RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
if (RecordRecTy *RecordType = dyn_cast<RecordRecTy>(getType()))
if (RecordVal *Field = RecordType->getRecord()->getValue(FieldName))
return Field->getType();
- return 0;
+ return nullptr;
}
Init *
TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
BitsRecTy *T = dyn_cast<BitsRecTy>(getType());
- if (T == 0) return 0; // Cannot subscript a non-bits variable.
+ if (!T) return nullptr; // Cannot subscript a non-bits variable.
unsigned NumBits = T->getNumBits();
SmallVector<Init *, 16> NewBits(Bits.size());
for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
if (Bits[i] >= NumBits)
- return 0;
+ return nullptr;
NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), Bits[i]);
}
@@ -1287,7 +1293,7 @@ TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
Init *
TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
ListRecTy *T = dyn_cast<ListRecTy>(getType());
- if (T == 0) return 0; // Cannot subscript a non-list variable.
+ if (!T) return nullptr; // Cannot subscript a non-list variable.
if (Elements.size() == 1)
return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]);
@@ -1332,8 +1338,8 @@ Init *VarInit::getBit(unsigned Bit) const {
Init *VarInit::resolveListElementReference(Record &R,
const RecordVal *IRV,
unsigned Elt) const {
- if (R.isTemplateArg(getNameInit())) return 0;
- if (IRV && IRV->getNameInit() != getNameInit()) return 0;
+ if (R.isTemplateArg(getNameInit())) return nullptr;
+ if (IRV && IRV->getNameInit() != getNameInit()) return nullptr;
RecordVal *RV = R.getValue(getNameInit());
assert(RV && "Reference to a non-existent variable?");
@@ -1345,14 +1351,14 @@ Init *VarInit::resolveListElementReference(Record &R,
}
if (Elt >= LI->getSize())
- return 0; // Out of range reference.
+ return nullptr; // Out of range reference.
Init *E = LI->getElement(Elt);
// If the element is set to some value, or if we are resolving a reference
// to a specific variable and that variable is explicitly unset, then
// replace the VarListElementInit with it.
if (IRV || !isa<UnsetInit>(E))
return E;
- return 0;
+ return nullptr;
}
@@ -1360,7 +1366,7 @@ RecTy *VarInit::getFieldType(const std::string &FieldName) const {
if (RecordRecTy *RTy = dyn_cast<RecordRecTy>(getType()))
if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName))
return RV->getType();
- return 0;
+ return nullptr;
}
Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
@@ -1368,15 +1374,15 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
if (isa<RecordRecTy>(getType()))
if (const RecordVal *Val = R.getValue(VarName)) {
if (RV != Val && (RV || isa<UnsetInit>(Val->getValue())))
- return 0;
+ return nullptr;
Init *TheInit = Val->getValue();
assert(TheInit != this && "Infinite loop detected!");
if (Init *I = TheInit->getFieldInit(R, RV, FieldName))
return I;
else
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
/// resolveReferences - This method is used by classes that refer to other
@@ -1386,7 +1392,7 @@ Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
///
Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
if (RecordVal *Val = R.getValue(VarName))
- if (RV == Val || (RV == 0 && !isa<UnsetInit>(Val->getValue())))
+ if (RV == Val || (!RV && !isa<UnsetInit>(Val->getValue())))
return Val->getValue();
return const_cast<VarInit *>(this);
}
@@ -1462,7 +1468,7 @@ Init *VarListElementInit:: resolveListElementReference(Record &R,
return Result;
}
- return 0;
+ return nullptr;
}
DefInit *DefInit::get(Record *R) {
@@ -1472,7 +1478,7 @@ DefInit *DefInit::get(Record *R) {
RecTy *DefInit::getFieldType(const std::string &FieldName) const {
if (const RecordVal *RV = Def->getValue(FieldName))
return RV->getType();
- return 0;
+ return nullptr;
}
Init *DefInit::getFieldInit(Record &R, const RecordVal *RV,
@@ -1507,7 +1513,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
unsigned Elt) const {
if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
if (ListInit *LI = dyn_cast<ListInit>(ListVal)) {
- if (Elt >= LI->getSize()) return 0;
+ if (Elt >= LI->getSize()) return nullptr;
Init *E = LI->getElement(Elt);
// If the element is set to some value, or if we are resolving a
@@ -1516,7 +1522,7 @@ Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
if (RV || !isa<UnsetInit>(E))
return E;
}
- return 0;
+ return nullptr;
}
Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
@@ -1560,7 +1566,7 @@ DagInit::get(Init *V, const std::string &VN,
FoldingSetNodeID ID;
ProfileDagInit(ID, V, VN, ArgRange, NameRange);
- void *IP = 0;
+ void *IP = nullptr;
if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
return I;
@@ -1784,7 +1790,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
///
Init *Record::getValueInit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
return R->getValue();
@@ -1797,7 +1803,7 @@ Init *Record::getValueInit(StringRef FieldName) const {
///
std::string Record::getValueAsString(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1813,7 +1819,7 @@ std::string Record::getValueAsString(StringRef FieldName) const {
///
BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1829,7 +1835,7 @@ BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
///
ListInit *Record::getValueAsListInit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1864,7 +1870,7 @@ Record::getValueAsListOfDefs(StringRef FieldName) const {
///
int64_t Record::getValueAsInt(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1918,7 +1924,7 @@ Record::getValueAsListOfStrings(StringRef FieldName) const {
///
Record *Record::getValueAsDef(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1934,7 +1940,7 @@ Record *Record::getValueAsDef(StringRef FieldName) const {
///
bool Record::getValueAsBit(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
@@ -1946,7 +1952,7 @@ bool Record::getValueAsBit(StringRef FieldName) const {
bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName.str() + "'!\n");
@@ -1967,7 +1973,7 @@ bool Record::getValueAsBitOrUnset(StringRef FieldName, bool &Unset) const {
///
DagInit *Record::getValueAsDag(StringRef FieldName) const {
const RecordVal *R = getValue(FieldName);
- if (R == 0 || R->getValue() == 0)
+ if (!R || !R->getValue())
PrintFatalError(getLoc(), "Record `" + getName() +
"' does not have a field named `" + FieldName + "'!\n");
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index c6be4f8..1ec2eea 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -30,7 +30,7 @@ TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
CurBuffer = 0;
CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
CurPtr = CurBuf->getBufferStart();
- TokStart = 0;
+ TokStart = nullptr;
}
SMLoc TGLexer::getLoc() const {
@@ -389,12 +389,12 @@ tgtok::TokKind TGLexer::LexNumber() {
return ReturnError(TokStart, "Invalid hexadecimal number");
errno = 0;
- CurIntVal = strtoll(NumStart, 0, 16);
+ CurIntVal = strtoll(NumStart, nullptr, 16);
if (errno == EINVAL)
return ReturnError(TokStart, "Invalid hexadecimal number");
if (errno == ERANGE) {
errno = 0;
- CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+ CurIntVal = (int64_t)strtoull(NumStart, nullptr, 16);
if (errno == EINVAL)
return ReturnError(TokStart, "Invalid hexadecimal number");
if (errno == ERANGE)
@@ -410,7 +410,7 @@ tgtok::TokKind TGLexer::LexNumber() {
// Requires at least one binary digit.
if (CurPtr == NumStart)
return ReturnError(CurPtr-2, "Invalid binary number");
- CurIntVal = strtoll(NumStart, 0, 2);
+ CurIntVal = strtoll(NumStart, nullptr, 2);
return tgtok::IntVal;
}
}
@@ -425,7 +425,7 @@ tgtok::TokKind TGLexer::LexNumber() {
while (isdigit(CurPtr[0]))
++CurPtr;
- CurIntVal = strtoll(TokStart, 0, 10);
+ CurIntVal = strtoll(TokStart, nullptr, 10);
return tgtok::IntVal;
}
@@ -478,6 +478,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
.Case("empty", tgtok::XEmpty)
.Case("subst", tgtok::XSubst)
.Case("foreach", tgtok::XForEach)
+ .Case("listconcat", tgtok::XListConcat)
.Case("strconcat", tgtok::XStrConcat)
.Default(tgtok::Error);
diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
index d1bd70d..1e599f8 100644
--- a/lib/TableGen/TGLexer.h
+++ b/lib/TableGen/TGLexer.h
@@ -47,7 +47,7 @@ namespace tgtok {
MultiClass, String,
// !keywords.
- XConcat, XADD, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+ XConcat, XADD, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast, XSubst,
XForEach, XHead, XTail, XEmpty, XIf, XEq,
// Integer value.
diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
index 4ba769c..038e018 100644
--- a/lib/TableGen/TGParser.cpp
+++ b/lib/TableGen/TGParser.cpp
@@ -29,18 +29,18 @@ struct SubClassReference {
SMRange RefRange;
Record *Rec;
std::vector<Init*> TemplateArgs;
- SubClassReference() : Rec(0) {}
+ SubClassReference() : Rec(nullptr) {}
- bool isInvalid() const { return Rec == 0; }
+ bool isInvalid() const { return Rec == nullptr; }
};
struct SubMultiClassReference {
SMRange RefRange;
MultiClass *MC;
std::vector<Init*> TemplateArgs;
- SubMultiClassReference() : MC(0) {}
+ SubMultiClassReference() : MC(nullptr) {}
- bool isInvalid() const { return MC == 0; }
+ bool isInvalid() const { return MC == nullptr; }
void dump() const;
};
@@ -61,7 +61,7 @@ void SubMultiClassReference::dump() const {
} // end namespace llvm
bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
- if (CurRec == 0)
+ if (!CurRec)
CurRec = &CurMultiClass->Rec;
if (RecordVal *ERV = CurRec->getValue(RV.getNameInit())) {
@@ -83,10 +83,10 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
const std::vector<unsigned> &BitList, Init *V) {
if (!V) return false;
- if (CurRec == 0) CurRec = &CurMultiClass->Rec;
+ if (!CurRec) CurRec = &CurMultiClass->Rec;
RecordVal *RV = CurRec->getValue(ValName);
- if (RV == 0)
+ if (!RV)
return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ "' unknown!");
@@ -103,19 +103,19 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
//
if (!BitList.empty()) {
BitsInit *CurVal = dyn_cast<BitsInit>(RV->getValue());
- if (CurVal == 0)
+ if (!CurVal)
return Error(Loc, "Value '" + ValName->getAsUnquotedString()
+ "' is not a bits type");
// Convert the incoming value to a bits type of the appropriate size...
Init *BI = V->convertInitializerTo(BitsRecTy::get(BitList.size()));
- if (BI == 0) {
+ if (!BI) {
return Error(Loc, "Initializer is not compatible with bit range");
}
// We should have a BitsInit type now.
BitsInit *BInit = dyn_cast<BitsInit>(BI);
- assert(BInit != 0);
+ assert(BInit != nullptr);
SmallVector<Init *, 16> NewBits(CurVal->getNumBits());
@@ -129,7 +129,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
}
for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i)
- if (NewBits[i] == 0)
+ if (!NewBits[i])
NewBits[i] = CurVal->getBit(i);
V = BitsInit::get(NewBits);
@@ -314,14 +314,14 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
assert(IterVals.size() < Loops.size());
ForeachLoop &CurLoop = Loops[IterVals.size()];
ListInit *List = dyn_cast<ListInit>(CurLoop.ListValue);
- if (List == 0) {
+ if (!List) {
Error(Loc, "Loop list is not a list");
return true;
}
// Process each value.
for (int64_t i = 0; i < List->getSize(); ++i) {
- Init *ItemVal = List->resolveListElementReference(*CurRec, 0, i);
+ Init *ItemVal = List->resolveListElementReference(*CurRec, nullptr, i);
IterVals.push_back(IterRecord(CurLoop.IterVar, ItemVal));
if (ProcessForeachDefs(CurRec, Loc, IterVals))
return true;
@@ -339,7 +339,7 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
for (unsigned i = 0, e = IterVals.size(); i != e; ++i) {
VarInit *IterVar = IterVals[i].IterVar;
TypedInit *IVal = dyn_cast<TypedInit>(IterVals[i].IterValue);
- if (IVal == 0) {
+ if (!IVal) {
Error(Loc, "foreach iterator value is untyped");
return true;
}
@@ -400,21 +400,21 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
// These are all of the tokens that can begin an object body.
// Some of these can also begin values but we disallow those cases
// because they are unlikely to be useful.
- return 0;
+ return nullptr;
default:
break;
}
- Record *CurRec = 0;
+ Record *CurRec = nullptr;
if (CurMultiClass)
CurRec = &CurMultiClass->Rec;
- RecTy *Type = 0;
+ RecTy *Type = nullptr;
if (CurRec) {
const TypedInit *CurRecName = dyn_cast<TypedInit>(CurRec->getNameInit());
if (!CurRecName) {
TokError("Record name is not typed!");
- return 0;
+ return nullptr;
}
Type = CurRecName->getType();
}
@@ -430,11 +430,11 @@ Init *TGParser::ParseObjectName(MultiClass *CurMultiClass) {
Record *TGParser::ParseClassID() {
if (Lex.getCode() != tgtok::Id) {
TokError("expected name for ClassID");
- return 0;
+ return nullptr;
}
Record *Result = Records.getClass(Lex.getCurStrVal());
- if (Result == 0)
+ if (!Result)
TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
Lex.Lex();
@@ -449,11 +449,11 @@ Record *TGParser::ParseClassID() {
MultiClass *TGParser::ParseMultiClassID() {
if (Lex.getCode() != tgtok::Id) {
TokError("expected name for MultiClassID");
- return 0;
+ return nullptr;
}
MultiClass *Result = MultiClasses[Lex.getCurStrVal()];
- if (Result == 0)
+ if (!Result)
TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'");
Lex.Lex();
@@ -477,7 +477,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
} else {
Result.Rec = ParseClassID();
}
- if (Result.Rec == 0) return Result;
+ if (!Result.Rec) return Result;
// If there is no template arg list, we're done.
if (Lex.getCode() != tgtok::less) {
@@ -488,19 +488,19 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
if (Lex.getCode() == tgtok::greater) {
TokError("subclass reference requires a non-empty list of template values");
- Result.Rec = 0;
+ Result.Rec = nullptr;
return Result;
}
Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
if (Result.TemplateArgs.empty()) {
- Result.Rec = 0; // Error parsing value list.
+ Result.Rec = nullptr; // Error parsing value list.
return Result;
}
if (Lex.getCode() != tgtok::greater) {
TokError("expected '>' in template value list");
- Result.Rec = 0;
+ Result.Rec = nullptr;
return Result;
}
Lex.Lex();
@@ -522,7 +522,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
Result.RefRange.Start = Lex.getLoc();
Result.MC = ParseMultiClassID();
- if (Result.MC == 0) return Result;
+ if (!Result.MC) return Result;
// If there is no template arg list, we're done.
if (Lex.getCode() != tgtok::less) {
@@ -533,19 +533,19 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
if (Lex.getCode() == tgtok::greater) {
TokError("subclass reference requires a non-empty list of template values");
- Result.MC = 0;
+ Result.MC = nullptr;
return Result;
}
Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
if (Result.TemplateArgs.empty()) {
- Result.MC = 0; // Error parsing value list.
+ Result.MC = nullptr; // Error parsing value list.
return Result;
}
if (Lex.getCode() != tgtok::greater) {
TokError("expected '>' in template value list");
- Result.MC = 0;
+ Result.MC = nullptr;
return Result;
}
Lex.Lex();
@@ -677,7 +677,7 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
///
RecTy *TGParser::ParseType() {
switch (Lex.getCode()) {
- default: TokError("Unknown token when expecting a type"); return 0;
+ default: TokError("Unknown token when expecting a type"); return nullptr;
case tgtok::String: Lex.Lex(); return StringRecTy::get();
case tgtok::Code: Lex.Lex(); return StringRecTy::get();
case tgtok::Bit: Lex.Lex(); return BitRecTy::get();
@@ -685,20 +685,20 @@ RecTy *TGParser::ParseType() {
case tgtok::Dag: Lex.Lex(); return DagRecTy::get();
case tgtok::Id:
if (Record *R = ParseClassID()) return RecordRecTy::get(R);
- return 0;
+ return nullptr;
case tgtok::Bits: {
if (Lex.Lex() != tgtok::less) { // Eat 'bits'
TokError("expected '<' after bits type");
- return 0;
+ return nullptr;
}
if (Lex.Lex() != tgtok::IntVal) { // Eat '<'
TokError("expected integer in bits<n> type");
- return 0;
+ return nullptr;
}
uint64_t Val = Lex.getCurIntVal();
if (Lex.Lex() != tgtok::greater) { // Eat count.
TokError("expected '>' at end of bits<n> type");
- return 0;
+ return nullptr;
}
Lex.Lex(); // Eat '>'
return BitsRecTy::get(Val);
@@ -706,15 +706,15 @@ RecTy *TGParser::ParseType() {
case tgtok::List: {
if (Lex.Lex() != tgtok::less) { // Eat 'bits'
TokError("expected '<' after list type");
- return 0;
+ return nullptr;
}
Lex.Lex(); // Eat '<'
RecTy *SubType = ParseType();
- if (SubType == 0) return 0;
+ if (!SubType) return nullptr;
if (Lex.getCode() != tgtok::greater) {
TokError("expected '>' at end of list<ty> type");
- return 0;
+ return nullptr;
}
Lex.Lex(); // Eat '>'
return ListRecTy::get(SubType);
@@ -772,7 +772,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
if (Mode == ParseValueMode) {
Error(NameLoc, "Variable not defined: '" + Name + "'");
- return 0;
+ return nullptr;
}
return StringInit::get(Name);
@@ -786,13 +786,13 @@ Init *TGParser::ParseOperation(Record *CurRec) {
switch (Lex.getCode()) {
default:
TokError("unknown operation");
- return 0;
+ return nullptr;
case tgtok::XHead:
case tgtok::XTail:
case tgtok::XEmpty:
case tgtok::XCast: { // Value ::= !unop '(' Value ')'
UnOpInit::UnaryOp Code;
- RecTy *Type = 0;
+ RecTy *Type = nullptr;
switch (Lex.getCode()) {
default: llvm_unreachable("Unhandled code!");
@@ -802,9 +802,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
Type = ParseOperatorType();
- if (Type == 0) {
+ if (!Type) {
TokError("did not get type for unary operator");
- return 0;
+ return nullptr;
}
break;
@@ -824,12 +824,12 @@ Init *TGParser::ParseOperation(Record *CurRec) {
}
if (Lex.getCode() != tgtok::l_paren) {
TokError("expected '(' after unary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '('
Init *LHS = ParseValue(CurRec);
- if (LHS == 0) return 0;
+ if (!LHS) return nullptr;
if (Code == UnOpInit::HEAD
|| Code == UnOpInit::TAIL
@@ -837,36 +837,36 @@ Init *TGParser::ParseOperation(Record *CurRec) {
ListInit *LHSl = dyn_cast<ListInit>(LHS);
StringInit *LHSs = dyn_cast<StringInit>(LHS);
TypedInit *LHSt = dyn_cast<TypedInit>(LHS);
- if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
+ if (!LHSl && !LHSs && !LHSt) {
TokError("expected list or string type argument in unary operator");
- return 0;
+ return nullptr;
}
if (LHSt) {
ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
StringRecTy *SType = dyn_cast<StringRecTy>(LHSt->getType());
- if (LType == 0 && SType == 0) {
+ if (!LType && !SType) {
TokError("expected list or string type argumnet in unary operator");
- return 0;
+ return nullptr;
}
}
if (Code == UnOpInit::HEAD
|| Code == UnOpInit::TAIL) {
- if (LHSl == 0 && LHSt == 0) {
+ if (!LHSl && !LHSt) {
TokError("expected list type argumnet in unary operator");
- return 0;
+ return nullptr;
}
if (LHSl && LHSl->getSize() == 0) {
TokError("empty list argument in unary operator");
- return 0;
+ return nullptr;
}
if (LHSl) {
Init *Item = LHSl->getElement(0);
TypedInit *Itemt = dyn_cast<TypedInit>(Item);
- if (Itemt == 0) {
+ if (!Itemt) {
TokError("untyped list element in unary operator");
- return 0;
+ return nullptr;
}
if (Code == UnOpInit::HEAD) {
Type = Itemt->getType();
@@ -876,9 +876,9 @@ Init *TGParser::ParseOperation(Record *CurRec) {
} else {
assert(LHSt && "expected list type argument in unary operator");
ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType());
- if (LType == 0) {
+ if (!LType) {
TokError("expected list type argumnet in unary operator");
- return 0;
+ return nullptr;
}
if (Code == UnOpInit::HEAD) {
Type = LType->getElementType();
@@ -891,7 +891,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
if (Lex.getCode() != tgtok::r_paren) {
TokError("expected ')' in unary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ')'
return (UnOpInit::get(Code, LHS, Type))->Fold(CurRec, CurMultiClass);
@@ -903,13 +903,14 @@ Init *TGParser::ParseOperation(Record *CurRec) {
case tgtok::XSRL:
case tgtok::XSHL:
case tgtok::XEq:
+ case tgtok::XListConcat:
case tgtok::XStrConcat: { // Value ::= !binop '(' Value ',' Value ')'
tgtok::TokKind OpTok = Lex.getCode();
SMLoc OpLoc = Lex.getLoc();
Lex.Lex(); // eat the operation
BinOpInit::BinaryOp Code;
- RecTy *Type = 0;
+ RecTy *Type = nullptr;
switch (OpTok) {
default: llvm_unreachable("Unhandled code!");
@@ -919,6 +920,10 @@ Init *TGParser::ParseOperation(Record *CurRec) {
case tgtok::XSRL: Code = BinOpInit::SRL; Type = IntRecTy::get(); break;
case tgtok::XSHL: Code = BinOpInit::SHL; Type = IntRecTy::get(); break;
case tgtok::XEq: Code = BinOpInit::EQ; Type = BitRecTy::get(); break;
+ case tgtok::XListConcat:
+ Code = BinOpInit::LISTCONCAT;
+ // We don't know the list type until we parse the first argument
+ break;
case tgtok::XStrConcat:
Code = BinOpInit::STRCONCAT;
Type = StringRecTy::get();
@@ -927,31 +932,44 @@ Init *TGParser::ParseOperation(Record *CurRec) {
if (Lex.getCode() != tgtok::l_paren) {
TokError("expected '(' after binary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '('
SmallVector<Init*, 2> InitList;
InitList.push_back(ParseValue(CurRec));
- if (InitList.back() == 0) return 0;
+ if (!InitList.back()) return nullptr;
while (Lex.getCode() == tgtok::comma) {
Lex.Lex(); // eat the ','
InitList.push_back(ParseValue(CurRec));
- if (InitList.back() == 0) return 0;
+ if (!InitList.back()) return nullptr;
}
if (Lex.getCode() != tgtok::r_paren) {
TokError("expected ')' in operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ')'
+ // If we are doing !listconcat, we should know the type by now
+ if (OpTok == tgtok::XListConcat) {
+ if (VarInit *Arg0 = dyn_cast<VarInit>(InitList[0]))
+ Type = Arg0->getType();
+ else if (ListInit *Arg0 = dyn_cast<ListInit>(InitList[0]))
+ Type = Arg0->getType();
+ else {
+ InitList[0]->dump();
+ Error(OpLoc, "expected a list");
+ return nullptr;
+ }
+ }
+
// We allow multiple operands to associative operators like !strconcat as
// shorthand for nesting them.
- if (Code == BinOpInit::STRCONCAT) {
+ if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT) {
while (InitList.size() > 2) {
Init *RHS = InitList.pop_back_val();
RHS = (BinOpInit::get(Code, InitList.back(), RHS, Type))
@@ -965,14 +983,14 @@ Init *TGParser::ParseOperation(Record *CurRec) {
->Fold(CurRec, CurMultiClass);
Error(OpLoc, "expected two operands to operator");
- return 0;
+ return nullptr;
}
case tgtok::XIf:
case tgtok::XForEach:
case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
TernOpInit::TernaryOp Code;
- RecTy *Type = 0;
+ RecTy *Type = nullptr;
tgtok::TokKind LexCode = Lex.getCode();
Lex.Lex(); // eat the operation
@@ -990,42 +1008,42 @@ Init *TGParser::ParseOperation(Record *CurRec) {
}
if (Lex.getCode() != tgtok::l_paren) {
TokError("expected '(' after ternary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '('
Init *LHS = ParseValue(CurRec);
- if (LHS == 0) return 0;
+ if (!LHS) return nullptr;
if (Lex.getCode() != tgtok::comma) {
TokError("expected ',' in ternary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ','
Init *MHS = ParseValue(CurRec);
- if (MHS == 0) return 0;
+ if (!MHS) return nullptr;
if (Lex.getCode() != tgtok::comma) {
TokError("expected ',' in ternary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ','
Init *RHS = ParseValue(CurRec);
- if (RHS == 0) return 0;
+ if (!RHS) return nullptr;
if (Lex.getCode() != tgtok::r_paren) {
TokError("expected ')' in binary operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ')'
switch (LexCode) {
default: llvm_unreachable("Unhandled code!");
case tgtok::XIf: {
- RecTy *MHSTy = 0;
- RecTy *RHSTy = 0;
+ RecTy *MHSTy = nullptr;
+ RecTy *RHSTy = nullptr;
if (TypedInit *MHSt = dyn_cast<TypedInit>(MHS))
MHSTy = MHSt->getType();
@@ -1049,7 +1067,7 @@ Init *TGParser::ParseOperation(Record *CurRec) {
if (!MHSTy || !RHSTy) {
TokError("could not get type for !if");
- return 0;
+ return nullptr;
}
if (MHSTy->typeIsConvertibleTo(RHSTy)) {
@@ -1058,24 +1076,24 @@ Init *TGParser::ParseOperation(Record *CurRec) {
Type = MHSTy;
} else {
TokError("inconsistent types for !if");
- return 0;
+ return nullptr;
}
break;
}
case tgtok::XForEach: {
TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
- if (MHSt == 0) {
+ if (!MHSt) {
TokError("could not get type for !foreach");
- return 0;
+ return nullptr;
}
Type = MHSt->getType();
break;
}
case tgtok::XSubst: {
TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
- if (RHSt == 0) {
+ if (!RHSt) {
TokError("could not get type for !subst");
- return 0;
+ return nullptr;
}
Type = RHSt->getType();
break;
@@ -1093,24 +1111,24 @@ Init *TGParser::ParseOperation(Record *CurRec) {
/// OperatorType ::= '<' Type '>'
///
RecTy *TGParser::ParseOperatorType() {
- RecTy *Type = 0;
+ RecTy *Type = nullptr;
if (Lex.getCode() != tgtok::less) {
TokError("expected type name for operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the <
Type = ParseType();
- if (Type == 0) {
+ if (!Type) {
TokError("expected type name for operator");
- return 0;
+ return nullptr;
}
if (Lex.getCode() != tgtok::greater) {
TokError("expected type name for operator");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the >
@@ -1134,11 +1152,12 @@ RecTy *TGParser::ParseOperatorType() {
/// SimpleValue ::= SHLTOK '(' Value ',' Value ')'
/// SimpleValue ::= SRATOK '(' Value ',' Value ')'
/// SimpleValue ::= SRLTOK '(' Value ',' Value ')'
+/// SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
///
Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
IDParseMode Mode) {
- Init *R = 0;
+ Init *R = nullptr;
switch (Lex.getCode()) {
default: TokError("Unknown token when parsing a value"); break;
case tgtok::paste:
@@ -1177,7 +1196,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
// Value ::= ID '<' ValueListNE '>'
if (Lex.Lex() == tgtok::greater) {
TokError("expected non-empty value list");
- return 0;
+ return nullptr;
}
// This is a CLASS<initvalslist> expression. This is supposed to synthesize
@@ -1186,15 +1205,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
Record *Class = Records.getClass(Name);
if (!Class) {
Error(NameLoc, "Expected a class name, got '" + Name + "'");
- return 0;
+ return nullptr;
}
std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
- if (ValueList.empty()) return 0;
+ if (ValueList.empty()) return nullptr;
if (Lex.getCode() != tgtok::greater) {
TokError("expected '>' at end of value list");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '>'
SMLoc EndLoc = Lex.getLoc();
@@ -1208,7 +1227,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
SCRef.TemplateArgs = ValueList;
// Add info about the subclass to NewRec.
if (AddSubClass(NewRec, SCRef))
- return 0;
+ return nullptr;
if (!CurMultiClass) {
NewRec->resolveReferences();
Records.addDef(NewRec);
@@ -1250,11 +1269,11 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
if (Lex.getCode() != tgtok::r_brace) {
Vals = ParseValueList(CurRec);
- if (Vals.empty()) return 0;
+ if (Vals.empty()) return nullptr;
}
if (Lex.getCode() != tgtok::r_brace) {
TokError("expected '}' at end of bit list value");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '}'
@@ -1262,10 +1281,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
Init *Bit = Vals[i]->convertInitializerTo(BitRecTy::get());
- if (Bit == 0) {
+ if (!Bit) {
Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
") is not convertable to a bit");
- return 0;
+ return nullptr;
}
NewBits[Vals.size()-i-1] = Bit;
}
@@ -1275,87 +1294,87 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
Lex.Lex(); // eat the '['
std::vector<Init*> Vals;
- RecTy *DeducedEltTy = 0;
- ListRecTy *GivenListTy = 0;
+ RecTy *DeducedEltTy = nullptr;
+ ListRecTy *GivenListTy = nullptr;
- if (ItemType != 0) {
+ if (ItemType) {
ListRecTy *ListType = dyn_cast<ListRecTy>(ItemType);
- if (ListType == 0) {
+ if (!ListType) {
std::string s;
raw_string_ostream ss(s);
ss << "Type mismatch for list, expected list type, got "
<< ItemType->getAsString();
TokError(ss.str());
- return 0;
+ return nullptr;
}
GivenListTy = ListType;
}
if (Lex.getCode() != tgtok::r_square) {
- Vals = ParseValueList(CurRec, 0,
- GivenListTy ? GivenListTy->getElementType() : 0);
- if (Vals.empty()) return 0;
+ Vals = ParseValueList(CurRec, nullptr,
+ GivenListTy ? GivenListTy->getElementType() : nullptr);
+ if (Vals.empty()) return nullptr;
}
if (Lex.getCode() != tgtok::r_square) {
TokError("expected ']' at end of list value");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ']'
- RecTy *GivenEltTy = 0;
+ RecTy *GivenEltTy = nullptr;
if (Lex.getCode() == tgtok::less) {
// Optional list element type
Lex.Lex(); // eat the '<'
GivenEltTy = ParseType();
- if (GivenEltTy == 0) {
+ if (!GivenEltTy) {
// Couldn't parse element type
- return 0;
+ return nullptr;
}
if (Lex.getCode() != tgtok::greater) {
TokError("expected '>' at end of list element type");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the '>'
}
// Check elements
- RecTy *EltTy = 0;
+ RecTy *EltTy = nullptr;
for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
i != ie;
++i) {
TypedInit *TArg = dyn_cast<TypedInit>(*i);
- if (TArg == 0) {
+ if (!TArg) {
TokError("Untyped list element");
- return 0;
+ return nullptr;
}
- if (EltTy != 0) {
+ if (EltTy) {
EltTy = resolveTypes(EltTy, TArg->getType());
- if (EltTy == 0) {
+ if (!EltTy) {
TokError("Incompatible types in list elements");
- return 0;
+ return nullptr;
}
} else {
EltTy = TArg->getType();
}
}
- if (GivenEltTy != 0) {
- if (EltTy != 0) {
+ if (GivenEltTy) {
+ if (EltTy) {
// Verify consistency
if (!EltTy->typeIsConvertibleTo(GivenEltTy)) {
TokError("Incompatible types in list elements");
- return 0;
+ return nullptr;
}
}
EltTy = GivenEltTy;
}
- if (EltTy == 0) {
- if (ItemType == 0) {
+ if (!EltTy) {
+ if (!ItemType) {
TokError("No type for list");
- return 0;
+ return nullptr;
}
DeducedEltTy = GivenListTy->getElementType();
} else {
@@ -1363,7 +1382,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
if (GivenListTy) {
if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) {
TokError("Element type mismatch for list");
- return 0;
+ return nullptr;
}
}
DeducedEltTy = EltTy;
@@ -1375,18 +1394,18 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
Lex.Lex(); // eat the '('
if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
TokError("expected identifier in dag init");
- return 0;
+ return nullptr;
}
Init *Operator = ParseValue(CurRec);
- if (Operator == 0) return 0;
+ if (!Operator) return nullptr;
// If the operator name is present, parse it.
std::string OperatorName;
if (Lex.getCode() == tgtok::colon) {
if (Lex.Lex() != tgtok::VarName) { // eat the ':'
TokError("expected variable name in dag operator");
- return 0;
+ return nullptr;
}
OperatorName = Lex.getCurStrVal();
Lex.Lex(); // eat the VarName.
@@ -1395,12 +1414,12 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
std::vector<std::pair<llvm::Init*, std::string> > DagArgs;
if (Lex.getCode() != tgtok::r_paren) {
DagArgs = ParseDagArgList(CurRec);
- if (DagArgs.empty()) return 0;
+ if (DagArgs.empty()) return nullptr;
}
if (Lex.getCode() != tgtok::r_paren) {
TokError("expected ')' in dag init");
- return 0;
+ return nullptr;
}
Lex.Lex(); // eat the ')'
@@ -1417,6 +1436,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XSRL:
case tgtok::XSHL:
case tgtok::XEq:
+ case tgtok::XListConcat:
case tgtok::XStrConcat: // Value ::= !binop '(' Value ',' Value ')'
case tgtok::XIf:
case tgtok::XForEach:
@@ -1437,7 +1457,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
///
Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
Init *Result = ParseSimpleValue(CurRec, ItemType, Mode);
- if (Result == 0) return 0;
+ if (!Result) return nullptr;
// Parse the suffixes now if present.
while (1) {
@@ -1451,20 +1471,20 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
SMLoc CurlyLoc = Lex.getLoc();
Lex.Lex(); // eat the '{'
std::vector<unsigned> Ranges = ParseRangeList();
- if (Ranges.empty()) return 0;
+ if (Ranges.empty()) return nullptr;
// Reverse the bitlist.
std::reverse(Ranges.begin(), Ranges.end());
Result = Result->convertInitializerBitRange(Ranges);
- if (Result == 0) {
+ if (!Result) {
Error(CurlyLoc, "Invalid bit range for value");
- return 0;
+ return nullptr;
}
// Eat the '}'.
if (Lex.getCode() != tgtok::r_brace) {
TokError("expected '}' at end of bit range list");
- return 0;
+ return nullptr;
}
Lex.Lex();
break;
@@ -1473,18 +1493,18 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
SMLoc SquareLoc = Lex.getLoc();
Lex.Lex(); // eat the '['
std::vector<unsigned> Ranges = ParseRangeList();
- if (Ranges.empty()) return 0;
+ if (Ranges.empty()) return nullptr;
Result = Result->convertInitListSlice(Ranges);
- if (Result == 0) {
+ if (!Result) {
Error(SquareLoc, "Invalid range for list slice");
- return 0;
+ return nullptr;
}
// Eat the ']'.
if (Lex.getCode() != tgtok::r_square) {
TokError("expected ']' at end of list slice");
- return 0;
+ return nullptr;
}
Lex.Lex();
break;
@@ -1492,12 +1512,12 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
case tgtok::period:
if (Lex.Lex() != tgtok::Id) { // eat the .
TokError("expected field identifier after '.'");
- return 0;
+ return nullptr;
}
if (!Result->getFieldType(Lex.getCurStrVal())) {
TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
Result->getAsString() + "'");
- return 0;
+ return nullptr;
}
Result = FieldInit::get(Result, Lex.getCurStrVal());
Lex.Lex(); // eat field name
@@ -1512,14 +1532,14 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
TypedInit *LHS = dyn_cast<TypedInit>(Result);
if (!LHS) {
Error(PasteLoc, "LHS of paste is not typed!");
- return 0;
+ return nullptr;
}
if (LHS->getType() != StringRecTy::get()) {
LHS = UnOpInit::get(UnOpInit::CAST, LHS, StringRecTy::get());
}
- TypedInit *RHS = 0;
+ TypedInit *RHS = nullptr;
Lex.Lex(); // Eat the '#'.
switch (Lex.getCode()) {
@@ -1539,7 +1559,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
RHS = dyn_cast<TypedInit>(RHSResult);
if (!RHS) {
Error(PasteLoc, "RHS of paste is not typed!");
- return 0;
+ return nullptr;
}
if (RHS->getType() != StringRecTy::get()) {
@@ -1575,7 +1595,7 @@ TGParser::ParseDagArgList(Record *CurRec) {
} else {
// DagArg ::= Value (':' VARNAME)?
Init *Val = ParseValue(CurRec);
- if (Val == 0)
+ if (!Val)
return std::vector<std::pair<llvm::Init*, std::string> >();
// If the variable name is present, add it.
@@ -1610,7 +1630,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
std::vector<Init*> Result;
RecTy *ItemType = EltTy;
unsigned int ArgN = 0;
- if (ArgsRec != 0 && EltTy == 0) {
+ if (ArgsRec && !EltTy) {
const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
if (!TArgs.size()) {
TokError("template argument provided to non-template class");
@@ -1626,12 +1646,12 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
++ArgN;
}
Result.push_back(ParseValue(CurRec, ItemType));
- if (Result.back() == 0) return std::vector<Init*>();
+ if (!Result.back()) return std::vector<Init*>();
while (Lex.getCode() == tgtok::comma) {
Lex.Lex(); // Eat the comma
- if (ArgsRec != 0 && EltTy == 0) {
+ if (ArgsRec && !EltTy) {
const std::vector<Init *> &TArgs = ArgsRec->getTemplateArgs();
if (ArgN >= TArgs.size()) {
TokError("too many template arguments");
@@ -1643,7 +1663,7 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
++ArgN;
}
Result.push_back(ParseValue(CurRec, ItemType));
- if (Result.back() == 0) return std::vector<Init*>();
+ if (!Result.back()) return std::vector<Init*>();
}
return Result;
@@ -1667,11 +1687,11 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
if (HasField) Lex.Lex();
RecTy *Type = ParseType();
- if (Type == 0) return 0;
+ if (!Type) return nullptr;
if (Lex.getCode() != tgtok::Id) {
TokError("Expected identifier in declaration");
- return 0;
+ return nullptr;
}
SMLoc IdLoc = Lex.getLoc();
@@ -1691,16 +1711,16 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
// Add the value.
if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
- return 0;
+ return nullptr;
// If a value is present, parse it.
if (Lex.getCode() == tgtok::equal) {
Lex.Lex();
SMLoc ValLoc = Lex.getLoc();
Init *Val = ParseValue(CurRec, Type);
- if (Val == 0 ||
+ if (!Val ||
SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
- return 0;
+ return nullptr;
}
return DeclName;
@@ -1717,7 +1737,7 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
if (Lex.getCode() != tgtok::Id) {
TokError("Expected identifier in foreach declaration");
- return 0;
+ return nullptr;
}
Init *DeclName = StringInit::get(Lex.getCurStrVal());
@@ -1726,27 +1746,27 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
// If a value is present, parse it.
if (Lex.getCode() != tgtok::equal) {
TokError("Expected '=' in foreach declaration");
- return 0;
+ return nullptr;
}
Lex.Lex(); // Eat the '='
- RecTy *IterType = 0;
+ RecTy *IterType = nullptr;
std::vector<unsigned> Ranges;
switch (Lex.getCode()) {
- default: TokError("Unknown token when expecting a range list"); return 0;
+ default: TokError("Unknown token when expecting a range list"); return nullptr;
case tgtok::l_square: { // '[' ValueList ']'
- Init *List = ParseSimpleValue(0, 0, ParseForeachMode);
+ Init *List = ParseSimpleValue(nullptr, nullptr, ParseForeachMode);
ForeachListValue = dyn_cast<ListInit>(List);
- if (ForeachListValue == 0) {
+ if (!ForeachListValue) {
TokError("Expected a Value list");
- return 0;
+ return nullptr;
}
RecTy *ValueType = ForeachListValue->getType();
ListRecTy *ListType = dyn_cast<ListRecTy>(ValueType);
- if (ListType == 0) {
+ if (!ListType) {
TokError("Value list is not of list type");
- return 0;
+ return nullptr;
}
IterType = ListType->getElementType();
break;
@@ -1754,7 +1774,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
case tgtok::IntVal: { // RangePiece.
if (ParseRangePiece(Ranges))
- return 0;
+ return nullptr;
break;
}
@@ -1763,7 +1783,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
Ranges = ParseRangeList();
if (Lex.getCode() != tgtok::r_brace) {
TokError("expected '}' at end of bit range list");
- return 0;
+ return nullptr;
}
Lex.Lex();
break;
@@ -1780,7 +1800,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
}
if (!IterType)
- return 0;
+ return nullptr;
return VarInit::get(DeclName, IterType);
}
@@ -1800,7 +1820,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
// Read the first declaration.
Init *TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
- if (TemplArg == 0)
+ if (!TemplArg)
return true;
TheRecToAddTo->addTemplateArg(TemplArg);
@@ -1810,7 +1830,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
// Read the following declarations.
TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
- if (TemplArg == 0)
+ if (!TemplArg)
return true;
TheRecToAddTo->addTemplateArg(TemplArg);
}
@@ -1828,7 +1848,7 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
/// BodyItem ::= LET ID OptionalBitList '=' Value ';'
bool TGParser::ParseBodyItem(Record *CurRec) {
if (Lex.getCode() != tgtok::Let) {
- if (ParseDeclaration(CurRec, false) == 0)
+ if (!ParseDeclaration(CurRec, false))
return true;
if (Lex.getCode() != tgtok::semi)
@@ -1855,13 +1875,13 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
Lex.Lex(); // eat the '='.
RecordVal *Field = CurRec->getValue(FieldName);
- if (Field == 0)
+ if (!Field)
return TokError("Value '" + FieldName + "' unknown!");
RecTy *Type = Field->getType();
Init *Val = ParseValue(CurRec, Type);
- if (Val == 0) return true;
+ if (!Val) return true;
if (Lex.getCode() != tgtok::semi)
return TokError("expected ';' after let expression");
@@ -1927,7 +1947,7 @@ bool TGParser::ParseObjectBody(Record *CurRec) {
SubClassReference SubClass = ParseSubClassReference(CurRec, false);
while (1) {
// Check for error.
- if (SubClass.Rec == 0) return true;
+ if (!SubClass.Rec) return true;
// Add it.
if (AddSubClass(CurRec, SubClass))
@@ -1998,7 +2018,7 @@ bool TGParser::ParseDef(MultiClass *CurMultiClass) {
} else if (ParseObjectBody(CurRec))
return true;
- if (CurMultiClass == 0) // Def's in multiclasses aren't really defs.
+ if (!CurMultiClass) // Def's in multiclasses aren't really defs.
// See Record::setName(). This resolve step will see any new name
// for the def that might have been created when resolving
// inheritance, values and arguments above.
@@ -2040,9 +2060,9 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
// Make a temporary object to record items associated with the for
// loop.
- ListInit *ListValue = 0;
+ ListInit *ListValue = nullptr;
VarInit *IterName = ParseForeachDeclaration(ListValue);
- if (IterName == 0)
+ if (!IterName)
return TokError("expected declaration in for");
if (Lex.getCode() != tgtok::In)
@@ -2144,8 +2164,8 @@ std::vector<LetRecord> TGParser::ParseLetList() {
}
Lex.Lex(); // eat the '='.
- Init *Val = ParseValue(0);
- if (Val == 0) return std::vector<LetRecord>();
+ Init *Val = ParseValue(nullptr);
+ if (!Val) return std::vector<LetRecord>();
// Now that we have everything, add the record.
Result.push_back(LetRecord(Name, Bits, Val, NameLoc));
@@ -2228,7 +2248,7 @@ bool TGParser::ParseMultiClass() {
// If there are template args, parse them.
if (Lex.getCode() == tgtok::less)
- if (ParseTemplateArgList(0))
+ if (ParseTemplateArgList(nullptr))
return true;
bool inherits = false;
@@ -2244,7 +2264,7 @@ bool TGParser::ParseMultiClass() {
ParseSubMultiClassReference(CurMultiClass);
while (1) {
// Check for error.
- if (SubMultiClass.MC == 0) return true;
+ if (!SubMultiClass.MC) return true;
// Add it.
if (AddSubMultiClass(CurMultiClass, SubMultiClass))
@@ -2283,7 +2303,7 @@ bool TGParser::ParseMultiClass() {
Lex.Lex(); // eat the '}'.
}
- CurMultiClass = 0;
+ CurMultiClass = nullptr;
return false;
}
@@ -2301,7 +2321,7 @@ InstantiateMulticlassDef(MultiClass &MC,
// as a prefix.
bool IsAnonymous = false;
- if (DefmPrefix == 0) {
+ if (!DefmPrefix) {
DefmPrefix = StringInit::get(GetNewAnonymousName());
IsAnonymous = true;
}
@@ -2310,7 +2330,7 @@ InstantiateMulticlassDef(MultiClass &MC,
StringInit *DefNameString = dyn_cast<StringInit>(DefName);
- if (DefNameString != 0) {
+ if (DefNameString) {
// We have a fully expanded string so there are no operators to
// resolve. We should concatenate the given prefix and name.
DefName =
@@ -2338,13 +2358,13 @@ InstantiateMulticlassDef(MultiClass &MC,
Error(DefmPrefixRange.Start, "Could not resolve "
+ CurRec->getNameInitAsString() + ":NAME to '"
+ DefmPrefix->getAsUnquotedString() + "'");
- return 0;
+ return nullptr;
}
// If the DefNameString didn't resolve, we probably have a reference to
// NAME and need to replace it. We need to do at least this much greedily,
// otherwise nested multiclasses will end up with incorrect NAME expansions.
- if (DefNameString == 0) {
+ if (!DefNameString) {
RecordVal *DefNameRV = CurRec->getValue("NAME");
CurRec->resolveReferencesTo(DefNameRV);
}
@@ -2369,7 +2389,7 @@ InstantiateMulticlassDef(MultiClass &MC,
Error(DefmPrefixRange.Start, "def '" + CurRec->getNameInitAsString() +
"' already defined, instantiating defm with subdef '" +
DefProto->getNameInitAsString() + "'");
- return 0;
+ return nullptr;
}
Records.addDef(CurRec);
@@ -2453,7 +2473,7 @@ bool TGParser::ResolveMulticlassDef(MultiClass &MC,
bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
SMLoc DefmLoc = Lex.getLoc();
- Init *DefmPrefix = 0;
+ Init *DefmPrefix = nullptr;
if (Lex.Lex() == tgtok::Id) { // eat the defm.
DefmPrefix = ParseObjectName(CurMultiClass);
@@ -2473,10 +2493,10 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
Lex.Lex();
SMLoc SubClassLoc = Lex.getLoc();
- SubClassReference Ref = ParseSubClassReference(0, true);
+ SubClassReference Ref = ParseSubClassReference(nullptr, true);
while (1) {
- if (Ref.Rec == 0) return true;
+ if (!Ref.Rec) return true;
// To instantiate a multiclass, we need to first get the multiclass, then
// instantiate each def contained in the multiclass with the SubClassRef
@@ -2522,21 +2542,21 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
// A defm can inherit from regular classes (non-multiclass) as
// long as they come in the end of the inheritance list.
- InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0);
+ InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != nullptr);
if (InheritFromClass)
break;
- Ref = ParseSubClassReference(0, true);
+ Ref = ParseSubClassReference(nullptr, true);
}
if (InheritFromClass) {
// Process all the classes to inherit as if they were part of a
// regular 'def' and inherit all record values.
- SubClassReference SubClass = ParseSubClassReference(0, false);
+ SubClassReference SubClass = ParseSubClassReference(nullptr, false);
while (1) {
// Check for error.
- if (SubClass.Rec == 0) return true;
+ if (!SubClass.Rec) return true;
// Get the expanded definition prototypes and teach them about
// the record values the current class to inherit has
@@ -2553,7 +2573,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
if (Lex.getCode() != tgtok::comma) break;
Lex.Lex(); // eat ','.
- SubClass = ParseSubClassReference(0, false);
+ SubClass = ParseSubClassReference(nullptr, false);
}
}
diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
index ce31f8e..6fd442a 100644
--- a/lib/TableGen/TGParser.h
+++ b/lib/TableGen/TGParser.h
@@ -85,7 +85,7 @@ class TGParser {
public:
TGParser(SourceMgr &SrcMgr, RecordKeeper &records)
- : Lex(SrcMgr), CurMultiClass(0), Records(records), AnonCounter(0) {}
+ : Lex(SrcMgr), CurMultiClass(nullptr), Records(records), AnonCounter(0) {}
/// ParseFile - Main entrypoint for parsing a tblgen file. These parser
/// routines return true on error, or false on success.
@@ -131,7 +131,7 @@ private: // Semantic analysis methods.
bool ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals);
private: // Parser methods.
- bool ParseObjectList(MultiClass *MC = 0);
+ bool ParseObjectList(MultiClass *MC = nullptr);
bool ParseObject(MultiClass *MC);
bool ParseClass();
bool ParseMultiClass();
@@ -169,12 +169,12 @@ private: // Parser methods.
Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc,
IDParseMode Mode = ParseValueMode);
- Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0,
+ Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = nullptr,
IDParseMode Mode = ParseValueMode);
- Init *ParseValue(Record *CurRec, RecTy *ItemType = 0,
+ Init *ParseValue(Record *CurRec, RecTy *ItemType = nullptr,
IDParseMode Mode = ParseValueMode);
- std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0,
- RecTy *EltTy = 0);
+ std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = nullptr,
+ RecTy *EltTy = nullptr);
std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
diff --git a/lib/TableGen/module.modulemap b/lib/TableGen/module.modulemap
new file mode 100644
index 0000000..8dac0a2
--- /dev/null
+++ b/lib/TableGen/module.modulemap
@@ -0,0 +1 @@
+module TableGen { requires cplusplus umbrella "." module * { export * } }
diff --git a/lib/Target/AArch64/AArch64.h b/lib/Target/AArch64/AArch64.h
index 0297de1..1c022aa 100644
--- a/lib/Target/AArch64/AArch64.h
+++ b/lib/Target/AArch64/AArch64.h
@@ -1,4 +1,4 @@
-//==-- AArch64.h - Top-level interface for AArch64 representation -*- C++ -*-=//
+//==-- AArch64.h - Top-level interface for AArch64 --------------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,35 +12,38 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64_H
-#define LLVM_TARGET_AARCH64_H
+#ifndef TARGET_AArch64_H
+#define TARGET_AArch64_H
+#include "Utils/AArch64BaseInfo.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
-class AArch64AsmPrinter;
-class FunctionPass;
class AArch64TargetMachine;
-class MachineInstr;
-class MCInst;
-
-FunctionPass *createAArch64ISelDAG(AArch64TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
+class FunctionPass;
+class MachineFunctionPass;
+
+FunctionPass *createAArch64DeadRegisterDefinitions();
+FunctionPass *createAArch64ConditionalCompares();
+FunctionPass *createAArch64AdvSIMDScalar();
+FunctionPass *createAArch64BranchRelaxation();
+FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+FunctionPass *createAArch64StorePairSuppressPass();
+FunctionPass *createAArch64ExpandPseudoPass();
+FunctionPass *createAArch64LoadStoreOptimizationPass();
+ModulePass *createAArch64PromoteConstantPass();
+FunctionPass *createAArch64AddressTypePromotionPass();
+/// \brief Creates an ARM-specific Target Transformation Info pass.
+ImmutablePass *
+createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM);
FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
-FunctionPass *createAArch64BranchFixupPass();
-
-/// \brief Creates an AArch64-specific Target Transformation Info pass.
-ImmutablePass *createAArch64TargetTransformInfoPass(
- const AArch64TargetMachine *TM);
-
-void LowerAArch64MachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AArch64AsmPrinter &AP);
-
-
-}
+FunctionPass *createAArch64CollectLOHPass();
+} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td
index e49afd6..1ad5ac8 100644
--- a/lib/Target/AArch64/AArch64.td
+++ b/lib/Target/AArch64/AArch64.td
@@ -1,4 +1,4 @@
-//===- AArch64.td - Describe the AArch64 Target Machine -------*- tblgen -*-==//
+//=- AArch64.td - Describe the AArch64 Target Machine --------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,11 @@
//
//===----------------------------------------------------------------------===//
//
-// This is the top level entry point for the AArch64 target.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Target-independent interfaces
+// Target-independent interfaces which we are implementing
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
@@ -22,7 +21,7 @@ include "llvm/Target/Target.td"
//
def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true",
- "Enable ARMv8 FP">;
+ "Enable ARMv8 FP">;
def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
"Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
@@ -30,54 +29,106 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
"Enable cryptographic instructions">;
+def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
+ "Enable ARMv8 CRC-32 checksum instructions">;
+
+/// Cyclone has register move instructions which are "free".
+def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
+ "Has zero-cycle register moves">;
+
+/// Cyclone has instructions which zero registers for "free".
+def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
+ "Has zero-cycle zeroing instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AArch64RegisterInfo.td"
+include "AArch64CallingConvention.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
//===----------------------------------------------------------------------===//
-// AArch64 Processors
-//
include "AArch64Schedule.td"
+include "AArch64InstrInfo.td"
-class ProcNoItin<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+def AArch64InstrInfo : InstrInfo;
-def : Processor<"generic", GenericItineraries, [FeatureFPARMv8, FeatureNEON]>;
+//===----------------------------------------------------------------------===//
+// AArch64 Processors supported.
+//
+include "AArch64SchedA53.td"
+include "AArch64SchedCyclone.td"
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
"Cortex-A53 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
- FeatureCrypto]>;
+ FeatureCrypto,
+ FeatureCRC]>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors",
[FeatureFPARMv8,
FeatureNEON,
- FeatureCrypto]>;
+ FeatureCrypto,
+ FeatureCRC]>;
+
+def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
+ "Cyclone",
+ [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureZCRegMove, FeatureZCZeroing]>;
+
+def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
+ FeatureNEON,
+ FeatureCRC]>;
def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>;
-def : Processor<"cortex-a57", NoItineraries, [ProcA57]>;
+def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>;
+def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
//===----------------------------------------------------------------------===//
-// Register File Description
+// Assembly parser
//===----------------------------------------------------------------------===//
-include "AArch64RegisterInfo.td"
+def GenericAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+ string Name = "generic";
+}
-include "AArch64CallingConv.td"
+def AppleAsmParserVariant : AsmParserVariant {
+ int Variant = 1;
+ string Name = "apple-neon";
+}
//===----------------------------------------------------------------------===//
-// Instruction Descriptions
+// Assembly printer
//===----------------------------------------------------------------------===//
+// AArch64 Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def GenericAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ int Variant = 0;
+ bit isMCAsmWriter = 1;
+}
-include "AArch64InstrInfo.td"
-
-def AArch64InstrInfo : InstrInfo {
- let noNamedPositionallyEncodedOperands = 1;
+def AppleAsmWriter : AsmWriter {
+ let AsmWriterClassName = "AppleInstPrinter";
+ int Variant = 1;
+ int isMCAsmWriter = 1;
}
//===----------------------------------------------------------------------===//
-// Declare the target which we are implementing
+// Target Declaration
//===----------------------------------------------------------------------===//
def AArch64 : Target {
let InstructionSet = AArch64InstrInfo;
+ let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
+ let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
}
diff --git a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 72fa6af..04906f6 100644
--- a/lib/Target/ARM64/ARM64AddressTypePromotion.cpp
+++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -1,5 +1,4 @@
-
-//===-- ARM64AddressTypePromotion.cpp --- Promote type for addr accesses -===//
+//===-- AArch64AddressTypePromotion.cpp --- Promote type for addr accesses -==//
//
// The LLVM Compiler Infrastructure
//
@@ -29,8 +28,7 @@
// FIXME: This pass may be useful for other targets too.
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-type-promotion"
-#include "ARM64.h"
+#include "AArch64.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -46,41 +44,43 @@
using namespace llvm;
+#define DEBUG_TYPE "aarch64-type-promotion"
+
static cl::opt<bool>
-EnableAddressTypePromotion("arm64-type-promotion", cl::Hidden,
+EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
cl::init(true));
static cl::opt<bool>
-EnableMerge("arm64-type-promotion-merge", cl::Hidden,
+EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
cl::desc("Enable merging of redundant sexts when one is dominating"
" the other."),
cl::init(true));
//===----------------------------------------------------------------------===//
-// ARM64AddressTypePromotion
+// AArch64AddressTypePromotion
//===----------------------------------------------------------------------===//
namespace llvm {
-void initializeARM64AddressTypePromotionPass(PassRegistry &);
+void initializeAArch64AddressTypePromotionPass(PassRegistry &);
}
namespace {
-class ARM64AddressTypePromotion : public FunctionPass {
+class AArch64AddressTypePromotion : public FunctionPass {
public:
static char ID;
- ARM64AddressTypePromotion()
- : FunctionPass(ID), Func(NULL), ConsideredSExtType(NULL) {
- initializeARM64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
+ AArch64AddressTypePromotion()
+ : FunctionPass(ID), Func(nullptr), ConsideredSExtType(nullptr) {
+ initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
}
- virtual const char *getPassName() const {
- return "ARM64 Address Type Promotion";
+ const char *getPassName() const override {
+ return "AArch64 Address Type Promotion";
}
/// Iterate over the functions and promote the computation of interesting
// sext instructions.
- bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
private:
/// The current function.
@@ -90,7 +90,7 @@ private:
Type *ConsideredSExtType;
// This transformation requires dominator info.
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -139,19 +139,19 @@ private:
};
} // end anonymous namespace.
-char ARM64AddressTypePromotion::ID = 0;
+char AArch64AddressTypePromotion::ID = 0;
-INITIALIZE_PASS_BEGIN(ARM64AddressTypePromotion, "arm64-type-promotion",
- "ARM64 Type Promotion Pass", false, false)
+INITIALIZE_PASS_BEGIN(AArch64AddressTypePromotion, "aarch64-type-promotion",
+ "AArch64 Type Promotion Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64AddressTypePromotion, "arm64-type-promotion",
- "ARM64 Type Promotion Pass", false, false)
+INITIALIZE_PASS_END(AArch64AddressTypePromotion, "aarch64-type-promotion",
+ "AArch64 Type Promotion Pass", false, false)
-FunctionPass *llvm::createARM64AddressTypePromotionPass() {
- return new ARM64AddressTypePromotion();
+FunctionPass *llvm::createAArch64AddressTypePromotionPass() {
+ return new AArch64AddressTypePromotion();
}
-bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
+bool AArch64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
if (isa<SExtInst>(Inst))
return true;
@@ -174,7 +174,7 @@ bool ARM64AddressTypePromotion::canGetThrough(const Instruction *Inst) {
return false;
}
-bool ARM64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
+bool AArch64AddressTypePromotion::shouldGetThrough(const Instruction *Inst) {
// If the type of the sext is the same as the considered one, this sext
// will become useless.
// Otherwise, we will have to do something to preserve the original value,
@@ -210,14 +210,12 @@ static bool shouldSExtOperand(const Instruction *Inst, int OpIdx) {
}
bool
-ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
+AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
if (SExt->getType() != ConsideredSExtType)
return false;
- for (Value::const_use_iterator UseIt = SExt->use_begin(),
- EndUseIt = SExt->use_end();
- UseIt != EndUseIt; ++UseIt) {
- if (isa<GetElementPtrInst>(*UseIt))
+ for (const Use &U : SExt->uses()) {
+ if (isa<GetElementPtrInst>(*U))
return true;
}
@@ -250,7 +248,7 @@ ARM64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const {
// = a
// Iterate on 'c'.
bool
-ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
+AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
DEBUG(dbgs() << "*** Propagate Sign Extension ***\n");
bool LocalChange = false;
@@ -345,7 +343,7 @@ ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
SExtForOpnd->moveBefore(Inst);
Inst->setOperand(OpIdx, SExtForOpnd);
// If more sext are required, new instructions will have to be created.
- SExtForOpnd = NULL;
+ SExtForOpnd = nullptr;
}
if (SExtForOpnd == SExt) {
DEBUG(dbgs() << "Sign extension is useless now\n");
@@ -376,11 +374,11 @@ ARM64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) {
return LocalChange;
}
-void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
- SetOfInstructions &ToRemove) {
+void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
+ SetOfInstructions &ToRemove) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- for (auto &Entry: ValToSExtendedUses) {
+ for (auto &Entry : ValToSExtendedUses) {
Instructions &Insts = Entry.second;
Instructions CurPts;
for (Instruction *Inst : Insts) {
@@ -415,13 +413,13 @@ void ARM64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses,
}
}
-void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
+void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
DEBUG(dbgs() << "*** Analyze Sign Extensions ***\n");
DenseMap<Value *, Instruction *> SeenChains;
for (auto &BB : *Func) {
- for (auto &II: BB) {
+ for (auto &II : BB) {
Instruction *SExt = &II;
// Collect all sext operation per type.
@@ -438,10 +436,8 @@ void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
bool insert = false;
// #1.
- for (Value::use_iterator UseIt = SExt->use_begin(),
- EndUseIt = SExt->use_end();
- UseIt != EndUseIt; ++UseIt) {
- const Instruction *Inst = dyn_cast<GetElementPtrInst>(*UseIt);
+ for (const Use &U : SExt->uses()) {
+ const Instruction *Inst = dyn_cast<GetElementPtrInst>(U);
if (Inst && Inst->getNumOperands() > 2) {
DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst
<< '\n');
@@ -469,10 +465,10 @@ void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
if (insert || AlreadySeen != SeenChains.end()) {
DEBUG(dbgs() << "Insert\n");
SExtInsts.push_back(SExt);
- if (AlreadySeen != SeenChains.end() && AlreadySeen->second != NULL) {
+ if (AlreadySeen != SeenChains.end() && AlreadySeen->second != nullptr) {
DEBUG(dbgs() << "Insert chain member\n");
SExtInsts.push_back(AlreadySeen->second);
- SeenChains[Last] = NULL;
+ SeenChains[Last] = nullptr;
}
} else {
DEBUG(dbgs() << "Record its chain membership\n");
@@ -482,7 +478,7 @@ void ARM64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) {
}
}
-bool ARM64AddressTypePromotion::runOnFunction(Function &F) {
+bool AArch64AddressTypePromotion::runOnFunction(Function &F) {
if (!EnableAddressTypePromotion || F.isDeclaration())
return false;
Func = &F;
diff --git a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index 83f8cda..734fb21 100644
--- a/lib/Target/ARM64/ARM64AdvSIMDScalarPass.cpp
+++ b/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
+//===-- AArch64AdvSIMDScalar.cpp - Replace dead defs w/ zero reg --===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
// TODO: Graph based predicate heuristics.
// Walking the instruction list linearly will get many, perhaps most, of
-// the cases, but to do a truly throrough job of this, we need a more
+// the cases, but to do a truly thorough job of this, we need a more
// wholistic approach.
//
// This optimization is very similar in spirit to the register allocator's
@@ -33,10 +33,9 @@
// solution.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-simd-scalar"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64RegisterInfo.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64RegisterInfo.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -48,14 +47,12 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static cl::opt<bool>
-AdvSIMDScalar("arm64-simd-scalar",
- cl::desc("enable use of AdvSIMD scalar integer instructions"),
- cl::init(false), cl::Hidden);
+#define DEBUG_TYPE "aarch64-simd-scalar"
+
// Allow forcing all i64 operations with equivalent SIMD instructions to use
// them. For stress-testing the transformation function.
static cl::opt<bool>
-TransformAll("arm64-simd-scalar-force-all",
+TransformAll("aarch64-simd-scalar-force-all",
cl::desc("Force use of AdvSIMD scalar instructions everywhere"),
cl::init(false), cl::Hidden);
@@ -64,9 +61,9 @@ STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
namespace {
-class ARM64AdvSIMDScalar : public MachineFunctionPass {
+class AArch64AdvSIMDScalar : public MachineFunctionPass {
MachineRegisterInfo *MRI;
- const ARM64InstrInfo *TII;
+ const AArch64InstrInfo *TII;
private:
// isProfitableToTransform - Predicate function to determine whether an
@@ -74,7 +71,7 @@ private:
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
bool isProfitableToTransform(const MachineInstr *MI) const;
- // tranformInstruction - Perform the transformation of an instruction
+ // transformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
void transformInstruction(MachineInstr *MI);
@@ -84,20 +81,20 @@ private:
public:
static char ID; // Pass identification, replacement for typeid.
- explicit ARM64AdvSIMDScalar() : MachineFunctionPass(ID) {}
+ explicit AArch64AdvSIMDScalar() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
- const char *getPassName() const {
- return "AdvSIMD scalar operation optimization";
+ const char *getPassName() const override {
+ return "AdvSIMD Scalar Operation Optimization";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-char ARM64AdvSIMDScalar::ID = 0;
+char AArch64AdvSIMDScalar::ID = 0;
} // end anonymous namespace
static bool isGPR64(unsigned Reg, unsigned SubReg,
@@ -105,20 +102,20 @@ static bool isGPR64(unsigned Reg, unsigned SubReg,
if (SubReg)
return false;
if (TargetRegisterInfo::isVirtualRegister(Reg))
- return MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::GPR64RegClass);
- return ARM64::GPR64RegClass.contains(Reg);
+ return MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::GPR64RegClass);
+ return AArch64::GPR64RegClass.contains(Reg);
}
static bool isFPR64(unsigned Reg, unsigned SubReg,
const MachineRegisterInfo *MRI) {
if (TargetRegisterInfo::isVirtualRegister(Reg))
- return (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR64RegClass) &&
+ return (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR64RegClass) &&
SubReg == 0) ||
- (MRI->getRegClass(Reg)->hasSuperClassEq(&ARM64::FPR128RegClass) &&
- SubReg == ARM64::dsub);
- // Physical register references just check the regist class directly.
- return (ARM64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
- (ARM64::FPR128RegClass.contains(Reg) && SubReg == ARM64::dsub);
+ (MRI->getRegClass(Reg)->hasSuperClassEq(&AArch64::FPR128RegClass) &&
+ SubReg == AArch64::dsub);
+ // Physical register references just check the register class directly.
+ return (AArch64::FPR64RegClass.contains(Reg) && SubReg == 0) ||
+ (AArch64::FPR128RegClass.contains(Reg) && SubReg == AArch64::dsub);
}
// getSrcFromCopy - Get the original source register for a GPR64 <--> FPR64
@@ -128,17 +125,18 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
unsigned &SubReg) {
SubReg = 0;
// The "FMOV Xd, Dn" instruction is the typical form.
- if (MI->getOpcode() == ARM64::FMOVDXr || MI->getOpcode() == ARM64::FMOVXDr)
+ if (MI->getOpcode() == AArch64::FMOVDXr ||
+ MI->getOpcode() == AArch64::FMOVXDr)
return MI->getOperand(1).getReg();
// A lane zero extract "UMOV.d Xd, Vn[0]" is equivalent. We shouldn't see
// these at this stage, but it's easy to check for.
- if (MI->getOpcode() == ARM64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
- SubReg = ARM64::dsub;
+ if (MI->getOpcode() == AArch64::UMOVvi64 && MI->getOperand(2).getImm() == 0) {
+ SubReg = AArch64::dsub;
return MI->getOperand(1).getReg();
}
// Or just a plain COPY instruction. This can be directly to/from FPR64,
// or it can be a dsub subreg reference to an FPR128.
- if (MI->getOpcode() == ARM64::COPY) {
+ if (MI->getOpcode() == AArch64::COPY) {
if (isFPR64(MI->getOperand(0).getReg(), MI->getOperand(0).getSubReg(),
MRI) &&
isGPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(), MRI))
@@ -147,7 +145,7 @@ static unsigned getSrcFromCopy(const MachineInstr *MI,
MRI) &&
isFPR64(MI->getOperand(1).getReg(), MI->getOperand(1).getSubReg(),
MRI)) {
- SubReg = ARM64::dsub;
+ SubReg = MI->getOperand(1).getSubReg();
return MI->getOperand(1).getReg();
}
}
@@ -164,10 +162,10 @@ static int getTransformOpcode(unsigned Opc) {
default:
break;
// FIXME: Lots more possibilities.
- case ARM64::ADDXrr:
- return ARM64::ADDv1i64;
- case ARM64::SUBXrr:
- return ARM64::SUBv1i64;
+ case AArch64::ADDXrr:
+ return AArch64::ADDv1i64;
+ case AArch64::SUBXrr:
+ return AArch64::SUBv1i64;
}
// No AdvSIMD equivalent, so just return the original opcode.
return Opc;
@@ -181,7 +179,8 @@ static bool isTransformable(const MachineInstr *MI) {
// isProfitableToTransform - Predicate function to determine whether an
// instruction should be transformed to its equivalent AdvSIMD scalar
// instruction. "add Xd, Xn, Xm" ==> "add Dd, Da, Db", for example.
-bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
+bool
+AArch64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
// If this instruction isn't eligible to be transformed (no SIMD equivalent),
// early exit since that's the common case.
if (!isTransformable(MI))
@@ -241,8 +240,8 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
// preferable to have it use the FPR64 in most cases, as if the source
// vector is an IMPLICIT_DEF, the INSERT_SUBREG just goes away entirely.
// Ditto for a lane insert.
- else if (Use->getOpcode() == ARM64::INSERT_SUBREG ||
- Use->getOpcode() == ARM64::INSvi64gpr)
+ else if (Use->getOpcode() == AArch64::INSERT_SUBREG ||
+ Use->getOpcode() == AArch64::INSvi64gpr)
;
else
AllUsesAreCopies = false;
@@ -252,7 +251,7 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
if (AllUsesAreCopies)
--NumNewCopies;
- // If a tranform will not increase the number of cross-class copies required,
+ // If a transform will not increase the number of cross-class copies required,
// return true.
if (NumNewCopies <= NumRemovableCopies)
return true;
@@ -262,10 +261,10 @@ bool ARM64AdvSIMDScalar::isProfitableToTransform(const MachineInstr *MI) const {
return TransformAll;
}
-static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
+static MachineInstr *insertCopy(const AArch64InstrInfo *TII, MachineInstr *MI,
unsigned Dst, unsigned Src, bool IsKill) {
MachineInstrBuilder MIB =
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(ARM64::COPY),
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AArch64::COPY),
Dst)
.addReg(Src, getKillRegState(IsKill));
DEBUG(dbgs() << " adding copy: " << *MIB);
@@ -273,10 +272,10 @@ static MachineInstr *insertCopy(const ARM64InstrInfo *TII, MachineInstr *MI,
return MIB;
}
-// tranformInstruction - Perform the transformation of an instruction
+// transformInstruction - Perform the transformation of an instruction
// to its equivalant AdvSIMD scalar instruction. Update inputs and outputs
// to be the correct register class, minimizing cross-class copies.
-void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
+void AArch64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
DEBUG(dbgs() << "Scalar transform: " << *MI);
MachineBasicBlock *MBB = MI->getParent();
@@ -319,19 +318,19 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
// copy.
if (!Src0) {
SubReg0 = 0;
- Src0 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+ Src0 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
insertCopy(TII, MI, Src0, OrigSrc0, true);
}
if (!Src1) {
SubReg1 = 0;
- Src1 = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+ Src1 = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
insertCopy(TII, MI, Src1, OrigSrc1, true);
}
// Create a vreg for the destination.
// FIXME: No need to do this if the ultimate user expects an FPR64.
// Check for that and avoid the copy if possible.
- unsigned Dst = MRI->createVirtualRegister(&ARM64::FPR64RegClass);
+ unsigned Dst = MRI->createVirtualRegister(&AArch64::FPR64RegClass);
// For now, all of the new instructions have the same simple three-register
// form, so no need to special case based on what instruction we're
@@ -352,7 +351,7 @@ void ARM64AdvSIMDScalar::transformInstruction(MachineInstr *MI) {
}
// processMachineBasicBlock - Main optimzation loop.
-bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
+bool AArch64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
bool Changed = false;
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
MachineInstr *MI = I;
@@ -366,17 +365,13 @@ bool ARM64AdvSIMDScalar::processMachineBasicBlock(MachineBasicBlock *MBB) {
}
// runOnMachineFunction - Pass entry point from PassManager.
-bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
- // Early exit if pass disabled.
- if (!AdvSIMDScalar)
- return false;
-
+bool AArch64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
bool Changed = false;
- DEBUG(dbgs() << "***** ARM64AdvSIMDScalar *****\n");
+ DEBUG(dbgs() << "***** AArch64AdvSIMDScalar *****\n");
const TargetMachine &TM = mf.getTarget();
MRI = &mf.getRegInfo();
- TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
// Just check things on a one-block-at-a-time basis.
for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I)
@@ -385,8 +380,8 @@ bool ARM64AdvSIMDScalar::runOnMachineFunction(MachineFunction &mf) {
return Changed;
}
-// createARM64AdvSIMDScalar - Factory function used by ARM64TargetMachine
+// createAArch64AdvSIMDScalar - Factory function used by AArch64TargetMachine
// to add the pass to the PassManager.
-FunctionPass *llvm::createARM64AdvSIMDScalar() {
- return new ARM64AdvSIMDScalar();
+FunctionPass *llvm::createAArch64AdvSIMDScalar() {
+ return new AArch64AdvSIMDScalar();
}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp
index f0b52d3..c3ee9bb 100644
--- a/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64AsmPrinter.cpp - Print machine code to an AArch64 .s file --===//
+//===-- AArch64AsmPrinter.cpp - AArch64 LLVM assembly writer --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,236 +8,337 @@
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to GAS-format AArch64 assembly language.
+// of machine-dependent LLVM code to the AArch64 assembly language.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
-#include "AArch64AsmPrinter.h"
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64MCInstLower.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
#include "InstPrinter/AArch64InstPrinter.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
-
using namespace llvm;
-/// Try to print a floating-point register as if it belonged to a specified
-/// register-class. For example the inline asm operand modifier "b" requires its
-/// argument to be printed as "bN".
-static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
- const TargetRegisterInfo *TRI,
- char RegType, raw_ostream &O) {
- if (!MO.isReg())
- return true;
-
- for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
- if (AArch64::FPR8RegClass.contains(*AR)) {
- O << RegType << TRI->getEncodingValue(MO.getReg());
- return false;
+#define DEBUG_TYPE "asm-printer"
+
+namespace {
+
+class AArch64AsmPrinter : public AsmPrinter {
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when printing asm code for different targets.
+ const AArch64Subtarget *Subtarget;
+
+ AArch64MCInstLower MCInstLowering;
+ StackMaps SM;
+
+public:
+ AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+ : AsmPrinter(TM, Streamer),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()),
+ MCInstLowering(OutContext, *Mang, *this), SM(*this), AArch64FI(nullptr),
+ LOHLabelCounter(0) {}
+
+ const char *getPassName() const override {
+ return "AArch64 Assembly Printer";
+ }
+
+ /// \brief Wrapper for MCInstLowering.lowerOperand() for the
+ /// tblgen'erated pseudo lowering.
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
+ return MCInstLowering.lowerOperand(MO, MCOp);
+ }
+
+ void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+ void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI);
+ /// \brief tblgen'erated driver function for lowering simple MI->MC
+ /// pseudo instructions.
+ bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+ const MachineInstr *MI);
+
+ void EmitInstruction(const MachineInstr *MI) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AsmPrinter::getAnalysisUsage(AU);
+ AU.setPreservesAll();
+ }
+
+ bool runOnMachineFunction(MachineFunction &F) override {
+ AArch64FI = F.getInfo<AArch64FunctionInfo>();
+ return AsmPrinter::runOnMachineFunction(F);
+ }
+
+private:
+ MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+ void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
+ bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+ bool printAsmRegInClass(const MachineOperand &MO,
+ const TargetRegisterClass *RC, bool isVector,
+ raw_ostream &O);
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+ unsigned AsmVariant, const char *ExtraCode,
+ raw_ostream &O) override;
+
+ void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+ void EmitFunctionBodyEnd() override;
+
+ MCSymbol *GetCPISymbol(unsigned CPID) const override;
+ void EmitEndOfAsmFile(Module &M) override;
+ AArch64FunctionInfo *AArch64FI;
+
+ /// \brief Emit the LOHs contained in AArch64FI.
+ void EmitLOHs();
+
+ typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
+ MInstToMCSymbol LOHInstToLabel;
+ unsigned LOHLabelCounter;
+};
+
+} // end of anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
+ if (Subtarget->isTargetMachO()) {
+ // Funny Darwin hack: This flag tells the linker that no global symbols
+ // contain code that falls through to other global symbols (e.g. the obvious
+ // implementation of multiple entry points). If this doesn't occur, the
+ // linker can safely perform dead code stripping. Since LLVM never
+ // generates code that does this, it is always safe to set.
+ OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+ SM.serializeToStackMapSection();
+ }
+
+ // Emit a .data.rel section containing any stubs that were created.
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ TD->getPointerSize(0));
+ }
+ Stubs.clear();
}
}
- // The register doesn't correspond to anything floating-point like.
- return true;
}
-/// Implements the 'w' and 'x' inline asm operand modifiers, which print a GPR
-/// with the obvious type and an immediate 0 as either wzr or xzr.
-static bool printModifiedGPRAsmOperand(const MachineOperand &MO,
- const TargetRegisterInfo *TRI,
- const TargetRegisterClass &RegClass,
- raw_ostream &O) {
- char Prefix = &RegClass == &AArch64::GPR32RegClass ? 'w' : 'x';
+MachineLocation
+AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+ MachineLocation Location;
+ assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+ // Frame address. Currently handles register +- offset only.
+ if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+ else {
+ DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+ }
+ return Location;
+}
- if (MO.isImm() && MO.getImm() == 0) {
- O << Prefix << "zr";
- return false;
- } else if (MO.isReg()) {
- if (MO.getReg() == AArch64::XSP || MO.getReg() == AArch64::WSP) {
- O << (Prefix == 'x' ? "sp" : "wsp");
- return false;
- }
+void AArch64AsmPrinter::EmitLOHs() {
+ SmallVector<MCSymbol *, 3> MCArgs;
- for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
- if (RegClass.contains(*AR)) {
- O << AArch64InstPrinter::getRegisterName(*AR);
- return false;
- }
+ for (const auto &D : AArch64FI->getLOHContainer()) {
+ for (const MachineInstr *MI : D.getArgs()) {
+ MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI);
+ assert(LabelIt != LOHInstToLabel.end() &&
+ "Label hasn't been inserted for LOH related instruction");
+ MCArgs.push_back(LabelIt->second);
}
+ OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
+ MCArgs.clear();
}
+}
- return true;
+void AArch64AsmPrinter::EmitFunctionBodyEnd() {
+ if (!AArch64FI->getLOHRelated().empty())
+ EmitLOHs();
}
-bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO,
- bool PrintImmediatePrefix,
- StringRef Suffix, raw_ostream &O) {
- StringRef Name;
- StringRef Modifier;
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
+ // Darwin uses a linker-private symbol name for constant-pools (to
+ // avoid addends on the relocation?), ELF has no such concept and
+ // uses a normal private symbol.
+ if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
+ return OutContext.GetOrCreateSymbol(
+ Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
+ Twine(getFunctionNumber()) + "_" + Twine(CPID));
+
+ return OutContext.GetOrCreateSymbol(
+ Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
+ Twine(getFunctionNumber()) + "_" + Twine(CPID));
+}
+
+void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
default:
- return true;
- case MachineOperand::MO_GlobalAddress:
- Name = getSymbol(MO.getGlobal())->getName();
-
- // Global variables may be accessed either via a GOT or in various fun and
- // interesting TLS-model specific ways. Set the prefix modifier as
- // appropriate here.
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) {
- Reloc::Model RelocM = TM.getRelocationModel();
- if (GV->isThreadLocal()) {
- switch (TM.getTLSModel(GV)) {
- case TLSModel::GeneralDynamic:
- Modifier = "tlsdesc";
- break;
- case TLSModel::LocalDynamic:
- Modifier = "dtprel";
- break;
- case TLSModel::InitialExec:
- Modifier = "gottprel";
- break;
- case TLSModel::LocalExec:
- Modifier = "tprel";
- break;
- }
- } else if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) {
- Modifier = "got";
- }
- }
+ assert(0 && "<unknown operand type>");
+ case MachineOperand::MO_Register: {
+ unsigned Reg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
+ O << AArch64InstPrinter::getRegisterName(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate: {
+ int64_t Imm = MO.getImm();
+ O << '#' << Imm;
break;
- case MachineOperand::MO_BlockAddress:
- Name = GetBlockAddressSymbol(MO.getBlockAddress())->getName();
+ }
+ }
+}
+
+bool AArch64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+ raw_ostream &O) {
+ unsigned Reg = MO.getReg();
+ switch (Mode) {
+ default:
+ return true; // Unknown mode.
+ case 'w':
+ Reg = getWRegFromXReg(Reg);
break;
- case MachineOperand::MO_ConstantPoolIndex:
- Name = GetCPISymbol(MO.getIndex())->getName();
+ case 'x':
+ Reg = getXRegFromWReg(Reg);
break;
}
- // Some instructions (notably ADRP) don't take the # prefix for
- // immediates. Only print it if asked to.
- if (PrintImmediatePrefix)
- O << '#';
-
- // Only need the joining "_" if both the prefix and the suffix are
- // non-null. This little block simply takes care of the four possibly
- // combinations involved there.
- if (Modifier == "" && Suffix == "")
- O << Name;
- else if (Modifier == "" && Suffix != "")
- O << ":" << Suffix << ':' << Name;
- else if (Modifier != "" && Suffix == "")
- O << ":" << Modifier << ':' << Name;
- else
- O << ":" << Modifier << '_' << Suffix << ':' << Name;
+ O << AArch64InstPrinter::getRegisterName(Reg);
+ return false;
+}
+// Prints the register in MO using class RC using the offset in the
+// new register class. This should not be used for cross class
+// printing.
+bool AArch64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
+ const TargetRegisterClass *RC,
+ bool isVector, raw_ostream &O) {
+ assert(MO.isReg() && "Should only get here with a register!");
+ const AArch64RegisterInfo *RI =
+ static_cast<const AArch64RegisterInfo *>(TM.getRegisterInfo());
+ unsigned Reg = MO.getReg();
+ unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
+ assert(RI->regsOverlap(RegToPrint, Reg));
+ O << AArch64InstPrinter::getRegisterName(
+ RegToPrint, isVector ? AArch64::vreg : AArch64::NoRegAltName);
return false;
}
bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const MachineOperand &MO = MI->getOperand(OpNum);
- if (!ExtraCode)
- ExtraCode = "";
+ // First try the generic code, which knows about modifiers like 'c' and 'n'.
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+ return false;
- switch(ExtraCode[0]) {
- default:
- if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O))
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0]) {
+ if (ExtraCode[1] != 0)
+ return true; // Unknown modifier.
+
+ switch (ExtraCode[0]) {
+ default:
+ return true; // Unknown modifier.
+ case 'w': // Print W register
+ case 'x': // Print X register
+ if (MO.isReg())
+ return printAsmMRegister(MO, ExtraCode[0], O);
+ if (MO.isImm() && MO.getImm() == 0) {
+ unsigned Reg = ExtraCode[0] == 'w' ? AArch64::WZR : AArch64::XZR;
+ O << AArch64InstPrinter::getRegisterName(Reg);
return false;
- break;
- case 'w':
- // Output 32-bit general register operand, constant zero as wzr, or stack
- // pointer as wsp. Ignored when used with other operand types.
- if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR32RegClass, O))
- return false;
- break;
- case 'x':
- // Output 64-bit general register operand, constant zero as xzr, or stack
- // pointer as sp. Ignored when used with other operand types.
- if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI,
- AArch64::GPR64RegClass, O))
- return false;
- break;
- case 'H':
- // Output higher numbered of a 64-bit general register pair
- case 'Q':
- // Output least significant register of a 64-bit general register pair
- case 'R':
- // Output most significant register of a 64-bit general register pair
-
- // FIXME note: these three operand modifiers will require, to some extent,
- // adding a paired GPR64 register class. Initial investigation suggests that
- // assertions are hit unless it has a type and is made legal for that type
- // in ISelLowering. After that step is made, the number of modifications
- // needed explodes (operation legality, calling conventions, stores, reg
- // copies ...).
- llvm_unreachable("FIXME: Unimplemented register pairs");
- case 'b':
- case 'h':
- case 's':
- case 'd':
- case 'q':
- if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
- ExtraCode[0], O))
- return false;
- break;
- case 'A':
- // Output symbolic address with appropriate relocation modifier (also
- // suitable for ADRP).
- if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O))
- return false;
- break;
- case 'L':
- // Output bits 11:0 of symbolic address with appropriate :lo12: relocation
- // modifier.
- if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O))
+ }
+ printOperand(MI, OpNum, O);
return false;
- break;
- case 'G':
- // Output bits 23:12 of symbolic address with appropriate :hi12: relocation
- // modifier (currently only for TLS local exec).
- if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O))
+ case 'b': // Print B register.
+ case 'h': // Print H register.
+ case 's': // Print S register.
+ case 'd': // Print D register.
+ case 'q': // Print Q register.
+ if (MO.isReg()) {
+ const TargetRegisterClass *RC;
+ switch (ExtraCode[0]) {
+ case 'b':
+ RC = &AArch64::FPR8RegClass;
+ break;
+ case 'h':
+ RC = &AArch64::FPR16RegClass;
+ break;
+ case 's':
+ RC = &AArch64::FPR32RegClass;
+ break;
+ case 'd':
+ RC = &AArch64::FPR64RegClass;
+ break;
+ case 'q':
+ RC = &AArch64::FPR128RegClass;
+ break;
+ default:
+ return true;
+ }
+ return printAsmRegInClass(MO, RC, false /* vector */, O);
+ }
+ printOperand(MI, OpNum, O);
return false;
- break;
- case 'a':
- return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+ }
}
- // There's actually no operand modifier, which leads to a slightly eclectic
- // set of behaviour which we have to handle here.
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unexpected operand for inline assembly");
- case MachineOperand::MO_Register:
- // GCC prints the unmodified operand of a 'w' constraint as the vector
- // register. Technically, we could allocate the argument as a VPR128, but
- // that leads to extremely dodgy copies being generated to get the data
- // there.
- if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
- O << AArch64InstPrinter::getRegisterName(MO.getReg());
- break;
- case MachineOperand::MO_Immediate:
- O << '#' << MO.getImm();
- break;
- case MachineOperand::MO_FPImmediate:
- assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected");
- O << "#0.0";
- break;
- case MachineOperand::MO_BlockAddress:
- case MachineOperand::MO_ConstantPoolIndex:
- case MachineOperand::MO_GlobalAddress:
- return printSymbolicAddress(MO, false, "", O);
+ // According to ARM, we should emit x and v registers unless we have a
+ // modifier.
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+
+ // If this is a w or x register, print an x register.
+ if (AArch64::GPR32allRegClass.contains(Reg) ||
+ AArch64::GPR64allRegClass.contains(Reg))
+ return printAsmMRegister(MO, 'x', O);
+
+ // If this is a b, h, s, d, or q register, print it as a v register.
+ return printAsmRegInClass(MO, &AArch64::FPR128RegClass, true /* vector */,
+ O);
}
+ printOperand(MI, OpNum, O);
return false;
}
@@ -246,15 +347,90 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned AsmVariant,
const char *ExtraCode,
raw_ostream &O) {
- // Currently both the memory constraints (m and Q) behave the same and amount
- // to the address as a single register. In future, we may allow "m" to provide
- // both a base and an offset.
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline assembly memory operand");
- O << '[' << AArch64InstPrinter::getRegisterName(MO.getReg()) << ']';
+ assert(MO.isReg() && "unexpected inline asm memory operand");
+ O << "[" << AArch64InstPrinter::getRegisterName(MO.getReg()) << "]";
return false;
}
+void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+ raw_ostream &OS) {
+ unsigned NOps = MI->getNumOperands();
+ assert(NOps == 4);
+ OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+ // cast away const; DIetc do not take const operands for some reason.
+ DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
+ OS << V.getName();
+ OS << " <- ";
+ // Frame address. Currently handles register +- offset only.
+ assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+ OS << '[';
+ printOperand(MI, 0, OS);
+ OS << '+';
+ printOperand(MI, 1, OS);
+ OS << ']';
+ OS << "+";
+ printOperand(MI, NOps - 2, OS);
+}
+
+void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ unsigned NumNOPBytes = MI.getOperand(1).getImm();
+
+ SM.recordStackMap(MI);
+ // Emit padding.
+ assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+ for (unsigned i = 0; i < NumNOPBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+}
+
+// Lower a patchpoint of the form:
+// [<def>], <id>, <numBytes>, <target>, <numArgs>
+void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
+ const MachineInstr &MI) {
+ SM.recordPatchPoint(MI);
+
+ PatchPointOpers Opers(&MI);
+
+ int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+ unsigned EncodedBytes = 0;
+ if (CallTarget) {
+ assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
+ "High 16 bits of call target should be zero.");
+ unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
+ EncodedBytes = 16;
+ // Materialize the jump address:
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVZWi)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 32) & 0xFFFF)
+ .addImm(32));
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm((CallTarget >> 16) & 0xFFFF)
+ .addImm(16));
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::MOVKWi)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(CallTarget & 0xFFFF)
+ .addImm(0));
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg));
+ }
+ // Emit padding.
+ unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+ assert(NumBytes >= EncodedBytes &&
+ "Patchpoint can't request size less than the length of a call.");
+ assert((NumBytes - EncodedBytes) % 4 == 0 &&
+ "Invalid number of NOP bytes requested!");
+ for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+}
+
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -262,41 +438,87 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(OutStreamer, MI))
return;
- MCInst TmpInst;
- LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this);
- EmitToStreamer(OutStreamer, TmpInst);
-}
+ if (AArch64FI->getLOHRelated().count(MI)) {
+ // Generate a label for LOH related instruction
+ MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
+ // Associate the instruction with the label
+ LOHInstToLabel[MI] = LOHLabel;
+ OutStreamer.EmitLabel(LOHLabel);
+ }
-void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
- if (Subtarget->isTargetELF()) {
- const TargetLoweringObjectFileELF &TLOFELF =
- static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+ // Do any manual lowerings.
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::DBG_VALUE: {
+ if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+ SmallString<128> TmpStr;
+ raw_svector_ostream OS(TmpStr);
+ PrintDebugValueComment(MI, OS);
+ OutStreamer.EmitRawText(StringRef(OS.str()));
+ }
+ return;
+ }
- MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+ // Tail calls use pseudo instructions so they have the proper code-gen
+ // attributes (isCall, isReturn, etc.). We lower them to the real
+ // instruction here.
+ case AArch64::TCRETURNri: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(AArch64::BR);
+ TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case AArch64::TCRETURNdi: {
+ MCOperand Dest;
+ MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
+ MCInst TmpInst;
+ TmpInst.setOpcode(AArch64::B);
+ TmpInst.addOperand(Dest);
+ EmitToStreamer(OutStreamer, TmpInst);
+ return;
+ }
+ case AArch64::TLSDESC_BLR: {
+ MCOperand Callee, Sym;
+ MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
+ MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
- // Output stubs for external and common global variables.
- MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
- if (!Stubs.empty()) {
- OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
- const DataLayout *TD = TM.getDataLayout();
+ // First emit a relocation-annotation. This expands to no code, but requests
+ // the following instruction gets an R_AARCH64_TLSDESC_CALL.
+ MCInst TLSDescCall;
+ TLSDescCall.setOpcode(AArch64::TLSDESCCALL);
+ TLSDescCall.addOperand(Sym);
+ EmitToStreamer(OutStreamer, TLSDescCall);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
- TD->getPointerSize(0));
- }
- Stubs.clear();
- }
+ // Other than that it's just a normal indirect call to the function loaded
+ // from the descriptor.
+ MCInst BLR;
+ BLR.setOpcode(AArch64::BLR);
+ BLR.addOperand(Callee);
+ EmitToStreamer(OutStreamer, BLR);
+
+ return;
+ }
+
+ case TargetOpcode::STACKMAP:
+ return LowerSTACKMAP(OutStreamer, SM, *MI);
+
+ case TargetOpcode::PATCHPOINT:
+ return LowerPATCHPOINT(OutStreamer, SM, *MI);
}
-}
-bool AArch64AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
- return AsmPrinter::runOnMachineFunction(MF);
+ // Finally, do the automated lowerings for everything else.
+ MCInst TmpInst;
+ MCInstLowering.Lower(MI, TmpInst);
+ EmitToStreamer(OutStreamer, TmpInst);
}
// Force static initialization.
extern "C" void LLVMInitializeAArch64AsmPrinter() {
- RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
- RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
-}
+ RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
+ RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
+ RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64leTarget);
+ RegisterAsmPrinter<AArch64AsmPrinter> W(TheARM64beTarget);
+}
diff --git a/lib/Target/AArch64/AArch64AsmPrinter.h b/lib/Target/AArch64/AArch64AsmPrinter.h
deleted file mode 100644
index 824f003..0000000
--- a/lib/Target/AArch64/AArch64AsmPrinter.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// AArch64AsmPrinter.h - Print machine code to an AArch64 .s file -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the AArch64 assembly printer class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_AARCH64ASMPRINTER_H
-#define LLVM_AARCH64ASMPRINTER_H
-
-#include "AArch64.h"
-#include "AArch64TargetMachine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-
-class MCOperand;
-
-class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter {
-
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
- /// make the right decision when printing asm code for different targets.
- const AArch64Subtarget *Subtarget;
-
- // emitPseudoExpansionLowering - tblgen'erated.
- bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
- const MachineInstr *MI);
-
- public:
- explicit AArch64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer) {
- Subtarget = &TM.getSubtarget<AArch64Subtarget>();
- }
-
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
-
- MCOperand lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Sym) const;
-
- void EmitInstruction(const MachineInstr *MI);
- void EmitEndOfAsmFile(Module &M);
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
-
- /// printSymbolicAddress - Given some kind of reasonably bare symbolic
- /// reference, print out the appropriate asm string to represent it. If
- /// appropriate, a relocation-specifier will be produced, composed of a
- /// general class derived from the MO parameter and an instruction-specific
- /// suffix, provided in Suffix. E.g. ":got_lo12:" if a Suffix of "lo12" is
- /// given.
- bool printSymbolicAddress(const MachineOperand &MO,
- bool PrintImmediatePrefix,
- StringRef Suffix, raw_ostream &O);
-
- virtual const char *getPassName() const {
- return "AArch64 Assembly Printer";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/lib/Target/AArch64/AArch64BranchFixupPass.cpp
deleted file mode 100644
index c03cdde..0000000
--- a/lib/Target/AArch64/AArch64BranchFixupPass.cpp
+++ /dev/null
@@ -1,600 +0,0 @@
-//===-- AArch64BranchFixupPass.cpp - AArch64 branch fixup -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a pass that fixes AArch64 branches which have ended up out
-// of range for their immediate operands.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "aarch64-branch-fixup"
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumSplit, "Number of uncond branches inserted");
-STATISTIC(NumCBrFixed, "Number of cond branches fixed");
-
-/// Return the worst case padding that could result from unknown offset bits.
-/// This does not include alignment padding caused by known offset bits.
-///
-/// @param LogAlign log2(alignment)
-/// @param KnownBits Number of known low offset bits.
-static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
- if (KnownBits < LogAlign)
- return (1u << LogAlign) - (1u << KnownBits);
- return 0;
-}
-
-namespace {
- /// Due to limited PC-relative displacements, conditional branches to distant
- /// blocks may need converting into an unconditional equivalent. For example:
- /// tbz w1, #0, far_away
- /// becomes
- /// tbnz w1, #0, skip
- /// b far_away
- /// skip:
- class AArch64BranchFixup : public MachineFunctionPass {
- /// Information about the offset and size of a single basic block.
- struct BasicBlockInfo {
- /// Distance from the beginning of the function to the beginning of this
- /// basic block.
- ///
- /// Offsets are computed assuming worst case padding before an aligned
- /// block. This means that subtracting basic block offsets always gives a
- /// conservative estimate of the real distance which may be smaller.
- ///
- /// Because worst case padding is used, the computed offset of an aligned
- /// block may not actually be aligned.
- unsigned Offset;
-
- /// Size of the basic block in bytes. If the block contains inline
- /// assembly, this is a worst case estimate.
- ///
- /// The size does not include any alignment padding whether from the
- /// beginning of the block, or from an aligned jump table at the end.
- unsigned Size;
-
- /// The number of low bits in Offset that are known to be exact. The
- /// remaining bits of Offset are an upper bound.
- uint8_t KnownBits;
-
- /// When non-zero, the block contains instructions (inline asm) of unknown
- /// size. The real size may be smaller than Size bytes by a multiple of 1
- /// << Unalign.
- uint8_t Unalign;
-
- BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0) {}
-
- /// Compute the number of known offset bits internally to this block.
- /// This number should be used to predict worst case padding when
- /// splitting the block.
- unsigned internalKnownBits() const {
- unsigned Bits = Unalign ? Unalign : KnownBits;
- // If the block size isn't a multiple of the known bits, assume the
- // worst case padding.
- if (Size & ((1u << Bits) - 1))
- Bits = countTrailingZeros(Size);
- return Bits;
- }
-
- /// Compute the offset immediately following this block. If LogAlign is
- /// specified, return the offset the successor block will get if it has
- /// this alignment.
- unsigned postOffset(unsigned LogAlign = 0) const {
- unsigned PO = Offset + Size;
- if (!LogAlign)
- return PO;
- // Add alignment padding from the terminator.
- return PO + UnknownPadding(LogAlign, internalKnownBits());
- }
-
- /// Compute the number of known low bits of postOffset. If this block
- /// contains inline asm, the number of known bits drops to the
- /// instruction alignment. An aligned terminator may increase the number
- /// of know bits.
- /// If LogAlign is given, also consider the alignment of the next block.
- unsigned postKnownBits(unsigned LogAlign = 0) const {
- return std::max(LogAlign, internalKnownBits());
- }
- };
-
- std::vector<BasicBlockInfo> BBInfo;
-
- /// One per immediate branch, keeping the machine instruction pointer,
- /// conditional or unconditional, the max displacement, and (if IsCond is
- /// true) the corresponding inverted branch opcode.
- struct ImmBranch {
- MachineInstr *MI;
- unsigned OffsetBits : 31;
- bool IsCond : 1;
- ImmBranch(MachineInstr *mi, unsigned offsetbits, bool cond)
- : MI(mi), OffsetBits(offsetbits), IsCond(cond) {}
- };
-
- /// Keep track of all the immediate branch instructions.
- ///
- std::vector<ImmBranch> ImmBranches;
-
- MachineFunction *MF;
- const AArch64InstrInfo *TII;
- public:
- static char ID;
- AArch64BranchFixup() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual const char *getPassName() const {
- return "AArch64 branch fixup pass";
- }
-
- private:
- void initializeFunctionInfo();
- MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
- void adjustBBOffsetsAfter(MachineBasicBlock *BB);
- bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned OffsetBits);
- bool fixupImmediateBr(ImmBranch &Br);
- bool fixupConditionalBr(ImmBranch &Br);
-
- void computeBlockSize(MachineBasicBlock *MBB);
- unsigned getOffsetOf(MachineInstr *MI) const;
- void dumpBBs();
- void verify();
- };
- char AArch64BranchFixup::ID = 0;
-}
-
-/// check BBOffsets
-void AArch64BranchFixup::verify() {
-#ifndef NDEBUG
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- unsigned MBBId = MBB->getNumber();
- assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
- }
-#endif
-}
-
-/// print block size and offset information - debugging
-void AArch64BranchFixup::dumpBBs() {
- DEBUG({
- for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
- const BasicBlockInfo &BBI = BBInfo[J];
- dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
- << " kb=" << unsigned(BBI.KnownBits)
- << " ua=" << unsigned(BBI.Unalign)
- << format(" size=%#x\n", BBInfo[J].Size);
- }
- });
-}
-
-/// Returns an instance of the branch fixup pass.
-FunctionPass *llvm::createAArch64BranchFixupPass() {
- return new AArch64BranchFixup();
-}
-
-bool AArch64BranchFixup::runOnMachineFunction(MachineFunction &mf) {
- MF = &mf;
- DEBUG(dbgs() << "***** AArch64BranchFixup ******");
- TII = (const AArch64InstrInfo*)MF->getTarget().getInstrInfo();
-
- // This pass invalidates liveness information when it splits basic blocks.
- MF->getRegInfo().invalidateLiveness();
-
- // Renumber all of the machine basic blocks in the function, guaranteeing that
- // the numbers agree with the position of the block in the function.
- MF->RenumberBlocks();
-
- // Do the initial scan of the function, building up information about the
- // sizes of each block and location of each immediate branch.
- initializeFunctionInfo();
-
- // Iteratively fix up branches until there is no change.
- unsigned NoBRIters = 0;
- bool MadeChange = false;
- while (true) {
- DEBUG(dbgs() << "Beginning iteration #" << NoBRIters << '\n');
- bool BRChange = false;
- for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
- BRChange |= fixupImmediateBr(ImmBranches[i]);
- if (BRChange && ++NoBRIters > 30)
- report_fatal_error("Branch Fix Up pass failed to converge!");
- DEBUG(dumpBBs());
-
- if (!BRChange)
- break;
- MadeChange = true;
- }
-
- // After a while, this might be made debug-only, but it is not expensive.
- verify();
-
- DEBUG(dbgs() << '\n'; dumpBBs());
-
- BBInfo.clear();
- ImmBranches.clear();
-
- return MadeChange;
-}
-
-/// Return true if the specified basic block can fallthrough into the block
-/// immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
- // Get the next machine basic block in the function.
- MachineFunction::iterator MBBI = MBB;
- // Can't fall off end of function.
- if (std::next(MBBI) == MBB->getParent()->end())
- return false;
-
- MachineBasicBlock *NextBB = std::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end(); I != E; ++I)
- if (*I == NextBB)
- return true;
-
- return false;
-}
-
-/// Do the initial scan of the function, building up information about the sizes
-/// of each block, and each immediate branch.
-void AArch64BranchFixup::initializeFunctionInfo() {
- BBInfo.clear();
- BBInfo.resize(MF->getNumBlockIDs());
-
- // First thing, compute the size of all basic blocks, and see if the function
- // has any inline assembly in it. If so, we have to be conservative about
- // alignment assumptions, as we don't know for sure the size of any
- // instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
-
- // The known bits of the entry block offset are determined by the function
- // alignment.
- BBInfo.front().KnownBits = MF->getAlignment();
-
- // Compute block offsets and known bits.
- adjustBBOffsetsAfter(MF->begin());
-
- // Now go back through the instructions and build up our data structures.
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
- MBBI != E; ++MBBI) {
- MachineBasicBlock &MBB = *MBBI;
-
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- if (I->isDebugValue())
- continue;
-
- int Opc = I->getOpcode();
- if (I->isBranch()) {
- bool IsCond = false;
-
- // The offsets encoded in instructions here scale by the instruction
- // size (4 bytes), effectively increasing their range by 2 bits.
- unsigned Bits = 0;
- switch (Opc) {
- default:
- continue; // Ignore other JT branches
- case AArch64::TBZxii:
- case AArch64::TBZwii:
- case AArch64::TBNZxii:
- case AArch64::TBNZwii:
- IsCond = true;
- Bits = 14 + 2;
- break;
- case AArch64::Bcc:
- case AArch64::CBZx:
- case AArch64::CBZw:
- case AArch64::CBNZx:
- case AArch64::CBNZw:
- IsCond = true;
- Bits = 19 + 2;
- break;
- case AArch64::Bimm:
- Bits = 26 + 2;
- break;
- }
-
- // Record this immediate branch.
- ImmBranches.push_back(ImmBranch(I, Bits, IsCond));
- }
- }
- }
-}
-
-/// Compute the size and some alignment information for MBB. This function
-/// updates BBInfo directly.
-void AArch64BranchFixup::computeBlockSize(MachineBasicBlock *MBB) {
- BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
- BBI.Size = 0;
- BBI.Unalign = 0;
-
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- BBI.Size += TII->getInstSizeInBytes(*I);
- // For inline asm, GetInstSizeInBytes returns a conservative estimate.
- // The actual size may be smaller, but still a multiple of the instr size.
- if (I->isInlineAsm())
- BBI.Unalign = 2;
- }
-}
-
-/// Return the current offset of the specified machine instruction from the
-/// start of the function. This offset changes as stuff is moved around inside
-/// the function.
-unsigned AArch64BranchFixup::getOffsetOf(MachineInstr *MI) const {
- MachineBasicBlock *MBB = MI->getParent();
-
- // The offset is composed of two things: the sum of the sizes of all MBB's
- // before this instruction's block, and the offset from the start of the block
- // it is in.
- unsigned Offset = BBInfo[MBB->getNumber()].Offset;
-
- // Sum instructions before MI in MBB.
- for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
- assert(I != MBB->end() && "Didn't find MI in its own basic block?");
- Offset += TII->getInstSizeInBytes(*I);
- }
- return Offset;
-}
-
-/// Split the basic block containing MI into two blocks, which are joined by
-/// an unconditional branch. Update data structures and renumber blocks to
-/// account for this change and returns the newly created block.
-MachineBasicBlock *
-AArch64BranchFixup::splitBlockBeforeInstr(MachineInstr *MI) {
- MachineBasicBlock *OrigBB = MI->getParent();
-
- // Create a new MBB for the code after the OrigBB.
- MachineBasicBlock *NewBB =
- MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
- MachineFunction::iterator MBBI = OrigBB; ++MBBI;
- MF->insert(MBBI, NewBB);
-
- // Splice the instructions starting with MI over to NewBB.
- NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
-
- // Add an unconditional branch from OrigBB to NewBB.
- // Note the new unconditional branch is not being recorded.
- // There doesn't seem to be meaningful DebugInfo available; this doesn't
- // correspond to anything in the source.
- BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::Bimm)).addMBB(NewBB);
- ++NumSplit;
-
- // Update the CFG. All succs of OrigBB are now succs of NewBB.
- NewBB->transferSuccessors(OrigBB);
-
- // OrigBB branches to NewBB.
- OrigBB->addSuccessor(NewBB);
-
- // Update internal data structures to account for the newly inserted MBB.
- MF->RenumberBlocks(NewBB);
-
- // Insert an entry into BBInfo to align it properly with the (newly
- // renumbered) block numbers.
- BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
-
- // Figure out how large the OrigBB is. As the first half of the original
- // block, it cannot contain a tablejump. The size includes
- // the new jump we added. (It should be possible to do this without
- // recounting everything, but it's very confusing, and this is rarely
- // executed.)
- computeBlockSize(OrigBB);
-
- // Figure out how large the NewMBB is. As the second half of the original
- // block, it may contain a tablejump.
- computeBlockSize(NewBB);
-
- // All BBOffsets following these blocks must be modified.
- adjustBBOffsetsAfter(OrigBB);
-
- return NewBB;
-}
-
-void AArch64BranchFixup::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
- unsigned BBNum = BB->getNumber();
- for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
- // Get the offset and known bits at the end of the layout predecessor.
- // Include the alignment of the current block.
- unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
- unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
- unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
-
- // This is where block i begins. Stop if the offset is already correct,
- // and we have updated 2 blocks. This is the maximum number of blocks
- // changed before calling this function.
- if (i > BBNum + 2 &&
- BBInfo[i].Offset == Offset &&
- BBInfo[i].KnownBits == KnownBits)
- break;
-
- BBInfo[i].Offset = Offset;
- BBInfo[i].KnownBits = KnownBits;
- }
-}
-
-/// Returns true if the distance between specific MI and specific BB can fit in
-/// MI's displacement field.
-bool AArch64BranchFixup::isBBInRange(MachineInstr *MI,
- MachineBasicBlock *DestBB,
- unsigned OffsetBits) {
- int64_t BrOffset = getOffsetOf(MI);
- int64_t DestOffset = BBInfo[DestBB->getNumber()].Offset;
-
- DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
- << " from BB#" << MI->getParent()->getNumber()
- << " bits available=" << OffsetBits
- << " from " << getOffsetOf(MI) << " to " << DestOffset
- << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
-
- return isIntN(OffsetBits, DestOffset - BrOffset);
-}
-
-/// Fix up an immediate branch whose destination is too far away to fit in its
-/// displacement field.
-bool AArch64BranchFixup::fixupImmediateBr(ImmBranch &Br) {
- MachineInstr *MI = Br.MI;
- MachineBasicBlock *DestBB = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- if (MI->getOperand(i).isMBB()) {
- DestBB = MI->getOperand(i).getMBB();
- break;
- }
- }
- assert(DestBB && "Branch with no destination BB?");
-
- // Check to see if the DestBB is already in-range.
- if (isBBInRange(MI, DestBB, Br.OffsetBits))
- return false;
-
- assert(Br.IsCond && "Only conditional branches should need fixup");
- return fixupConditionalBr(Br);
-}
-
-/// Fix up a conditional branch whose destination is too far away to fit in its
-/// displacement field. It is converted to an inverse conditional branch + an
-/// unconditional branch to the destination.
-bool
-AArch64BranchFixup::fixupConditionalBr(ImmBranch &Br) {
- MachineInstr *MI = Br.MI;
- MachineBasicBlock *MBB = MI->getParent();
- unsigned CondBrMBBOperand = 0;
-
- // The general idea is to add an unconditional branch to the destination and
- // invert the conditional branch to jump over it. Complications occur around
- // fallthrough and unreachable ends to the block.
- // b.lt L1
- // =>
- // b.ge L2
- // b L1
- // L2:
-
- // First we invert the conditional branch, by creating a replacement if
- // necessary. This if statement contains all the special handling of different
- // branch types.
- if (MI->getOpcode() == AArch64::Bcc) {
- // The basic block is operand number 1 for Bcc
- CondBrMBBOperand = 1;
-
- A64CC::CondCodes CC = (A64CC::CondCodes)MI->getOperand(0).getImm();
- CC = A64InvertCondCode(CC);
- MI->getOperand(0).setImm(CC);
- } else {
- MachineInstrBuilder InvertedMI;
- int InvertedOpcode;
- switch (MI->getOpcode()) {
- default: llvm_unreachable("Unknown branch type");
- case AArch64::TBZxii: InvertedOpcode = AArch64::TBNZxii; break;
- case AArch64::TBZwii: InvertedOpcode = AArch64::TBNZwii; break;
- case AArch64::TBNZxii: InvertedOpcode = AArch64::TBZxii; break;
- case AArch64::TBNZwii: InvertedOpcode = AArch64::TBZwii; break;
- case AArch64::CBZx: InvertedOpcode = AArch64::CBNZx; break;
- case AArch64::CBZw: InvertedOpcode = AArch64::CBNZw; break;
- case AArch64::CBNZx: InvertedOpcode = AArch64::CBZx; break;
- case AArch64::CBNZw: InvertedOpcode = AArch64::CBZw; break;
- }
-
- InvertedMI = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(InvertedOpcode));
- for (unsigned i = 0, e= MI->getNumOperands(); i != e; ++i) {
- InvertedMI.addOperand(MI->getOperand(i));
- if (MI->getOperand(i).isMBB())
- CondBrMBBOperand = i;
- }
-
- MI->eraseFromParent();
- MI = Br.MI = InvertedMI;
- }
-
- // If the branch is at the end of its MBB and that has a fall-through block,
- // direct the updated conditional branch to the fall-through
- // block. Otherwise, split the MBB before the next instruction.
- MachineInstr *BMI = &MBB->back();
- bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
-
- ++NumCBrFixed;
- if (BMI != MI) {
- if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
- BMI->getOpcode() == AArch64::Bimm) {
- // Last MI in the BB is an unconditional branch. We can swap destinations:
- // b.eq L1 (temporarily b.ne L1 after first change)
- // b L2
- // =>
- // b.ne L2
- // b L1
- MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
- if (isBBInRange(MI, NewDest, Br.OffsetBits)) {
- DEBUG(dbgs() << " Invert Bcc condition and swap its destination with "
- << *BMI);
- MachineBasicBlock *DestBB = MI->getOperand(CondBrMBBOperand).getMBB();
- BMI->getOperand(0).setMBB(DestBB);
- MI->getOperand(CondBrMBBOperand).setMBB(NewDest);
- return true;
- }
- }
- }
-
- if (NeedSplit) {
- MachineBasicBlock::iterator MBBI = MI; ++MBBI;
- splitBlockBeforeInstr(MBBI);
- // No need for the branch to the next block. We're adding an unconditional
- // branch to the destination.
- int delta = TII->getInstSizeInBytes(MBB->back());
- BBInfo[MBB->getNumber()].Size -= delta;
- MBB->back().eraseFromParent();
- // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
- }
-
- // After splitting and removing the unconditional branch from the original BB,
- // the structure is now:
- // oldbb:
- // [things]
- // b.invertedCC L1
- // splitbb/fallthroughbb:
- // [old b L2/real continuation]
- //
- // We now have to change the conditional branch to point to splitbb and add an
- // unconditional branch after it to L1, giving the final structure:
- // oldbb:
- // [things]
- // b.invertedCC splitbb
- // b L1
- // splitbb/fallthroughbb:
- // [old b L2/real continuation]
- MachineBasicBlock *NextBB = std::next(MachineFunction::iterator(MBB));
-
- DEBUG(dbgs() << " Insert B to BB#"
- << MI->getOperand(CondBrMBBOperand).getMBB()->getNumber()
- << " also invert condition and change dest. to BB#"
- << NextBB->getNumber() << "\n");
-
- // Insert a new unconditional branch and fixup the destination of the
- // conditional one. Also update the ImmBranch as well as adding a new entry
- // for the new branch.
- BuildMI(MBB, DebugLoc(), TII->get(AArch64::Bimm))
- .addMBB(MI->getOperand(CondBrMBBOperand).getMBB());
- MI->getOperand(CondBrMBBOperand).setMBB(NextBB);
-
- BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
-
- // 26 bits written down in Bimm, specifying a multiple of 4.
- unsigned OffsetBits = 26 + 2;
- ImmBranches.push_back(ImmBranch(&MBB->back(), OffsetBits, false));
-
- adjustBBOffsetsAfter(MBB);
- return true;
-}
diff --git a/lib/Target/ARM64/ARM64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
index a9bbef5..5209452 100644
--- a/lib/Target/ARM64/ARM64BranchRelaxation.cpp
+++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64BranchRelaxation.cpp - ARM64 branch relaxation ---------------===//
+//===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -9,10 +9,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-branch-relax"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -24,27 +23,29 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
+#define DEBUG_TYPE "aarch64-branch-relax"
+
static cl::opt<bool>
-BranchRelaxation("arm64-branch-relax", cl::Hidden, cl::init(true),
+BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
static cl::opt<unsigned>
-TBZDisplacementBits("arm64-tbz-offset-bits", cl::Hidden, cl::init(14),
+TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
-CBZDisplacementBits("arm64-cbz-offset-bits", cl::Hidden, cl::init(19),
+CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
static cl::opt<unsigned>
-BCCDisplacementBits("arm64-bcc-offset-bits", cl::Hidden, cl::init(19),
+BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
cl::desc("Restrict range of Bcc instructions (DEBUG)"));
STATISTIC(NumSplit, "Number of basic blocks split");
STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
namespace {
-class ARM64BranchRelaxation : public MachineFunctionPass {
+class AArch64BranchRelaxation : public MachineFunctionPass {
/// BasicBlockInfo - Information about the offset and size of a single
/// basic block.
struct BasicBlockInfo {
@@ -76,41 +77,39 @@ class ARM64BranchRelaxation : public MachineFunctionPass {
SmallVector<BasicBlockInfo, 16> BlockInfo;
MachineFunction *MF;
- const ARM64InstrInfo *TII;
+ const AArch64InstrInfo *TII;
bool relaxBranchInstructions();
void scanFunction();
MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
- void adjustBlockOffsets(MachineBasicBlock *BB);
+ void adjustBlockOffsets(MachineBasicBlock &MBB);
bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
bool fixupConditionalBranch(MachineInstr *MI);
- void computeBlockSize(MachineBasicBlock *MBB);
+ void computeBlockSize(const MachineBasicBlock &MBB);
unsigned getInstrOffset(MachineInstr *MI) const;
void dumpBBs();
void verify();
public:
static char ID;
- ARM64BranchRelaxation() : MachineFunctionPass(ID) {}
+ AArch64BranchRelaxation() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
- return "ARM64 branch relaxation pass";
+ const char *getPassName() const override {
+ return "AArch64 branch relaxation pass";
}
};
-char ARM64BranchRelaxation::ID = 0;
+char AArch64BranchRelaxation::ID = 0;
}
/// verify - check BBOffsets, BBSizes, alignment of islands
-void ARM64BranchRelaxation::verify() {
+void AArch64BranchRelaxation::verify() {
#ifndef NDEBUG
unsigned PrevNum = MF->begin()->getNumber();
- for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); MBBI != E;
- ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- unsigned Align = MBB->getAlignment();
- unsigned Num = MBB->getNumber();
+ for (MachineBasicBlock &MBB : *MF) {
+ unsigned Align = MBB.getAlignment();
+ unsigned Num = MBB.getNumber();
assert(BlockInfo[Num].Offset % (1u << Align) == 0);
assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
PrevNum = Num;
@@ -119,8 +118,8 @@ void ARM64BranchRelaxation::verify() {
}
/// print block size and offset information - debugging
-void ARM64BranchRelaxation::dumpBBs() {
- for (auto &MBB: *MF) {
+void AArch64BranchRelaxation::dumpBBs() {
+ for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
@@ -133,14 +132,12 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
// Can't fall off end of function.
- if (std::next(MBBI) == MBB->getParent()->end())
+ MachineBasicBlock *NextBB = std::next(MBBI);
+ if (NextBB == MBB->getParent()->end())
return false;
- MachineBasicBlock *NextBB = std::next(MBBI);
- for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
- E = MBB->succ_end();
- I != E; ++I)
- if (*I == NextBB)
+ for (MachineBasicBlock *S : MBB->successors())
+ if (S == NextBB)
return true;
return false;
@@ -148,7 +145,7 @@ static bool BBHasFallthrough(MachineBasicBlock *MBB) {
/// scanFunction - Do the initial scan of the function, building up
/// information about each block.
-void ARM64BranchRelaxation::scanFunction() {
+void AArch64BranchRelaxation::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
@@ -156,27 +153,26 @@ void ARM64BranchRelaxation::scanFunction() {
// has any inline assembly in it. If so, we have to be conservative about
// alignment assumptions, as we don't know for sure the size of any
// instructions in the inline assembly.
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- computeBlockSize(I);
+ for (MachineBasicBlock &MBB : *MF)
+ computeBlockSize(MBB);
// Compute block offsets and known bits.
- adjustBlockOffsets(MF->begin());
+ adjustBlockOffsets(*MF->begin());
}
/// computeBlockSize - Compute the size for MBB.
/// This function updates BlockInfo directly.
-void ARM64BranchRelaxation::computeBlockSize(MachineBasicBlock *MBB) {
+void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
unsigned Size = 0;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I)
- Size += TII->GetInstSizeInBytes(I);
- BlockInfo[MBB->getNumber()].Size = Size;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->GetInstSizeInBytes(&MI);
+ BlockInfo[MBB.getNumber()].Size = Size;
}
/// getInstrOffset - Return the current offset of the specified machine
/// instruction from the start of the function. This offset changes as stuff is
/// moved around inside the function.
-unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
+unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
MachineBasicBlock *MBB = MI->getParent();
// The offset is composed of two things: the sum of the sizes of all MBB's
@@ -192,17 +188,15 @@ unsigned ARM64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
return Offset;
}
-void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
- unsigned PrevNum = Start->getNumber();
- MachineFunction::iterator MBBI = Start, E = MF->end();
- for (++MBBI; MBBI != E; ++MBBI) {
- MachineBasicBlock *MBB = MBBI;
- unsigned Num = MBB->getNumber();
+void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+ unsigned PrevNum = Start.getNumber();
+ for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
+ unsigned Num = MBB.getNumber();
if (!Num) // block zero is never changed from offset zero.
continue;
// Get the offset and known bits at the end of the layout predecessor.
// Include the alignment of the current block.
- unsigned LogAlign = MBBI->getAlignment();
+ unsigned LogAlign = MBB.getAlignment();
BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
PrevNum = Num;
}
@@ -215,7 +209,7 @@ void ARM64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock *Start) {
/// and must be updated by the caller! Other transforms follow using this
/// utility function, so no point updating now rather than waiting.
MachineBasicBlock *
-ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
+AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
MachineBasicBlock *OrigBB = MI->getParent();
// Create a new MBB for the code after the OrigBB.
@@ -232,7 +226,7 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
// Note the new unconditional branch is not being recorded.
// There doesn't seem to be meaningful DebugInfo available; this doesn't
// correspond to anything in the source.
- BuildMI(OrigBB, DebugLoc(), TII->get(ARM64::B)).addMBB(NewBB);
+ BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB);
// Insert an entry into BlockInfo to align it properly with the block numbers.
BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
@@ -242,14 +236,14 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
// the new jump we added. (It should be possible to do this without
// recounting everything, but it's very confusing, and this is rarely
// executed.)
- computeBlockSize(OrigBB);
+ computeBlockSize(*OrigBB);
// Figure out how large the NewMBB is. As the second half of the original
// block, it may contain a tablejump.
- computeBlockSize(NewBB);
+ computeBlockSize(*NewBB);
// All BBOffsets following these blocks must be modified.
- adjustBlockOffsets(OrigBB);
+ adjustBlockOffsets(*OrigBB);
++NumSplit;
@@ -258,9 +252,9 @@ ARM64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
/// isBlockInRange - Returns true if the distance between specific MI and
/// specific BB can fit in MI's displacement field.
-bool ARM64BranchRelaxation::isBlockInRange(MachineInstr *MI,
- MachineBasicBlock *DestBB,
- unsigned Bits) {
+bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI,
+ MachineBasicBlock *DestBB,
+ unsigned Bits) {
unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
unsigned BrOffset = getInstrOffset(MI);
unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
@@ -281,13 +275,15 @@ static bool isConditionalBranch(unsigned Opc) {
switch (Opc) {
default:
return false;
- case ARM64::TBZ:
- case ARM64::TBNZ:
- case ARM64::CBZW:
- case ARM64::CBNZW:
- case ARM64::CBZX:
- case ARM64::CBNZX:
- case ARM64::Bcc:
+ case AArch64::TBZW:
+ case AArch64::TBNZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZX:
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ case AArch64::Bcc:
return true;
}
}
@@ -296,14 +292,16 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
switch (MI->getOpcode()) {
default:
assert(0 && "unexpected opcode!");
- case ARM64::TBZ:
- case ARM64::TBNZ:
+ case AArch64::TBZW:
+ case AArch64::TBNZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZX:
return MI->getOperand(2).getMBB();
- case ARM64::CBZW:
- case ARM64::CBNZW:
- case ARM64::CBZX:
- case ARM64::CBNZX:
- case ARM64::Bcc:
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ case AArch64::Bcc:
return MI->getOperand(1).getMBB();
}
}
@@ -312,13 +310,15 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) {
switch (Opc) {
default:
assert(0 && "unexpected opcode!");
- case ARM64::TBNZ: return ARM64::TBZ;
- case ARM64::TBZ: return ARM64::TBNZ;
- case ARM64::CBNZW: return ARM64::CBZW;
- case ARM64::CBNZX: return ARM64::CBZX;
- case ARM64::CBZW: return ARM64::CBNZW;
- case ARM64::CBZX: return ARM64::CBNZX;
- case ARM64::Bcc: return ARM64::Bcc; // Condition is an operand for Bcc.
+ case AArch64::TBNZW: return AArch64::TBZW;
+ case AArch64::TBNZX: return AArch64::TBZX;
+ case AArch64::TBZW: return AArch64::TBNZW;
+ case AArch64::TBZX: return AArch64::TBNZX;
+ case AArch64::CBNZW: return AArch64::CBZW;
+ case AArch64::CBNZX: return AArch64::CBZX;
+ case AArch64::CBZW: return AArch64::CBNZW;
+ case AArch64::CBZX: return AArch64::CBNZX;
+ case AArch64::Bcc: return AArch64::Bcc; // Condition is an operand for Bcc.
}
}
@@ -326,30 +326,32 @@ static unsigned getBranchDisplacementBits(unsigned Opc) {
switch (Opc) {
default:
assert(0 && "unexpected opcode!");
- case ARM64::TBNZ:
- case ARM64::TBZ:
+ case AArch64::TBNZW:
+ case AArch64::TBZW:
+ case AArch64::TBNZX:
+ case AArch64::TBZX:
return TBZDisplacementBits;
- case ARM64::CBNZW:
- case ARM64::CBZW:
- case ARM64::CBNZX:
- case ARM64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBZW:
+ case AArch64::CBNZX:
+ case AArch64::CBZX:
return CBZDisplacementBits;
- case ARM64::Bcc:
+ case AArch64::Bcc:
return BCCDisplacementBits;
}
}
static inline void invertBccCondition(MachineInstr *MI) {
- assert(MI->getOpcode() == ARM64::Bcc && "Unexpected opcode!");
- ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(0).getImm();
- CC = ARM64CC::getInvertedCondCode(CC);
+ assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!");
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm();
+ CC = AArch64CC::getInvertedCondCode(CC);
MI->getOperand(0).setImm((int64_t)CC);
}
/// fixupConditionalBranch - Fix up a conditional branch whose destination is
/// too far away to fit in its displacement field. It is converted to an inverse
/// conditional branch + an unconditional branch to the destination.
-bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
+bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MachineBasicBlock *DestBB = getDestBlock(MI);
// Add an unconditional branch to the destination and invert the branch
@@ -370,7 +372,7 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
if (BMI != MI) {
if (std::next(MachineBasicBlock::iterator(MI)) ==
std::prev(MBB->getLastNonDebugInstr()) &&
- BMI->getOpcode() == ARM64::B) {
+ BMI->getOpcode() == AArch64::B) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
// beq L1
@@ -384,13 +386,15 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
DEBUG(dbgs() << " Invert condition and swap its destination with "
<< *BMI);
BMI->getOperand(0).setMBB(DestBB);
- unsigned OpNum =
- (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
- ? 2
- : 1;
+ unsigned OpNum = (MI->getOpcode() == AArch64::TBZW ||
+ MI->getOpcode() == AArch64::TBNZW ||
+ MI->getOpcode() == AArch64::TBZX ||
+ MI->getOpcode() == AArch64::TBNZX)
+ ? 2
+ : 1;
MI->getOperand(OpNum).setMBB(NewDest);
MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
- if (MI->getOpcode() == ARM64::Bcc)
+ if (MI->getOpcode() == AArch64::Bcc)
invertBccCondition(MI);
return true;
}
@@ -426,13 +430,14 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MachineInstrBuilder MIB = BuildMI(
MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
.addOperand(MI->getOperand(0));
- if (MI->getOpcode() == ARM64::TBZ || MI->getOpcode() == ARM64::TBNZ)
+ if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW ||
+ MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX)
MIB.addOperand(MI->getOperand(1));
- if (MI->getOpcode() == ARM64::Bcc)
+ if (MI->getOpcode() == AArch64::Bcc)
invertBccCondition(MIB);
MIB.addMBB(NextBB);
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
- BuildMI(MBB, DebugLoc(), TII->get(ARM64::B)).addMBB(DestBB);
+ BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB);
BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
// Remove the old conditional branch. It may or may not still be in MBB.
@@ -440,11 +445,11 @@ bool ARM64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
MI->eraseFromParent();
// Finally, keep the block offsets up to date.
- adjustBlockOffsets(MBB);
+ adjustBlockOffsets(*MBB);
return true;
}
-bool ARM64BranchRelaxation::relaxBranchInstructions() {
+bool AArch64BranchRelaxation::relaxBranchInstructions() {
bool Changed = false;
// Relaxing branches involves creating new basic blocks, so re-eval
// end() for termination.
@@ -461,16 +466,16 @@ bool ARM64BranchRelaxation::relaxBranchInstructions() {
return Changed;
}
-bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
// If the pass is disabled, just bail early.
if (!BranchRelaxation)
return false;
- DEBUG(dbgs() << "***** ARM64BranchRelaxation *****\n");
+ DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
- TII = (const ARM64InstrInfo *)MF->getTarget().getInstrInfo();
+ TII = (const AArch64InstrInfo *)MF->getTarget().getInstrInfo();
// Renumber all of the machine basic blocks in the function, guaranteeing that
// the numbers agree with the position of the block in the function.
@@ -498,8 +503,8 @@ bool ARM64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
return MadeChange;
}
-/// createARM64BranchRelaxation - returns an instance of the constpool
+/// createAArch64BranchRelaxation - returns an instance of the constpool
/// island pass.
-FunctionPass *llvm::createARM64BranchRelaxation() {
- return new ARM64BranchRelaxation();
+FunctionPass *llvm::createAArch64BranchRelaxation() {
+ return new AArch64BranchRelaxation();
}
diff --git a/lib/Target/AArch64/AArch64CallingConv.td b/lib/Target/AArch64/AArch64CallingConv.td
deleted file mode 100644
index 9fe6aae..0000000
--- a/lib/Target/AArch64/AArch64CallingConv.td
+++ /dev/null
@@ -1,197 +0,0 @@
-//==-- AArch64CallingConv.td - Calling Conventions for ARM ----*- tblgen -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// This describes the calling conventions for AArch64 architecture.
-//===----------------------------------------------------------------------===//
-
-
-// The AArch64 Procedure Call Standard is unfortunately specified at a slightly
-// higher level of abstraction than LLVM's target interface presents. In
-// particular, it refers (like other ABIs, in fact) directly to
-// structs. However, generic LLVM code takes the liberty of lowering structure
-// arguments to the component fields before we see them.
-//
-// As a result, the obvious direct map from LLVM IR to PCS concepts can't be
-// implemented, so the goals of this calling convention are, in decreasing
-// priority order:
-// 1. Expose *some* way to express the concepts required to implement the
-// generic PCS from a front-end.
-// 2. Provide a sane ABI for pure LLVM.
-// 3. Follow the generic PCS as closely as is naturally possible.
-//
-// The suggested front-end implementation of PCS features is:
-// * Integer, float and vector arguments of all sizes which end up in
-// registers are passed and returned via the natural LLVM type.
-// * Structure arguments with size <= 16 bytes are passed and returned in
-// registers as similar integer or composite types. For example:
-// [1 x i64], [2 x i64] or [1 x i128] (if alignment 16 needed).
-// * HFAs in registers follow rules similar to small structs: appropriate
-// composite types.
-// * Structure arguments with size > 16 bytes are passed via a pointer,
-// handled completely by the front-end.
-// * Structure return values > 16 bytes via an sret pointer argument.
-// * Other stack-based arguments (not large structs) are passed using byval
-// pointers. Padding arguments are added beforehand to guarantee a large
-// struct doesn't later use integer registers.
-//
-// N.b. this means that it is the front-end's responsibility (if it cares about
-// PCS compliance) to check whether enough registers are available for an
-// argument when deciding how to pass it.
-
-class CCIfAlign<int Align, CCAction A>:
- CCIf<"ArgFlags.getOrigAlign() == " # Align, A>;
-
-def CC_A64_APCS : CallingConv<[
- // SRet is an LLVM-specific concept, so it takes precedence over general ABI
- // concerns. However, this rule will be used by C/C++ frontends to implement
- // structure return.
- CCIfSRet<CCAssignToReg<[X8]>>,
-
- // Put ByVal arguments directly on the stack. Minimum size and alignment of a
- // slot is 64-bit.
- CCIfByVal<CCPassByVal<8, 8>>,
-
- // Canonicalise the various types that live in different floating-point
- // registers. This makes sense because the PCS does not distinguish Short
- // Vectors and Floating-point types.
- CCIfType<[v1i16, v2i8], CCBitConvertToType<f16>>,
- CCIfType<[v1i32, v4i8, v2i16], CCBitConvertToType<f32>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType<f64>>,
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCBitConvertToType<f128>>,
-
- // PCS: "C.1: If the argument is a Half-, Single-, Double- or Quad- precision
- // Floating-point or Short Vector Type and the NSRN is less than 8, then the
- // argument is allocated to the least significant bits of register
- // v[NSRN]. The NSRN is incremented by one. The argument has now been
- // allocated."
- CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>,
- CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>,
- CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>,
- CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
- CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
-
- // PCS: "C.2: If the argument is an HFA and there are sufficient unallocated
- // SIMD and Floating-point registers (NSRN - number of elements < 8), then the
- // argument is allocated to SIMD and Floating-point registers (with one
- // register per element of the HFA). The NSRN is incremented by the number of
- // registers used. The argument has now been allocated."
- //
- // N.b. As above, this rule is the responsibility of the front-end.
-
- // "C.3: If the argument is an HFA then the NSRN is set to 8 and the size of
- // the argument is rounded up to the nearest multiple of 8 bytes."
- //
- // "C.4: If the argument is an HFA, a Quad-precision Floating-point or Short
- // Vector Type then the NSAA is rounded up to the larger of 8 or the Natural
- // Alignment of the Argument's type."
- //
- // It is expected that these will be satisfied by adding dummy arguments to
- // the prototype.
-
- // PCS: "C.5: If the argument is a Half- or Single- precision Floating-point
- // type then the size of the argument is set to 8 bytes. The effect is as if
- // the argument had been copied to the least significant bits of a 64-bit
- // register and the remaining bits filled with unspecified values."
- CCIfType<[f16, f32], CCPromoteToType<f64>>,
-
- // PCS: "C.6: If the argument is an HFA, a Half-, Single-, Double- or Quad-
- // precision Floating-point or Short Vector Type, then the argument is copied
- // to memory at the adjusted NSAA. The NSAA is incremented by the size of the
- // argument. The argument has now been allocated."
- CCIfType<[f64], CCAssignToStack<8, 8>>,
- CCIfType<[f128], CCAssignToStack<16, 16>>,
-
- // PCS: "C.7: If the argument is an Integral Type, the size of the argument is
- // less than or equal to 8 bytes and the NGRN is less than 8, the argument is
- // copied to the least significant bits of x[NGRN]. The NGRN is incremented by
- // one. The argument has now been allocated."
-
- // First we implement C.8 and C.9 (128-bit types get even registers). i128 is
- // represented as two i64s, the first one being split. If we delayed this
- // operation C.8 would never be reached.
- CCIfType<[i64],
- CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], [X0, X1, X3, X5]>>>,
-
- // Note: the promotion also implements C.14.
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
- // And now the real implementation of C.7
- CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3, X4, X5, X6, X7]>>,
-
- // PCS: "C.8: If the argument has an alignment of 16 then the NGRN is rounded
- // up to the next even number."
- //
- // "C.9: If the argument is an Integral Type, the size of the argument is
- // equal to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
- // and x[NGRN+1], x[NGRN] shall contain the lower addressed double-word of the
- // memory representation of the argument. The NGRN is incremented by two. The
- // argument has now been allocated."
- //
- // Subtlety here: what if alignment is 16 but it is not an integral type? All
- // floating-point types have been allocated already, which leaves composite
- // types: this is why a front-end may need to produce i128 for a struct <= 16
- // bytes.
-
- // PCS: "C.10 If the argument is a Composite Type and the size in double-words
- // of the argument is not more than 8 minus NGRN, then the argument is copied
- // into consecutive general-purpose registers, starting at x[NGRN]. The
- // argument is passed as though it had been loaded into the registers from a
- // double-word aligned address with an appropriate sequence of LDR
- // instructions loading consecutive registers from memory (the contents of any
- // unused parts of the registers are unspecified by this standard). The NGRN
- // is incremented by the number of registers used. The argument has now been
- // allocated."
- //
- // Another one that's the responsibility of the front-end (sigh).
-
- // PCS: "C.11: The NGRN is set to 8."
- CCCustom<"CC_AArch64NoMoreRegs">,
-
- // PCS: "C.12: The NSAA is rounded up to the larger of 8 or the Natural
- // Alignment of the argument's type."
- //
- // PCS: "C.13: If the argument is a composite type then the argument is copied
- // to memory at the adjusted NSAA. The NSAA is by the size of the
- // argument. The argument has now been allocated."
- //
- // Note that the effect of this corresponds to a memcpy rather than register
- // stores so that the struct ends up correctly addressable at the adjusted
- // NSAA.
-
- // PCS: "C.14: If the size of the argument is less than 8 bytes then the size
- // of the argument is set to 8 bytes. The effect is as if the argument was
- // copied to the least significant bits of a 64-bit register and the remaining
- // bits filled with unspecified values."
- //
- // Integer types were widened above. Floating-point and composite types have
- // already been allocated completely. Nothing to do.
-
- // PCS: "C.15: The argument is copied to memory at the adjusted NSAA. The NSAA
- // is incremented by the size of the argument. The argument has now been
- // allocated."
- CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
- CCIfType<[i64], CCAssignToStack<8, 8>>
-
-]>;
-
-// According to the PCS, X19-X30 are callee-saved, however only the low 64-bits
-// of vector registers (8-15) are callee-saved. The order here is is picked up
-// by PrologEpilogInserter.cpp to allocate stack slots, starting from top of
-// stack upon entry. This gives the customary layout of x30 at [sp-8], x29 at
-// [sp-16], ...
-def CSR_PCS : CalleeSavedRegs<(add (sequence "X%u", 30, 19),
- (sequence "D%u", 15, 8))>;
-
-
-// TLS descriptor calls are extremely restricted in their changes, to allow
-// optimisations in the (hopefully) more common fast path where no real action
-// is needed. They actually have to preserve all registers, except for the
-// unavoidable X30 and the return register X0.
-def TLSDesc : CalleeSavedRegs<(add (sequence "X%u", 29, 1),
- (sequence "Q%u", 31, 0))>;
diff --git a/lib/Target/ARM64/ARM64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td
index 9ac888f..ded2e17 100644
--- a/lib/Target/ARM64/ARM64CallingConvention.td
+++ b/lib/Target/AArch64/AArch64CallingConvention.td
@@ -1,4 +1,4 @@
-//===- ARM64CallingConv.td - Calling Conventions for ARM64 -*- tablegen -*-===//
+//=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,28 +7,45 @@
//
//===----------------------------------------------------------------------===//
//
-// This describes the calling conventions for ARM64 architecture.
+// This describes the calling conventions for AArch64 architecture.
//
//===----------------------------------------------------------------------===//
/// CCIfAlign - Match of the original alignment of the arg
class CCIfAlign<string Align, CCAction A> :
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+/// CCIfBigEndian - Match only if we're in big endian mode.
+class CCIfBigEndian<CCAction A> :
+ CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>;
+
+class CCIfUnallocated<string Reg, CCAction A> :
+ CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
-def CC_ARM64_AAPCS : CallingConv<[
+def CC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
- CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+ // Big endian vectors must be passed as if they were 1-element vectors so that
+ // their lanes are in a consistent order.
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCBitConvertToType<f64>>>,
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCBitConvertToType<f128>>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
+ CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -36,7 +53,7 @@ def CC_ARM64_AAPCS : CallingConv<[
[X0, X1, X3, X5]>>>,
// i128 is split to two i64s, and its stack alignment is 16 bytes.
- CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
[W0, W1, W2, W3, W4, W5, W6, W7]>>,
@@ -47,7 +64,7 @@ def CC_ARM64_AAPCS : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
@@ -55,12 +72,20 @@ def CC_ARM64_AAPCS : CallingConv<[
CCIfType<[i32, f32], CCAssignToStack<8, 8>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCAssignToStack<16, 16>>
]>;
-def RetCC_ARM64_AAPCS : CallingConv<[
+def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
- CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+ CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+
+ // Big endian vectors must be passed as if they were 1-element vectors so that
+ // their lanes are in a consistent order.
+ CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8],
+ CCBitConvertToType<f64>>>,
+ CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
+ CCBitConvertToType<f128>>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
@@ -73,7 +98,7 @@ def RetCC_ARM64_AAPCS : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32],
CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7],
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
- CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
+ CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64],
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>
]>;
@@ -82,16 +107,20 @@ def RetCC_ARM64_AAPCS : CallingConv<[
// from the standard one at this level:
// + i128s (i.e. split i64s) don't need even registers.
// + Stack slots are sized as needed rather than being at least 64-bit.
-def CC_ARM64_DarwinPCS : CallingConv<[
+def CC_AArch64_DarwinPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
// An SRet is passed in X8, not X0 like a normal pointer parameter.
CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>,
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit.
+ CCIfByVal<CCPassByVal<8, 8>>,
+
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
// up to eight each of GPR and FPR.
- CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Reg">>,
+ CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
// i128 is split to two i64s, we can't fit half to register X7.
@@ -114,14 +143,15 @@ def CC_ARM64_DarwinPCS : CallingConv<[
CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>,
// If more than will fit in registers, pass them on the stack instead.
- CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_Custom_i1i8i16_Stack">>,
+ CCIfType<[i1, i8], CCAssignToStack<1, 1>>,
+ CCIfType<[i16], CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64], CCAssignToStack<16, 16>>
]>;
-def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
+def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
@@ -140,9 +170,9 @@ def CC_ARM64_DarwinPCS_VarArg : CallingConv<[
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
// 32bit quantity as undef.
-def CC_ARM64_WebKit_JS : CallingConv<[
+def CC_AArch64_WebKit_JS : CallingConv<[
// Handle i1, i8, i16, i32, and i64 passing in register X0 (W0).
- CCIfType<[i1, i8, i16], CCCustom<"CC_ARM64_WebKit_JS_i1i8i16_Reg">>,
+ CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>,
CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>,
@@ -152,7 +182,7 @@ def CC_ARM64_WebKit_JS : CallingConv<[
CCIfType<[i64, f64], CCAssignToStack<8, 8>>
]>;
-def RetCC_ARM64_WebKit_JS : CallingConv<[
+def RetCC_AArch64_WebKit_JS : CallingConv<[
CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
[X0, X1, X2, X3, X4, X5, X6, X7]>>,
CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
@@ -171,7 +201,7 @@ def RetCC_ARM64_WebKit_JS : CallingConv<[
// It would be better to model its preservation semantics properly (create a
// vreg on entry, use it in RET & tail call generation; make that vreg def if we
// end up saving LR as part of a call frame). Watch this space...
-def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
+def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
@@ -184,24 +214,24 @@ def CSR_ARM64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
// (For generic ARM 64-bit ABI code, clang will not generate constructors or
// destructors with 'this' returns, so this RegMask will not be used in that
// case)
-def CSR_ARM64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_ARM64_AAPCS, X0)>;
+def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>;
// The function used by Darwin to obtain the address of a thread-local variable
// guarantees more than a normal AAPCS function. x16 and x17 are used on the
// fast path for calculation, but other registers except X0 (argument/return)
// and LR (it is a call, after all) are preserved.
-def CSR_ARM64_TLS_Darwin
+def CSR_AArch64_TLS_Darwin
: CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17),
FP,
(sequence "Q%u", 0, 31))>;
// The ELF stub used for TLS-descriptor access saves every feasible
// register. Only X0 and LR are clobbered.
-def CSR_ARM64_TLS_ELF
+def CSR_AArch64_TLS_ELF
: CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP,
(sequence "Q%u", 0, 31))>;
-def CSR_ARM64_AllRegs
+def CSR_AArch64_AllRegs
: CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP,
(sequence "X%u", 0, 28), FP, LR, SP,
(sequence "B%u", 0, 31), (sequence "H%u", 0, 31),
diff --git a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index e3f8248..4d23dc5 100644
--- a/lib/Target/ARM64/ARM64CleanupLocalDynamicTLSPass.cpp
+++ b/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64CleanupLocalDynamicTLSPass.cpp -----------------------*- C++ -*-=//
+//===-- AArch64CleanupLocalDynamicTLSPass.cpp ---------------------*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -22,10 +22,10 @@
// pass looks through a function and performs such combinations.
//
//===----------------------------------------------------------------------===//
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64TargetMachine.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -38,8 +38,8 @@ struct LDTLSCleanup : public MachineFunctionPass {
static char ID;
LDTLSCleanup() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
if (AFI->getNumLocalDynamicTLSAccesses() < 2) {
// No point folding accesses if there isn't at least two.
return false;
@@ -62,7 +62,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
- case ARM64::TLSDESC_BLR:
+ case AArch64::TLSDESC_BLR:
// Make sure it's a local dynamic access.
if (!I->getOperand(1).isSymbol() ||
strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
@@ -92,15 +92,15 @@ struct LDTLSCleanup : public MachineFunctionPass {
MachineInstr *replaceTLSBaseAddrCall(MachineInstr *I,
unsigned TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const ARM64TargetMachine *TM =
- static_cast<const ARM64TargetMachine *>(&MF->getTarget());
- const ARM64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
// code sequence assumes the address will be.
- MachineInstr *Copy =
- BuildMI(*I->getParent(), I, I->getDebugLoc(),
- TII->get(TargetOpcode::COPY), ARM64::X0).addReg(TLSBaseAddrReg);
+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ AArch64::X0).addReg(TLSBaseAddrReg);
// Erase the TLS_base_addr instruction.
I->eraseFromParent();
@@ -112,28 +112,28 @@ struct LDTLSCleanup : public MachineFunctionPass {
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *setRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
- const ARM64TargetMachine *TM =
- static_cast<const ARM64TargetMachine *>(&MF->getTarget());
- const ARM64InstrInfo *TII = TM->getInstrInfo();
+ const AArch64TargetMachine *TM =
+ static_cast<const AArch64TargetMachine *>(&MF->getTarget());
+ const AArch64InstrInfo *TII = TM->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- *TLSBaseAddrReg = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
// Insert a copy from X0 to TLSBaseAddrReg for later.
MachineInstr *Next = I->getNextNode();
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
TII->get(TargetOpcode::COPY),
- *TLSBaseAddrReg).addReg(ARM64::X0);
+ *TLSBaseAddrReg).addReg(AArch64::X0);
return Copy;
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Local Dynamic TLS Access Clean-up";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -142,6 +142,6 @@ struct LDTLSCleanup : public MachineFunctionPass {
}
char LDTLSCleanup::ID = 0;
-FunctionPass *llvm::createARM64CleanupLocalDynamicTLSPass() {
+FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
return new LDTLSCleanup();
}
diff --git a/lib/Target/ARM64/ARM64CollectLOH.cpp b/lib/Target/AArch64/AArch64CollectLOH.cpp
index f52778f..6b1f096 100644
--- a/lib/Target/ARM64/ARM64CollectLOH.cpp
+++ b/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -1,4 +1,4 @@
-//===-------------- ARM64CollectLOH.cpp - ARM64 collect LOH pass --*- C++ -*-=//
+//===---------- AArch64CollectLOH.cpp - AArch64 collect LOH pass --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -85,8 +85,8 @@
// This LOH aims at getting rid of redundant ADRP instructions.
//
// The overall design for emitting the LOHs is:
-// 1. ARM64CollectLOH (this pass) records the LOHs in the ARM64FunctionInfo.
-// 2. ARM64AsmPrinter reads the LOHs from ARM64FunctionInfo and it:
+// 1. AArch64CollectLOH (this pass) records the LOHs in the AArch64FunctionInfo.
+// 2. AArch64AsmPrinter reads the LOHs from AArch64FunctionInfo and it:
// 1. Associates them a label.
// 2. Emits them in a MCStreamer (EmitLOHDirective).
// - The MCMachOStreamer records them into the MCAssembler.
@@ -98,11 +98,10 @@
// - Other ObjectWriters ignore them.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-collect-loh"
-#include "ARM64.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
@@ -123,14 +122,16 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
+#define DEBUG_TYPE "aarch64-collect-loh"
+
static cl::opt<bool>
-PreCollectRegister("arm64-collect-loh-pre-collect-register", cl::Hidden,
+PreCollectRegister("aarch64-collect-loh-pre-collect-register", cl::Hidden,
cl::desc("Restrict analysis to registers invovled"
" in LOHs"),
cl::init(true));
static cl::opt<bool>
-BasicBlockScopeOnly("arm64-collect-loh-bb-only", cl::Hidden,
+BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
cl::desc("Restrict analysis at basic block scope"),
cl::init(true));
@@ -163,23 +164,23 @@ STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
namespace llvm {
-void initializeARM64CollectLOHPass(PassRegistry &);
+void initializeAArch64CollectLOHPass(PassRegistry &);
}
namespace {
-struct ARM64CollectLOH : public MachineFunctionPass {
+struct AArch64CollectLOH : public MachineFunctionPass {
static char ID;
- ARM64CollectLOH() : MachineFunctionPass(ID) {
- initializeARM64CollectLOHPass(*PassRegistry::getPassRegistry());
+ AArch64CollectLOH() : MachineFunctionPass(ID) {
+ initializeAArch64CollectLOHPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
- return "ARM64 Collect Linker Optimization Hint (LOH)";
+ const char *getPassName() const override {
+ return "AArch64 Collect Linker Optimization Hint (LOH)";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineDominatorTree>();
@@ -213,14 +214,14 @@ typedef DenseMap<unsigned, unsigned> MapRegToId;
typedef SmallVector<unsigned, 32> MapIdToReg;
} // end anonymous namespace.
-char ARM64CollectLOH::ID = 0;
+char AArch64CollectLOH::ID = 0;
-INITIALIZE_PASS_BEGIN(ARM64CollectLOH, "arm64-collect-loh",
- "ARM64 Collect Linker Optimization Hint (LOH)", false,
+INITIALIZE_PASS_BEGIN(AArch64CollectLOH, "aarch64-collect-loh",
+ "AArch64 Collect Linker Optimization Hint (LOH)", false,
false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh",
- "ARM64 Collect Linker Optimization Hint (LOH)", false,
+INITIALIZE_PASS_END(AArch64CollectLOH, "aarch64-collect-loh",
+ "AArch64 Collect Linker Optimization Hint (LOH)", false,
false)
/// Given a couple (MBB, reg) get the corresponding set of instruction from
@@ -230,15 +231,14 @@ INITIALIZE_PASS_END(ARM64CollectLOH, "arm64-collect-loh",
/// \param nbRegs is used internally allocate some memory. It must be consistent
/// with the way sets is used.
static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
- const MachineBasicBlock *MBB, unsigned reg,
+ const MachineBasicBlock &MBB, unsigned reg,
unsigned nbRegs) {
SetOfMachineInstr *result;
- BlockToSetOfInstrsPerColor::iterator it = sets.find(MBB);
- if (it != sets.end()) {
+ BlockToSetOfInstrsPerColor::iterator it = sets.find(&MBB);
+ if (it != sets.end())
result = it->second;
- } else {
- result = sets[MBB] = new SetOfMachineInstr[nbRegs];
- }
+ else
+ result = sets[&MBB] = new SetOfMachineInstr[nbRegs];
return result[reg];
}
@@ -251,18 +251,18 @@ static SetOfMachineInstr &getSet(BlockToSetOfInstrsPerColor &sets,
/// "sets[reg]".
/// \pre set[reg] is valid.
static SetOfMachineInstr &getUses(InstrToInstrs *sets, unsigned reg,
- const MachineInstr *MI) {
- return sets[reg][MI];
+ const MachineInstr &MI) {
+ return sets[reg][&MI];
}
/// Same as getUses but does not modify the input map: sets.
/// \return NULL if the couple (reg, MI) is not in sets.
static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
- const MachineInstr *MI) {
- InstrToInstrs::const_iterator Res = sets[reg].find(MI);
+ const MachineInstr &MI) {
+ InstrToInstrs::const_iterator Res = sets[reg].find(&MI);
if (Res != sets[reg].end())
return &(Res->second);
- return NULL;
+ return nullptr;
}
/// Initialize the reaching definition algorithm:
@@ -276,41 +276,36 @@ static const SetOfMachineInstr *getUses(const InstrToInstrs *sets, unsigned reg,
/// definition. It also consider definitions of ADRP instructions as uses and
/// ignore other uses. The ADRPMode is used to collect the information for LHO
/// that involve ADRP operation only.
-static void initReachingDef(MachineFunction *MF,
+static void initReachingDef(MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToInstrPerColor &Gen, BlockToRegSet &Kill,
BlockToSetOfInstrsPerColor &ReachableUses,
const MapRegToId &RegToId,
const MachineInstr *DummyOp, bool ADRPMode) {
- const TargetMachine &TM = MF->getTarget();
+ const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
unsigned NbReg = RegToId.size();
- for (MachineFunction::const_iterator IMBB = MF->begin(), IMBBEnd = MF->end();
- IMBB != IMBBEnd; ++IMBB) {
- const MachineBasicBlock *MBB = &(*IMBB);
- const MachineInstr **&BBGen = Gen[MBB];
+ for (MachineBasicBlock &MBB : MF) {
+ const MachineInstr **&BBGen = Gen[&MBB];
BBGen = new const MachineInstr *[NbReg];
memset(BBGen, 0, sizeof(const MachineInstr *) * NbReg);
- BitVector &BBKillSet = Kill[MBB];
+ BitVector &BBKillSet = Kill[&MBB];
BBKillSet.resize(NbReg);
- for (MachineBasicBlock::const_iterator II = MBB->begin(), IEnd = MBB->end();
- II != IEnd; ++II) {
- bool IsADRP = II->getOpcode() == ARM64::ADRP;
+ for (const MachineInstr &MI : MBB) {
+ bool IsADRP = MI.getOpcode() == AArch64::ADRP;
// Process uses first.
if (IsADRP || !ADRPMode)
- for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
- IOEnd = II->operands_end();
- IO != IOEnd; ++IO) {
+ for (const MachineOperand &MO : MI.operands()) {
// Treat ADRP def as use, as the goal of the analysis is to find
// ADRP defs reached by other ADRP defs.
- if (!IO->isReg() || (!ADRPMode && !IO->isUse()) ||
- (ADRPMode && (!IsADRP || !IO->isDef())))
+ if (!MO.isReg() || (!ADRPMode && !MO.isUse()) ||
+ (ADRPMode && (!IsADRP || !MO.isDef())))
continue;
- unsigned CurReg = IO->getReg();
+ unsigned CurReg = MO.getReg();
MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
if (ItCurRegId == RegToId.end())
continue;
@@ -318,20 +313,18 @@ static void initReachingDef(MachineFunction *MF,
// if CurReg has not been defined, this use is reachable.
if (!BBGen[CurReg] && !BBKillSet.test(CurReg))
- getSet(ReachableUses, MBB, CurReg, NbReg).insert(&(*II));
+ getSet(ReachableUses, MBB, CurReg, NbReg).insert(&MI);
// current basic block definition for this color, if any, is in Gen.
if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(&(*II));
+ getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(&MI);
}
// Process clobbers.
- for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
- IOEnd = II->operands_end();
- IO != IOEnd; ++IO) {
- if (!IO->isRegMask())
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isRegMask())
continue;
// Clobbers kill the related colors.
- const uint32_t *PreservedRegs = IO->getRegMask();
+ const uint32_t *PreservedRegs = MO.getRegMask();
// Set generated regs.
for (const auto Entry : RegToId) {
@@ -342,19 +335,17 @@ static void initReachingDef(MachineFunction *MF,
// Do not register clobbered definition for no ADRP.
// This definition is not used anyway (otherwise register
// allocation is wrong).
- BBGen[Reg] = ADRPMode ? II : NULL;
+ BBGen[Reg] = ADRPMode ? &MI : nullptr;
BBKillSet.set(Reg);
}
}
}
- // Process defs
- for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
- IOEnd = II->operands_end();
- IO != IOEnd; ++IO) {
- if (!IO->isReg() || !IO->isDef())
+ // Process register defs.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg() || !MO.isDef())
continue;
- unsigned CurReg = IO->getReg();
+ unsigned CurReg = MO.getReg();
MapRegToId::const_iterator ItCurRegId = RegToId.find(CurReg);
if (ItCurRegId == RegToId.end())
continue;
@@ -365,19 +356,19 @@ static void initReachingDef(MachineFunction *MF,
"Sub-register of an "
"involved register, not recorded as involved!");
BBKillSet.set(ItRegId->second);
- BBGen[ItRegId->second] = &(*II);
+ BBGen[ItRegId->second] = &MI;
}
- BBGen[ItCurRegId->second] = &(*II);
+ BBGen[ItCurRegId->second] = &MI;
}
}
// If we restrict our analysis to basic block scope, conservatively add a
// dummy
// use for each generated value.
- if (!ADRPMode && DummyOp && !MBB->succ_empty())
+ if (!ADRPMode && DummyOp && !MBB.succ_empty())
for (unsigned CurReg = 0; CurReg < NbReg; ++CurReg)
if (BBGen[CurReg])
- getUses(ColorOpToReachedUses, CurReg, BBGen[CurReg]).insert(DummyOp);
+ getUses(ColorOpToReachedUses, CurReg, *BBGen[CurReg]).insert(DummyOp);
}
}
@@ -390,7 +381,7 @@ static void initReachingDef(MachineFunction *MF,
/// op.reachedUses
///
/// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
-static void reachingDefAlgorithm(MachineFunction *MF,
+static void reachingDefAlgorithm(MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
@@ -400,10 +391,7 @@ static void reachingDefAlgorithm(MachineFunction *MF,
bool HasChanged;
do {
HasChanged = false;
- for (MachineFunction::const_iterator IMBB = MF->begin(),
- IMBBEnd = MF->end();
- IMBB != IMBBEnd; ++IMBB) {
- const MachineBasicBlock *MBB = &(*IMBB);
+ for (MachineBasicBlock &MBB : MF) {
unsigned CurReg;
for (CurReg = 0; CurReg < NbReg; ++CurReg) {
SetOfMachineInstr &BBInSet = getSet(In, MBB, CurReg, NbReg);
@@ -412,24 +400,21 @@ static void reachingDefAlgorithm(MachineFunction *MF,
SetOfMachineInstr &BBOutSet = getSet(Out, MBB, CurReg, NbReg);
unsigned Size = BBOutSet.size();
// In[bb][color] = U Out[bb.predecessors][color]
- for (MachineBasicBlock::const_pred_iterator
- PredMBB = MBB->pred_begin(),
- EndPredMBB = MBB->pred_end();
- PredMBB != EndPredMBB; ++PredMBB) {
+ for (MachineBasicBlock *PredMBB : MBB.predecessors()) {
SetOfMachineInstr &PredOutSet = getSet(Out, *PredMBB, CurReg, NbReg);
BBInSet.insert(PredOutSet.begin(), PredOutSet.end());
}
// insert reachableUses[bb][color] in each in[bb][color] op.reachedses
- for (const MachineInstr *MI: BBInSet) {
+ for (const MachineInstr *MI : BBInSet) {
SetOfMachineInstr &OpReachedUses =
- getUses(ColorOpToReachedUses, CurReg, MI);
+ getUses(ColorOpToReachedUses, CurReg, *MI);
OpReachedUses.insert(BBReachableUses.begin(), BBReachableUses.end());
}
// Out[bb] = Gen[bb] U (In[bb] - Kill[bb])
- if (!Kill[MBB].test(CurReg))
+ if (!Kill[&MBB].test(CurReg))
BBOutSet.insert(BBInSet.begin(), BBInSet.end());
- if (Gen[MBB][CurReg])
- BBOutSet.insert(Gen[MBB][CurReg]);
+ if (Gen[&MBB][CurReg])
+ BBOutSet.insert(Gen[&MBB][CurReg]);
HasChanged |= BBOutSet.size() != Size;
}
}
@@ -442,38 +427,31 @@ static void finitReachingDef(BlockToSetOfInstrsPerColor &In,
BlockToSetOfInstrsPerColor &Out,
BlockToInstrPerColor &Gen,
BlockToSetOfInstrsPerColor &ReachableUses) {
- for (BlockToSetOfInstrsPerColor::const_iterator IT = Out.begin(),
- End = Out.end();
- IT != End; ++IT)
- delete[] IT->second;
- for (BlockToSetOfInstrsPerColor::const_iterator IT = In.begin(),
- End = In.end();
- IT != End; ++IT)
- delete[] IT->second;
- for (BlockToSetOfInstrsPerColor::const_iterator IT = ReachableUses.begin(),
- End = ReachableUses.end();
- IT != End; ++IT)
- delete[] IT->second;
- for (BlockToInstrPerColor::const_iterator IT = Gen.begin(), End = Gen.end();
- IT != End; ++IT)
- delete[] IT->second;
+ for (auto &IT : Out)
+ delete[] IT.second;
+ for (auto &IT : In)
+ delete[] IT.second;
+ for (auto &IT : ReachableUses)
+ delete[] IT.second;
+ for (auto &IT : Gen)
+ delete[] IT.second;
}
-/// Reaching definiton algorithm.
+/// Reaching definition algorithm.
/// \param MF function on which the algorithm will operate.
/// \param[out] ColorOpToReachedUses will contain the result of the reaching
/// def algorithm.
/// \param ADRPMode specify whether the reaching def algorithm should be tuned
/// for ADRP optimization. \see initReachingDef for more details.
/// \param DummyOp if not NULL, the algorithm will work at
-/// basic block scope and will set for every exposed defintion a use to
+/// basic block scope and will set for every exposed definition a use to
/// @p DummyOp.
/// \pre ColorOpToReachedUses is an array of at least number of registers of
/// InstrToInstrs.
-static void reachingDef(MachineFunction *MF,
+static void reachingDef(MachineFunction &MF,
InstrToInstrs *ColorOpToReachedUses,
const MapRegToId &RegToId, bool ADRPMode = false,
- const MachineInstr *DummyOp = NULL) {
+ const MachineInstr *DummyOp = nullptr) {
// structures:
// For each basic block.
// Out: a set per color of definitions that reach the
@@ -511,17 +489,12 @@ static void printReachingDef(const InstrToInstrs *ColorOpToReachedUses,
continue;
DEBUG(dbgs() << "*** Reg " << PrintReg(IdToReg[CurReg], TRI) << " ***\n");
- InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
- InstrToInstrs::const_iterator DefsItEnd =
- ColorOpToReachedUses[CurReg].end();
- for (; DefsIt != DefsItEnd; ++DefsIt) {
+ for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
DEBUG(dbgs() << "Def:\n");
- DEBUG(DefsIt->first->print(dbgs()));
+ DEBUG(DefsIt.first->print(dbgs()));
DEBUG(dbgs() << "Reachable uses:\n");
- for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
- UsesItEnd = DefsIt->second.end();
- UsesIt != UsesItEnd; ++UsesIt) {
- DEBUG((*UsesIt)->print(dbgs()));
+ for (const MachineInstr *MI : DefsIt.second) {
+ DEBUG(MI->print(dbgs()));
}
}
}
@@ -536,9 +509,9 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) {
switch (Opc) {
default:
return false;
- case ARM64::ADRP:
+ case AArch64::ADRP:
return true;
- case ARM64::ADDXri:
+ case AArch64::ADDXri:
// Check immediate to see if the immediate is an address.
switch (Def->getOperand(2).getType()) {
default:
@@ -549,7 +522,7 @@ static bool canDefBePartOfLOH(const MachineInstr *Def) {
case MachineOperand::MO_BlockAddress:
return true;
}
- case ARM64::LDRXui:
+ case AArch64::LDRXui:
// Check immediate to see if the immediate is an address.
switch (Def->getOperand(2).getType()) {
default:
@@ -568,13 +541,13 @@ static bool isCandidateStore(const MachineInstr *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
- case ARM64::STRBui:
- case ARM64::STRHui:
- case ARM64::STRWui:
- case ARM64::STRXui:
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STRQui:
+ case AArch64::STRBui:
+ case AArch64::STRHui:
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
// In case we have str xA, [xA, #imm], this is two different uses
// of xA and we cannot fold, otherwise the xA stored may be wrong,
// even if #imm == 0.
@@ -584,7 +557,7 @@ static bool isCandidateStore(const MachineInstr *Instr) {
return false;
}
-/// Given the result of a reaching defintion algorithm in ColorOpToReachedUses,
+/// Given the result of a reaching definition algorithm in ColorOpToReachedUses,
/// Build the Use to Defs information and filter out obvious non-LOH candidates.
/// In ADRPMode, non-LOH candidates are "uses" with non-ADRP definitions.
/// In non-ADRPMode, non-LOH candidates are "uses" with several definition,
@@ -603,34 +576,29 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
if (ColorOpToReachedUses[CurReg].empty())
continue;
- InstrToInstrs::const_iterator DefsIt = ColorOpToReachedUses[CurReg].begin();
- InstrToInstrs::const_iterator DefsItEnd =
- ColorOpToReachedUses[CurReg].end();
- for (; DefsIt != DefsItEnd; ++DefsIt) {
- for (SetOfMachineInstr::const_iterator UsesIt = DefsIt->second.begin(),
- UsesItEnd = DefsIt->second.end();
- UsesIt != UsesItEnd; ++UsesIt) {
- const MachineInstr *Def = DefsIt->first;
+ for (const auto &DefsIt : ColorOpToReachedUses[CurReg]) {
+ for (const MachineInstr *MI : DefsIt.second) {
+ const MachineInstr *Def = DefsIt.first;
MapRegToId::const_iterator It;
// if all the reaching defs are not adrp, this use will not be
// simplifiable.
- if ((ADRPMode && Def->getOpcode() != ARM64::ADRP) ||
+ if ((ADRPMode && Def->getOpcode() != AArch64::ADRP) ||
(!ADRPMode && !canDefBePartOfLOH(Def)) ||
- (!ADRPMode && isCandidateStore(*UsesIt) &&
+ (!ADRPMode && isCandidateStore(MI) &&
// store are LOH candidate iff the end of the chain is used as
// base.
- ((It = RegToId.find((*UsesIt)->getOperand(1).getReg())) == EndIt ||
+ ((It = RegToId.find((MI)->getOperand(1).getReg())) == EndIt ||
It->second != CurReg))) {
- NotCandidate.insert(*UsesIt);
+ NotCandidate.insert(MI);
continue;
}
// Do not consider self reaching as a simplifiable case for ADRP.
- if (!ADRPMode || *UsesIt != DefsIt->first) {
- UseToReachingDefs[*UsesIt].insert(DefsIt->first);
+ if (!ADRPMode || MI != DefsIt.first) {
+ UseToReachingDefs[MI].insert(DefsIt.first);
// If UsesIt has several reaching definitions, it is not
// candidate for simplificaton in non-ADRPMode.
- if (!ADRPMode && UseToReachingDefs[*UsesIt].size() > 1)
- NotCandidate.insert(*UsesIt);
+ if (!ADRPMode && UseToReachingDefs[MI].size() > 1)
+ NotCandidate.insert(MI);
}
}
}
@@ -647,10 +615,10 @@ static void reachedUsesToDefs(InstrToInstrs &UseToReachingDefs,
/// Based on the use to defs information (in ADRPMode), compute the
/// opportunities of LOH ADRP-related.
static void computeADRP(const InstrToInstrs &UseToDefs,
- ARM64FunctionInfo &ARM64FI,
+ AArch64FunctionInfo &AArch64FI,
const MachineDominatorTree *MDT) {
DEBUG(dbgs() << "*** Compute LOH for ADRP\n");
- for (const auto &Entry: UseToDefs) {
+ for (const auto &Entry : UseToDefs) {
unsigned Size = Entry.second.size();
if (Size == 0)
continue;
@@ -666,7 +634,7 @@ static void computeADRP(const InstrToInstrs &UseToDefs,
SmallVector<const MachineInstr *, 2> Args;
Args.push_back(L2);
Args.push_back(L1);
- ARM64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
+ AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, Args);
++NumADRPSimpleCandidate;
}
#ifdef DEBUG
@@ -688,19 +656,19 @@ static bool isCandidateLoad(const MachineInstr *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
- case ARM64::LDRSBWui:
- case ARM64::LDRSBXui:
- case ARM64::LDRSHWui:
- case ARM64::LDRSHXui:
- case ARM64::LDRSWui:
- case ARM64::LDRBui:
- case ARM64::LDRHui:
- case ARM64::LDRWui:
- case ARM64::LDRXui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
- if (Instr->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)
+ case AArch64::LDRSBWui:
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRBui:
+ case AArch64::LDRHui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ if (Instr->getOperand(2).getTargetFlags() & AArch64II::MO_GOT)
return false;
return true;
}
@@ -713,12 +681,12 @@ static bool supportLoadFromLiteral(const MachineInstr *Instr) {
switch (Instr->getOpcode()) {
default:
return false;
- case ARM64::LDRSWui:
- case ARM64::LDRWui:
- case ARM64::LDRXui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
+ case AArch64::LDRSWui:
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
return true;
}
// Unreachable.
@@ -737,7 +705,7 @@ static bool isCandidate(const MachineInstr *Instr,
return false;
const MachineInstr *Def = *UseToDefs.find(Instr)->second.begin();
- if (Def->getOpcode() != ARM64::ADRP) {
+ if (Def->getOpcode() != AArch64::ADRP) {
// At this point, Def is ADDXri or LDRXui of the right type of
// symbol, because we filtered out the uses that were not defined
// by these kind of instructions (+ ADRP).
@@ -747,8 +715,9 @@ static bool isCandidate(const MachineInstr *Instr,
if (!MDT->dominates(Def, Instr))
return false;
// Move one node up in the simple chain.
- if (UseToDefs.find(Def) == UseToDefs.end()
- // The map may contain garbage we have to ignore.
+ if (UseToDefs.find(Def) ==
+ UseToDefs.end()
+ // The map may contain garbage we have to ignore.
||
UseToDefs.find(Def)->second.empty())
return false;
@@ -759,52 +728,52 @@ static bool isCandidate(const MachineInstr *Instr,
// - top is ADRP.
// - check the simple chain property: each intermediate node must
// dominates the next one.
- if (Def->getOpcode() == ARM64::ADRP)
+ if (Def->getOpcode() == AArch64::ADRP)
return MDT->dominates(Def, Instr);
return false;
}
-static bool registerADRCandidate(const MachineInstr *Use,
+static bool registerADRCandidate(const MachineInstr &Use,
const InstrToInstrs &UseToDefs,
const InstrToInstrs *DefsPerColorToUses,
- ARM64FunctionInfo &ARM64FI,
+ AArch64FunctionInfo &AArch64FI,
SetOfMachineInstr *InvolvedInLOHs,
const MapRegToId &RegToId) {
// Look for opportunities to turn ADRP -> ADD or
// ADRP -> LDR GOTPAGEOFF into ADR.
// If ADRP has more than one use. Give up.
- if (Use->getOpcode() != ARM64::ADDXri &&
- (Use->getOpcode() != ARM64::LDRXui ||
- !(Use->getOperand(2).getTargetFlags() & ARM64II::MO_GOT)))
+ if (Use.getOpcode() != AArch64::ADDXri &&
+ (Use.getOpcode() != AArch64::LDRXui ||
+ !(Use.getOperand(2).getTargetFlags() & AArch64II::MO_GOT)))
return false;
- InstrToInstrs::const_iterator It = UseToDefs.find(Use);
+ InstrToInstrs::const_iterator It = UseToDefs.find(&Use);
// The map may contain garbage that we need to ignore.
if (It == UseToDefs.end() || It->second.empty())
return false;
- const MachineInstr *Def = *It->second.begin();
- if (Def->getOpcode() != ARM64::ADRP)
+ const MachineInstr &Def = **It->second.begin();
+ if (Def.getOpcode() != AArch64::ADRP)
return false;
// Check the number of users of ADRP.
const SetOfMachineInstr *Users =
getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, Def);
+ RegToId.find(Def.getOperand(0).getReg())->second, Def);
if (Users->size() > 1) {
++NumADRComplexCandidate;
return false;
}
++NumADRSimpleCandidate;
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Def)) &&
+ assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Def)) &&
"ADRP already involved in LOH.");
- assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Use)) &&
+ assert((!InvolvedInLOHs || InvolvedInLOHs->insert(&Use)) &&
"ADD already involved in LOH.");
- DEBUG(dbgs() << "Record AdrpAdd\n" << *Def << '\n' << *Use << '\n');
+ DEBUG(dbgs() << "Record AdrpAdd\n" << Def << '\n' << Use << '\n');
SmallVector<const MachineInstr *, 2> Args;
- Args.push_back(Def);
- Args.push_back(Use);
+ Args.push_back(&Def);
+ Args.push_back(&Use);
- ARM64FI.addLOHDirective(Use->getOpcode() == ARM64::ADDXri ? MCLOH_AdrpAdd
- : MCLOH_AdrpLdrGot,
+ AArch64FI.addLOHDirective(Use.getOpcode() == AArch64::ADDXri ? MCLOH_AdrpAdd
+ : MCLOH_AdrpLdrGot,
Args);
return true;
}
@@ -813,9 +782,9 @@ static bool registerADRCandidate(const MachineInstr *Use,
/// opportunities of LOH non-ADRP-related
static void computeOthers(const InstrToInstrs &UseToDefs,
const InstrToInstrs *DefsPerColorToUses,
- ARM64FunctionInfo &ARM64FI, const MapRegToId &RegToId,
+ AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
const MachineDominatorTree *MDT) {
- SetOfMachineInstr *InvolvedInLOHs = NULL;
+ SetOfMachineInstr *InvolvedInLOHs = nullptr;
#ifdef DEBUG
SetOfMachineInstr InvolvedInLOHsStorage;
InvolvedInLOHs = &InvolvedInLOHsStorage;
@@ -831,20 +800,18 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
// to be changed.
SetOfMachineInstr PotentialCandidates;
SetOfMachineInstr PotentialADROpportunities;
- for (InstrToInstrs::const_iterator UseIt = UseToDefs.begin(),
- EndUseIt = UseToDefs.end();
- UseIt != EndUseIt; ++UseIt) {
+ for (auto &Use : UseToDefs) {
// If no definition is available, this is a non candidate.
- if (UseIt->second.empty())
+ if (Use.second.empty())
continue;
// Keep only instructions that are load or store and at the end of
// a ADRP -> ADD/LDR/Nothing chain.
// We already filtered out the no-chain cases.
- if (!isCandidate(UseIt->first, UseToDefs, MDT)) {
- PotentialADROpportunities.insert(UseIt->first);
+ if (!isCandidate(Use.first, UseToDefs, MDT)) {
+ PotentialADROpportunities.insert(Use.first);
continue;
}
- PotentialCandidates.insert(UseIt->first);
+ PotentialCandidates.insert(Use.first);
}
// Make the following distinctions for statistics as the linker does
@@ -862,38 +829,34 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
// PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
// A potential candidate becomes a candidate, if its current immediate
// operand is zero and all nodes of the chain have respectively only one user
- SetOfMachineInstr::const_iterator CandidateIt, EndCandidateIt;
#ifdef DEBUG
SetOfMachineInstr DefsOfPotentialCandidates;
#endif
- for (CandidateIt = PotentialCandidates.begin(),
- EndCandidateIt = PotentialCandidates.end();
- CandidateIt != EndCandidateIt; ++CandidateIt) {
- const MachineInstr *Candidate = *CandidateIt;
+ for (const MachineInstr *Candidate : PotentialCandidates) {
// Get the definition of the candidate i.e., ADD or LDR.
const MachineInstr *Def = *UseToDefs.find(Candidate)->second.begin();
// Record the elements of the chain.
const MachineInstr *L1 = Def;
- const MachineInstr *L2 = NULL;
+ const MachineInstr *L2 = nullptr;
unsigned ImmediateDefOpc = Def->getOpcode();
- if (Def->getOpcode() != ARM64::ADRP) {
+ if (Def->getOpcode() != AArch64::ADRP) {
// Check the number of users of this node.
const SetOfMachineInstr *Users =
getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, Def);
+ RegToId.find(Def->getOperand(0).getReg())->second, *Def);
if (Users->size() > 1) {
#ifdef DEBUG
// if all the uses of this def are in potential candidate, this is
// a complex candidate of level 2.
- SetOfMachineInstr::const_iterator UseIt = Users->begin();
- SetOfMachineInstr::const_iterator EndUseIt = Users->end();
- for (; UseIt != EndUseIt; ++UseIt) {
- if (!PotentialCandidates.count(*UseIt)) {
+ bool IsLevel2 = true;
+ for (const MachineInstr *MI : *Users) {
+ if (!PotentialCandidates.count(MI)) {
++NumTooCplxLvl2;
+ IsLevel2 = false;
break;
}
}
- if (UseIt == EndUseIt)
+ if (IsLevel2)
++NumCplxLvl2;
#endif // DEBUG
PotentialADROpportunities.insert(Def);
@@ -908,7 +871,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
// Check the number of users of the first node in the chain, i.e., ADRP
const SetOfMachineInstr *Users =
getUses(DefsPerColorToUses,
- RegToId.find(Def->getOperand(0).getReg())->second, Def);
+ RegToId.find(Def->getOperand(0).getReg())->second, *Def);
if (Users->size() > 1) {
#ifdef DEBUG
// if all the uses of this def are in the defs of the potential candidate,
@@ -923,7 +886,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
}
}
bool Found = false;
- for (auto &Use: *Users) {
+ for (auto &Use : *Users) {
if (!DefsOfPotentialCandidates.count(Use)) {
++NumTooCplxLvl1;
Found = true;
@@ -936,15 +899,15 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
continue;
}
- bool IsL2Add = (ImmediateDefOpc == ARM64::ADDXri);
+ bool IsL2Add = (ImmediateDefOpc == AArch64::ADDXri);
// If the chain is three instructions long and ldr is the second element,
// then this ldr must load form GOT, otherwise this is not a correct chain.
- if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != ARM64II::MO_GOT)
+ if (L2 && !IsL2Add && L2->getOperand(2).getTargetFlags() != AArch64II::MO_GOT)
continue;
SmallVector<const MachineInstr *, 3> Args;
MCLOHType Kind;
if (isCandidateLoad(Candidate)) {
- if (L2 == NULL) {
+ if (!L2) {
// At this point, the candidate LOH indicates that the ldr instruction
// may use a direct access to the symbol. There is not such encoding
// for loads of byte and half.
@@ -981,18 +944,18 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
#ifdef DEBUG
// get the immediate of the load
if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == ARM64::ADDXri)
+ if (ImmediateDefOpc == AArch64::ADDXri)
++NumADDToLDR;
else
++NumLDRToLDR;
- else if (ImmediateDefOpc == ARM64::ADDXri)
+ else if (ImmediateDefOpc == AArch64::ADDXri)
++NumADDToLDRWithImm;
else
++NumLDRToLDRWithImm;
#endif // DEBUG
}
} else {
- if (ImmediateDefOpc == ARM64::ADRP)
+ if (ImmediateDefOpc == AArch64::ADRP)
continue;
else {
@@ -1015,23 +978,23 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
#ifdef DEBUG
// get the immediate of the store
if (Candidate->getOperand(2).getImm() == 0)
- if (ImmediateDefOpc == ARM64::ADDXri)
+ if (ImmediateDefOpc == AArch64::ADDXri)
++NumADDToSTR;
else
++NumLDRToSTR;
- else if (ImmediateDefOpc == ARM64::ADDXri)
+ else if (ImmediateDefOpc == AArch64::ADDXri)
++NumADDToSTRWithImm;
else
++NumLDRToSTRWithImm;
#endif // DEBUG
}
}
- ARM64FI.addLOHDirective(Kind, Args);
+ AArch64FI.addLOHDirective(Kind, Args);
}
// Now, we grabbed all the big patterns, check ADR opportunities.
- for (const MachineInstr *Candidate: PotentialADROpportunities)
- registerADRCandidate(Candidate, UseToDefs, DefsPerColorToUses, ARM64FI,
+ for (const MachineInstr *Candidate : PotentialADROpportunities)
+ registerADRCandidate(*Candidate, UseToDefs, DefsPerColorToUses, AArch64FI,
InvolvedInLOHs, RegToId);
}
@@ -1053,18 +1016,14 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
}
DEBUG(dbgs() << "** Collect Involved Register\n");
- for (MachineFunction::const_iterator IMBB = MF.begin(), IMBBEnd = MF.end();
- IMBB != IMBBEnd; ++IMBB)
- for (MachineBasicBlock::const_iterator II = IMBB->begin(),
- IEnd = IMBB->end();
- II != IEnd; ++II) {
-
- if (!canDefBePartOfLOH(II))
+ for (const auto &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (!canDefBePartOfLOH(&MI))
continue;
// Process defs
- for (MachineInstr::const_mop_iterator IO = II->operands_begin(),
- IOEnd = II->operands_end();
+ for (MachineInstr::const_mop_iterator IO = MI.operands_begin(),
+ IOEnd = MI.operands_end();
IO != IOEnd; ++IO) {
if (!IO->isReg() || !IO->isDef())
continue;
@@ -1079,31 +1038,32 @@ static void collectInvolvedReg(MachineFunction &MF, MapRegToId &RegToId,
}
}
}
+ }
}
-bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
- const TargetMachine &TM = Fn.getTarget();
+bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
+ const TargetMachine &TM = MF.getTarget();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const MachineDominatorTree *MDT = &getAnalysis<MachineDominatorTree>();
MapRegToId RegToId;
MapIdToReg IdToReg;
- ARM64FunctionInfo *ARM64FI = Fn.getInfo<ARM64FunctionInfo>();
- assert(ARM64FI && "No MachineFunctionInfo for this function!");
+ AArch64FunctionInfo *AArch64FI = MF.getInfo<AArch64FunctionInfo>();
+ assert(AArch64FI && "No MachineFunctionInfo for this function!");
- DEBUG(dbgs() << "Looking for LOH in " << Fn.getName() << '\n');
+ DEBUG(dbgs() << "Looking for LOH in " << MF.getName() << '\n');
- collectInvolvedReg(Fn, RegToId, IdToReg, TRI);
+ collectInvolvedReg(MF, RegToId, IdToReg, TRI);
if (RegToId.empty())
return false;
- MachineInstr *DummyOp = NULL;
+ MachineInstr *DummyOp = nullptr;
if (BasicBlockScopeOnly) {
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+ const AArch64InstrInfo *TII =
+ static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
// For local analysis, create a dummy operation to record uses that are not
// local.
- DummyOp = Fn.CreateMachineInstr(TII->get(ARM64::COPY), DebugLoc());
+ DummyOp = MF.CreateMachineInstr(TII->get(AArch64::COPY), DebugLoc());
}
unsigned NbReg = RegToId.size();
@@ -1114,7 +1074,7 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
// Compute the reaching def in ADRP mode, meaning ADRP definitions
// are first considered as uses.
- reachingDef(&Fn, ColorOpToReachedUses, RegToId, true, DummyOp);
+ reachingDef(MF, ColorOpToReachedUses, RegToId, true, DummyOp);
DEBUG(dbgs() << "ADRP reaching defs\n");
DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
@@ -1124,14 +1084,14 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
reachedUsesToDefs(ADRPToReachingDefs, ColorOpToReachedUses, RegToId, true);
// Compute LOH for ADRP.
- computeADRP(ADRPToReachingDefs, *ARM64FI, MDT);
+ computeADRP(ADRPToReachingDefs, *AArch64FI, MDT);
delete[] ColorOpToReachedUses;
// Continue with general ADRP -> ADD/LDR -> LDR/STR pattern.
ColorOpToReachedUses = new InstrToInstrs[NbReg];
// first perform a regular reaching def analysis.
- reachingDef(&Fn, ColorOpToReachedUses, RegToId, false, DummyOp);
+ reachingDef(MF, ColorOpToReachedUses, RegToId, false, DummyOp);
DEBUG(dbgs() << "All reaching defs\n");
DEBUG(printReachingDef(ColorOpToReachedUses, NbReg, TRI, IdToReg));
@@ -1140,18 +1100,18 @@ bool ARM64CollectLOH::runOnMachineFunction(MachineFunction &Fn) {
reachedUsesToDefs(UsesToReachingDefs, ColorOpToReachedUses, RegToId, false);
// Compute other than AdrpAdrp LOH.
- computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *ARM64FI, RegToId,
+ computeOthers(UsesToReachingDefs, ColorOpToReachedUses, *AArch64FI, RegToId,
MDT);
delete[] ColorOpToReachedUses;
if (BasicBlockScopeOnly)
- Fn.DeleteMachineInstr(DummyOp);
+ MF.DeleteMachineInstr(DummyOp);
return Modified;
}
-/// createARM64CollectLOHPass - returns an instance of the Statistic for
+/// createAArch64CollectLOHPass - returns an instance of the Statistic for
/// linker optimization pass.
-FunctionPass *llvm::createARM64CollectLOHPass() {
- return new ARM64CollectLOH();
+FunctionPass *llvm::createAArch64CollectLOHPass() {
+ return new AArch64CollectLOH();
}
diff --git a/lib/Target/ARM64/ARM64ConditionalCompares.cpp b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index b495afa..452cdec 100644
--- a/lib/Target/ARM64/ARM64ConditionalCompares.cpp
+++ b/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ConditionalCompares.cpp --- CCMP formation for ARM64 ---------===//
+//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the ARM64ConditionalCompares pass which reduces
+// This file implements the AArch64ConditionalCompares pass which reduces
// branching and code size by using the conditional compare instructions CCMP,
// CCMN, and FCMP.
//
@@ -17,8 +17,7 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-ccmp"
-#include "ARM64.h"
+#include "AArch64.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -43,14 +42,16 @@
using namespace llvm;
+#define DEBUG_TYPE "aarch64-ccmp"
+
// Absolute maximum number of instructions allowed per speculated block.
// This bypasses all other heuristics, so it should be set fairly high.
static cl::opt<unsigned> BlockInstrLimit(
- "arm64-ccmp-limit", cl::init(30), cl::Hidden,
+ "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
cl::desc("Maximum number of instructions per speculated block."));
// Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-ccmp", cl::Hidden,
+static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
cl::desc("Turn all knobs to 11"));
STATISTIC(NumConsidered, "Number of ccmps considered");
@@ -62,8 +63,8 @@ STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
-STATISTIC(NumMultCPSRUses, "Number of ccmps rejected (CPSR used)");
-STATISTIC(NumUnknCPSRDefs, "Number of ccmps rejected (CPSR def unknown)");
+STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
+STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
@@ -97,7 +98,7 @@ STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
//
// The cmp-conversion turns the compare instruction in CmpBB into a conditional
// compare, and merges CmpBB into Head, speculatively executing its
-// instructions. The ARM64 conditional compare instructions have an immediate
+// instructions. The AArch64 conditional compare instructions have an immediate
// operand that specifies the NZCV flag values when the condition is false and
// the compare isn't executed. This makes it possible to chain compares with
// different condition codes.
@@ -147,7 +148,7 @@ public:
/// else.
MachineBasicBlock *Head;
- /// The block containing cmp+br.cond with a sucessor shared with Head.
+ /// The block containing cmp+br.cond with a successor shared with Head.
MachineBasicBlock *CmpBB;
/// The common successor for Head and CmpBB.
@@ -161,13 +162,13 @@ private:
SmallVector<MachineOperand, 4> HeadCond;
/// The condition code that makes Head branch to CmpBB.
- ARM64CC::CondCode HeadCmpBBCC;
+ AArch64CC::CondCode HeadCmpBBCC;
/// The branch condition in CmpBB.
SmallVector<MachineOperand, 4> CmpBBCond;
/// The condition code that makes CmpBB branch to Tail.
- ARM64CC::CondCode CmpBBTailCC;
+ AArch64CC::CondCode CmpBBTailCC;
/// Check if the Tail PHIs are trivially convertible.
bool trivialTailPHIs();
@@ -212,13 +213,14 @@ public:
// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
// This means that no if-conversion is required when merging CmpBB into Head.
bool SSACCmpConv::trivialTailPHIs() {
- for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
- I != E && I->isPHI(); ++I) {
+ for (auto &I : *Tail) {
+ if (!I.isPHI())
+ break;
unsigned HeadReg = 0, CmpBBReg = 0;
// PHI operands come in (VReg, MBB) pairs.
- for (unsigned oi = 1, oe = I->getNumOperands(); oi != oe; oi += 2) {
- MachineBasicBlock *MBB = I->getOperand(oi + 1).getMBB();
- unsigned Reg = I->getOperand(oi).getReg();
+ for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
+ MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
+ unsigned Reg = I.getOperand(oi).getReg();
if (MBB == Head) {
assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
HeadReg = Reg;
@@ -237,24 +239,25 @@ bool SSACCmpConv::trivialTailPHIs() {
// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
// removing the CmpBB operands. The Head operands will be identical.
void SSACCmpConv::updateTailPHIs() {
- for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end();
- I != E && I->isPHI(); ++I) {
+ for (auto &I : *Tail) {
+ if (!I.isPHI())
+ break;
// I is a PHI. It can have multiple entries for CmpBB.
- for (unsigned oi = I->getNumOperands(); oi > 2; oi -= 2) {
+ for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
// PHI operands are (Reg, MBB) at (oi-2, oi-1).
- if (I->getOperand(oi - 1).getMBB() == CmpBB) {
- I->RemoveOperand(oi - 1);
- I->RemoveOperand(oi - 2);
+ if (I.getOperand(oi - 1).getMBB() == CmpBB) {
+ I.RemoveOperand(oi - 1);
+ I.RemoveOperand(oi - 2);
}
}
}
}
-// This pass runs before the ARM64DeadRegisterDefinitions pass, so compares are
-// still writing virtual registers without any uses.
+// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
+// are still writing virtual registers without any uses.
bool SSACCmpConv::isDeadDef(unsigned DstReg) {
// Writes to the zero register are dead.
- if (DstReg == ARM64::WZR || DstReg == ARM64::XZR)
+ if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
return true;
if (!TargetRegisterInfo::isVirtualRegister(DstReg))
return false;
@@ -266,11 +269,11 @@ bool SSACCmpConv::isDeadDef(unsigned DstReg) {
// Parse a condition code returned by AnalyzeBranch, and compute the CondCode
// corresponding to TBB.
// Return
-static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
+static bool parseCond(ArrayRef<MachineOperand> Cond, AArch64CC::CondCode &CC) {
// A normal br.cond simply has the condition code.
if (Cond[0].getImm() != -1) {
assert(Cond.size() == 1 && "Unknown Cond array format");
- CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
+ CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
return true;
}
// For tbz and cbz instruction, the opcode is next.
@@ -279,15 +282,15 @@ static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
// This includes tbz / tbnz branches which can't be converted to
// ccmp + br.cond.
return false;
- case ARM64::CBZW:
- case ARM64::CBZX:
+ case AArch64::CBZW:
+ case AArch64::CBZX:
assert(Cond.size() == 3 && "Unknown Cond array format");
- CC = ARM64CC::EQ;
+ CC = AArch64CC::EQ;
return true;
- case ARM64::CBNZW:
- case ARM64::CBNZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
assert(Cond.size() == 3 && "Unknown Cond array format");
- CC = ARM64CC::NE;
+ CC = AArch64CC::NE;
return true;
}
}
@@ -295,20 +298,20 @@ static bool parseCond(ArrayRef<MachineOperand> Cond, ARM64CC::CondCode &CC) {
MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
MachineBasicBlock::iterator I = MBB->getFirstTerminator();
if (I == MBB->end())
- return 0;
+ return nullptr;
// The terminator must be controlled by the flags.
- if (!I->readsRegister(ARM64::CPSR)) {
+ if (!I->readsRegister(AArch64::NZCV)) {
switch (I->getOpcode()) {
- case ARM64::CBZW:
- case ARM64::CBZX:
- case ARM64::CBNZW:
- case ARM64::CBNZX:
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
// These can be converted into a ccmp against #0.
return I;
}
++NumCmpTermRejs;
DEBUG(dbgs() << "Flags not used by terminator: " << *I);
- return 0;
+ return nullptr;
}
// Now find the instruction controlling the terminator.
@@ -317,56 +320,56 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
assert(!I->isTerminator() && "Spurious terminator");
switch (I->getOpcode()) {
// cmp is an alias for subs with a dead destination register.
- case ARM64::SUBSWri:
- case ARM64::SUBSXri:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
// cmn is an alias for adds with a dead destination register.
- case ARM64::ADDSWri:
- case ARM64::ADDSXri:
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
// Check that the immediate operand is within range, ccmp wants a uimm5.
// Rd = SUBSri Rn, imm, shift
if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
++NumImmRangeRejs;
- return 0;
+ return nullptr;
}
// Fall through.
- case ARM64::SUBSWrr:
- case ARM64::SUBSXrr:
- case ARM64::ADDSWrr:
- case ARM64::ADDSXrr:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSXrr:
if (isDeadDef(I->getOperand(0).getReg()))
return I;
DEBUG(dbgs() << "Can't convert compare with live destination: " << *I);
++NumLiveDstRejs;
- return 0;
- case ARM64::FCMPSrr:
- case ARM64::FCMPDrr:
- case ARM64::FCMPESrr:
- case ARM64::FCMPEDrr:
+ return nullptr;
+ case AArch64::FCMPSrr:
+ case AArch64::FCMPDrr:
+ case AArch64::FCMPESrr:
+ case AArch64::FCMPEDrr:
return I;
}
// Check for flag reads and clobbers.
MIOperands::PhysRegInfo PRI =
- MIOperands(I).analyzePhysReg(ARM64::CPSR, TRI);
+ MIOperands(I).analyzePhysReg(AArch64::NZCV, TRI);
if (PRI.Reads) {
// The ccmp doesn't produce exactly the same flags as the original
// compare, so reject the transform if there are uses of the flags
// besides the terminators.
DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
- ++NumMultCPSRUses;
- return 0;
+ ++NumMultNZCVUses;
+ return nullptr;
}
if (PRI.Clobbers) {
DEBUG(dbgs() << "Not convertible compare: " << *I);
- ++NumUnknCPSRDefs;
- return 0;
+ ++NumUnknNZCVDefs;
+ return nullptr;
}
}
DEBUG(dbgs() << "Flags not defined in BB#" << MBB->getNumber() << '\n');
- return 0;
+ return nullptr;
}
/// Determine if all the instructions in MBB can safely
@@ -376,7 +379,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
///
bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
const MachineInstr *CmpMI) {
- // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
// get right.
if (!MBB->livein_empty()) {
DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n");
@@ -387,10 +390,8 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
// Check all instructions, except the terminators. It is assumed that
// terminators never have side effects or define any used register values.
- for (MachineBasicBlock::iterator I = MBB->begin(),
- E = MBB->getFirstTerminator();
- I != E; ++I) {
- if (I->isDebugValue())
+ for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
+ if (I.isDebugValue())
continue;
if (++InstrCount > BlockInstrLimit && !Stress) {
@@ -400,29 +401,29 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
}
// There shouldn't normally be any phis in a single-predecessor block.
- if (I->isPHI()) {
- DEBUG(dbgs() << "Can't hoist: " << *I);
+ if (I.isPHI()) {
+ DEBUG(dbgs() << "Can't hoist: " << I);
return false;
}
// Don't speculate loads. Note that it may be possible and desirable to
// speculate GOT or constant pool loads that are guaranteed not to trap,
// but we don't support that for now.
- if (I->mayLoad()) {
- DEBUG(dbgs() << "Won't speculate load: " << *I);
+ if (I.mayLoad()) {
+ DEBUG(dbgs() << "Won't speculate load: " << I);
return false;
}
// We never speculate stores, so an AA pointer isn't necessary.
bool DontMoveAcrossStore = true;
- if (!I->isSafeToMove(TII, 0, DontMoveAcrossStore)) {
- DEBUG(dbgs() << "Can't speculate: " << *I);
+ if (!I.isSafeToMove(TII, nullptr, DontMoveAcrossStore)) {
+ DEBUG(dbgs() << "Can't speculate: " << I);
return false;
}
- // Only CmpMI is alowed to clobber the flags.
- if (&*I != CmpMI && I->modifiesRegister(ARM64::CPSR, TRI)) {
- DEBUG(dbgs() << "Clobbers flags: " << *I);
+ // Only CmpMI is allowed to clobber the flags.
+ if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
+ DEBUG(dbgs() << "Clobbers flags: " << I);
return false;
}
}
@@ -434,7 +435,7 @@ bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
///
bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
Head = MBB;
- Tail = CmpBB = 0;
+ Tail = CmpBB = nullptr;
if (Head->succ_size() != 2)
return false;
@@ -494,7 +495,7 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
// The branch we're looking to eliminate must be analyzable.
HeadCond.clear();
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
if (TII->AnalyzeBranch(*Head, TBB, FBB, HeadCond)) {
DEBUG(dbgs() << "Head branch not analyzable.\n");
++NumHeadBranchRejs;
@@ -518,11 +519,11 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
// Make sure the branch direction is right.
if (TBB != CmpBB) {
assert(TBB == Tail && "Unexpected TBB");
- HeadCmpBBCC = ARM64CC::getInvertedCondCode(HeadCmpBBCC);
+ HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
}
CmpBBCond.clear();
- TBB = FBB = 0;
+ TBB = FBB = nullptr;
if (TII->AnalyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
++NumCmpBranchRejs;
@@ -542,10 +543,10 @@ bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
}
if (TBB != Tail)
- CmpBBTailCC = ARM64CC::getInvertedCondCode(CmpBBTailCC);
+ CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
- DEBUG(dbgs() << "Head->CmpBB on " << ARM64CC::getCondCodeName(HeadCmpBBCC)
- << ", CmpBB->Tail on " << ARM64CC::getCondCodeName(CmpBBTailCC)
+ DEBUG(dbgs() << "Head->CmpBB on " << AArch64CC::getCondCodeName(HeadCmpBBCC)
+ << ", CmpBB->Tail on " << AArch64CC::getCondCodeName(CmpBBTailCC)
<< '\n');
CmpMI = findConvertibleCompare(CmpBB);
@@ -578,13 +579,13 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
++NumCompBranches;
unsigned Opc = 0;
switch (HeadCond[1].getImm()) {
- case ARM64::CBZW:
- case ARM64::CBNZW:
- Opc = ARM64::SUBSWri;
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ Opc = AArch64::SUBSWri;
break;
- case ARM64::CBZX:
- case ARM64::CBNZX:
- Opc = ARM64::SUBSXri;
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ Opc = AArch64::SUBSXri;
break;
default:
llvm_unreachable("Cannot convert Head branch");
@@ -614,27 +615,27 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
switch (CmpMI->getOpcode()) {
default:
llvm_unreachable("Unknown compare opcode");
- case ARM64::SUBSWri: Opc = ARM64::CCMPWi; break;
- case ARM64::SUBSWrr: Opc = ARM64::CCMPWr; break;
- case ARM64::SUBSXri: Opc = ARM64::CCMPXi; break;
- case ARM64::SUBSXrr: Opc = ARM64::CCMPXr; break;
- case ARM64::ADDSWri: Opc = ARM64::CCMNWi; break;
- case ARM64::ADDSWrr: Opc = ARM64::CCMNWr; break;
- case ARM64::ADDSXri: Opc = ARM64::CCMNXi; break;
- case ARM64::ADDSXrr: Opc = ARM64::CCMNXr; break;
- case ARM64::FCMPSrr: Opc = ARM64::FCCMPSrr; FirstOp = 0; break;
- case ARM64::FCMPDrr: Opc = ARM64::FCCMPDrr; FirstOp = 0; break;
- case ARM64::FCMPESrr: Opc = ARM64::FCCMPESrr; FirstOp = 0; break;
- case ARM64::FCMPEDrr: Opc = ARM64::FCCMPEDrr; FirstOp = 0; break;
- case ARM64::CBZW:
- case ARM64::CBNZW:
- Opc = ARM64::CCMPWi;
+ case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
+ case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
+ case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
+ case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
+ case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
+ case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
+ case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
+ case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
+ case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
+ case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
+ case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
+ case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ Opc = AArch64::CCMPWi;
FirstOp = 0;
isZBranch = true;
break;
- case ARM64::CBZX:
- case ARM64::CBNZX:
- Opc = ARM64::CCMPXi;
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
+ Opc = AArch64::CCMPXi;
FirstOp = 0;
isZBranch = true;
break;
@@ -645,7 +646,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// The NZCV immediate operand should provide flags for the case where Head
// would have branched to Tail. These flags should cause the new Head
// terminator to branch to tail.
- unsigned NZCV = ARM64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
+ unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
const MCInstrDesc &MCID = TII->get(Opc);
MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
TII->getRegClass(MCID, 0, TRI, *MF));
@@ -664,10 +665,10 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
// If CmpMI was a terminator, we need a new conditional branch to replace it.
// This now becomes a Head terminator.
if (isZBranch) {
- bool isNZ = CmpMI->getOpcode() == ARM64::CBNZW ||
- CmpMI->getOpcode() == ARM64::CBNZX;
- BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(ARM64::Bcc))
- .addImm(isNZ ? ARM64CC::NE : ARM64CC::EQ)
+ bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
+ CmpMI->getOpcode() == AArch64::CBNZX;
+ BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
+ .addImm(isNZ ? AArch64CC::NE : AArch64CC::EQ)
.addOperand(CmpMI->getOperand(1)); // Branch target.
}
CmpMI->eraseFromParent();
@@ -686,10 +687,10 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
// plus a branch instruction.
if (HeadCond[0].getImm() == -1) {
switch (HeadCond[1].getImm()) {
- case ARM64::CBZW:
- case ARM64::CBNZW:
- case ARM64::CBZX:
- case ARM64::CBNZX:
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
// Therefore delta += 1
delta = 1;
break;
@@ -705,21 +706,21 @@ int SSACCmpConv::expectedCodeSizeDelta() const {
default:
--delta;
break;
- case ARM64::CBZW:
- case ARM64::CBNZW:
- case ARM64::CBZX:
- case ARM64::CBNZX:
+ case AArch64::CBZW:
+ case AArch64::CBNZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZX:
break;
}
return delta;
}
//===----------------------------------------------------------------------===//
-// ARM64ConditionalCompares Pass
+// AArch64ConditionalCompares Pass
//===----------------------------------------------------------------------===//
namespace {
-class ARM64ConditionalCompares : public MachineFunctionPass {
+class AArch64ConditionalCompares : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const MCSchedModel *SchedModel;
@@ -734,10 +735,12 @@ class ARM64ConditionalCompares : public MachineFunctionPass {
public:
static char ID;
- ARM64ConditionalCompares() : MachineFunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const;
- bool runOnMachineFunction(MachineFunction &MF);
- const char *getPassName() const { return "ARM64 Conditional Compares"; }
+ AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ const char *getPassName() const override {
+ return "AArch64 Conditional Compares";
+ }
private:
bool tryConvert(MachineBasicBlock *);
@@ -748,25 +751,25 @@ private:
};
} // end anonymous namespace
-char ARM64ConditionalCompares::ID = 0;
+char AArch64ConditionalCompares::ID = 0;
namespace llvm {
-void initializeARM64ConditionalComparesPass(PassRegistry &);
+void initializeAArch64ConditionalComparesPass(PassRegistry &);
}
-INITIALIZE_PASS_BEGIN(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
- false, false)
+INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
+ "AArch64 CCMP Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
-INITIALIZE_PASS_END(ARM64ConditionalCompares, "arm64-ccmp", "ARM64 CCMP Pass",
- false, false)
+INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
+ "AArch64 CCMP Pass", false, false)
-FunctionPass *llvm::createARM64ConditionalCompares() {
- return new ARM64ConditionalCompares();
+FunctionPass *llvm::createAArch64ConditionalCompares() {
+ return new AArch64ConditionalCompares();
}
-void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
+void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineBranchProbabilityInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -778,8 +781,8 @@ void ARM64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
}
/// Update the dominator tree after if-conversion erased some blocks.
-void
-ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
+void AArch64ConditionalCompares::updateDomTree(
+ ArrayRef<MachineBasicBlock *> Removed) {
// convert() removes CmpBB which was previously dominated by Head.
// CmpBB children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
@@ -795,7 +798,7 @@ ARM64ConditionalCompares::updateDomTree(ArrayRef<MachineBasicBlock *> Removed) {
/// Update LoopInfo after if-conversion.
void
-ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
+AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
for (unsigned i = 0, e = Removed.size(); i != e; ++i)
@@ -803,7 +806,7 @@ ARM64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
}
/// Invalidate MachineTraceMetrics before if-conversion.
-void ARM64ConditionalCompares::invalidateTraces() {
+void AArch64ConditionalCompares::invalidateTraces() {
Traces->invalidate(CmpConv.Head);
Traces->invalidate(CmpConv.CmpBB);
}
@@ -811,7 +814,7 @@ void ARM64ConditionalCompares::invalidateTraces() {
/// Apply cost model and heuristics to the if-conversion in IfConv.
/// Return true if the conversion is a good idea.
///
-bool ARM64ConditionalCompares::shouldConvert() {
+bool AArch64ConditionalCompares::shouldConvert() {
// Stress testing mode disables all cost considerations.
if (Stress)
return true;
@@ -872,7 +875,7 @@ bool ARM64ConditionalCompares::shouldConvert() {
return true;
}
-bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
+bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
bool Changed = false;
while (CmpConv.canConvert(MBB) && shouldConvert()) {
invalidateTraces();
@@ -885,8 +888,8 @@ bool ARM64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
return Changed;
}
-bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** ARM64 Conditional Compares **********\n"
+bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
+ DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
<< "********** Function: " << MF.getName() << '\n');
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
@@ -896,7 +899,7 @@ bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
DomTree = &getAnalysis<MachineDominatorTree>();
Loops = getAnalysisIfAvailable<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
- MinInstr = 0;
+ MinInstr = nullptr;
MinSize = MF.getFunction()->getAttributes().hasAttribute(
AttributeSet::FunctionIndex, Attribute::MinSize);
@@ -906,11 +909,9 @@ bool ARM64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
// Visit blocks in dominator tree pre-order. The pre-order enables multiple
// cmp-conversions from the same head block.
// Note that updateDomTree() modifies the children of the DomTree node
- // currently being visited. The df_iterator supports that, it doesn't look at
+ // currently being visited. The df_iterator supports that; it doesn't look at
// child_begin() / child_end() until after a node has been visited.
- for (df_iterator<MachineDominatorTree *> I = df_begin(DomTree),
- E = df_end(DomTree);
- I != E; ++I)
+ for (auto *I : depth_first(DomTree))
if (tryConvert(I->getBlock()))
Changed = true;
diff --git a/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
new file mode 100644
index 0000000..a2d853c
--- /dev/null
+++ b/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -0,0 +1,134 @@
+//==-- AArch64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// When allowed by the instruction, replace a dead definition of a GPR with
+// the zero register. This makes the code a bit friendlier towards the
+// hardware's register renamer.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-dead-defs"
+
+STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
+
+namespace {
+class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
+private:
+ const TargetRegisterInfo *TRI;
+ bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
+ bool processMachineBasicBlock(MachineBasicBlock &MBB);
+ bool usesFrameIndex(const MachineInstr &MI);
+public:
+ static char ID; // Pass identification, replacement for typeid.
+ explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &F) override;
+
+ const char *getPassName() const override { return "Dead register definitions"; }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+char AArch64DeadRegisterDefinitions::ID = 0;
+} // end anonymous namespace
+
+bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
+ unsigned Reg, const MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.implicit_operands())
+ if (MO.isReg() && MO.isDef())
+ if (TRI->regsOverlap(Reg, MO.getReg()))
+ return true;
+ return false;
+}
+
+bool AArch64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) {
+ for (const MachineOperand &Op : MI.uses())
+ if (Op.isFI())
+ return true;
+ return false;
+}
+
+bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
+ MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineInstr &MI : MBB) {
+ if (usesFrameIndex(MI)) {
+ // We need to skip this instruction because while it appears to have a
+ // dead def it uses a frame index which might expand into a multi
+ // instruction sequence during EPI.
+ DEBUG(dbgs() << " Ignoring, operand is frame index\n");
+ continue;
+ }
+ for (int i = 0, e = MI.getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.isReg() && MO.isDead() && MO.isDef()) {
+ assert(!MO.isImplicit() && "Unexpected implicit def!");
+ DEBUG(dbgs() << " Dead def operand #" << i << " in:\n ";
+ MI.print(dbgs()));
+ // Be careful not to change the register if it's a tied operand.
+ if (MI.isRegTiedToUseOperand(i)) {
+ DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
+ continue;
+ }
+ // Don't change the register if there's an implicit def of a subreg or
+ // supperreg.
+ if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
+ DEBUG(dbgs() << " Ignoring, implicitly defines overlap reg.\n");
+ continue;
+ }
+ // Make sure the instruction take a register class that contains
+ // the zero register and replace it if so.
+ unsigned NewReg;
+ switch (MI.getDesc().OpInfo[i].RegClass) {
+ default:
+ DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
+ continue;
+ case AArch64::GPR32RegClassID:
+ NewReg = AArch64::WZR;
+ break;
+ case AArch64::GPR64RegClassID:
+ NewReg = AArch64::XZR;
+ break;
+ }
+ DEBUG(dbgs() << " Replacing with zero register. New:\n ");
+ MO.setReg(NewReg);
+ DEBUG(MI.print(dbgs()));
+ ++NumDeadDefsReplaced;
+ }
+ }
+ }
+ return Changed;
+}
+
+// Scan the function for instructions that have a dead definition of a
+// register. Replace that register with the zero register when possible.
+bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
+ TRI = MF.getTarget().getRegisterInfo();
+ bool Changed = false;
+ DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
+
+ for (auto &MBB : MF)
+ if (processMachineBasicBlock(MBB))
+ Changed = true;
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64DeadRegisterDefinitions() {
+ return new AArch64DeadRegisterDefinitions();
+}
diff --git a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index e082baf..a76fd76 100644
--- a/lib/Target/ARM64/ARM64ExpandPseudoInsts.cpp
+++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64ExpandPseudoInsts.cpp - Expand pseudo instructions ---*- C++ -*-=//
+//==-- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -14,25 +14,25 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "ARM64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "AArch64InstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
namespace {
-class ARM64ExpandPseudo : public MachineFunctionPass {
+class AArch64ExpandPseudo : public MachineFunctionPass {
public:
static char ID;
- ARM64ExpandPseudo() : MachineFunctionPass(ID) {}
+ AArch64ExpandPseudo() : MachineFunctionPass(ID) {}
- const ARM64InstrInfo *TII;
+ const AArch64InstrInfo *TII;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
- return "ARM64 pseudo instruction expansion pass";
+ const char *getPassName() const override {
+ return "AArch64 pseudo instruction expansion pass";
}
private:
@@ -41,7 +41,7 @@ private:
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
unsigned BitSize);
};
-char ARM64ExpandPseudo::ID = 0;
+char AArch64ExpandPseudo::ID = 0;
}
/// \brief Transfer implicit operands on the pseudo instruction to the
@@ -87,17 +87,17 @@ static uint64_t replicateChunk(uint64_t Imm, unsigned FromIdx, unsigned ToIdx) {
static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const ARM64InstrInfo *TII, unsigned ChunkIdx) {
+ const AArch64InstrInfo *TII, unsigned ChunkIdx) {
assert(ChunkIdx < 4 && "Out of range chunk index specified!");
const unsigned ShiftAmt = ChunkIdx * 16;
uint64_t Encoding;
- if (ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
+ if (AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding)) {
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
.addOperand(MI.getOperand(0))
- .addReg(ARM64::XZR)
+ .addReg(AArch64::XZR)
.addImm(Encoding);
// Create the MOVK instruction.
@@ -105,11 +105,11 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
const unsigned DstReg = MI.getOperand(0).getReg();
const bool DstIsDead = MI.getOperand(0).isDead();
MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB1);
MI.eraseFromParent();
@@ -124,7 +124,7 @@ static bool tryOrrMovk(uint64_t UImm, uint64_t OrrImm, MachineInstr &MI,
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
- return ARM64_AM::processLogicalImmediate(Chunk, 64, Encoding);
+ return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
}
/// \brief Check for identical 16-bit chunks within the constant and if so
@@ -138,7 +138,7 @@ static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const ARM64InstrInfo *TII) {
+ const AArch64InstrInfo *TII) {
typedef DenseMap<uint64_t, unsigned> CountMap;
CountMap Counts;
@@ -162,9 +162,9 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
const bool CountThree = Count == 3;
// Create the ORR-immediate instruction.
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
.addOperand(MI.getOperand(0))
- .addReg(ARM64::XZR)
+ .addReg(AArch64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
@@ -182,12 +182,12 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
// Create the first MOVK instruction.
MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
.addReg(DstReg,
RegState::Define | getDeadRegState(DstIsDead && CountThree))
.addReg(DstReg)
.addImm(Imm16)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
// In case we have three instances the whole constant is now materialized
// and we can exit.
@@ -207,11 +207,11 @@ static bool tryToreplicateChunks(uint64_t UImm, MachineInstr &MI,
// Create the second MOVK instruction.
MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(Imm16)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
@@ -272,7 +272,7 @@ static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- const ARM64InstrInfo *TII) {
+ const AArch64InstrInfo *TII) {
const int NotSet = -1;
const uint64_t Mask = 0xFFFF;
@@ -343,11 +343,11 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
// Create the ORR-immediate instruction.
uint64_t Encoding = 0;
- ARM64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
+ AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ORRXri))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXri))
.addOperand(MI.getOperand(0))
- .addReg(ARM64::XZR)
+ .addReg(AArch64::XZR)
.addImm(Encoding);
const unsigned DstReg = MI.getOperand(0).getReg();
@@ -356,12 +356,13 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
const bool SingleMovk = SecondMovkIdx == NotSet;
// Create the first MOVK instruction.
MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
.addReg(DstReg,
RegState::Define | getDeadRegState(DstIsDead && SingleMovk))
.addReg(DstReg)
.addImm(getChunk(UImm, FirstMovkIdx))
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, FirstMovkIdx * 16));
+ .addImm(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, FirstMovkIdx * 16));
// Early exit in case we only need to emit a single MOVK instruction.
if (SingleMovk) {
@@ -372,11 +373,12 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
// Create the second MOVK instruction.
MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::MOVKXi))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi))
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg)
.addImm(getChunk(UImm, SecondMovkIdx))
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, SecondMovkIdx * 16));
+ .addImm(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, SecondMovkIdx * 16));
transferImpOps(MI, MIB, MIB2);
MI.eraseFromParent();
@@ -385,9 +387,9 @@ static bool trySequenceOfOnes(uint64_t UImm, MachineInstr &MI,
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
/// real move-immediate instructions to synthesize the immediate.
-bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned BitSize) {
+bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned BitSize) {
MachineInstr &MI = *MBBI;
uint64_t Imm = MI.getOperand(1).getImm();
const unsigned Mask = 0xFFFF;
@@ -395,12 +397,12 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
// Try a MOVI instruction (aka ORR-immediate with the zero register).
uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
uint64_t Encoding;
- if (ARM64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
- unsigned Opc = (BitSize == 32 ? ARM64::ORRWri : ARM64::ORRXri);
+ if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
+ unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
.addOperand(MI.getOperand(0))
- .addReg(BitSize == 32 ? ARM64::WZR : ARM64::XZR)
+ .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
.addImm(Encoding);
transferImpOps(MI, MIB, MIB);
MI.eraseFromParent();
@@ -504,9 +506,9 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
unsigned FirstOpc;
if (BitSize == 32) {
Imm &= (1LL << 32) - 1;
- FirstOpc = (isNeg ? ARM64::MOVNWi : ARM64::MOVZWi);
+ FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
} else {
- FirstOpc = (isNeg ? ARM64::MOVNXi : ARM64::MOVZXi);
+ FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
}
unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
unsigned LastShift = 0; // LSL amount for last MOVK
@@ -524,7 +526,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define |
getDeadRegState(DstIsDead && Shift == LastShift))
.addImm(Imm16)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
// If a MOVN was used for the high bits of a negative value, flip the rest
// of the bits back for use with MOVK.
@@ -538,7 +540,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
}
MachineInstrBuilder MIB2;
- unsigned Opc = (BitSize == 32 ? ARM64::MOVKWi : ARM64::MOVKXi);
+ unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
while (Shift != LastShift) {
Shift -= 16;
Imm16 = (Imm >> Shift) & Mask;
@@ -550,7 +552,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
getDeadRegState(DstIsDead && Shift == LastShift))
.addReg(DstReg)
.addImm(Imm16)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, Shift));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift));
}
transferImpOps(MI, MIB1, MIB2);
@@ -560,7 +562,7 @@ bool ARM64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
/// \brief If MBBI references a pseudo instruction that should be expanded here,
/// do the expansion and return true. Otherwise return false.
-bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
+bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
MachineInstr &MI = *MBBI;
unsigned Opcode = MI.getOpcode();
@@ -568,67 +570,76 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
default:
break;
- case ARM64::ADDWrr:
- case ARM64::SUBWrr:
- case ARM64::ADDXrr:
- case ARM64::SUBXrr:
- case ARM64::ADDSWrr:
- case ARM64::SUBSWrr:
- case ARM64::ADDSXrr:
- case ARM64::SUBSXrr:
- case ARM64::ANDWrr:
- case ARM64::ANDXrr:
- case ARM64::BICWrr:
- case ARM64::BICXrr:
- case ARM64::EONWrr:
- case ARM64::EONXrr:
- case ARM64::EORWrr:
- case ARM64::EORXrr:
- case ARM64::ORNWrr:
- case ARM64::ORNXrr:
- case ARM64::ORRWrr:
- case ARM64::ORRXrr: {
+ case AArch64::ADDWrr:
+ case AArch64::SUBWrr:
+ case AArch64::ADDXrr:
+ case AArch64::SUBXrr:
+ case AArch64::ADDSWrr:
+ case AArch64::SUBSWrr:
+ case AArch64::ADDSXrr:
+ case AArch64::SUBSXrr:
+ case AArch64::ANDWrr:
+ case AArch64::ANDXrr:
+ case AArch64::BICWrr:
+ case AArch64::BICXrr:
+ case AArch64::ANDSWrr:
+ case AArch64::ANDSXrr:
+ case AArch64::BICSWrr:
+ case AArch64::BICSXrr:
+ case AArch64::EONWrr:
+ case AArch64::EONXrr:
+ case AArch64::EORWrr:
+ case AArch64::EORXrr:
+ case AArch64::ORNWrr:
+ case AArch64::ORNXrr:
+ case AArch64::ORRWrr:
+ case AArch64::ORRXrr: {
unsigned Opcode;
switch (MI.getOpcode()) {
default:
return false;
- case ARM64::ADDWrr: Opcode = ARM64::ADDWrs; break;
- case ARM64::SUBWrr: Opcode = ARM64::SUBWrs; break;
- case ARM64::ADDXrr: Opcode = ARM64::ADDXrs; break;
- case ARM64::SUBXrr: Opcode = ARM64::SUBXrs; break;
- case ARM64::ADDSWrr: Opcode = ARM64::ADDSWrs; break;
- case ARM64::SUBSWrr: Opcode = ARM64::SUBSWrs; break;
- case ARM64::ADDSXrr: Opcode = ARM64::ADDSXrs; break;
- case ARM64::SUBSXrr: Opcode = ARM64::SUBSXrs; break;
- case ARM64::ANDWrr: Opcode = ARM64::ANDWrs; break;
- case ARM64::ANDXrr: Opcode = ARM64::ANDXrs; break;
- case ARM64::BICWrr: Opcode = ARM64::BICWrs; break;
- case ARM64::BICXrr: Opcode = ARM64::BICXrs; break;
- case ARM64::EONWrr: Opcode = ARM64::EONWrs; break;
- case ARM64::EONXrr: Opcode = ARM64::EONXrs; break;
- case ARM64::EORWrr: Opcode = ARM64::EORWrs; break;
- case ARM64::EORXrr: Opcode = ARM64::EORXrs; break;
- case ARM64::ORNWrr: Opcode = ARM64::ORNWrs; break;
- case ARM64::ORNXrr: Opcode = ARM64::ORNXrs; break;
- case ARM64::ORRWrr: Opcode = ARM64::ORRWrs; break;
- case ARM64::ORRXrr: Opcode = ARM64::ORRXrs; break;
+ case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
+ case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
+ case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
+ case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
+ case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
+ case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
+ case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
+ case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
+ case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
+ case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
+ case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
+ case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
+ case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
+ case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
+ case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
+ case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
+ case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
+ case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
+ case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
+ case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
+ case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
+ case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
+ case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
+ case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
}
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
MI.getOperand(0).getReg())
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2))
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
MI.eraseFromParent();
return true;
}
- case ARM64::FCVTSHpseudo: {
+ case AArch64::FCVTSHpseudo: {
MachineOperand Src = MI.getOperand(1);
Src.setImplicit();
- unsigned SrcH = TII->getRegisterInfo().getSubReg(Src.getReg(), ARM64::hsub);
- auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::FCVTSHr))
+ unsigned SrcH =
+ TII->getRegisterInfo().getSubReg(Src.getReg(), AArch64::hsub);
+ auto MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::FCVTSHr))
.addOperand(MI.getOperand(0))
.addReg(SrcH, RegState::Undef)
.addOperand(Src);
@@ -636,33 +647,34 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
MI.eraseFromParent();
return true;
}
- case ARM64::LOADgot: {
+ case AArch64::LOADgot: {
// Expand into ADRP + LDR.
unsigned DstReg = MI.getOperand(0).getReg();
const MachineOperand &MO1 = MI.getOperand(1);
unsigned Flags = MO1.getTargetFlags();
MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::LDRXui))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
.addOperand(MI.getOperand(0))
.addReg(DstReg);
if (MO1.isGlobal()) {
- MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | ARM64II::MO_PAGE);
+ MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
MIB2.addGlobalAddress(MO1.getGlobal(), 0,
- Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+ Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
} else if (MO1.isSymbol()) {
- MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | ARM64II::MO_PAGE);
+ MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
MIB2.addExternalSymbol(MO1.getSymbolName(),
- Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+ Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
} else {
assert(MO1.isCPI() &&
"Only expect globals, externalsymbols, or constant pools");
MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | ARM64II::MO_PAGE);
+ Flags | AArch64II::MO_PAGE);
MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
- Flags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+ Flags | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
}
transferImpOps(MI, MIB1, MIB2);
@@ -670,20 +682,20 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM64::MOVaddr:
- case ARM64::MOVaddrJT:
- case ARM64::MOVaddrCP:
- case ARM64::MOVaddrBA:
- case ARM64::MOVaddrTLS:
- case ARM64::MOVaddrEXT: {
+ case AArch64::MOVaddr:
+ case AArch64::MOVaddrJT:
+ case AArch64::MOVaddrCP:
+ case AArch64::MOVaddrBA:
+ case AArch64::MOVaddrTLS:
+ case AArch64::MOVaddrEXT: {
// Expand into ADRP + ADD.
unsigned DstReg = MI.getOperand(0).getReg();
MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADRP), DstReg)
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
.addOperand(MI.getOperand(1));
MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::ADDXri))
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
.addOperand(MI.getOperand(0))
.addReg(DstReg)
.addOperand(MI.getOperand(2))
@@ -694,13 +706,13 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM64::MOVi32imm:
+ case AArch64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
- case ARM64::MOVi64imm:
+ case AArch64::MOVi64imm:
return expandMOVImm(MBB, MBBI, 64);
- case ARM64::RET_ReallyLR:
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM64::RET))
- .addReg(ARM64::LR);
+ case AArch64::RET_ReallyLR:
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
+ .addReg(AArch64::LR);
MI.eraseFromParent();
return true;
}
@@ -709,7 +721,7 @@ bool ARM64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
/// \brief Iterate over the instructions in basic block MBB and expand any
/// pseudo instructions. Return true if anything was modified.
-bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
@@ -722,8 +734,8 @@ bool ARM64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
return Modified;
}
-bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
+bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
bool Modified = false;
for (auto &MBB : MF)
@@ -732,6 +744,6 @@ bool ARM64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
}
/// \brief Returns an instance of the pseudo instruction expansion pass.
-FunctionPass *llvm::createARM64ExpandPseudoPass() {
- return new ARM64ExpandPseudo();
+FunctionPass *llvm::createAArch64ExpandPseudoPass() {
+ return new AArch64ExpandPseudo();
}
diff --git a/lib/Target/ARM64/ARM64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp
index 51b0f76..c3b5369 100644
--- a/lib/Target/ARM64/ARM64FastISel.cpp
+++ b/lib/Target/AArch64/AArch64FastISel.cpp
@@ -1,4 +1,4 @@
-//===-- ARM6464FastISel.cpp - ARM64 FastISel implementation ---------------===//
+//===-- AArch6464FastISel.cpp - AArch64 FastISel implementation -----------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,17 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the ARM64-specific support for the FastISel class. Some
+// This file defines the AArch64-specific support for the FastISel class. Some
// of the target-specific code is generated by tablegen in the file
-// ARM64GenFastISel.inc, which is #included here.
+// AArch64GenFastISel.inc, which is #included here.
//
//===----------------------------------------------------------------------===//
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64.h"
+#include "AArch64TargetMachine.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -40,7 +39,7 @@ using namespace llvm;
namespace {
-class ARM64FastISel : public FastISel {
+class AArch64FastISel : public FastISel {
class Address {
public:
@@ -85,9 +84,9 @@ class ARM64FastISel : public FastISel {
bool isValid() { return isFIBase() || (isRegBase() && getReg() != 0); }
};
- /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
- const ARM64Subtarget *Subtarget;
+ const AArch64Subtarget *Subtarget;
LLVMContext *Context;
private:
@@ -130,8 +129,8 @@ private:
unsigned EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
unsigned Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
- unsigned ARM64MaterializeFP(const ConstantFP *CFP, MVT VT);
- unsigned ARM64MaterializeGV(const GlobalValue *GV);
+ unsigned AArch64MaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned AArch64MaterializeGV(const GlobalValue *GV);
// Call handling routines.
private:
@@ -147,32 +146,32 @@ private:
public:
// Backend specific FastISel code.
- virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual unsigned TargetMaterializeConstant(const Constant *C);
+ unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ unsigned TargetMaterializeConstant(const Constant *C) override;
- explicit ARM64FastISel(FunctionLoweringInfo &funcInfo,
+ explicit AArch64FastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo)
: FastISel(funcInfo, libInfo) {
- Subtarget = &TM.getSubtarget<ARM64Subtarget>();
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
Context = &funcInfo.Fn->getContext();
}
- virtual bool TargetSelectInstruction(const Instruction *I);
+ bool TargetSelectInstruction(const Instruction *I) override;
-#include "ARM64GenFastISel.inc"
+#include "AArch64GenFastISel.inc"
};
} // end anonymous namespace
-#include "ARM64GenCallingConv.inc"
+#include "AArch64GenCallingConv.inc"
-CCAssignFn *ARM64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
+CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
if (CC == CallingConv::WebKit_JS)
- return CC_ARM64_WebKit_JS;
- return Subtarget->isTargetDarwin() ? CC_ARM64_DarwinPCS : CC_ARM64_AAPCS;
+ return CC_AArch64_WebKit_JS;
+ return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
}
-unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+unsigned AArch64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
assert(TLI.getValueType(AI->getType(), true) == MVT::i64 &&
"Alloca should always return a pointer.");
@@ -184,8 +183,8 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
FuncInfo.StaticAllocaMap.find(AI);
if (SI != FuncInfo.StaticAllocaMap.end()) {
- unsigned ResultReg = createResultReg(&ARM64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
+ unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addFrameIndex(SI->second)
.addImm(0)
@@ -196,7 +195,10 @@ unsigned ARM64FastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
return 0;
}
-unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
const APFloat Val = CFP->getValueAPF();
bool is64bit = (VT == MVT::f64);
@@ -206,11 +208,11 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
int Imm;
unsigned Opc;
if (is64bit) {
- Imm = ARM64_AM::getFP64Imm(Val);
- Opc = ARM64::FMOVDi;
+ Imm = AArch64_AM::getFP64Imm(Val);
+ Opc = AArch64::FMOVDi;
} else {
- Imm = ARM64_AM::getFP32Imm(Val);
- Opc = ARM64::FMOVSi;
+ Imm = AArch64_AM::getFP32Imm(Val);
+ Opc = AArch64::FMOVSi;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -225,24 +227,29 @@ unsigned ARM64FastISel::ARM64MaterializeFP(const ConstantFP *CFP, MVT VT) {
Align = DL.getTypeAllocSize(CFP->getType());
unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
- unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
- ADRPReg).addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGE);
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg).addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGE);
- unsigned Opc = is64bit ? ARM64::LDRDui : ARM64::LDRSui;
+ unsigned Opc = is64bit ? AArch64::LDRDui : AArch64::LDRSui;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(ADRPReg)
- .addConstantPoolIndex(Idx, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
+ .addConstantPoolIndex(Idx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
return ResultReg;
}
-unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
+unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) {
// We can't handle thread-local variables quickly yet. Unfortunately we have
// to peer through any aliases to find out if that rule applies.
const GlobalValue *TLSGV = GV;
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- TLSGV = GA->getAliasedGlobal();
+ TLSGV = GA->getAliasee();
+
+ // MachO still uses GOT for large code-model accesses, but ELF requires
+ // movz/movk sequences, which FastISel doesn't handle yet.
+ if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO())
+ return 0;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV))
if (GVar->isThreadLocal())
@@ -253,35 +260,38 @@ unsigned ARM64FastISel::ARM64MaterializeGV(const GlobalValue *GV) {
EVT DestEVT = TLI.getValueType(GV->getType(), true);
if (!DestEVT.isSimple())
return 0;
- MVT DestVT = DestEVT.getSimpleVT();
- unsigned ADRPReg = createResultReg(&ARM64::GPR64RegClass);
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
+ unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
+ unsigned ResultReg;
- if (OpFlags & ARM64II::MO_GOT) {
+ if (OpFlags & AArch64II::MO_GOT) {
// ADRP + LDRX
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
ADRPReg)
- .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGE);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::LDRXui),
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGE);
+
+ ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
ResultReg)
.addReg(ADRPReg)
- .addGlobalAddress(GV, 0, ARM64II::MO_GOT | ARM64II::MO_PAGEOFF |
- ARM64II::MO_NC);
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
} else {
// ADRP + ADDX
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADRP),
- ADRPReg).addGlobalAddress(GV, 0, ARM64II::MO_PAGE);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ADDXri),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
+ ADRPReg).addGlobalAddress(GV, 0, AArch64II::MO_PAGE);
+
+ ResultReg = createResultReg(&AArch64::GPR64spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
ResultReg)
.addReg(ADRPReg)
- .addGlobalAddress(GV, 0, ARM64II::MO_PAGEOFF | ARM64II::MO_NC)
+ .addGlobalAddress(GV, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
.addImm(0);
}
return ResultReg;
}
-unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
+unsigned AArch64FastISel::TargetMaterializeConstant(const Constant *C) {
EVT CEVT = TLI.getValueType(C->getType(), true);
// Only handle simple types.
@@ -291,16 +301,16 @@ unsigned ARM64FastISel::TargetMaterializeConstant(const Constant *C) {
// FIXME: Handle ConstantInt.
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return ARM64MaterializeFP(CFP, VT);
+ return AArch64MaterializeFP(CFP, VT);
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return ARM64MaterializeGV(GV);
+ return AArch64MaterializeGV(GV);
return 0;
}
// Computes the address to get to an object.
-bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
- const User *U = NULL;
+bool AArch64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
@@ -407,7 +417,7 @@ bool ARM64FastISel::ComputeAddress(const Value *Obj, Address &Addr) {
return Addr.isValid();
}
-bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
+bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(Ty, true);
// Only handle simple types.
@@ -415,12 +425,16 @@ bool ARM64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
return false;
VT = evt.getSimpleVT();
- // Handle all legal types, i.e. a register that will directly hold this
+ // This is a legal type, but it's not something we handle in fast-isel.
+ if (VT == MVT::f128)
+ return false;
+
+ // Handle all other legal types, i.e. a register that will directly hold this
// value.
return TLI.isTypeLegal(VT);
}
-bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
+bool AArch64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
if (isTypeLegal(Ty, VT))
return true;
@@ -432,8 +446,8 @@ bool ARM64FastISel::isLoadStoreTypeLegal(Type *Ty, MVT &VT) {
return false;
}
-bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
- bool UseUnscaled) {
+bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT,
+ int64_t ScaleFactor, bool UseUnscaled) {
bool needsLowering = false;
int64_t Offset = Addr.getOffset();
switch (VT.SimpleTy) {
@@ -476,9 +490,9 @@ bool ARM64FastISel::SimplifyAddress(Address &Addr, MVT VT, int64_t ScaleFactor,
return true;
}
-void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
- const MachineInstrBuilder &MIB,
- unsigned Flags, bool UseUnscaled) {
+void AArch64FastISel::AddLoadStoreOperands(Address &Addr,
+ const MachineInstrBuilder &MIB,
+ unsigned Flags, bool UseUnscaled) {
int64_t Offset = Addr.getOffset();
// Frame base works a bit differently. Handle it separately.
if (Addr.getKind() == Address::FrameIndexBase) {
@@ -497,8 +511,8 @@ void ARM64FastISel::AddLoadStoreOperands(Address &Addr,
}
}
-bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
- bool UseUnscaled) {
+bool AArch64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
+ bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
@@ -515,32 +529,32 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
VTIsi1 = true;
// Intentional fall-through.
case MVT::i8:
- Opc = UseUnscaled ? ARM64::LDURBBi : ARM64::LDRBBui;
- RC = &ARM64::GPR32RegClass;
+ Opc = UseUnscaled ? AArch64::LDURBBi : AArch64::LDRBBui;
+ RC = &AArch64::GPR32RegClass;
ScaleFactor = 1;
break;
case MVT::i16:
- Opc = UseUnscaled ? ARM64::LDURHHi : ARM64::LDRHHui;
- RC = &ARM64::GPR32RegClass;
+ Opc = UseUnscaled ? AArch64::LDURHHi : AArch64::LDRHHui;
+ RC = &AArch64::GPR32RegClass;
ScaleFactor = 2;
break;
case MVT::i32:
- Opc = UseUnscaled ? ARM64::LDURWi : ARM64::LDRWui;
- RC = &ARM64::GPR32RegClass;
+ Opc = UseUnscaled ? AArch64::LDURWi : AArch64::LDRWui;
+ RC = &AArch64::GPR32RegClass;
ScaleFactor = 4;
break;
case MVT::i64:
- Opc = UseUnscaled ? ARM64::LDURXi : ARM64::LDRXui;
- RC = &ARM64::GPR64RegClass;
+ Opc = UseUnscaled ? AArch64::LDURXi : AArch64::LDRXui;
+ RC = &AArch64::GPR64RegClass;
ScaleFactor = 8;
break;
case MVT::f32:
- Opc = UseUnscaled ? ARM64::LDURSi : ARM64::LDRSui;
+ Opc = UseUnscaled ? AArch64::LDURSi : AArch64::LDRSui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 4;
break;
case MVT::f64:
- Opc = UseUnscaled ? ARM64::LDURDi : ARM64::LDRDui;
+ Opc = UseUnscaled ? AArch64::LDURDi : AArch64::LDRDui;
RC = TLI.getRegClassFor(VT);
ScaleFactor = 8;
break;
@@ -567,17 +581,18 @@ bool ARM64FastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
// Loading an i1 requires special handling.
if (VTIsi1) {
- unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+ MRI.constrainRegClass(ResultReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(ResultReg)
- .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
ResultReg = ANDReg;
}
return true;
}
-bool ARM64FastISel::SelectLoad(const Instruction *I) {
+bool AArch64FastISel::SelectLoad(const Instruction *I) {
MVT VT;
// Verify we have a legal type before going any further. Currently, we handle
// simple types that will directly fit in a register (i32/f32/i64/f64) or
@@ -598,8 +613,8 @@ bool ARM64FastISel::SelectLoad(const Instruction *I) {
return true;
}
-bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
- bool UseUnscaled) {
+bool AArch64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
+ bool UseUnscaled) {
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
if (!UseUnscaled && Addr.getOffset() < 0)
@@ -615,27 +630,27 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
case MVT::i1:
VTIsi1 = true;
case MVT::i8:
- StrOpc = UseUnscaled ? ARM64::STURBBi : ARM64::STRBBui;
+ StrOpc = UseUnscaled ? AArch64::STURBBi : AArch64::STRBBui;
ScaleFactor = 1;
break;
case MVT::i16:
- StrOpc = UseUnscaled ? ARM64::STURHHi : ARM64::STRHHui;
+ StrOpc = UseUnscaled ? AArch64::STURHHi : AArch64::STRHHui;
ScaleFactor = 2;
break;
case MVT::i32:
- StrOpc = UseUnscaled ? ARM64::STURWi : ARM64::STRWui;
+ StrOpc = UseUnscaled ? AArch64::STURWi : AArch64::STRWui;
ScaleFactor = 4;
break;
case MVT::i64:
- StrOpc = UseUnscaled ? ARM64::STURXi : ARM64::STRXui;
+ StrOpc = UseUnscaled ? AArch64::STURXi : AArch64::STRXui;
ScaleFactor = 8;
break;
case MVT::f32:
- StrOpc = UseUnscaled ? ARM64::STURSi : ARM64::STRSui;
+ StrOpc = UseUnscaled ? AArch64::STURSi : AArch64::STRSui;
ScaleFactor = 4;
break;
case MVT::f64:
- StrOpc = UseUnscaled ? ARM64::STURDi : ARM64::STRDui;
+ StrOpc = UseUnscaled ? AArch64::STURDi : AArch64::STRDui;
ScaleFactor = 8;
break;
}
@@ -655,11 +670,12 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
// Storing an i1 requires special handling.
if (VTIsi1) {
- unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+ MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(SrcReg)
- .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
SrcReg = ANDReg;
}
// Create the base instruction, then add the operands.
@@ -669,7 +685,7 @@ bool ARM64FastISel::EmitStore(MVT VT, unsigned SrcReg, Address Addr,
return true;
}
-bool ARM64FastISel::SelectStore(const Instruction *I) {
+bool AArch64FastISel::SelectStore(const Instruction *I) {
MVT VT;
Value *Op0 = I->getOperand(0);
// Verify we have a legal type before going any further. Currently, we handle
@@ -694,53 +710,53 @@ bool ARM64FastISel::SelectStore(const Instruction *I) {
return true;
}
-static ARM64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
+static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
switch (Pred) {
case CmpInst::FCMP_ONE:
case CmpInst::FCMP_UEQ:
default:
// AL is our "false" for now. The other two need more compares.
- return ARM64CC::AL;
+ return AArch64CC::AL;
case CmpInst::ICMP_EQ:
case CmpInst::FCMP_OEQ:
- return ARM64CC::EQ;
+ return AArch64CC::EQ;
case CmpInst::ICMP_SGT:
case CmpInst::FCMP_OGT:
- return ARM64CC::GT;
+ return AArch64CC::GT;
case CmpInst::ICMP_SGE:
case CmpInst::FCMP_OGE:
- return ARM64CC::GE;
+ return AArch64CC::GE;
case CmpInst::ICMP_UGT:
case CmpInst::FCMP_UGT:
- return ARM64CC::HI;
+ return AArch64CC::HI;
case CmpInst::FCMP_OLT:
- return ARM64CC::MI;
+ return AArch64CC::MI;
case CmpInst::ICMP_ULE:
case CmpInst::FCMP_OLE:
- return ARM64CC::LS;
+ return AArch64CC::LS;
case CmpInst::FCMP_ORD:
- return ARM64CC::VC;
+ return AArch64CC::VC;
case CmpInst::FCMP_UNO:
- return ARM64CC::VS;
+ return AArch64CC::VS;
case CmpInst::FCMP_UGE:
- return ARM64CC::PL;
+ return AArch64CC::PL;
case CmpInst::ICMP_SLT:
case CmpInst::FCMP_ULT:
- return ARM64CC::LT;
+ return AArch64CC::LT;
case CmpInst::ICMP_SLE:
case CmpInst::FCMP_ULE:
- return ARM64CC::LE;
+ return AArch64CC::LE;
case CmpInst::FCMP_UNE:
case CmpInst::ICMP_NE:
- return ARM64CC::NE;
+ return AArch64CC::NE;
case CmpInst::ICMP_UGE:
- return ARM64CC::CS;
+ return AArch64CC::HS;
case CmpInst::ICMP_ULT:
- return ARM64CC::CC;
+ return AArch64CC::LO;
}
}
-bool ARM64FastISel::SelectBranch(const Instruction *I) {
+bool AArch64FastISel::SelectBranch(const Instruction *I) {
const BranchInst *BI = cast<BranchInst>(I);
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
@@ -748,8 +764,8 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// We may not handle every CC for now.
- ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == ARM64CC::AL)
+ AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+ if (CC == AArch64CC::AL)
return false;
// Emit the cmp.
@@ -757,7 +773,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
return false;
// Emit the branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
@@ -776,25 +792,27 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
// Issue an extract_subreg to get the lower 32-bits.
if (SrcVT == MVT::i64)
CondReg = FastEmitInst_extractsubreg(MVT::i32, CondReg, /*Kill=*/true,
- ARM64::sub_32);
+ AArch64::sub_32);
- unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
- ANDReg)
+ MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::ANDWri), ANDReg)
.addReg(CondReg)
- .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
.addImm(0);
- unsigned CC = ARM64CC::NE;
+ unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
- CC = ARM64CC::EQ;
+ CC = AArch64CC::EQ;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
@@ -805,7 +823,7 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
dyn_cast<ConstantInt>(BI->getCondition())) {
uint64_t Imm = CI->getZExtValue();
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::B))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
.addMBB(Target);
FuncInfo.MBB->addSuccessor(Target);
return true;
@@ -822,19 +840,19 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
// Regardless, the compare has been done in the predecessor block,
// and it left a value for us in a virtual register. Ergo, we test
// the one-bit value left in the virtual register.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri),
- ARM64::WZR)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri),
+ AArch64::WZR)
.addReg(CondReg)
.addImm(0)
.addImm(0);
- unsigned CC = ARM64CC::NE;
+ unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
- CC = ARM64CC::EQ;
+ CC = AArch64CC::EQ;
}
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::Bcc))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
.addImm(CC)
.addMBB(TBB);
FuncInfo.MBB->addSuccessor(TBB);
@@ -842,14 +860,14 @@ bool ARM64FastISel::SelectBranch(const Instruction *I) {
return true;
}
-bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
+bool AArch64FastISel::SelectIndirectBr(const Instruction *I) {
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
unsigned AddrReg = getRegForValue(BI->getOperand(0));
if (AddrReg == 0)
return false;
// Emit the indirect branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BR))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BR))
.addReg(AddrReg);
// Make sure the CFG is up-to-date.
@@ -859,7 +877,7 @@ bool ARM64FastISel::SelectIndirectBr(const Instruction *I) {
return true;
}
-bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
+bool AArch64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
Type *Ty = Src1Value->getType();
EVT SrcEVT = TLI.getValueType(Ty, true);
if (!SrcEVT.isSimple())
@@ -903,26 +921,26 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
needsExt = true;
// Intentional fall-through.
case MVT::i32:
- ZReg = ARM64::WZR;
+ ZReg = AArch64::WZR;
if (UseImm)
- CmpOpc = isNegativeImm ? ARM64::ADDSWri : ARM64::SUBSWri;
+ CmpOpc = isNegativeImm ? AArch64::ADDSWri : AArch64::SUBSWri;
else
- CmpOpc = ARM64::SUBSWrr;
+ CmpOpc = AArch64::SUBSWrr;
break;
case MVT::i64:
- ZReg = ARM64::XZR;
+ ZReg = AArch64::XZR;
if (UseImm)
- CmpOpc = isNegativeImm ? ARM64::ADDSXri : ARM64::SUBSXri;
+ CmpOpc = isNegativeImm ? AArch64::ADDSXri : AArch64::SUBSXri;
else
- CmpOpc = ARM64::SUBSXrr;
+ CmpOpc = AArch64::SUBSXrr;
break;
case MVT::f32:
isICmp = false;
- CmpOpc = UseImm ? ARM64::FCMPSri : ARM64::FCMPSrr;
+ CmpOpc = UseImm ? AArch64::FCMPSri : AArch64::FCMPSrr;
break;
case MVT::f64:
isICmp = false;
- CmpOpc = UseImm ? ARM64::FCMPDri : ARM64::FCMPDrr;
+ CmpOpc = UseImm ? AArch64::FCMPDri : AArch64::FCMPDrr;
break;
}
@@ -973,12 +991,12 @@ bool ARM64FastISel::EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt) {
return true;
}
-bool ARM64FastISel::SelectCmp(const Instruction *I) {
+bool AArch64FastISel::SelectCmp(const Instruction *I) {
const CmpInst *CI = cast<CmpInst>(I);
// We may not handle every CC for now.
- ARM64CC::CondCode CC = getCompareCC(CI->getPredicate());
- if (CC == ARM64CC::AL)
+ AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+ if (CC == AArch64CC::AL)
return false;
// Emit the cmp.
@@ -986,19 +1004,19 @@ bool ARM64FastISel::SelectCmp(const Instruction *I) {
return false;
// Now set a register based on the comparison.
- ARM64CC::CondCode invertedCC = getInvertedCondCode(CC);
- unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::CSINCWr),
+ AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
+ unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
ResultReg)
- .addReg(ARM64::WZR)
- .addReg(ARM64::WZR)
+ .addReg(AArch64::WZR)
+ .addReg(AArch64::WZR)
.addImm(invertedCC);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARM64FastISel::SelectSelect(const Instruction *I) {
+bool AArch64FastISel::SelectSelect(const Instruction *I) {
const SelectInst *SI = cast<SelectInst>(I);
EVT DestEVT = TLI.getValueType(SI->getType(), true);
@@ -1020,13 +1038,15 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
if (FalseReg == 0)
return false;
- unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+
+ MRI.constrainRegClass(CondReg, &AArch64::GPR32RegClass);
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(CondReg)
- .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SUBSWri))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SUBSWri))
.addReg(ANDReg)
.addReg(ANDReg)
.addImm(0)
@@ -1037,16 +1057,16 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
default:
return false;
case MVT::i32:
- SelectOpc = ARM64::CSELWr;
+ SelectOpc = AArch64::CSELWr;
break;
case MVT::i64:
- SelectOpc = ARM64::CSELXr;
+ SelectOpc = AArch64::CSELXr;
break;
case MVT::f32:
- SelectOpc = ARM64::FCSELSrrr;
+ SelectOpc = AArch64::FCSELSrrr;
break;
case MVT::f64:
- SelectOpc = ARM64::FCSELDrrr;
+ SelectOpc = AArch64::FCSELDrrr;
break;
}
@@ -1055,13 +1075,13 @@ bool ARM64FastISel::SelectSelect(const Instruction *I) {
ResultReg)
.addReg(TrueReg)
.addReg(FalseReg)
- .addImm(ARM64CC::NE);
+ .addImm(AArch64CC::NE);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARM64FastISel::SelectFPExt(const Instruction *I) {
+bool AArch64FastISel::SelectFPExt(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
return false;
@@ -1070,14 +1090,14 @@ bool ARM64FastISel::SelectFPExt(const Instruction *I) {
if (Op == 0)
return false;
- unsigned ResultReg = createResultReg(&ARM64::FPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTDSr),
+ unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
+bool AArch64FastISel::SelectFPTrunc(const Instruction *I) {
Value *V = I->getOperand(0);
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
return false;
@@ -1086,15 +1106,15 @@ bool ARM64FastISel::SelectFPTrunc(const Instruction *I) {
if (Op == 0)
return false;
- unsigned ResultReg = createResultReg(&ARM64::FPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::FCVTSDr),
+ unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
ResultReg).addReg(Op);
UpdateValueMap(I, ResultReg);
return true;
}
// FPToUI and FPToSI
-bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
+bool AArch64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
@@ -1104,30 +1124,35 @@ bool ARM64FastISel::SelectFPToInt(const Instruction *I, bool Signed) {
return false;
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType(), true);
+ if (SrcVT == MVT::f128)
+ return false;
unsigned Opc;
if (SrcVT == MVT::f64) {
if (Signed)
- Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWDr : ARM64::FCVTZSUXDr;
+ Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
else
- Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWDr : ARM64::FCVTZUUXDr;
+ Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
} else {
if (Signed)
- Opc = (DestVT == MVT::i32) ? ARM64::FCVTZSUWSr : ARM64::FCVTZSUXSr;
+ Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
else
- Opc = (DestVT == MVT::i32) ? ARM64::FCVTZUUWSr : ARM64::FCVTZUUXSr;
+ Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
}
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
+ unsigned ResultReg = createResultReg(
+ DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
.addReg(SrcReg);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
+bool AArch64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
MVT DestVT;
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
return false;
+ assert ((DestVT == MVT::f32 || DestVT == MVT::f64) &&
+ "Unexpected value type.");
unsigned SrcReg = getRegForValue(I->getOperand(0));
if (SrcReg == 0)
@@ -1143,17 +1168,20 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
return false;
}
+ MRI.constrainRegClass(SrcReg, SrcVT == MVT::i64 ? &AArch64::GPR64RegClass
+ : &AArch64::GPR32RegClass);
+
unsigned Opc;
if (SrcVT == MVT::i64) {
if (Signed)
- Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUXSri : ARM64::SCVTFUXDri;
+ Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
else
- Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUXSri : ARM64::UCVTFUXDri;
+ Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
} else {
if (Signed)
- Opc = (DestVT == MVT::f32) ? ARM64::SCVTFUWSri : ARM64::SCVTFUWDri;
+ Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
else
- Opc = (DestVT == MVT::f32) ? ARM64::UCVTFUWSri : ARM64::UCVTFUWDri;
+ Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
}
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
@@ -1163,12 +1191,11 @@ bool ARM64FastISel::SelectIntToFP(const Instruction *I, bool Signed) {
return true;
}
-bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
- SmallVectorImpl<unsigned> &ArgRegs,
- SmallVectorImpl<MVT> &ArgVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
- SmallVectorImpl<unsigned> &RegArgs,
- CallingConv::ID CC, unsigned &NumBytes) {
+bool AArch64FastISel::ProcessCallArgs(
+ SmallVectorImpl<Value *> &Args, SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs, SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs, CallingConv::ID CC,
+ unsigned &NumBytes) {
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
@@ -1227,10 +1254,16 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
assert(VA.isMemLoc() && "Assuming store on stack.");
// Need to store on the stack.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+
+ unsigned BEAlign = 0;
+ if (ArgSize < 8 && !Subtarget->isLittleEndian())
+ BEAlign = 8 - ArgSize;
+
Address Addr;
Addr.setKind(Address::RegBase);
- Addr.setReg(ARM64::SP);
- Addr.setOffset(VA.getLocMemOffset());
+ Addr.setReg(AArch64::SP);
+ Addr.setOffset(VA.getLocMemOffset() + BEAlign);
if (!EmitStore(ArgVT, Arg, Addr))
return false;
@@ -1239,9 +1272,9 @@ bool ARM64FastISel::ProcessCallArgs(SmallVectorImpl<Value *> &Args,
return true;
}
-bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
- const Instruction *I, CallingConv::ID CC,
- unsigned &NumBytes) {
+bool AArch64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
@@ -1273,8 +1306,8 @@ bool ARM64FastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
return true;
}
-bool ARM64FastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = 0) {
+bool AArch64FastISel::SelectCall(const Instruction *I,
+ const char *IntrMemName = nullptr) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
@@ -1367,7 +1400,7 @@ bool ARM64FastISel::SelectCall(const Instruction *I,
// Issue the call.
MachineInstrBuilder MIB;
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BL));
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BL));
if (!IntrMemName)
MIB.addGlobalAddress(GV, 0, 0);
else
@@ -1392,15 +1425,15 @@ bool ARM64FastISel::SelectCall(const Instruction *I,
return true;
}
-bool ARM64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
+bool AArch64FastISel::IsMemCpySmall(uint64_t Len, unsigned Alignment) {
if (Alignment)
return Len / Alignment <= 4;
else
return Len < 32;
}
-bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
- unsigned Alignment) {
+bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src,
+ uint64_t Len, unsigned Alignment) {
// Make sure we don't bloat code by inlining very large memcpy's.
if (!IsMemCpySmall(Len, Alignment))
return false;
@@ -1452,7 +1485,7 @@ bool ARM64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
return true;
}
-bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
+bool AArch64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default:
@@ -1510,7 +1543,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return SelectCall(&I, "memset");
}
case Intrinsic::trap: {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::BRK))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
.addImm(1);
return true;
}
@@ -1518,7 +1551,7 @@ bool ARM64FastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
return false;
}
-bool ARM64FastISel::SelectRet(const Instruction *I) {
+bool AArch64FastISel::SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
@@ -1540,8 +1573,8 @@ bool ARM64FastISel::SelectRet(const Instruction *I) {
SmallVector<CCValAssign, 16> ValLocs;
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
I->getContext());
- CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
- : RetCC_ARM64_AAPCS;
+ CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
+ : RetCC_AArch64_AAPCS;
CCInfo.AnalyzeReturn(Outs, RetCC);
// Only handle a single return value for now.
@@ -1570,7 +1603,14 @@ bool ARM64FastISel::SelectRet(const Instruction *I) {
EVT RVEVT = TLI.getValueType(RV->getType());
if (!RVEVT.isSimple())
return false;
+
+ // Vectors (of > 1 lane) in big endian need tricky handling.
+ if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1)
+ return false;
+
MVT RVVT = RVEVT.getSimpleVT();
+ if (RVVT == MVT::f128)
+ return false;
MVT DestVT = VA.getValVT();
// Special handling for extended integers.
if (RVVT != DestVT) {
@@ -1595,13 +1635,13 @@ bool ARM64FastISel::SelectRet(const Instruction *I) {
}
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM64::RET_ReallyLR));
+ TII.get(AArch64::RET_ReallyLR));
for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
MIB.addReg(RetRegs[i], RegState::Implicit);
return true;
}
-bool ARM64FastISel::SelectTrunc(const Instruction *I) {
+bool AArch64FastISel::SelectTrunc(const Instruction *I) {
Type *DestTy = I->getType();
Value *Op = I->getOperand(0);
Type *SrcTy = Op->getType();
@@ -1648,13 +1688,14 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) {
}
// Issue an extract_subreg to get the lower 32-bits.
unsigned Reg32 = FastEmitInst_extractsubreg(MVT::i32, SrcReg, /*Kill=*/true,
- ARM64::sub_32);
+ AArch64::sub_32);
+ MRI.constrainRegClass(Reg32, &AArch64::GPR32RegClass);
// Create the AND instruction which performs the actual truncation.
- unsigned ANDReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+ unsigned ANDReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ANDReg)
.addReg(Reg32)
- .addImm(ARM64_AM::encodeLogicalImmediate(Mask, 32));
+ .addImm(AArch64_AM::encodeLogicalImmediate(Mask, 32));
SrcReg = ANDReg;
}
@@ -1662,7 +1703,7 @@ bool ARM64FastISel::SelectTrunc(const Instruction *I) {
return true;
}
-unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
+unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
DestVT == MVT::i64) &&
"Unexpected value type.");
@@ -1671,21 +1712,22 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
DestVT = MVT::i32;
if (isZExt) {
- unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::ANDWri),
+ MRI.constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
+ unsigned ResultReg = createResultReg(&AArch64::GPR32spRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ANDWri),
ResultReg)
.addReg(SrcReg)
- .addImm(ARM64_AM::encodeLogicalImmediate(1, 32));
+ .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
if (DestVT == MVT::i64) {
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
- unsigned Reg64 = MRI.createVirtualRegister(&ARM64::GPR64RegClass);
+ unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(ARM64::SUBREG_TO_REG), Reg64)
+ TII.get(AArch64::SUBREG_TO_REG), Reg64)
.addImm(0)
.addReg(ResultReg)
- .addImm(ARM64::sub_32);
+ .addImm(AArch64::sub_32);
ResultReg = Reg64;
}
return ResultReg;
@@ -1694,8 +1736,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
// FIXME: We're SExt i1 to i64.
return 0;
}
- unsigned ResultReg = createResultReg(&ARM64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(ARM64::SBFMWri),
+ unsigned ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::SBFMWri),
ResultReg)
.addReg(SrcReg)
.addImm(0)
@@ -1704,8 +1746,8 @@ unsigned ARM64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) {
}
}
-unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
- bool isZExt) {
+unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ bool isZExt) {
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
unsigned Opc;
unsigned Imm = 0;
@@ -1717,21 +1759,21 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
return Emiti1Ext(SrcReg, DestVT, isZExt);
case MVT::i8:
if (DestVT == MVT::i64)
- Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+ Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
+ Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 7;
break;
case MVT::i16:
if (DestVT == MVT::i64)
- Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+ Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
else
- Opc = isZExt ? ARM64::UBFMWri : ARM64::SBFMWri;
+ Opc = isZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
Imm = 15;
break;
case MVT::i32:
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
- Opc = isZExt ? ARM64::UBFMXri : ARM64::SBFMXri;
+ Opc = isZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
Imm = 31;
break;
}
@@ -1739,6 +1781,15 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
// Handle i8 and i16 as i32.
if (DestVT == MVT::i8 || DestVT == MVT::i16)
DestVT = MVT::i32;
+ else if (DestVT == MVT::i64) {
+ unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(AArch64::SUBREG_TO_REG), Src64)
+ .addImm(0)
+ .addReg(SrcReg)
+ .addImm(AArch64::sub_32);
+ SrcReg = Src64;
+ }
unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
@@ -1749,7 +1800,7 @@ unsigned ARM64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
return ResultReg;
}
-bool ARM64FastISel::SelectIntExt(const Instruction *I) {
+bool AArch64FastISel::SelectIntExt(const Instruction *I) {
// On ARM, in general, integer casts don't involve legal types; this code
// handles promotable integers. The high bits for a type smaller than
// the register size are assumed to be undefined.
@@ -1778,7 +1829,7 @@ bool ARM64FastISel::SelectIntExt(const Instruction *I) {
return true;
}
-bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
+bool AArch64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
EVT DestEVT = TLI.getValueType(I->getType(), true);
if (!DestEVT.isSimple())
return false;
@@ -1793,13 +1844,13 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
default:
return false;
case ISD::SREM:
- DivOpc = is64bit ? ARM64::SDIVXr : ARM64::SDIVWr;
+ DivOpc = is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
break;
case ISD::UREM:
- DivOpc = is64bit ? ARM64::UDIVXr : ARM64::UDIVWr;
+ DivOpc = is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
break;
}
- unsigned MSubOpc = is64bit ? ARM64::MSUBXrrr : ARM64::MSUBWrrr;
+ unsigned MSubOpc = is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
unsigned Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
@@ -1808,21 +1859,22 @@ bool ARM64FastISel::SelectRem(const Instruction *I, unsigned ISDOpcode) {
if (!Src1Reg)
return false;
- unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), ResultReg)
+ unsigned QuotReg = createResultReg(TLI.getRegClassFor(DestVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(DivOpc), QuotReg)
.addReg(Src0Reg)
.addReg(Src1Reg);
- // The remainder is computed as numerator – (quotient * denominator) using the
+ // The remainder is computed as numerator - (quotient * denominator) using the
// MSUB instruction.
+ unsigned ResultReg = createResultReg(TLI.getRegClassFor(DestVT));
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MSubOpc), ResultReg)
- .addReg(ResultReg)
+ .addReg(QuotReg)
.addReg(Src1Reg)
.addReg(Src0Reg);
UpdateValueMap(I, ResultReg);
return true;
}
-bool ARM64FastISel::SelectMul(const Instruction *I) {
+bool AArch64FastISel::SelectMul(const Instruction *I) {
EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType(), true);
if (!SrcEVT.isSimple())
return false;
@@ -1841,12 +1893,12 @@ bool ARM64FastISel::SelectMul(const Instruction *I) {
case MVT::i8:
case MVT::i16:
case MVT::i32:
- ZReg = ARM64::WZR;
- Opc = ARM64::MADDWrrr;
+ ZReg = AArch64::WZR;
+ Opc = AArch64::MADDWrrr;
break;
case MVT::i64:
- ZReg = ARM64::XZR;
- Opc = ARM64::MADDXrrr;
+ ZReg = AArch64::XZR;
+ Opc = AArch64::MADDXrrr;
break;
}
@@ -1868,7 +1920,7 @@ bool ARM64FastISel::SelectMul(const Instruction *I) {
return true;
}
-bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
+bool AArch64FastISel::TargetSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
break;
@@ -1918,12 +1970,12 @@ bool ARM64FastISel::TargetSelectInstruction(const Instruction *I) {
}
return false;
// Silence warnings.
- (void)&CC_ARM64_DarwinPCS_VarArg;
+ (void)&CC_AArch64_DarwinPCS_VarArg;
}
namespace llvm {
-llvm::FastISel *ARM64::createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo) {
- return new ARM64FastISel(funcInfo, libInfo);
+llvm::FastISel *AArch64::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new AArch64FastISel(funcInfo, libInfo);
}
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp
index b29587a..deb306a 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1,4 +1,4 @@
-//===- AArch64FrameLowering.cpp - AArch64 Frame Information ---------------===//
+//===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
//
// The LLVM Compiler Infrastructure
//
@@ -11,227 +11,444 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64.h"
#include "AArch64FrameLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-void AArch64FrameLowering::splitSPAdjustments(uint64_t Total,
- uint64_t &Initial,
- uint64_t &Residual) const {
- // 0x1f0 here is a pessimistic (i.e. realistic) boundary: x-register LDP
- // instructions have a 7-bit signed immediate scaled by 8, giving a reach of
- // 0x1f8, but stack adjustment should always be a multiple of 16.
- if (Total <= 0x1f0) {
- Initial = Total;
- Residual = 0;
- } else {
- Initial = 0x1f0;
- Residual = Total - Initial;
+#define DEBUG_TYPE "frame-info"
+
+static cl::opt<bool> EnableRedZone("aarch64-redzone",
+ cl::desc("enable use of redzone on AArch64"),
+ cl::init(false), cl::Hidden);
+
+STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
+
+static unsigned estimateStackSize(MachineFunction &MF) {
+ const MachineFrameInfo *FFI = MF.getFrameInfo();
+ int Offset = 0;
+ for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+ int FixedOff = -FFI->getObjectOffset(i);
+ if (FixedOff > Offset)
+ Offset = FixedOff;
+ }
+ for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+ if (FFI->isDeadObjectIndex(i))
+ continue;
+ Offset += FFI->getObjectSize(i);
+ unsigned Align = FFI->getObjectAlignment(i);
+ // Adjust to alignment boundary
+ Offset = (Offset + Align - 1) / Align * Align;
}
+ // This does not include the 16 bytes used for fp and lr.
+ return (unsigned)Offset;
}
-void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
- AArch64MachineFunctionInfo *FuncInfo =
- MF.getInfo<AArch64MachineFunctionInfo>();
- MachineBasicBlock &MBB = MF.front();
- MachineBasicBlock::iterator MBBI = MBB.begin();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
+ if (!EnableRedZone)
+ return false;
+ // Don't use the red zone if the function explicitly asks us not to.
+ // This is typically used for kernel code.
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::NoRedZone))
+ return false;
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ unsigned NumBytes = AFI->getLocalStackSize();
+
+ // Note: currently hasFP() is always true for hasCalls(), but that's an
+ // implementation detail of the current code, not a strict requirement,
+ // so stay safe here and check both.
+ if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
+ return false;
+ return true;
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.
+bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+#ifndef NDEBUG
+ const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ assert(!RegInfo->needsStackRealignment(MF) &&
+ "No stack realignment on AArch64!");
+#endif
+
+ return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+ MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function. This eliminates the need for
+/// add/sub sp brackets around call sites. Returns true if the call frame is
+/// included as part of the stack frame.
+bool
+AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+ return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void AArch64FrameLowering::eliminateCallFramePseudoInstr(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ const AArch64InstrInfo *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ DebugLoc DL = I->getDebugLoc();
+ int Opc = I->getOpcode();
+ bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
+ uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
+
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+ if (!TFI->hasReservedCallFrame(MF)) {
+ unsigned Align = getStackAlignment();
+
+ int64_t Amount = I->getOperand(0).getImm();
+ Amount = RoundUpToAlignment(Amount, Align);
+ if (!IsDestroy)
+ Amount = -Amount;
+
+ // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
+ // doesn't have to pop anything), then the first operand will be zero too so
+ // this adjustment is a no-op.
+ if (CalleePopAmount == 0) {
+ // FIXME: in-function stack adjustment for calls is limited to 24-bits
+ // because there's no guaranteed temporary register available.
+ //
+ // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
+ // 1) For offset <= 12-bit, we use LSL #0
+ // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
+ // LSL #0, and the other uses LSL #12.
+ //
+ // Mostly call frames will be allocated at the start of a function so
+ // this is OK, but it is a limitation that needs dealing with.
+ assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
+ }
+ } else if (CalleePopAmount != 0) {
+ // If the calling convention demands that the callee pops arguments from the
+ // stack, we want to add it back if we have a reserved call frame.
+ assert(CalleePopAmount < 0xffffff && "call frame too large");
+ emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
+ TII);
+ }
+ MBB.erase(I);
+}
+
+void AArch64FrameLowering::emitCalleeSavedFrameMoves(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ unsigned FramePtr) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- bool NeedsFrameMoves = MMI.hasDebugInfo()
- || MF.getFunction()->needsUnwindTableEntry();
-
- uint64_t NumInitialBytes, NumResidualBytes;
-
- // Currently we expect the stack to be laid out by
- // sub sp, sp, #initial
- // stp x29, x30, [sp, #offset]
- // ...
- // str xxx, [sp, #offset]
- // sub sp, sp, #rest (possibly via extra instructions).
- if (MFI->getCalleeSavedInfo().size()) {
- // If there are callee-saved registers, we want to store them efficiently as
- // a block, and virtual base assignment happens too early to do it for us so
- // we adjust the stack in two phases: first just for callee-saved fiddling,
- // then to allocate the rest of the frame.
- splitSPAdjustments(MFI->getStackSize(), NumInitialBytes, NumResidualBytes);
- } else {
- // If there aren't any callee-saved registers, two-phase adjustment is
- // inefficient. It's more efficient to adjust with NumInitialBytes too
- // because when we're in a "callee pops argument space" situation, that pop
- // must be tacked onto Initial for correctness.
- NumInitialBytes = MFI->getStackSize();
- NumResidualBytes = 0;
- }
+ const AArch64InstrInfo *TII = TM.getInstrInfo();
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ if (CSI.empty())
+ return;
+
+ const DataLayout *TD = MF.getTarget().getDataLayout();
+ bool HasFP = hasFP(MF);
+
+ // Calculate amount of bytes used for return address storing.
+ int stackGrowth = -TD->getPointerSize(0);
+
+ // Calculate offsets.
+ int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
+ unsigned TotalSkipped = 0;
+ for (const auto &Info : CSI) {
+ unsigned Reg = Info.getReg();
+ int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
+ getOffsetOfLocalArea() + saveAreaOffset;
+
+ // Don't output a new CFI directive if we're re-saving the frame pointer or
+ // link register. This happens when the PrologEpilogInserter has inserted an
+ // extra "STP" of the frame pointer and link register -- the "emitPrologue"
+ // method automatically generates the directives when frame pointers are
+ // used. If we generate CFI directives for the extra "STP"s, the linker will
+ // lose track of the correct values for the frame pointer and link register.
+ if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
+ TotalSkipped += stackGrowth;
+ continue;
+ }
- // Tell everyone else how much adjustment we're expecting them to use. In
- // particular if an adjustment is required for a tail call the epilogue could
- // have a different view of things.
- FuncInfo->setInitialStackAdjust(NumInitialBytes);
-
- emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumInitialBytes,
- MachineInstr::FrameSetup);
-
- if (NeedsFrameMoves && NumInitialBytes) {
- // We emit this update even if the CFA is set from a frame pointer later so
- // that the CFA is valid in the interim.
- MachineLocation Dst(MachineLocation::VirtualFP);
- unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfa(nullptr, Reg, -NumInitialBytes));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+ unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+ nullptr, DwarfReg, Offset - TotalSkipped));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
+}
- // Otherwise we need to set the frame pointer and/or add a second stack
- // adjustment.
-
- bool FPNeedsSetting = hasFP(MF);
- for (; MBBI != MBB.end(); ++MBBI) {
- // Note that this search makes strong assumptions about the operation used
- // to store the frame-pointer: it must be "STP x29, x30, ...". This could
- // change in future, but until then there's no point in implementing
- // untestable more generic cases.
- if (FPNeedsSetting && MBBI->getOpcode() == AArch64::LSPair64_STR
- && MBBI->getOperand(0).getReg() == AArch64::X29) {
- int64_t X29FrameIdx = MBBI->getOperand(2).getIndex();
- FuncInfo->setFramePointerOffset(MFI->getObjectOffset(X29FrameIdx));
-
- ++MBBI;
- emitRegUpdate(MBB, MBBI, DL, TII, AArch64::X29, AArch64::XSP,
- AArch64::X29,
- NumInitialBytes + MFI->getObjectOffset(X29FrameIdx),
- MachineInstr::FrameSetup);
+void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *Fn = MF.getFunction();
+ const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo();
+ const AArch64InstrInfo *TII = TM.getInstrInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
+ bool HasFP = hasFP(MF);
+ DebugLoc DL = MBB.findDebugLoc(MBBI);
- // The offset adjustment used when emitting debugging locations relative
- // to whatever frame base is set. AArch64 uses the default frame base (FP
- // or SP) and this adjusts the calculations to be correct.
- MFI->setOffsetAdjustment(- MFI->getObjectOffset(X29FrameIdx)
- - MFI->getStackSize());
-
- if (NeedsFrameMoves) {
- unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true);
- unsigned Offset = MFI->getObjectOffset(X29FrameIdx);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfa(nullptr, Reg, Offset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
+ int NumBytes = (int)MFI->getStackSize();
+ if (!AFI->hasStackFrame()) {
+ assert(!HasFP && "unexpected function without stack frame but with FP");
+
+ // All of the stack allocation is for locals.
+ AFI->setLocalStackSize(NumBytes);
- FPNeedsSetting = false;
+ // Label used to tie together the PROLOG_LABEL and the MachineMoves.
+ MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+
+ // REDZONE: If the stack size is less than 128 bytes, we don't need
+ // to actually allocate.
+ if (NumBytes && !canUseRedZone(MF)) {
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
+
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ } else if (NumBytes) {
+ ++NumRedZoneFunctions;
}
- if (!MBBI->getFlag(MachineInstr::FrameSetup))
- break;
+ return;
}
- assert(!FPNeedsSetting && "Frame pointer couldn't be set");
+ // Only set up FP if we actually need to.
+ int FPOffset = 0;
+ if (HasFP) {
+ // First instruction must a) allocate the stack and b) have an immediate
+ // that is a multiple of -2.
+ assert((MBBI->getOpcode() == AArch64::STPXpre ||
+ MBBI->getOpcode() == AArch64::STPDpre) &&
+ MBBI->getOperand(3).getReg() == AArch64::SP &&
+ MBBI->getOperand(4).getImm() < 0 &&
+ (MBBI->getOperand(4).getImm() & 1) == 0);
+
+ // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
+ // required for the callee saved register area we get the frame pointer
+ // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
+ FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
+ assert(FPOffset >= 0 && "Bad Framepointer Offset");
+ }
- emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16, -NumResidualBytes,
- MachineInstr::FrameSetup);
+ // Move past the saves of the callee-saved registers.
+ while (MBBI->getOpcode() == AArch64::STPXi ||
+ MBBI->getOpcode() == AArch64::STPDi ||
+ MBBI->getOpcode() == AArch64::STPXpre ||
+ MBBI->getOpcode() == AArch64::STPDpre) {
+ ++MBBI;
+ NumBytes -= 16;
+ }
+ assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ if (HasFP) {
+ // Issue sub fp, sp, FPOffset or
+ // mov fp,sp when FPOffset is zero.
+ // Note: All stores of callee-saved registers are marked as "FrameSetup".
+ // This code marks the instruction(s) that set the FP also.
+ emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
+ MachineInstr::FrameSetup);
+ }
- // Now we emit the rest of the frame setup information, if necessary: we've
- // already noted the FP and initial SP moves so we're left with the prologue's
- // final SP update and callee-saved register locations.
- if (!NeedsFrameMoves)
- return;
+ // All of the remaining stack allocations are for locals.
+ AFI->setLocalStackSize(NumBytes);
- // The rest of the stack adjustment
- if (!hasFP(MF) && NumResidualBytes) {
- MachineLocation Dst(MachineLocation::VirtualFP);
- unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true);
- unsigned Offset = NumResidualBytes + NumInitialBytes;
- unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
+ // Allocate space for the rest of the frame.
+ if (NumBytes) {
+ // If we're a leaf function, try using the red zone.
+ if (!canUseRedZone(MF))
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
+ MachineInstr::FrameSetup);
}
- // And any callee-saved registers (it's fine to leave them to the end here,
- // because the old values are still valid at this point.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.size()) {
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Offset = MFI->getObjectOffset(I->getFrameIdx());
- unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true);
+ // If we need a base pointer, set it up here. It's whatever the value of the
+ // stack pointer is at this point. Any variable size objects will be allocated
+ // after this, so we can still use the base pointer to reference locals.
+ //
+ // FIXME: Clarify FrameSetup flags here.
+ // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
+ // needed.
+ //
+ if (RegInfo->hasBasePointer(MF))
+ TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
+
+ if (needsFrameMoves) {
+ const DataLayout *TD = MF.getTarget().getDataLayout();
+ const int StackGrowth = -TD->getPointerSize(0);
+ unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+ // An example of the prologue:
+ //
+ // .globl __foo
+ // .align 2
+ // __foo:
+ // Ltmp0:
+ // .cfi_startproc
+ // .cfi_personality 155, ___gxx_personality_v0
+ // Leh_func_begin:
+ // .cfi_lsda 16, Lexception33
+ //
+ // stp xa,bx, [sp, -#offset]!
+ // ...
+ // stp x28, x27, [sp, #offset-32]
+ // stp fp, lr, [sp, #offset-16]
+ // add fp, sp, #offset - 16
+ // sub sp, sp, #1360
+ //
+ // The Stack:
+ // +-------------------------------------------+
+ // 10000 | ........ | ........ | ........ | ........ |
+ // 10004 | ........ | ........ | ........ | ........ |
+ // +-------------------------------------------+
+ // 10008 | ........ | ........ | ........ | ........ |
+ // 1000c | ........ | ........ | ........ | ........ |
+ // +===========================================+
+ // 10010 | X28 Register |
+ // 10014 | X28 Register |
+ // +-------------------------------------------+
+ // 10018 | X27 Register |
+ // 1001c | X27 Register |
+ // +===========================================+
+ // 10020 | Frame Pointer |
+ // 10024 | Frame Pointer |
+ // +-------------------------------------------+
+ // 10028 | Link Register |
+ // 1002c | Link Register |
+ // +===========================================+
+ // 10030 | ........ | ........ | ........ | ........ |
+ // 10034 | ........ | ........ | ........ | ........ |
+ // +-------------------------------------------+
+ // 10038 | ........ | ........ | ........ | ........ |
+ // 1003c | ........ | ........ | ........ | ........ |
+ // +-------------------------------------------+
+ //
+ // [sp] = 10030 :: >>initial value<<
+ // sp = 10020 :: stp fp, lr, [sp, #-16]!
+ // fp = sp == 10020 :: mov fp, sp
+ // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
+ // sp == 10010 :: >>final value<<
+ //
+ // The frame pointer (w29) points to address 10020. If we use an offset of
+ // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
+ // for w27, and -32 for w28:
+ //
+ // Ltmp1:
+ // .cfi_def_cfa w29, 16
+ // Ltmp2:
+ // .cfi_offset w30, -8
+ // Ltmp3:
+ // .cfi_offset w29, -16
+ // Ltmp4:
+ // .cfi_offset w27, -24
+ // Ltmp5:
+ // .cfi_offset w28, -32
+
+ if (HasFP) {
+ // Define the current CFA rule to use the provided FP.
+ unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, Reg, Offset));
- BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Record the location of the stored LR
+ unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+
+ // Record the location of the stored FP
+ CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ } else {
+ // Encode the stack size of the leaf function.
+ unsigned CFIIndex = MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
+
+ // Now emit the moves for whatever callee saved regs we have.
+ emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
}
}
-void
-AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- AArch64MachineFunctionInfo *FuncInfo =
- MF.getInfo<AArch64MachineFunctionInfo>();
+static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ if (Reg == CSRegs[i])
+ return true;
+ return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
+ unsigned RtIdx = 0;
+ if (MI->getOpcode() == AArch64::LDPXpost ||
+ MI->getOpcode() == AArch64::LDPDpost)
+ RtIdx = 1;
+
+ if (MI->getOpcode() == AArch64::LDPXpost ||
+ MI->getOpcode() == AArch64::LDPDpost ||
+ MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
+ if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
+ !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
+ MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
+ return false;
+ return true;
+ }
+ return false;
+}
+
+void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+ assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64InstrInfo *TII =
+ static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
DebugLoc DL = MBBI->getDebugLoc();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned RetOpcode = MBBI->getOpcode();
+ int NumBytes = MFI->getStackSize();
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
// Initial and residual are named for consitency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
- uint64_t NumInitialBytes = FuncInfo->getInitialStackAdjust();
- uint64_t NumResidualBytes = MFI.getStackSize() - NumInitialBytes;
uint64_t ArgumentPopSize = 0;
- if (RetOpcode == AArch64::TC_RETURNdi ||
- RetOpcode == AArch64::TC_RETURNxi) {
- MachineOperand &JumpTarget = MBBI->getOperand(0);
+ if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
MachineOperand &StackAdjust = MBBI->getOperand(1);
- MachineInstrBuilder MIB;
- if (RetOpcode == AArch64::TC_RETURNdi) {
- MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_Bimm));
- if (JumpTarget.isGlobal()) {
- MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
- JumpTarget.getTargetFlags());
- } else {
- assert(JumpTarget.isSymbol() && "unexpected tail call destination");
- MIB.addExternalSymbol(JumpTarget.getSymbolName(),
- JumpTarget.getTargetFlags());
- }
- } else {
- assert(RetOpcode == AArch64::TC_RETURNxi && JumpTarget.isReg()
- && "Unexpected tail call");
-
- MIB = BuildMI(MBB, MBBI, DL, TII.get(AArch64::TAIL_BRx));
- MIB.addReg(JumpTarget.getReg(), RegState::Kill);
- }
-
- // Add the extra operands onto the new tail call instruction even though
- // they're not used directly (so that liveness is tracked properly etc).
- for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
- MIB->addOperand(MBBI->getOperand(i));
-
-
- // Delete the pseudo instruction TC_RETURN.
- MachineInstr *NewMI = std::prev(MBBI);
- MBB.erase(MBBI);
- MBBI = NewMI;
-
// For a tail-call in a callee-pops-arguments environment, some or all of
// the stack may actually be in use for the call's arguments, this is
// calculated during LowerCall and consumed here...
@@ -241,386 +458,434 @@ AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// conveniently stored in the MachineFunctionInfo by
// LowerFormalArguments. This will, of course, be zero for the C calling
// convention.
- ArgumentPopSize = FuncInfo->getArgumentStackToRestore();
+ ArgumentPopSize = AFI->getArgumentStackToRestore();
}
- assert(NumInitialBytes % 16 == 0 && NumResidualBytes % 16 == 0
- && "refusing to adjust stack by misaligned amt");
-
- // We may need to address callee-saved registers differently, so find out the
- // bound on the frame indices.
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
- int MinCSFI = 0;
- int MaxCSFI = -1;
-
- if (CSI.size()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ // The stack frame should be like below,
+ //
+ // ---------------------- ---
+ // | | |
+ // | BytesInStackArgArea| CalleeArgStackSize
+ // | (NumReusableBytes) | (of tail call)
+ // | | ---
+ // | | |
+ // ---------------------| --- |
+ // | | | |
+ // | CalleeSavedReg | | |
+ // | (NumRestores * 16) | | |
+ // | | | |
+ // ---------------------| | NumBytes
+ // | | StackSize (StackAdjustUp)
+ // | LocalStackSize | | |
+ // | (covering callee | | |
+ // | args) | | |
+ // | | | |
+ // ---------------------- --- ---
+ //
+ // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
+ // = StackSize + ArgumentPopSize
+ //
+ // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
+ // it as the 2nd argument of AArch64ISD::TC_RETURN.
+ NumBytes += ArgumentPopSize;
+
+ unsigned NumRestores = 0;
+ // Move past the restores of the callee-saved registers.
+ MachineBasicBlock::iterator LastPopI = MBBI;
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ if (LastPopI != MBB.begin()) {
+ do {
+ ++NumRestores;
+ --LastPopI;
+ } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
+ if (!isCSRestore(LastPopI, CSRegs)) {
+ ++LastPopI;
+ --NumRestores;
+ }
}
-
- // The "residual" stack update comes first from this direction and guarantees
- // that SP is NumInitialBytes below its value on function entry, either by a
- // direct update or restoring it from the frame pointer.
- if (NumInitialBytes + ArgumentPopSize != 0) {
- emitSPUpdate(MBB, MBBI, DL, TII, AArch64::X16,
- NumInitialBytes + ArgumentPopSize);
- --MBBI;
+ NumBytes -= NumRestores * 16;
+ assert(NumBytes >= 0 && "Negative stack allocation size!?");
+
+ if (!hasFP(MF)) {
+ // If this was a redzone leaf function, we don't need to restore the
+ // stack pointer.
+ if (!canUseRedZone(MF))
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
+ TII);
+ return;
}
+ // Restore the original stack pointer.
+ // FIXME: Rather than doing the math here, we should instead just use
+ // non-post-indexed loads for the restores if we aren't actually going to
+ // be able to save any instructions.
+ if (NumBytes || MFI->hasVarSizedObjects())
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
+ -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
+}
- // MBBI now points to the instruction just past the last callee-saved
- // restoration (either RET/B if NumInitialBytes == 0, or the "ADD sp, sp"
- // otherwise).
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index.
+int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
+ unsigned FrameReg;
+ return getFrameIndexReference(MF, FI, FrameReg);
+}
- // Now we need to find out where to put the bulk of the stack adjustment
- MachineBasicBlock::iterator FirstEpilogue = MBBI;
- while (MBBI != MBB.begin()) {
- --MBBI;
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info. It's the same as what we use for resolving the code-gen
+/// references for now. FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
+ int FI,
+ unsigned &FrameReg) const {
+ return resolveFrameIndexReference(MF, FI, FrameReg);
+}
- unsigned FrameOp;
- for (FrameOp = 0; FrameOp < MBBI->getNumOperands(); ++FrameOp) {
- if (MBBI->getOperand(FrameOp).isFI())
- break;
+int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
+ int FI, unsigned &FrameReg,
+ bool PreferFP) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ int FPOffset = MFI->getObjectOffset(FI) + 16;
+ int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+ bool isFixed = MFI->isFixedObjectIndex(FI);
+
+ // Use frame pointer to reference fixed objects. Use it for locals if
+ // there are VLAs (and thus the SP isn't reliable as a base).
+ // Make sure useFPForScavengingIndex() does the right thing for the emergency
+ // spill slot.
+ bool UseFP = false;
+ if (AFI->hasStackFrame()) {
+ // Note: Keeping the following as multiple 'if' statements rather than
+ // merging to a single expression for readability.
+ //
+ // Argument access should always use the FP.
+ if (isFixed) {
+ UseFP = hasFP(MF);
+ } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
+ // Use SP or FP, whichever gives us the best chance of the offset
+ // being in range for direct access. If the FPOffset is positive,
+ // that'll always be best, as the SP will be even further away.
+ // If the FPOffset is negative, we have to keep in mind that the
+ // available offset range for negative offsets is smaller than for
+ // positive ones. If we have variable sized objects, we're stuck with
+ // using the FP regardless, though, as the SP offset is unknown
+ // and we don't have a base pointer available. If an offset is
+ // available via the FP and the SP, use whichever is closest.
+ if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
+ (FPOffset >= -256 && Offset > -FPOffset))
+ UseFP = true;
}
-
- // If this instruction doesn't have a frame index we've reached the end of
- // the callee-save restoration.
- if (FrameOp == MBBI->getNumOperands())
- break;
-
- // Likewise if it *is* a local reference, but not to a callee-saved object.
- int FrameIdx = MBBI->getOperand(FrameOp).getIndex();
- if (FrameIdx < MinCSFI || FrameIdx > MaxCSFI)
- break;
-
- FirstEpilogue = MBBI;
}
- if (MF.getFrameInfo()->hasVarSizedObjects()) {
- int64_t StaticFrameBase;
- StaticFrameBase = -(NumInitialBytes + FuncInfo->getFramePointerOffset());
- emitRegUpdate(MBB, FirstEpilogue, DL, TII,
- AArch64::XSP, AArch64::X29, AArch64::NoRegister,
- StaticFrameBase);
- } else {
- emitSPUpdate(MBB, FirstEpilogue, DL,TII, AArch64::X16, NumResidualBytes);
+ if (UseFP) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FPOffset;
}
-}
-int64_t
-AArch64FrameLowering::resolveFrameIndexReference(MachineFunction &MF,
- int FrameIndex,
- unsigned &FrameReg,
- int SPAdj,
- bool IsCalleeSaveOp) const {
- AArch64MachineFunctionInfo *FuncInfo =
- MF.getInfo<AArch64MachineFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- int64_t TopOfFrameOffset = MFI->getObjectOffset(FrameIndex);
-
- assert(!(IsCalleeSaveOp && FuncInfo->getInitialStackAdjust() == 0)
- && "callee-saved register in unexpected place");
-
- // If the frame for this function is particularly large, we adjust the stack
- // in two phases which means the callee-save related operations see a
- // different (intermediate) stack size.
- int64_t FrameRegPos;
- if (IsCalleeSaveOp) {
- FrameReg = AArch64::XSP;
- FrameRegPos = -static_cast<int64_t>(FuncInfo->getInitialStackAdjust());
- } else if (useFPForAddressing(MF)) {
- // Have to use the frame pointer since we have no idea where SP is.
- FrameReg = AArch64::X29;
- FrameRegPos = FuncInfo->getFramePointerOffset();
- } else {
- FrameReg = AArch64::XSP;
- FrameRegPos = -static_cast<int64_t>(MFI->getStackSize()) + SPAdj;
+ // Use the base pointer if we have one.
+ if (RegInfo->hasBasePointer(MF))
+ FrameReg = RegInfo->getBaseRegister();
+ else {
+ FrameReg = AArch64::SP;
+ // If we're using the red zone for this function, the SP won't actually
+ // be adjusted, so the offsets will be negative. They're also all
+ // within range of the signed 9-bit immediate instructions.
+ if (canUseRedZone(MF))
+ Offset -= AFI->getLocalStackSize();
}
- return TopOfFrameOffset - FrameRegPos;
+ return Offset;
}
-void
-AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const {
- const AArch64RegisterInfo *RegInfo =
- static_cast<const AArch64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64InstrInfo &TII =
- *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
-
- if (hasFP(MF)) {
- MF.getRegInfo().setPhysRegUsed(AArch64::X29);
- MF.getRegInfo().setPhysRegUsed(AArch64::X30);
- }
+static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
+ if (Reg != AArch64::LR)
+ return getKillRegState(true);
- // If addressing of local variables is going to be more complicated than
- // shoving a base register and an offset into the instruction then we may well
- // need to scavenge registers. We should either specifically add an
- // callee-save register for this purpose or allocate an extra spill slot.
- bool BigStack =
- MFI->estimateStackSize(MF) >= TII.estimateRSStackLimit(MF)
- || MFI->hasVarSizedObjects() // Access will be from X29: messes things up
- || (MFI->adjustsStack() && !hasReservedCallFrame(MF));
-
- if (!BigStack)
- return;
-
- // We certainly need some slack space for the scavenger, preferably an extra
- // register.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
- uint16_t ExtraReg = AArch64::NoRegister;
-
- for (unsigned i = 0; CSRegs[i]; ++i) {
- if (AArch64::GPR64RegClass.contains(CSRegs[i]) &&
- !MF.getRegInfo().isPhysRegUsed(CSRegs[i])) {
- ExtraReg = CSRegs[i];
- break;
- }
- }
-
- if (ExtraReg != 0) {
- MF.getRegInfo().setPhysRegUsed(ExtraReg);
- } else {
- assert(RS && "Expect register scavenger to be available");
-
- // Create a stack slot for scavenging purposes. PrologEpilogInserter
- // helpfully places it near either SP or FP for us to avoid
- // infinitely-regression during scavenging.
- const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
- RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
- }
+ // LR maybe referred to later by an @llvm.returnaddress intrinsic.
+ bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
+ bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
+ return getKillRegState(LRKill);
}
-bool AArch64FrameLowering::determinePrologueDeath(MachineBasicBlock &MBB,
- unsigned Reg) const {
- // If @llvm.returnaddress is called then it will refer to X30 by some means;
- // the prologue store does not kill the register.
- if (Reg == AArch64::X30) {
- if (MBB.getParent()->getFrameInfo()->isReturnAddressTaken()
- && MBB.getParent()->getRegInfo().isLiveIn(Reg))
- return false;
+bool AArch64FrameLowering::spillCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned Count = CSI.size();
+ DebugLoc DL;
+ assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+
+ if (MI != MBB.end())
+ DL = MI->getDebugLoc();
+
+ for (unsigned i = 0; i < Count; i += 2) {
+ unsigned idx = Count - i - 2;
+ unsigned Reg1 = CSI[idx].getReg();
+ unsigned Reg2 = CSI[idx + 1].getReg();
+ // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
+ // list to come in sorted by frame index so that we can issue the store
+ // pair instructions directly. Assert if we see anything otherwise.
+ //
+ // The order of the registers in the list is controlled by
+ // getCalleeSavedRegs(), so they will always be in-order, as well.
+ assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
+ "Out of order callee saved regs!");
+ unsigned StrOpc;
+ assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
+ // Issue sequence of non-sp increment and pi sp spills for cs regs. The
+ // first spill is a pre-increment that allocates the stack.
+ // For example:
+ // stp x22, x21, [sp, #-48]! // addImm(-6)
+ // stp x20, x19, [sp, #16] // addImm(+2)
+ // stp fp, lr, [sp, #32] // addImm(+4)
+ // Rationale: This sequence saves uop updates compared to a sequence of
+ // pre-increment spills like stp xi,xj,[sp,#-16]!
+ // Note: Similar rational and sequence for restores in epilog.
+ if (AArch64::GPR64RegClass.contains(Reg1)) {
+ assert(AArch64::GPR64RegClass.contains(Reg2) &&
+ "Expected GPR64 callee-saved register pair!");
+ // For first spill use pre-increment store.
+ if (i == 0)
+ StrOpc = AArch64::STPXpre;
+ else
+ StrOpc = AArch64::STPXi;
+ } else if (AArch64::FPR64RegClass.contains(Reg1)) {
+ assert(AArch64::FPR64RegClass.contains(Reg2) &&
+ "Expected FPR64 callee-saved register pair!");
+ // For first spill use pre-increment store.
+ if (i == 0)
+ StrOpc = AArch64::STPDpre;
+ else
+ StrOpc = AArch64::STPDi;
+ } else
+ llvm_unreachable("Unexpected callee saved register!");
+ DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
+ << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
+ << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
+ // Compute offset: i = 0 => offset = -Count;
+ // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
+ const int Offset = (i == 0) ? -Count : i;
+ assert((Offset >= -64 && Offset <= 63) &&
+ "Offset out of bounds for STP immediate");
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
+ if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
+ MIB.addReg(AArch64::SP, RegState::Define);
+
+ MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
+ .addReg(Reg1, getPrologueDeath(MF, Reg1))
+ .addReg(AArch64::SP)
+ .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
+ .setMIFlag(MachineInstr::FrameSetup);
}
-
- // In all other cases, physical registers are dead after they've been saved
- // but live at the beginning of the prologue block.
- MBB.addLiveIn(Reg);
return true;
}
-void
-AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI,
- const LoadStoreMethod PossClasses[],
- unsigned NumClasses) const {
- DebugLoc DL = MBB.findDebugLoc(MBBI);
+bool AArch64FrameLowering::restoreCalleeSavedRegisters(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+ unsigned Count = CSI.size();
+ DebugLoc DL;
+ assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+
+ if (MI != MBB.end())
+ DL = MI->getDebugLoc();
+
+ for (unsigned i = 0; i < Count; i += 2) {
+ unsigned Reg1 = CSI[i].getReg();
+ unsigned Reg2 = CSI[i + 1].getReg();
+ // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
+ // list to come in sorted by frame index so that we can issue the store
+ // pair instructions directly. Assert if we see anything otherwise.
+ assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
+ "Out of order callee saved regs!");
+ // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
+ // the last load is sp-pi post-increment and de-allocates the stack:
+ // For example:
+ // ldp fp, lr, [sp, #32] // addImm(+4)
+ // ldp x20, x19, [sp, #16] // addImm(+2)
+ // ldp x22, x21, [sp], #48 // addImm(+6)
+ // Note: see comment in spillCalleeSavedRegisters()
+ unsigned LdrOpc;
+
+ assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
+ assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
+ if (AArch64::GPR64RegClass.contains(Reg1)) {
+ assert(AArch64::GPR64RegClass.contains(Reg2) &&
+ "Expected GPR64 callee-saved register pair!");
+ if (i == Count - 2)
+ LdrOpc = AArch64::LDPXpost;
+ else
+ LdrOpc = AArch64::LDPXi;
+ } else if (AArch64::FPR64RegClass.contains(Reg1)) {
+ assert(AArch64::FPR64RegClass.contains(Reg2) &&
+ "Expected FPR64 callee-saved register pair!");
+ if (i == Count - 2)
+ LdrOpc = AArch64::LDPDpost;
+ else
+ LdrOpc = AArch64::LDPDi;
+ } else
+ llvm_unreachable("Unexpected callee saved register!");
+ DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
+ << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
+ << ", " << CSI[i + 1].getFrameIdx() << ")\n");
+
+ // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
+ // etc.
+ const int Offset = (i == Count - 2) ? Count : Count - i - 2;
+ assert((Offset >= -64 && Offset <= 63) &&
+ "Offset out of bounds for LDP immediate");
+ MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
+ if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
+ MIB.addReg(AArch64::SP, RegState::Define);
+
+ MIB.addReg(Reg2, getDefRegState(true))
+ .addReg(Reg1, getDefRegState(true))
+ .addReg(AArch64::SP)
+ .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
+ // where the factor * 8 is implicit
+ }
+ return true;
+}
- // A certain amount of implicit contract is present here. The actual stack
- // offsets haven't been allocated officially yet, so for strictly correct code
- // we rely on the fact that the elements of CSI are allocated in order
- // starting at SP, purely as dictated by size and alignment. In practice since
- // this function handles the only accesses to those slots it's not quite so
- // important.
- //
- // We have also ordered the Callee-saved register list in AArch64CallingConv
- // so that the above scheme puts registers in order: in particular we want
- // &X30 to be &X29+8 for an ABI-correct frame record (PCS 5.2.2)
- for (unsigned i = 0, e = CSI.size(); i < e; ++i) {
- unsigned Reg = CSI[i].getReg();
-
- // First we need to find out which register class the register belongs to so
- // that we can use the correct load/store instrucitons.
- unsigned ClassIdx;
- for (ClassIdx = 0; ClassIdx < NumClasses; ++ClassIdx) {
- if (PossClasses[ClassIdx].RegClass->contains(Reg))
- break;
- }
- assert(ClassIdx != NumClasses
- && "Asked to store register in unexpected class");
- const TargetRegisterClass &TheClass = *PossClasses[ClassIdx].RegClass;
-
- // Now we need to decide whether it's possible to emit a paired instruction:
- // for this we want the next register to be in the same class.
- MachineInstrBuilder NewMI;
- bool Pair = false;
- if (i + 1 < CSI.size() && TheClass.contains(CSI[i+1].getReg())) {
- Pair = true;
- unsigned StLow = 0, StHigh = 0;
- if (isPrologue) {
- // Most of these registers will be live-in to the MBB and killed by our
- // store, though there are exceptions (see determinePrologueDeath).
- StLow = getKillRegState(determinePrologueDeath(MBB, CSI[i+1].getReg()));
- StHigh = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
- } else {
- StLow = RegState::Define;
- StHigh = RegState::Define;
- }
+void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
+ MachineFunction &MF, RegScavenger *RS) const {
+ const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
+ MF.getTarget().getRegisterInfo());
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ SmallVector<unsigned, 4> UnspilledCSGPRs;
+ SmallVector<unsigned, 4> UnspilledCSFPRs;
- NewMI = BuildMI(MBB, MBBI, DL, TII.get(PossClasses[ClassIdx].PairOpcode))
- .addReg(CSI[i+1].getReg(), StLow)
- .addReg(CSI[i].getReg(), StHigh);
+ // The frame record needs to be created by saving the appropriate registers
+ if (hasFP(MF)) {
+ MRI->setPhysRegUsed(AArch64::FP);
+ MRI->setPhysRegUsed(AArch64::LR);
+ }
- // If it's a paired op, we've consumed two registers
- ++i;
- } else {
- unsigned State;
- if (isPrologue) {
- State = getKillRegState(determinePrologueDeath(MBB, CSI[i].getReg()));
+ // Spill the BasePtr if it's used. Do this first thing so that the
+ // getCalleeSavedRegs() below will get the right answer.
+ if (RegInfo->hasBasePointer(MF))
+ MRI->setPhysRegUsed(RegInfo->getBaseRegister());
+
+ // If any callee-saved registers are used, the frame cannot be eliminated.
+ unsigned NumGPRSpilled = 0;
+ unsigned NumFPRSpilled = 0;
+ bool ExtraCSSpill = false;
+ bool CanEliminateFrame = true;
+ DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+
+ // Check pairs of consecutive callee-saved registers.
+ for (unsigned i = 0; CSRegs[i]; i += 2) {
+ assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
+
+ const unsigned OddReg = CSRegs[i];
+ const unsigned EvenReg = CSRegs[i + 1];
+ assert((AArch64::GPR64RegClass.contains(OddReg) &&
+ AArch64::GPR64RegClass.contains(EvenReg)) ^
+ (AArch64::FPR64RegClass.contains(OddReg) &&
+ AArch64::FPR64RegClass.contains(EvenReg)) &&
+ "Register class mismatch!");
+
+ const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
+ const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
+
+ // Early exit if none of the registers in the register pair is actually
+ // used.
+ if (!OddRegUsed && !EvenRegUsed) {
+ if (AArch64::GPR64RegClass.contains(OddReg)) {
+ UnspilledCSGPRs.push_back(OddReg);
+ UnspilledCSGPRs.push_back(EvenReg);
} else {
- State = RegState::Define;
+ UnspilledCSFPRs.push_back(OddReg);
+ UnspilledCSFPRs.push_back(EvenReg);
}
+ continue;
+ }
- NewMI = BuildMI(MBB, MBBI, DL,
- TII.get(PossClasses[ClassIdx].SingleOpcode))
- .addReg(CSI[i].getReg(), State);
+ unsigned Reg = AArch64::NoRegister;
+ // If only one of the registers of the register pair is used, make sure to
+ // mark the other one as used as well.
+ if (OddRegUsed ^ EvenRegUsed) {
+ // Find out which register is the additional spill.
+ Reg = OddRegUsed ? EvenReg : OddReg;
+ MRI->setPhysRegUsed(Reg);
}
- // Note that the FrameIdx refers to the second register in a pair: it will
- // be allocated the smaller numeric address and so is the one an LDP/STP
- // address must use.
- int FrameIdx = CSI[i].getFrameIdx();
- MachineMemOperand::MemOperandFlags Flags;
- Flags = isPrologue ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- Flags,
- Pair ? TheClass.getSize() * 2 : TheClass.getSize(),
- MFI.getObjectAlignment(FrameIdx));
-
- NewMI.addFrameIndex(FrameIdx)
- .addImm(0) // address-register offset
- .addMemOperand(MMO);
-
- if (isPrologue)
- NewMI.setMIFlags(MachineInstr::FrameSetup);
-
- // For aesthetic reasons, during an epilogue we want to emit complementary
- // operations to the prologue, but in the opposite order. So we still
- // iterate through the CalleeSavedInfo list in order, but we put the
- // instructions successively earlier in the MBB.
- if (!isPrologue)
- --MBBI;
+ DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
+ DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
+
+ assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
+ (RegInfo->getEncodingValue(OddReg) + 1 ==
+ RegInfo->getEncodingValue(EvenReg))) &&
+ "Register pair of non-adjacent registers!");
+ if (AArch64::GPR64RegClass.contains(OddReg)) {
+ NumGPRSpilled += 2;
+ // If it's not a reserved register, we can use it in lieu of an
+ // emergency spill slot for the register scavenger.
+ // FIXME: It would be better to instead keep looking and choose another
+ // unspilled register that isn't reserved, if there is one.
+ if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
+ ExtraCSSpill = true;
+ } else
+ NumFPRSpilled += 2;
+
+ CanEliminateFrame = false;
}
-}
-
-bool
-AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- if (CSI.empty())
- return false;
-
- static const LoadStoreMethod PossibleClasses[] = {
- {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR},
- {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR},
- };
- const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
-
- emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI,
- PossibleClasses, NumClasses);
-
- return true;
-}
-
-bool
-AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
-
- if (CSI.empty())
- return false;
-
- static const LoadStoreMethod PossibleClasses[] = {
- {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR},
- {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR},
- };
- const unsigned NumClasses = llvm::array_lengthof(PossibleClasses);
-
- emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI,
- PossibleClasses, NumClasses);
-
- return true;
-}
-
-bool
-AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
-
- // This is a decision of ABI compliance. The AArch64 PCS gives various options
- // for conformance, and even at the most stringent level more or less permits
- // elimination for leaf functions because there's no loss of functionality
- // (for debugging etc)..
- if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->hasCalls())
- return true;
- // The following are hard-limits: incorrect code will be generated if we try
- // to omit the frame.
- return (RI->needsStackRealignment(MF) ||
- MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
-}
-
-bool
-AArch64FrameLowering::useFPForAddressing(const MachineFunction &MF) const {
- return MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-bool
-AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Of the various reasons for having a frame pointer, it's actually only
- // variable-sized objects that prevent reservation of a call frame.
- return !(hasFP(MF) && MFI->hasVarSizedObjects());
-}
-
-void
-AArch64FrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const {
- const AArch64InstrInfo &TII =
- *static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
- DebugLoc dl = MI->getDebugLoc();
- int Opcode = MI->getOpcode();
- bool IsDestroy = Opcode == TII.getCallFrameDestroyOpcode();
- uint64_t CalleePopAmount = IsDestroy ? MI->getOperand(1).getImm() : 0;
-
- if (!hasReservedCallFrame(MF)) {
- unsigned Align = getStackAlignment();
-
- int64_t Amount = MI->getOperand(0).getImm();
- Amount = RoundUpToAlignment(Amount, Align);
- if (!IsDestroy) Amount = -Amount;
+ // FIXME: Set BigStack if any stack slot references may be out of range.
+ // For now, just conservatively guestimate based on unscaled indexing
+ // range. We'll end up allocating an unnecessary spill slot a lot, but
+ // realistically that's not a big deal at this stage of the game.
+ // The CSR spill slots have not been allocated yet, so estimateStackSize
+ // won't include them.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
+ DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
+ bool BigStack = (CFSize >= 256);
+ if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
+ AFI->setHasStackFrame(true);
+
+ // Estimate if we might need to scavenge a register at some point in order
+ // to materialize a stack offset. If so, either spill one additional
+ // callee-saved register or reserve a special spill slot to facilitate
+ // register scavenging. If we already spilled an extra callee-saved register
+ // above to keep the number of spills even, we don't need to do anything else
+ // here.
+ if (BigStack && !ExtraCSSpill) {
+
+ // If we're adding a register to spill here, we have to add two of them
+ // to keep the number of regs to spill even.
+ assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
+ unsigned Count = 0;
+ while (!UnspilledCSGPRs.empty() && Count < 2) {
+ unsigned Reg = UnspilledCSGPRs.back();
+ UnspilledCSGPRs.pop_back();
+ DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
+ << " to get a scratch register.\n");
+ MRI->setPhysRegUsed(Reg);
+ ExtraCSSpill = true;
+ ++Count;
+ }
- // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
- // doesn't have to pop anything), then the first operand will be zero too so
- // this adjustment is a no-op.
- if (CalleePopAmount == 0) {
- // FIXME: in-function stack adjustment for calls is limited to 12-bits
- // because there's no guaranteed temporary register available. Mostly call
- // frames will be allocated at the start of a function so this is OK, but
- // it is a limitation that needs dealing with.
- assert(Amount > -0xfff && Amount < 0xfff && "call frame too large");
- emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, Amount);
+ // If we didn't find an extra callee-saved register to spill, create
+ // an emergency spill slot.
+ if (!ExtraCSSpill) {
+ const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
+ int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
+ RS->addScavengingFrameIndex(FI);
+ DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
+ << " as the emergency spill slot.\n");
}
- } else if (CalleePopAmount != 0) {
- // If the calling convention demands that the callee pops arguments from the
- // stack, we want to add it back if we have a reserved call frame.
- assert(CalleePopAmount < 0xfff && "call frame too large");
- emitSPUpdate(MBB, MI, dl, TII, AArch64::NoRegister, -CalleePopAmount);
}
-
- MBB.erase(MI);
}
diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h
index 032dd90..0e00d16 100644
--- a/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/lib/Target/AArch64/AArch64FrameLowering.h
@@ -1,4 +1,4 @@
-//==- AArch64FrameLowering.h - Define frame lowering for AArch64 -*- C++ -*--=//
+//==-- AArch64FrameLowering.h - TargetFrameLowering for AArch64 --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,100 +7,67 @@
//
//===----------------------------------------------------------------------===//
//
-// This class implements the AArch64-specific parts of the TargetFrameLowering
-// class.
+//
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64_FRAMEINFO_H
-#define LLVM_AARCH64_FRAMEINFO_H
+#ifndef AArch64_FRAMELOWERING_H
+#define AArch64_FRAMELOWERING_H
-#include "AArch64Subtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
+
class AArch64Subtarget;
+class AArch64TargetMachine;
class AArch64FrameLowering : public TargetFrameLowering {
-private:
- // In order to unify the spilling and restoring of callee-saved registers into
- // emitFrameMemOps, we need to be able to specify which instructions to use
- // for the relevant memory operations on each register class. An array of the
- // following struct is populated and passed in to achieve this.
- struct LoadStoreMethod {
- const TargetRegisterClass *RegClass; // E.g. GPR64RegClass
-
- // The preferred instruction.
- unsigned PairOpcode; // E.g. LSPair64_STR
-
- // Sometimes only a single register can be handled at once.
- unsigned SingleOpcode; // E.g. LS64_STR
- };
-protected:
- const AArch64Subtarget &STI;
+ const AArch64TargetMachine &TM;
public:
- explicit AArch64FrameLowering(const AArch64Subtarget &sti)
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0, 16),
- STI(sti) {
- }
+ explicit AArch64FrameLowering(const AArch64TargetMachine &TM,
+ const AArch64Subtarget &STI)
+ : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
+ false /*StackRealignable*/),
+ TM(TM) {}
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- /// Decides how much stack adjustment to perform in each phase of the prologue
- /// and epilogue.
- void splitSPAdjustments(uint64_t Total, uint64_t &Initial,
- uint64_t &Residual) const;
-
- int64_t resolveFrameIndexReference(MachineFunction &MF, int FrameIndex,
- unsigned &FrameReg, int SPAdj,
- bool IsCalleeSaveOp) const;
-
- virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-
- virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned FramePtr) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
-
- /// If the register is X30 (i.e. LR) and the return address is used in the
- /// function then the callee-save store doesn't actually kill the register,
- /// otherwise it does.
- bool determinePrologueDeath(MachineBasicBlock &MBB, unsigned Reg) const;
-
- /// This function emits the loads or stores required during prologue and
- /// epilogue as efficiently as possible.
- ///
- /// The operations involved in setting up and tearing down the frame are
- /// similar enough to warrant a shared function, particularly as discrepancies
- /// between the two would be disastrous.
- void emitFrameMemOps(bool isStore, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI,
- const LoadStoreMethod PossibleClasses[],
- unsigned NumClasses) const;
-
-
- virtual bool hasFP(const MachineFunction &MF) const;
-
- virtual bool useFPForAddressing(const MachineFunction &MF) const;
-
- /// On AA
- virtual bool hasReservedCallFrame(const MachineFunction &MF) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+ /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+ /// the function.
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ int getFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg) const override;
+ int resolveFrameIndexReference(const MachineFunction &MF, int FI,
+ unsigned &FrameReg,
+ bool PreferFP = false) const;
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ /// \brief Can this function use the red zone for local allocations.
+ bool canUseRedZone(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+
+ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+ RegScavenger *RS) const override;
};
} // End llvm namespace
diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index dac4b32..7007ffc 100644
--- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -11,118 +11,119 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "aarch64-isel"
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
-#include "Utils/AArch64BaseInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/Function.h" // To access function attributes.
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Intrinsics.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "aarch64-isel"
+
//===--------------------------------------------------------------------===//
-/// AArch64 specific code to select AArch64 machine instructions for
-/// SelectionDAG operations.
+/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
+/// instructions for SelectionDAG operations.
///
namespace {
class AArch64DAGToDAGISel : public SelectionDAGISel {
AArch64TargetMachine &TM;
- /// Keep a pointer to the AArch64Subtarget around so that we can
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
/// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
+ bool ForCodeSize;
+
public:
explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
- }
+ : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
+ ForCodeSize(false) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "AArch64 Instruction Selection";
}
- // Include the pieces autogenerated from the target description.
-#include "AArch64GenDAGISel.inc"
-
- template<unsigned MemSize>
- bool SelectOffsetUImm12(SDValue N, SDValue &UImm12) {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- if (!CN || CN->getZExtValue() % MemSize != 0
- || CN->getZExtValue() / MemSize > 0xfff)
- return false;
-
- UImm12 = CurDAG->getTargetConstant(CN->getZExtValue() / MemSize, MVT::i64);
- return true;
- }
-
- template<unsigned RegWidth>
- bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
- return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
- }
-
- /// Used for pre-lowered address-reference nodes, so we already know
- /// the fields match. This operand's job is simply to add an
- /// appropriate shift operand to the MOVZ/MOVK instruction.
- template<unsigned LogShift>
- bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) {
- Imm = N;
- Shift = CurDAG->getTargetConstant(LogShift, MVT::i32);
- return true;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ AttributeSet FnAttrs = MF.getFunction()->getAttributes();
+ ForCodeSize =
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::OptimizeForSize) ||
+ FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
}
- bool SelectFPZeroOperand(SDValue N, SDValue &Dummy);
-
- bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth);
+ SDNode *Select(SDNode *Node) override;
+ /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+ /// inline asm expressions.
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
- std::vector<SDValue> &OutOps);
-
- bool SelectLogicalImm(SDValue N, SDValue &Imm);
-
- template<unsigned RegWidth>
- bool SelectTSTBOperand(SDValue N, SDValue &FixedPos) {
- return SelectTSTBOperand(N, FixedPos, RegWidth);
+ std::vector<SDValue> &OutOps) override;
+
+ SDNode *SelectMLAV64LaneV128(SDNode *N);
+ SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
+ bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
+ bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
+ bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
+ bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
+ return SelectShiftedRegister(N, false, Reg, Shift);
+ }
+ bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
+ return SelectShiftedRegister(N, true, Reg, Shift);
+ }
+ bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeIndexed(N, 16, Base, OffImm);
+ }
+ bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeUnscaled(N, 1, Base, OffImm);
+ }
+ bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeUnscaled(N, 2, Base, OffImm);
+ }
+ bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeUnscaled(N, 4, Base, OffImm);
+ }
+ bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeUnscaled(N, 8, Base, OffImm);
+ }
+ bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
+ return SelectAddrModeUnscaled(N, 16, Base, OffImm);
}
- bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
-
- SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32,
- unsigned Op64);
-
- /// Put the given constant into a pool and return a DAG which will give its
- /// address.
- SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV);
-
- SDNode *TrySelectToMoveImm(SDNode *N);
- SDNode *LowerToFPLitPool(SDNode *Node);
- SDNode *SelectToLitPool(SDNode *N);
-
- SDNode* Select(SDNode*);
-private:
- /// Get the opcode for table lookup instruction
- unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec);
-
- /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4.
- /// IsExt is to indicate if the result will be extended with an argument.
- SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt);
+ template<int Width>
+ bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &SignExtend, SDValue &DoShift) {
+ return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
+ }
- /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcode);
+ template<int Width>
+ bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
+ SDValue &SignExtend, SDValue &DoShift) {
+ return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
+ }
- /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4.
- SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes);
/// Form sequences of consecutive 64/128-bit registers for use in NEON
/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
@@ -136,315 +137,713 @@ private:
SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
unsigned SubRegs[]);
- /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4.
- /// The opcode array specifies the instructions used for load.
- SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes);
+ SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
+
+ SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
+
+ SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
+ unsigned SubRegIdx);
+ SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
+ unsigned SubRegIdx);
+ SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+
+ SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
+ SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
+ SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+ SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
+
+ SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
+ SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
+
+ SDNode *SelectBitfieldExtractOp(SDNode *N);
+ SDNode *SelectBitfieldInsertOp(SDNode *N);
+
+ SDNode *SelectLIBM(SDNode *N);
+
+// Include the pieces autogenerated from the target description.
+#include "AArch64GenDAGISel.inc"
+
+private:
+ bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
+ SDValue &Shift);
+ bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &OffImm);
+ bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &Offset, SDValue &SignExtend,
+ SDValue &DoShift);
+ bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
+ SDValue &Offset, SDValue &SignExtend,
+ SDValue &DoShift);
+ bool isWorthFolding(SDValue V) const;
+ bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
+ SDValue &Offset, SDValue &SignExtend);
- /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4.
- /// The opcode arrays specify the instructions used for load/store.
- SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
- unsigned NumVecs, const uint16_t *Opcodes);
+ template<unsigned RegWidth>
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
+ return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
+ }
- SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
- SDValue Operand);
+ bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
};
+} // end anonymous namespace
+
+/// isIntImmediate - This method tests to see if the node is a constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
+ if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
+ Imm = C->getZExtValue();
+ return true;
+ }
+ return false;
}
-bool
-AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth) {
- const ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
- if (!CN) return false;
+// isIntImmediate - This method tests to see if a constant operand.
+// If so Imm will receive the value.
+static bool isIntImmediate(SDValue N, uint64_t &Imm) {
+ return isIntImmediate(N.getNode(), Imm);
+}
- // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
- // is between 1 and 32 for a destination w-register, or 1 and 64 for an
- // x-register.
- //
- // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
- // want THIS_NODE to be 2^fbits. This is much easier to deal with using
- // integers.
- bool IsExact;
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
+ uint64_t &Imm) {
+ return N->getOpcode() == Opc &&
+ isIntImmediate(N->getOperand(1).getNode(), Imm);
+}
- // fbits is between 1 and 64 in the worst-case, which means the fmul
- // could have 2^64 as an actual operand. Need 65 bits of precision.
- APSInt IntVal(65, true);
- CN->getValueAPF().convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
+bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
+ assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+ // Require the address to be in a register. That is safe for all AArch64
+ // variants and it is hard to do anything much smarter without knowing
+ // how the operand is used.
+ OutOps.push_back(Op);
+ return false;
+}
- // N.b. isPowerOf2 also checks for > 0.
- if (!IsExact || !IntVal.isPowerOf2()) return false;
- unsigned FBits = IntVal.logBase2();
+/// SelectArithImmed - Select an immediate value that can be represented as
+/// a 12-bit value shifted left by either 0 or 12. If so, return true with
+/// Val set to the 12-bit value and Shift set to the shifter operand.
+bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
+ SDValue &Shift) {
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ if (!isa<ConstantSDNode>(N.getNode()))
+ return false;
- // Checks above should have guaranteed that we haven't lost information in
- // finding FBits, but it must still be in range.
- if (FBits == 0 || FBits > RegWidth) return false;
+ uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
+ unsigned ShiftAmt;
- FixedPos = CurDAG->getTargetConstant(64 - FBits, MVT::i32);
+ if (Immed >> 12 == 0) {
+ ShiftAmt = 0;
+ } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
+ ShiftAmt = 12;
+ Immed = Immed >> 12;
+ } else
+ return false;
+
+ unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
+ Val = CurDAG->getTargetConstant(Immed, MVT::i32);
+ Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
return true;
}
-bool
-AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
- switch (ConstraintCode) {
- default: llvm_unreachable("Unrecognised AArch64 memory constraint");
- case 'm':
- // FIXME: more freedom is actually permitted for 'm'. We can go
- // hunting for a base and an offset if we want. Of course, since
- // we don't really know how the operand is going to be used we're
- // probably restricted to the load/store pair's simm7 as an offset
- // range anyway.
- case 'Q':
- OutOps.push_back(Op);
+/// SelectNegArithImmed - As above, but negates the value before trying to
+/// select it.
+bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
+ SDValue &Shift) {
+ // This function is called from the addsub_shifted_imm ComplexPattern,
+ // which lists [imm] as the list of opcode it's interested in, however
+ // we still need to check whether the operand is actually an immediate
+ // here because the ComplexPattern opcode list is only used in
+ // root-level opcode matching.
+ if (!isa<ConstantSDNode>(N.getNode()))
+ return false;
+
+ // The immediate operand must be a 24-bit zero-extended immediate.
+ uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
+
+ // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
+ // have the opposite effect on the C flag, so this pattern mustn't match under
+ // those circumstances.
+ if (Immed == 0)
+ return false;
+
+ if (N.getValueType() == MVT::i32)
+ Immed = ~((uint32_t)Immed) + 1;
+ else
+ Immed = ~Immed + 1ULL;
+ if (Immed & 0xFFFFFFFFFF000000ULL)
+ return false;
+
+ Immed &= 0xFFFFFFULL;
+ return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
+}
+
+/// getShiftTypeForNode - Translate a shift node to the corresponding
+/// ShiftType value.
+static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
+ switch (N.getOpcode()) {
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ case ISD::SHL:
+ return AArch64_AM::LSL;
+ case ISD::SRL:
+ return AArch64_AM::LSR;
+ case ISD::SRA:
+ return AArch64_AM::ASR;
+ case ISD::ROTR:
+ return AArch64_AM::ROR;
}
+}
+/// \brief Determine wether it is worth to fold V into an extended register.
+bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
+ // it hurts if the a value is used at least twice, unless we are optimizing
+ // for code size.
+ if (ForCodeSize || V.hasOneUse())
+ return true;
return false;
}
-bool
-AArch64DAGToDAGISel::SelectFPZeroOperand(SDValue N, SDValue &Dummy) {
- ConstantFPSDNode *Imm = dyn_cast<ConstantFPSDNode>(N);
- if (!Imm || !Imm->getValueAPF().isPosZero())
+/// SelectShiftedRegister - Select a "shifted register" operand. If the value
+/// is not shifted, set the Shift operand to default of "LSL 0". The logical
+/// instructions allow the shifted register to be rotated, but the arithmetic
+/// instructions do not. The AllowROR parameter specifies whether ROR is
+/// supported.
+bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
+ SDValue &Reg, SDValue &Shift) {
+ AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
+ if (ShType == AArch64_AM::InvalidShiftExtend)
+ return false;
+ if (!AllowROR && ShType == AArch64_AM::ROR)
return false;
- // Doesn't actually carry any information, but keeps TableGen quiet.
- Dummy = CurDAG->getTargetConstant(0, MVT::i32);
- return true;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ unsigned BitSize = N.getValueType().getSizeInBits();
+ unsigned Val = RHS->getZExtValue() & (BitSize - 1);
+ unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
+
+ Reg = N.getOperand(0);
+ Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
+ return isWorthFolding(N);
+ }
+
+ return false;
}
-bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) {
- uint32_t Bits;
- uint32_t RegWidth = N.getValueType().getSizeInBits();
+/// getExtendTypeForNode - Translate an extend node to the corresponding
+/// ExtendType value.
+static AArch64_AM::ShiftExtendType
+getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
+ if (N.getOpcode() == ISD::SIGN_EXTEND ||
+ N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ EVT SrcVT;
+ if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
+ SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
+ else
+ SrcVT = N.getOperand(0).getValueType();
+
+ if (!IsLoadStore && SrcVT == MVT::i8)
+ return AArch64_AM::SXTB;
+ else if (!IsLoadStore && SrcVT == MVT::i16)
+ return AArch64_AM::SXTH;
+ else if (SrcVT == MVT::i32)
+ return AArch64_AM::SXTW;
+ assert(SrcVT != MVT::i64 && "extend from 64-bits?");
+
+ return AArch64_AM::InvalidShiftExtend;
+ } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
+ N.getOpcode() == ISD::ANY_EXTEND) {
+ EVT SrcVT = N.getOperand(0).getValueType();
+ if (!IsLoadStore && SrcVT == MVT::i8)
+ return AArch64_AM::UXTB;
+ else if (!IsLoadStore && SrcVT == MVT::i16)
+ return AArch64_AM::UXTH;
+ else if (SrcVT == MVT::i32)
+ return AArch64_AM::UXTW;
+ assert(SrcVT != MVT::i64 && "extend from 64-bits?");
+
+ return AArch64_AM::InvalidShiftExtend;
+ } else if (N.getOpcode() == ISD::AND) {
+ ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!CSD)
+ return AArch64_AM::InvalidShiftExtend;
+ uint64_t AndMask = CSD->getZExtValue();
+
+ switch (AndMask) {
+ default:
+ return AArch64_AM::InvalidShiftExtend;
+ case 0xFF:
+ return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
+ case 0xFFFF:
+ return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
+ case 0xFFFFFFFF:
+ return AArch64_AM::UXTW;
+ }
+ }
- ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- if (!CN) return false;
+ return AArch64_AM::InvalidShiftExtend;
+}
+
+// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
+static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
+ if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
+ DL->getOpcode() != AArch64ISD::DUPLANE32)
+ return false;
- if (!A64Imms::isLogicalImm(RegWidth, CN->getZExtValue(), Bits))
+ SDValue SV = DL->getOperand(0);
+ if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
return false;
- Imm = CurDAG->getTargetConstant(Bits, MVT::i32);
+ SDValue EV = SV.getOperand(1);
+ if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+
+ ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
+ ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
+ LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
+ LaneOp = EV.getOperand(0);
+
return true;
}
-SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) {
- SDNode *ResNode;
- SDLoc dl(Node);
- EVT DestType = Node->getValueType(0);
- unsigned DestWidth = DestType.getSizeInBits();
-
- unsigned MOVOpcode;
- EVT MOVType;
- int UImm16, Shift;
- uint32_t LogicalBits;
-
- uint64_t BitPat = cast<ConstantSDNode>(Node)->getZExtValue();
- if (A64Imms::isMOVZImm(DestWidth, BitPat, UImm16, Shift)) {
- MOVType = DestType;
- MOVOpcode = DestWidth == 64 ? AArch64::MOVZxii : AArch64::MOVZwii;
- } else if (A64Imms::isMOVNImm(DestWidth, BitPat, UImm16, Shift)) {
- MOVType = DestType;
- MOVOpcode = DestWidth == 64 ? AArch64::MOVNxii : AArch64::MOVNwii;
- } else if (DestWidth == 64 && A64Imms::isMOVNImm(32, BitPat, UImm16, Shift)) {
- // To get something like 0x0000_0000_ffff_1234 into a 64-bit register we can
- // use a 32-bit instruction: "movn w0, 0xedbc".
- MOVType = MVT::i32;
- MOVOpcode = AArch64::MOVNwii;
- } else if (A64Imms::isLogicalImm(DestWidth, BitPat, LogicalBits)) {
- MOVOpcode = DestWidth == 64 ? AArch64::ORRxxi : AArch64::ORRwwi;
- uint16_t ZR = DestWidth == 64 ? AArch64::XZR : AArch64::WZR;
-
- return CurDAG->getMachineNode(MOVOpcode, dl, DestType,
- CurDAG->getRegister(ZR, DestType),
- CurDAG->getTargetConstant(LogicalBits, MVT::i32));
- } else {
- // Can't handle it in one instruction. There's scope for permitting two (or
- // more) instructions, but that'll need more thought.
- return NULL;
+// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
+// high lane extract.
+static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
+ SDValue &LaneOp, int &LaneIdx) {
+
+ if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
+ std::swap(Op0, Op1);
+ if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
+ return false;
+ }
+ StdOp = Op1;
+ return true;
+}
+
+/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
+/// is a lane in the upper half of a 128-bit vector. Recognize and select this
+/// so that we don't emit unnecessary lane extracts.
+SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
+ SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
+ int LaneIdx = -1; // Will hold the lane index.
+
+ if (Op1.getOpcode() != ISD::MUL ||
+ !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
+ LaneIdx)) {
+ std::swap(Op0, Op1);
+ if (Op1.getOpcode() != ISD::MUL ||
+ !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
+ LaneIdx))
+ return nullptr;
}
- ResNode = CurDAG->getMachineNode(MOVOpcode, dl, MOVType,
- CurDAG->getTargetConstant(UImm16, MVT::i32),
- CurDAG->getTargetConstant(Shift, MVT::i32));
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+
+ SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
+
+ unsigned MLAOpc = ~0U;
- if (MOVType != DestType) {
- ResNode = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
- MVT::i64, MVT::i32, MVT::Other,
- CurDAG->getTargetConstant(0, MVT::i64),
- SDValue(ResNode, 0),
- CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32));
+ switch (N->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unrecognized MLA.");
+ case MVT::v4i16:
+ MLAOpc = AArch64::MLAv4i16_indexed;
+ break;
+ case MVT::v8i16:
+ MLAOpc = AArch64::MLAv8i16_indexed;
+ break;
+ case MVT::v2i32:
+ MLAOpc = AArch64::MLAv2i32_indexed;
+ break;
+ case MVT::v4i32:
+ MLAOpc = AArch64::MLAv4i32_indexed;
+ break;
}
- return ResNode;
+ return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
+ SDValue SMULLOp0;
+ SDValue SMULLOp1;
+ int LaneIdx;
+
+ if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
+ LaneIdx))
+ return nullptr;
+
+ SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
+
+ SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
+
+ unsigned SMULLOpc = ~0U;
+
+ if (IntNo == Intrinsic::aarch64_neon_smull) {
+ switch (N->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unrecognized SMULL.");
+ case MVT::v4i32:
+ SMULLOpc = AArch64::SMULLv4i16_indexed;
+ break;
+ case MVT::v2i64:
+ SMULLOpc = AArch64::SMULLv2i32_indexed;
+ break;
+ }
+ } else if (IntNo == Intrinsic::aarch64_neon_umull) {
+ switch (N->getSimpleValueType(0).SimpleTy) {
+ default:
+ llvm_unreachable("Unrecognized SMULL.");
+ case MVT::v4i32:
+ SMULLOpc = AArch64::UMULLv4i16_indexed;
+ break;
+ case MVT::v2i64:
+ SMULLOpc = AArch64::UMULLv2i32_indexed;
+ break;
+ }
+ } else
+ llvm_unreachable("Unrecognized intrinsic.");
+
+ return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
+}
+
+/// Instructions that accept extend modifiers like UXTW expect the register
+/// being extended to be a GPR32, but the incoming DAG might be acting on a
+/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
+/// this is the case.
+static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
+ if (N.getValueType() == MVT::i32)
+ return N;
+
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+ SDLoc(N), MVT::i32, N, SubReg);
+ return SDValue(Node, 0);
+}
+
+
+/// SelectArithExtendedRegister - Select a "extended register" operand. This
+/// operand folds in an extend followed by an optional left shift.
+bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
+ SDValue &Shift) {
+ unsigned ShiftVal = 0;
+ AArch64_AM::ShiftExtendType Ext;
+
+ if (N.getOpcode() == ISD::SHL) {
+ ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!CSD)
+ return false;
+ ShiftVal = CSD->getZExtValue();
+ if (ShiftVal > 4)
+ return false;
+
+ Ext = getExtendTypeForNode(N.getOperand(0));
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return false;
+
+ Reg = N.getOperand(0).getOperand(0);
+ } else {
+ Ext = getExtendTypeForNode(N);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return false;
+
+ Reg = N.getOperand(0);
+ }
+
+ // AArch64 mandates that the RHS of the operation must use the smallest
+ // register classs that could contain the size being extended from. Thus,
+ // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
+ // there might not be an actual 32-bit value in the program. We can
+ // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
+ assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
+ Reg = narrowIfNeeded(CurDAG, Reg);
+ Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
+ return isWorthFolding(N);
}
-SDValue
-AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL,
- const Constant *CV) {
- EVT PtrVT = getTargetLowering()->getPointerTy();
-
- switch (getTargetLowering()->getTargetMachine().getCodeModel()) {
- case CodeModel::Small: {
- unsigned Alignment =
- getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
- return CurDAG->getNode(
- AArch64ISD::WrapperSmall, DL, PtrVT,
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG),
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_LO12),
- CurDAG->getConstant(Alignment, MVT::i32));
- }
- case CodeModel::Large: {
- SDNode *LitAddr;
- LitAddr = CurDAG->getMachineNode(
- AArch64::MOVZxii, DL, PtrVT,
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
- CurDAG->getTargetConstant(3, MVT::i32));
- LitAddr = CurDAG->getMachineNode(
- AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
- CurDAG->getTargetConstant(2, MVT::i32));
- LitAddr = CurDAG->getMachineNode(
- AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
- CurDAG->getTargetConstant(1, MVT::i32));
- LitAddr = CurDAG->getMachineNode(
- AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0),
- CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC),
- CurDAG->getTargetConstant(0, MVT::i32));
- return SDValue(LitAddr, 0);
+/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
+/// immediate" address. The "Size" argument is the size in bytes of the memory
+/// reference, which determines the scale.
+bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
+ SDValue &Base, SDValue &OffImm) {
+ const TargetLowering *TLI = getTargetLowering();
+ if (N.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(N)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ return true;
}
- default:
- llvm_unreachable("Only small and large code models supported now");
+
+ if (N.getOpcode() == AArch64ISD::ADDlow) {
+ GlobalAddressSDNode *GAN =
+ dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
+ Base = N.getOperand(0);
+ OffImm = N.getOperand(1);
+ if (!GAN)
+ return true;
+
+ const GlobalValue *GV = GAN->getGlobal();
+ unsigned Alignment = GV->getAlignment();
+ const DataLayout *DL = TLI->getDataLayout();
+ if (Alignment == 0 && !Subtarget->isTargetDarwin())
+ Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
+
+ if (Alignment >= Size)
+ return true;
}
+
+ if (CurDAG->isBaseWithConstantOffset(N)) {
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = (int64_t)RHS->getZExtValue();
+ unsigned Scale = Log2_32(Size);
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
+ return true;
+ }
+ }
+ }
+
+ // Before falling back to our general case, check if the unscaled
+ // instructions can handle this. If so, that's preferable.
+ if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
+ return false;
+
+ // Base only. The address will be materialized into a register before
+ // the memory is accessed.
+ // add x0, Xbase, #offset
+ // ldr x0, [x0]
+ Base = N;
+ OffImm = CurDAG->getTargetConstant(0, MVT::i64);
+ return true;
}
-SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) {
- SDLoc DL(Node);
- uint64_t UnsignedVal = cast<ConstantSDNode>(Node)->getZExtValue();
- int64_t SignedVal = cast<ConstantSDNode>(Node)->getSExtValue();
- EVT DestType = Node->getValueType(0);
-
- // Since we may end up loading a 64-bit constant from a 32-bit entry the
- // constant in the pool may have a different type to the eventual node.
- ISD::LoadExtType Extension;
- EVT MemType;
-
- assert((DestType == MVT::i64 || DestType == MVT::i32)
- && "Only expect integer constants at the moment");
-
- if (DestType == MVT::i32) {
- Extension = ISD::NON_EXTLOAD;
- MemType = MVT::i32;
- } else if (UnsignedVal <= UINT32_MAX) {
- Extension = ISD::ZEXTLOAD;
- MemType = MVT::i32;
- } else if (SignedVal >= INT32_MIN && SignedVal <= INT32_MAX) {
- Extension = ISD::SEXTLOAD;
- MemType = MVT::i32;
- } else {
- Extension = ISD::NON_EXTLOAD;
- MemType = MVT::i64;
- }
-
- Constant *CV = ConstantInt::get(Type::getIntNTy(*CurDAG->getContext(),
- MemType.getSizeInBits()),
- UnsignedVal);
- SDValue PoolAddr = getConstantPoolItemAddress(DL, CV);
- unsigned Alignment =
- getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType());
-
- return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(),
- PoolAddr,
- MachinePointerInfo::getConstantPool(), MemType,
- /* isVolatile = */ false,
- /* isNonTemporal = */ false,
- Alignment).getNode();
+/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
+/// immediate" address. This should only match when there is an offset that
+/// is not valid for a scaled immediate addressing mode. The "Size" argument
+/// is the size in bytes of the memory reference, which is needed here to know
+/// what is valid for a scaled immediate.
+bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
+ SDValue &Base,
+ SDValue &OffImm) {
+ if (!CurDAG->isBaseWithConstantOffset(N))
+ return false;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ // If the offset is valid as a scaled immediate, don't match here.
+ if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
+ RHSC < (0x1000 << Log2_32(Size)))
+ return false;
+ if (RHSC >= -256 && RHSC < 256) {
+ Base = N.getOperand(0);
+ if (Base.getOpcode() == ISD::FrameIndex) {
+ int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+ const TargetLowering *TLI = getTargetLowering();
+ Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ }
+ OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
+ return true;
+ }
+ }
+ return false;
}
-SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) {
- SDLoc DL(Node);
- const ConstantFP *FV = cast<ConstantFPSDNode>(Node)->getConstantFPValue();
- EVT DestType = Node->getValueType(0);
-
- unsigned Alignment =
- getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType());
- SDValue PoolAddr = getConstantPoolItemAddress(DL, FV);
-
- return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /* isVolatile = */ false,
- /* isNonTemporal = */ false,
- /* isInvariant = */ true,
- Alignment).getNode();
+static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ SDValue ImpDef = SDValue(
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
+ 0);
+ MachineSDNode *Node = CurDAG->getMachineNode(
+ TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
+ return SDValue(Node, 0);
}
-bool
-AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
- unsigned RegWidth) {
- const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
- if (!CN) return false;
+/// \brief Check if the given SHL node (\p N), can be used to form an
+/// extended register for an addressing mode.
+bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
+ bool WantExtend, SDValue &Offset,
+ SDValue &SignExtend) {
+ assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
+ ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
+ if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
+ return false;
- uint64_t Val = CN->getZExtValue();
+ if (WantExtend) {
+ AArch64_AM::ShiftExtendType Ext =
+ getExtendTypeForNode(N.getOperand(0), true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return false;
- if (!isPowerOf2_64(Val)) return false;
+ Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ } else {
+ Offset = N.getOperand(0);
+ SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
+ }
- unsigned TestedBit = Log2_64(Val);
- // Checks above should have guaranteed that we haven't lost information in
- // finding TestedBit, but it must still be in range.
- if (TestedBit >= RegWidth) return false;
+ unsigned LegalShiftVal = Log2_32(Size);
+ unsigned ShiftVal = CSD->getZExtValue();
- FixedPos = CurDAG->getTargetConstant(TestedBit, MVT::i64);
- return true;
+ if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
+ return false;
+
+ if (isWorthFolding(N))
+ return true;
+
+ return false;
}
-SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
- unsigned Op16,unsigned Op32,
- unsigned Op64) {
- // Mostly direct translation to the given operations, except that we preserve
- // the AtomicOrdering for use later on.
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
- EVT VT = AN->getMemoryVT();
-
- unsigned Op;
- if (VT == MVT::i8)
- Op = Op8;
- else if (VT == MVT::i16)
- Op = Op16;
- else if (VT == MVT::i32)
- Op = Op32;
- else if (VT == MVT::i64)
- Op = Op64;
- else
- llvm_unreachable("Unexpected atomic operation");
+bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
+ SDValue &Base, SDValue &Offset,
+ SDValue &SignExtend,
+ SDValue &DoShift) {
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+ SDValue LHS = N.getOperand(0);
+ SDValue RHS = N.getOperand(1);
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 1; i < AN->getNumOperands(); ++i)
- Ops.push_back(AN->getOperand(i));
+ // We don't want to match immediate adds here, because they are better lowered
+ // to the register-immediate addressing modes.
+ if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
+ return false;
- Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
- Ops.push_back(AN->getOperand(0)); // Chain moves to the end
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = N.getNode();
+ for (SDNode *UI : Node->uses()) {
+ if (!isa<MemSDNode>(*UI))
+ return false;
+ }
+
+ // Remember if it is worth folding N when it produces extended register.
+ bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
+
+ // Try to match a shifted extend on the RHS.
+ if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
+ SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
+ Base = LHS;
+ DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ return true;
+ }
+
+ // Try to match a shifted extend on the LHS.
+ if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
+ SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
+ Base = RHS;
+ DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ return true;
+ }
+
+ // There was no shift, whatever else we find.
+ DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+
+ AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
+ // Try to match an unshifted extend on the LHS.
+ if (IsExtendedRegisterWorthFolding &&
+ (Ext = getExtendTypeForNode(LHS, true)) !=
+ AArch64_AM::InvalidShiftExtend) {
+ Base = RHS;
+ Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ if (isWorthFolding(LHS))
+ return true;
+ }
+
+ // Try to match an unshifted extend on the RHS.
+ if (IsExtendedRegisterWorthFolding &&
+ (Ext = getExtendTypeForNode(RHS, true)) !=
+ AArch64_AM::InvalidShiftExtend) {
+ Base = LHS;
+ Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
+ SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
+ if (isWorthFolding(RHS))
+ return true;
+ }
- return CurDAG->SelectNodeTo(Node, Op,
- AN->getValueType(0), MVT::Other,
- &Ops[0], Ops.size());
+ return false;
+}
+
+bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
+ SDValue &Base, SDValue &Offset,
+ SDValue &SignExtend,
+ SDValue &DoShift) {
+ if (N.getOpcode() != ISD::ADD)
+ return false;
+ SDValue LHS = N.getOperand(0);
+ SDValue RHS = N.getOperand(1);
+
+ // We don't want to match immediate adds here, because they are better lowered
+ // to the register-immediate addressing modes.
+ if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
+ return false;
+
+ // Check if this particular node is reused in any non-memory related
+ // operation. If yes, do not try to fold this node into the address
+ // computation, since the computation will be kept.
+ const SDNode *Node = N.getNode();
+ for (SDNode *UI : Node->uses()) {
+ if (!isa<MemSDNode>(*UI))
+ return false;
+ }
+
+ // Remember if it is worth folding N when it produces extended register.
+ bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
+
+ // Try to match a shifted extend on the RHS.
+ if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
+ SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
+ Base = LHS;
+ DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ return true;
+ }
+
+ // Try to match a shifted extend on the LHS.
+ if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
+ SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
+ Base = RHS;
+ DoShift = CurDAG->getTargetConstant(true, MVT::i32);
+ return true;
+ }
+
+ // Match any non-shifted, non-extend, non-immediate add expression.
+ Base = LHS;
+ Offset = RHS;
+ SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
+ DoShift = CurDAG->getTargetConstant(false, MVT::i32);
+ // Reg1 + Reg2 is free: no check needed.
+ return true;
}
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = { AArch64::DPairRegClassID,
- AArch64::DTripleRegClassID,
- AArch64::DQuadRegClassID };
- static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1,
- AArch64::dsub_2, AArch64::dsub_3 };
+ static unsigned RegClassIDs[] = {
+ AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
+ static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3 };
return createTuple(Regs, RegClassIDs, SubRegs);
}
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = { AArch64::QPairRegClassID,
- AArch64::QTripleRegClassID,
- AArch64::QQuadRegClassID };
- static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1,
- AArch64::qsub_2, AArch64::qsub_3 };
+ static unsigned RegClassIDs[] = {
+ AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
+ static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
return createTuple(Regs, RegClassIDs, SubRegs);
}
@@ -478,1100 +877,2159 @@ SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
return SDValue(N, 0);
}
+SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
+ unsigned Opc, bool isExt) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+
+ unsigned ExtOff = isExt;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ unsigned Vec0Off = ExtOff + 1;
+ SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
+ N->op_begin() + Vec0Off + NumVecs);
+ SDValue RegSeq = createQTuple(Regs);
-// Get the register stride update opcode of a VLD/VST instruction that
-// is otherwise equivalent to the given fixed stride updating instruction.
-static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
- switch (Opc) {
- default: break;
- case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register;
- case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register;
- case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register;
- case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register;
- case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register;
- case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register;
- case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register;
- case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register;
-
- case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register;
- case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register;
- case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register;
- case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register;
- case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register;
- case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register;
- case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register;
-
- case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register;
- case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register;
- case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register;
- case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register;
- case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register;
- case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register;
- case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register;
-
- case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register;
- case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register;
- case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register;
- case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register;
- case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register;
- case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register;
- case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register;
-
- case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register;
- case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register;
- case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register;
- case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register;
- case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register;
- case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register;
- case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register;
- case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register;
-
- case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register;
- case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register;
- case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register;
- case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register;
- case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register;
- case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register;
- case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register;
- case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register;
-
- case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register;
- case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register;
- case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register;
- case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register;
- case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register;
- case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register;
- case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register;
- case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register;
-
- case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register;
- case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register;
- case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register;
- case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register;
- case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register;
- case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register;
- case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register;
- case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register;
-
- case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register;
- case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register;
- case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register;
- case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register;
- case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register;
- case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register;
- case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register;
-
- case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register;
- case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register;
- case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register;
- case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register;
- case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register;
- case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register;
- case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register;
-
- case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register;
- case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register;
- case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register;
- case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register;
- case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register;
- case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register;
- case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register;
-
- case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register;
- case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register;
- case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register;
- case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register;
- case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register;
- case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register;
- case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register;
- case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register;
-
- case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register;
- case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register;
- case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register;
- case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register;
- case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register;
- case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register;
- case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register;
- case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register;
-
- case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register;
- case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register;
- case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register;
- case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register;
- case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register;
- case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register;
- case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register;
- case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register;
-
- // Post-index of duplicate loads
- case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register;
- case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register;
- case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register;
- case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register;
- case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register;
- case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register;
- case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register;
- case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register;
-
- case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register;
- case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register;
- case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register;
- case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register;
- case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register;
- case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register;
- case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register;
- case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register;
-
- case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register;
- case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register;
- case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register;
- case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register;
- case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register;
- case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register;
- case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register;
- case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register;
-
- // Post-index of lane loads
- case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register;
- case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register;
- case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register;
- case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register;
-
- case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register;
- case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register;
- case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register;
- case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register;
-
- case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register;
- case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register;
- case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register;
- case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register;
-
- // Post-index of lane stores
- case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register;
- case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register;
- case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register;
- case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register;
-
- case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register;
- case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register;
- case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register;
- case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register;
-
- case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register;
- case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register;
- case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register;
- case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register;
- }
- return Opc; // If not one we handle, return it unchanged.
+ SmallVector<SDValue, 6> Ops;
+ if (isExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
-SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating,
- unsigned NumVecs,
- const uint16_t *Opcodes) {
- assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->isUnindexed())
+ return nullptr;
+ EVT VT = LD->getMemoryVT();
+ EVT DstVT = N->getValueType(0);
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+
+ // We're not doing validity checking here. That was done when checking
+ // if we should mark the load as indexed or not. We're just selecting
+ // the right instruction.
+ unsigned Opcode = 0;
+
+ ISD::LoadExtType ExtType = LD->getExtensionType();
+ bool InsertTo64 = false;
+ if (VT == MVT::i64)
+ Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
+ else if (VT == MVT::i32) {
+ if (ExtType == ISD::NON_EXTLOAD)
+ Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
+ else if (ExtType == ISD::SEXTLOAD)
+ Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
+ else {
+ Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
+ InsertTo64 = true;
+ // The result of the load is only i32. It's the subreg_to_reg that makes
+ // it into an i64.
+ DstVT = MVT::i32;
+ }
+ } else if (VT == MVT::i16) {
+ if (ExtType == ISD::SEXTLOAD) {
+ if (DstVT == MVT::i64)
+ Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
+ else
+ Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
+ } else {
+ Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
+ InsertTo64 = DstVT == MVT::i64;
+ // The result of the load is only i32. It's the subreg_to_reg that makes
+ // it into an i64.
+ DstVT = MVT::i32;
+ }
+ } else if (VT == MVT::i8) {
+ if (ExtType == ISD::SEXTLOAD) {
+ if (DstVT == MVT::i64)
+ Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
+ else
+ Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
+ } else {
+ Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
+ InsertTo64 = DstVT == MVT::i64;
+ // The result of the load is only i32. It's the subreg_to_reg that makes
+ // it into an i64.
+ DstVT = MVT::i32;
+ }
+ } else if (VT == MVT::f32) {
+ Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
+ } else if (VT == MVT::f64 || VT.is64BitVector()) {
+ Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
+ } else if (VT.is128BitVector()) {
+ Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
+ } else
+ return nullptr;
+ SDValue Chain = LD->getChain();
+ SDValue Base = LD->getBasePtr();
+ ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
+ int OffsetVal = (int)OffsetOp->getZExtValue();
+ SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
+ SDValue Ops[] = { Base, Offset, Chain };
+ SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
+ MVT::Other, Ops);
+ // Either way, we're replacing the node, so tell the caller that.
+ Done = true;
+ SDValue LoadedVal = SDValue(Res, 1);
+ if (InsertTo64) {
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ LoadedVal =
+ SDValue(CurDAG->getMachineNode(
+ AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
+ CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
+ 0);
+ }
+
+ ReplaceUses(SDValue(N, 0), LoadedVal);
+ ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
+ ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
+
+ return nullptr;
+}
+SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
+ unsigned Opc, unsigned SubRegIdx) {
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
- unsigned OpcodeIndex;
- bool is64BitVector = VT.is64BitVector();
- switch (VT.getScalarType().getSizeInBits()) {
- case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
- case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
- case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
- case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
- default: llvm_unreachable("unhandled vector load type");
- }
- unsigned Opc = Opcodes[OpcodeIndex];
+ SDValue Chain = N->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
- Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(2)); // Mem operand;
+ Ops.push_back(Chain);
- if (isUpdating) {
- SDValue Inc = N->getOperand(AddrOpIdx + 1);
- if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- Ops.push_back(Inc);
- }
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
- Ops.push_back(N->getOperand(0)); // Push back the Chain
+ SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ SDValue SuperReg = SDValue(Ld, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i),
+ CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
- SmallVector<EVT, 3> ResTys;
- // Push back the type of return super register
- if (NumVecs == 1)
- ResTys.push_back(VT);
- else if (NumVecs == 3)
- ResTys.push_back(MVT::Untyped);
- else {
- EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
- is64BitVector ? NumVecs : NumVecs * 2);
- ResTys.push_back(ResTy);
- }
-
- if (isUpdating)
- ResTys.push_back(MVT::i64); // Type of the updated register
- ResTys.push_back(MVT::Other); // Type of the Chain
+ ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
+ return nullptr;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
+ unsigned Opc, unsigned SubRegIdx) {
SDLoc dl(N);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ EVT VT = N->getValueType(0);
+ SDValue Chain = N->getOperand(0);
- // Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(N->getOperand(1)); // Mem operand
+ Ops.push_back(N->getOperand(2)); // Incremental
+ Ops.push_back(Chain);
+
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i64); // Type of the write back register
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
+
+ SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ // Update uses of write back register
+ ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
+
+ // Update uses of vector list
+ SDValue SuperReg = SDValue(Ld, 1);
if (NumVecs == 1)
- return VLd;
-
- // If NumVecs > 1, the return result is a super register containing 2-4
- // consecutive vector registers.
- SDValue SuperReg = SDValue(VLd, 0);
-
- unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
- // Update users of the Chain
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
- if (isUpdating)
- ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
-
- return NULL;
+ ReplaceUses(SDValue(N, 0), SuperReg);
+ else
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i),
+ CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
+
+ // Update the chain
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
+ return nullptr;
}
-SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating,
- unsigned NumVecs,
- const uint16_t *Opcodes) {
- assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
SDLoc dl(N);
+ EVT VT = N->getOperand(2)->getValueType(0);
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+ // Form a REG_SEQUENCE to force register allocation.
+ bool Is128Bit = VT.getSizeInBits() == 128;
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
+ SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
- unsigned Vec0Idx = 3;
- EVT VT = N->getOperand(Vec0Idx).getValueType();
- unsigned OpcodeIndex;
- bool is64BitVector = VT.is64BitVector();
- switch (VT.getScalarType().getSizeInBits()) {
- case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
- case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
- case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
- case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
- default: llvm_unreachable("unhandled vector store type");
- }
- unsigned Opc = Opcodes[OpcodeIndex];
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(NumVecs + 2));
+ Ops.push_back(N->getOperand(0));
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
+
+ return St;
+}
+SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
+ SDLoc dl(N);
+ EVT VT = N->getOperand(2)->getValueType(0);
SmallVector<EVT, 2> ResTys;
- if (isUpdating)
- ResTys.push_back(MVT::i64);
+ ResTys.push_back(MVT::i64); // Type of the write back register
ResTys.push_back(MVT::Other); // Type for the Chain
+ // Form a REG_SEQUENCE to force register allocation.
+ bool Is128Bit = VT.getSizeInBits() == 128;
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
+ SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
+
SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
+ Ops.push_back(RegSeq);
+ Ops.push_back(N->getOperand(NumVecs + 1)); // base register
+ Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
+ Ops.push_back(N->getOperand(0)); // Chain
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- if (isUpdating) {
- SDValue Inc = N->getOperand(AddrOpIdx + 1);
- if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- Ops.push_back(Inc);
+ return St;
+}
+
+/// WidenVector - Given a value in the V64 register class, produce the
+/// equivalent value in the V128 register class.
+class WidenVector {
+ SelectionDAG &DAG;
+
+public:
+ WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
+
+ SDValue operator()(SDValue V64Reg) {
+ EVT VT = V64Reg.getValueType();
+ unsigned NarrowSize = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType().getSimpleVT();
+ MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
+ SDLoc DL(V64Reg);
+
+ SDValue Undef =
+ SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
+ return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
}
+};
+
+/// NarrowVector - Given a value in the V128 register class, produce the
+/// equivalent value in the V64 register class.
+static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
+ EVT VT = V128Reg.getValueType();
+ unsigned WideSize = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType().getSimpleVT();
+ MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
+
+ return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
+ V128Reg);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ bool Narrow = VT.getSizeInBits() == 64;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
+
+ if (Narrow)
+ std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+ WidenVector(*CurDAG));
+
+ SDValue RegSeq = createQTuple(Regs);
- SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
- N->op_begin() + Vec0Idx + NumVecs);
- SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs);
- Ops.push_back(SrcReg);
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
- // Push back the Chain
+ unsigned LaneNo =
+ cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
+ Ops.push_back(N->getOperand(NumVecs + 3));
Ops.push_back(N->getOperand(0));
+ SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ SDValue SuperReg = SDValue(Ld, 0);
+
+ EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
+ static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
+ AArch64::qsub3 };
+ for (unsigned i = 0; i < NumVecs; ++i) {
+ SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
+ if (Narrow)
+ NV = NarrowVector(NV, *CurDAG);
+ ReplaceUses(SDValue(N, i), NV);
+ }
- // Transfer memoperands.
- SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
+ ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
- return VSt;
+ return Ld;
}
-SDValue
-AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD,
- SDValue Operand) {
- SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL,
- VT, VTD, MVT::Other,
- CurDAG->getTargetConstant(0, MVT::i64),
- Operand,
- CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32));
- return SDValue(Reg, 0);
+SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ bool Narrow = VT.getSizeInBits() == 64;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
+
+ if (Narrow)
+ std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+ WidenVector(*CurDAG));
+
+ SDValue RegSeq = createQTuple(Regs);
+
+ std::vector<EVT> ResTys;
+ ResTys.push_back(MVT::i64); // Type of the write back register
+ ResTys.push_back(MVT::Untyped);
+ ResTys.push_back(MVT::Other);
+
+ unsigned LaneNo =
+ cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
+ Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
+ Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
+ Ops.push_back(N->getOperand(0));
+ SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Update uses of the write back register
+ ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
+
+ // Update uses of the vector list
+ SDValue SuperReg = SDValue(Ld, 1);
+ if (NumVecs == 1) {
+ ReplaceUses(SDValue(N, 0),
+ Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
+ } else {
+ EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
+ static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
+ AArch64::qsub3 };
+ for (unsigned i = 0; i < NumVecs; ++i) {
+ SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
+ SuperReg);
+ if (Narrow)
+ NV = NarrowVector(NV, *CurDAG);
+ ReplaceUses(SDValue(N, i), NV);
+ }
+ }
+
+ // Update the Chain
+ ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
+
+ return Ld;
}
-SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
- unsigned NumVecs,
- const uint16_t *Opcodes) {
- assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range");
+SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
SDLoc dl(N);
+ EVT VT = N->getOperand(2)->getValueType(0);
+ bool Narrow = VT.getSizeInBits() == 64;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
+
+ if (Narrow)
+ std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+ WidenVector(*CurDAG));
+
+ SDValue RegSeq = createQTuple(Regs);
+
+ unsigned LaneNo =
+ cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
- EVT VT = N->getValueType(0);
- unsigned OpcodeIndex;
- bool is64BitVector = VT.is64BitVector();
- switch (VT.getScalarType().getSizeInBits()) {
- case 8: OpcodeIndex = is64BitVector ? 0 : 4; break;
- case 16: OpcodeIndex = is64BitVector ? 1 : 5; break;
- case 32: OpcodeIndex = is64BitVector ? 2 : 6; break;
- case 64: OpcodeIndex = is64BitVector ? 3 : 7; break;
- default: llvm_unreachable("unhandled vector duplicate lane load type");
- }
- unsigned Opc = Opcodes[OpcodeIndex];
-
- SDValue SuperReg;
SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(1)); // Push back the Memory Address
- if (isUpdating) {
- SDValue Inc = N->getOperand(2);
- if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- Ops.push_back(Inc);
- }
- Ops.push_back(N->getOperand(0)); // Push back the Chain
-
- SmallVector<EVT, 3> ResTys;
- // Push back the type of return super register
- if (NumVecs == 3)
- ResTys.push_back(MVT::Untyped);
- else {
- EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
- is64BitVector ? NumVecs : NumVecs * 2);
- ResTys.push_back(ResTy);
- }
- if (isUpdating)
- ResTys.push_back(MVT::i64); // Type of the updated register
- ResTys.push_back(MVT::Other); // Type of the Chain
- SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ Ops.push_back(RegSeq);
+ Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
+ Ops.push_back(N->getOperand(NumVecs + 3));
+ Ops.push_back(N->getOperand(0));
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
// Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
-
- SuperReg = SDValue(VLdDup, 0);
- unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0;
- // Update uses of each registers in super register
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
- // Update uses of the Chain
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
- if (isUpdating)
- ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
- return NULL;
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
}
-// We only have 128-bit vector type of load/store lane instructions.
-// If it is 64-bit vector, we also select it to the 128-bit instructions.
-// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and
-// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output.
-SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
- bool isUpdating, unsigned NumVecs,
- const uint16_t *Opcodes) {
- assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
+ unsigned Opc) {
SDLoc dl(N);
- unsigned AddrOpIdx = isUpdating ? 1 : 2;
- unsigned Vec0Idx = 3;
+ EVT VT = N->getOperand(2)->getValueType(0);
+ bool Narrow = VT.getSizeInBits() == 64;
- SDValue Chain = N->getOperand(0);
- unsigned Lane =
- cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
- EVT VT = N->getOperand(Vec0Idx).getValueType();
- bool is64BitVector = VT.is64BitVector();
- EVT VT64; // 64-bit Vector Type
-
- if (is64BitVector) {
- VT64 = VT;
- VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(),
- VT.getVectorNumElements() * 2);
- }
-
- unsigned OpcodeIndex;
- switch (VT.getScalarType().getSizeInBits()) {
- case 8: OpcodeIndex = 0; break;
- case 16: OpcodeIndex = 1; break;
- case 32: OpcodeIndex = 2; break;
- case 64: OpcodeIndex = 3; break;
- default: llvm_unreachable("unhandled vector lane load/store type");
- }
- unsigned Opc = Opcodes[OpcodeIndex];
-
- SmallVector<EVT, 3> ResTys;
- if (IsLoad) {
- // Push back the type of return super register
- if (NumVecs == 3)
- ResTys.push_back(MVT::Untyped);
- else {
- EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,
- is64BitVector ? NumVecs : NumVecs * 2);
- ResTys.push_back(ResTy);
- }
- }
- if (isUpdating)
- ResTys.push_back(MVT::i64); // Type of the updated register
- ResTys.push_back(MVT::Other); // Type of Chain
- SmallVector<SDValue, 5> Ops;
- Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address
- if (isUpdating) {
- SDValue Inc = N->getOperand(AddrOpIdx + 1);
- if (!isa<ConstantSDNode>(Inc.getNode())) // Increment in Register
- Opc = getVLDSTRegisterUpdateOpcode(Opc);
- Ops.push_back(Inc);
- }
-
- SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
- N->op_begin() + Vec0Idx + NumVecs);
- if (is64BitVector)
- for (unsigned i = 0; i < Regs.size(); i++)
- Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]);
- SDValue SuperReg = createQTuple(Regs);
-
- Ops.push_back(SuperReg); // Source Reg
- SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32);
- Ops.push_back(LaneValue);
- Ops.push_back(Chain); // Push back the Chain
-
- SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+ // Form a REG_SEQUENCE to force register allocation.
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
+
+ if (Narrow)
+ std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+ WidenVector(*CurDAG));
+
+ SDValue RegSeq = createQTuple(Regs);
+
+ SmallVector<EVT, 2> ResTys;
+ ResTys.push_back(MVT::i64); // Type of the write back register
+ ResTys.push_back(MVT::Other);
+
+ unsigned LaneNo =
+ cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
+
+ SmallVector<SDValue, 6> Ops;
+ Ops.push_back(RegSeq);
+ Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
+ Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
+ Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
+ Ops.push_back(N->getOperand(0));
+ SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
+
+ // Transfer memoperands.
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
- if (!IsLoad)
- return VLdLn;
-
- // Extract the subregisters.
- SuperReg = SDValue(VLdLn, 0);
- unsigned Sub0 = AArch64::qsub_0;
- // Update uses of each registers in super register
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg);
- if (is64BitVector) {
- SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0);
- }
- ReplaceUses(SDValue(N, Vec), SUB0);
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
+}
+
+static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
+ unsigned &Opc, SDValue &Opd0,
+ unsigned &LSB, unsigned &MSB,
+ unsigned NumberOfIgnoredLowBits,
+ bool BiggerPattern) {
+ assert(N->getOpcode() == ISD::AND &&
+ "N must be a AND operation to call this function");
+
+ EVT VT = N->getValueType(0);
+
+ // Here we can test the type of VT and return false when the type does not
+ // match, but since it is done prior to that call in the current context
+ // we turned that into an assert to avoid redundant code.
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "Type checking must have been done before calling this function");
+
+ // FIXME: simplify-demanded-bits in DAGCombine will probably have
+ // changed the AND node to a 32-bit mask operation. We'll have to
+ // undo that as part of the transform here if we want to catch all
+ // the opportunities.
+ // Currently the NumberOfIgnoredLowBits argument helps to recover
+ // form these situations when matching bigger pattern (bitfield insert).
+
+ // For unsigned extracts, check for a shift right and mask
+ uint64_t And_imm = 0;
+ if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
+ return false;
+
+ const SDNode *Op0 = N->getOperand(0).getNode();
+
+ // Because of simplify-demanded-bits in DAGCombine, the mask may have been
+ // simplified. Try to undo that
+ And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
+
+ // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+ if (And_imm & (And_imm + 1))
+ return false;
+
+ bool ClampMSB = false;
+ uint64_t Srl_imm = 0;
+ // Handle the SRL + ANY_EXTEND case.
+ if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
+ isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
+ // Extend the incoming operand of the SRL to 64-bit.
+ Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
+ // Make sure to clamp the MSB so that we preserve the semantics of the
+ // original operations.
+ ClampMSB = true;
+ } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
+ isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
+ Srl_imm)) {
+ // If the shift result was truncated, we can still combine them.
+ Opd0 = Op0->getOperand(0).getOperand(0);
+
+ // Use the type of SRL node.
+ VT = Opd0->getValueType(0);
+ } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
+ Opd0 = Op0->getOperand(0);
+ } else if (BiggerPattern) {
+ // Let's pretend a 0 shift right has been performed.
+ // The resulting code will be at least as good as the original one
+ // plus it may expose more opportunities for bitfield insert pattern.
+ // FIXME: Currently we limit this to the bigger pattern, because
+ // some optimizations expect AND and not UBFM
+ Opd0 = N->getOperand(0);
+ } else
+ return false;
+
+ assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
+ "bad amount in shift node!");
+
+ LSB = Srl_imm;
+ MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
+ : CountTrailingOnes_64(And_imm)) -
+ 1;
+ if (ClampMSB)
+ // Since we're moving the extend before the right shift operation, we need
+ // to clamp the MSB to make sure we don't shift in undefined bits instead of
+ // the zeros which would get shifted in with the original right shift
+ // operation.
+ MSB = MSB > 31 ? 31 : MSB;
+
+ Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
+ return true;
+}
+
+static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
+ unsigned &LSB, unsigned &MSB) {
+ // We are looking for the following pattern which basically extracts a single
+ // bit from the source value and places it in the LSB of the destination
+ // value, all other bits of the destination value or set to zero:
+ //
+ // Value2 = AND Value, MaskImm
+ // SRL Value2, ShiftImm
+ //
+ // with MaskImm >> ShiftImm == 1.
+ //
+ // This gets selected into a single UBFM:
+ //
+ // UBFM Value, ShiftImm, ShiftImm
+ //
+
+ if (N->getOpcode() != ISD::SRL)
+ return false;
+
+ uint64_t And_mask = 0;
+ if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
+ return false;
+
+ Opd0 = N->getOperand(0).getOperand(0);
+
+ uint64_t Srl_imm = 0;
+ if (!isIntImmediate(N->getOperand(1), Srl_imm))
+ return false;
+
+ // Check whether we really have a one bit extract here.
+ if (And_mask >> Srl_imm == 0x1) {
+ if (N->getValueType(0) == MVT::i32)
+ Opc = AArch64::UBFMWri;
+ else
+ Opc = AArch64::UBFMXri;
+
+ LSB = MSB = Srl_imm;
+
+ return true;
}
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
- if (isUpdating)
- ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
- return NULL;
+
+ return false;
}
-unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit,
- unsigned NumOfVec) {
- assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range");
+static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
+ unsigned &LSB, unsigned &MSB,
+ bool BiggerPattern) {
+ assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
+ "N must be a SHR/SRA operation to call this function");
- unsigned Opc = 0;
- switch (NumOfVec) {
+ EVT VT = N->getValueType(0);
+
+ // Here we can test the type of VT and return false when the type does not
+ // match, but since it is done prior to that call in the current context
+ // we turned that into an assert to avoid redundant code.
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "Type checking must have been done before calling this function");
+
+ // Check for AND + SRL doing a one bit extract.
+ if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
+ return true;
+
+ // we're looking for a shift of a shift
+ uint64_t Shl_imm = 0;
+ uint64_t Trunc_bits = 0;
+ if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+ Opd0 = N->getOperand(0).getOperand(0);
+ } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
+ N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
+ // We are looking for a shift of truncate. Truncate from i64 to i32 could
+ // be considered as setting high 32 bits as zero. Our strategy here is to
+ // always generate 64bit UBFM. This consistency will help the CSE pass
+ // later find more redundancy.
+ Opd0 = N->getOperand(0).getOperand(0);
+ Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
+ VT = Opd0->getValueType(0);
+ assert(VT == MVT::i64 && "the promoted type should be i64");
+ } else if (BiggerPattern) {
+ // Let's pretend a 0 shift left has been performed.
+ // FIXME: Currently we limit this to the bigger pattern case,
+ // because some optimizations expect AND and not UBFM
+ Opd0 = N->getOperand(0);
+ } else
+ return false;
+
+ assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
+ uint64_t Srl_imm = 0;
+ if (!isIntImmediate(N->getOperand(1), Srl_imm))
+ return false;
+
+ assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
+ "bad amount in shift node!");
+ // Note: The width operand is encoded as width-1.
+ unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
+ int sLSB = Srl_imm - Shl_imm;
+ if (sLSB < 0)
+ return false;
+ LSB = sLSB;
+ MSB = LSB + Width;
+ // SRA requires a signed extraction
+ if (VT == MVT::i32)
+ Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
+ else
+ Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
+ return true;
+}
+
+static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
+ SDValue &Opd0, unsigned &LSB, unsigned &MSB,
+ unsigned NumberOfIgnoredLowBits = 0,
+ bool BiggerPattern = false) {
+ if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
+ return false;
+
+ switch (N->getOpcode()) {
default:
+ if (!N->isMachineOpcode())
+ return false;
break;
- case 1:
- if (IsExt)
- Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b;
+ case ISD::AND:
+ return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
+ NumberOfIgnoredLowBits, BiggerPattern);
+ case ISD::SRL:
+ case ISD::SRA:
+ return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
+ }
+
+ unsigned NOpc = N->getMachineOpcode();
+ switch (NOpc) {
+ default:
+ return false;
+ case AArch64::SBFMWri:
+ case AArch64::UBFMWri:
+ case AArch64::SBFMXri:
+ case AArch64::UBFMXri:
+ Opc = NOpc;
+ Opd0 = N->getOperand(0);
+ LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+ MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+ return true;
+ }
+ // Unreachable
+ return false;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
+ unsigned Opc, LSB, MSB;
+ SDValue Opd0;
+ if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
+ return nullptr;
+
+ EVT VT = N->getValueType(0);
+
+ // If the bit extract operation is 64bit but the original type is 32bit, we
+ // need to add one EXTRACT_SUBREG.
+ if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
+ SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
+ CurDAG->getTargetConstant(MSB, MVT::i64)};
+
+ SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
+ SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
+ MachineSDNode *Node =
+ CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
+ SDValue(BFM, 0), SubReg);
+ return Node;
+ }
+
+ SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
+ CurDAG->getTargetConstant(MSB, VT)};
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+}
+
+/// Does DstMask form a complementary pair with the mask provided by
+/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
+/// this asks whether DstMask zeroes precisely those bits that will be set by
+/// the other half.
+static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
+ unsigned NumberOfIgnoredHighBits, EVT VT) {
+ assert((VT == MVT::i32 || VT == MVT::i64) &&
+ "i32 or i64 mask type expected!");
+ unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
+
+ APInt SignificantDstMask = APInt(BitWidth, DstMask);
+ APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
+
+ return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
+ (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
+}
+
+// Look for bits that will be useful for later uses.
+// A bit is consider useless as soon as it is dropped and never used
+// before it as been dropped.
+// E.g., looking for useful bit of x
+// 1. y = x & 0x7
+// 2. z = y >> 2
+// After #1, x useful bits are 0x7, then the useful bits of x, live through
+// y.
+// After #2, the useful bits of x are 0x4.
+// However, if x is used on an unpredicatable instruction, then all its bits
+// are useful.
+// E.g.
+// 1. y = x & 0x7
+// 2. z = y >> 2
+// 3. str x, [@x]
+static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
+
+static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
+ unsigned Depth) {
+ uint64_t Imm =
+ cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
+ Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
+ UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
+ getUsefulBits(Op, UsefulBits, Depth + 1);
+}
+
+static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
+ uint64_t Imm, uint64_t MSB,
+ unsigned Depth) {
+ // inherit the bitwidth value
+ APInt OpUsefulBits(UsefulBits);
+ OpUsefulBits = 1;
+
+ if (MSB >= Imm) {
+ OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
+ --OpUsefulBits;
+ // The interesting part will be in the lower part of the result
+ getUsefulBits(Op, OpUsefulBits, Depth + 1);
+ // The interesting part was starting at Imm in the argument
+ OpUsefulBits = OpUsefulBits.shl(Imm);
+ } else {
+ OpUsefulBits = OpUsefulBits.shl(MSB + 1);
+ --OpUsefulBits;
+ // The interesting part will be shifted in the result
+ OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
+ getUsefulBits(Op, OpUsefulBits, Depth + 1);
+ // The interesting part was at zero in the argument
+ OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
+ }
+
+ UsefulBits &= OpUsefulBits;
+}
+
+static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
+ unsigned Depth) {
+ uint64_t Imm =
+ cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
+ uint64_t MSB =
+ cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+
+ getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
+}
+
+static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
+ unsigned Depth) {
+ uint64_t ShiftTypeAndValue =
+ cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+ APInt Mask(UsefulBits);
+ Mask.clearAllBits();
+ Mask.flipAllBits();
+
+ if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
+ // Shift Left
+ uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
+ Mask = Mask.shl(ShiftAmt);
+ getUsefulBits(Op, Mask, Depth + 1);
+ Mask = Mask.lshr(ShiftAmt);
+ } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
+ // Shift Right
+ // We do not handle AArch64_AM::ASR, because the sign will change the
+ // number of useful bits
+ uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
+ Mask = Mask.lshr(ShiftAmt);
+ getUsefulBits(Op, Mask, Depth + 1);
+ Mask = Mask.shl(ShiftAmt);
+ } else
+ return;
+
+ UsefulBits &= Mask;
+}
+
+static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
+ unsigned Depth) {
+ uint64_t Imm =
+ cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
+ uint64_t MSB =
+ cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
+
+ if (Op.getOperand(1) == Orig)
+ return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
+
+ APInt OpUsefulBits(UsefulBits);
+ OpUsefulBits = 1;
+
+ if (MSB >= Imm) {
+ OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
+ --OpUsefulBits;
+ UsefulBits &= ~OpUsefulBits;
+ getUsefulBits(Op, UsefulBits, Depth + 1);
+ } else {
+ OpUsefulBits = OpUsefulBits.shl(MSB + 1);
+ --OpUsefulBits;
+ UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
+ getUsefulBits(Op, UsefulBits, Depth + 1);
+ }
+}
+
+static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
+ SDValue Orig, unsigned Depth) {
+
+ // Users of this node should have already been instruction selected
+ // FIXME: Can we turn that into an assert?
+ if (!UserNode->isMachineOpcode())
+ return;
+
+ switch (UserNode->getMachineOpcode()) {
+ default:
+ return;
+ case AArch64::ANDSWri:
+ case AArch64::ANDSXri:
+ case AArch64::ANDWri:
+ case AArch64::ANDXri:
+ // We increment Depth only when we call the getUsefulBits
+ return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
+ Depth);
+ case AArch64::UBFMWri:
+ case AArch64::UBFMXri:
+ return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
+
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ if (UserNode->getOperand(1) != Orig)
+ return;
+ return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
+ Depth);
+ case AArch64::BFMWri:
+ case AArch64::BFMXri:
+ return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
+ }
+}
+
+static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
+ if (Depth >= 6)
+ return;
+ // Initialize UsefulBits
+ if (!Depth) {
+ unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
+ // At the beginning, assume every produced bits is useful
+ UsefulBits = APInt(Bitwidth, 0);
+ UsefulBits.flipAllBits();
+ }
+ APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
+
+ for (SDNode *Node : Op.getNode()->uses()) {
+ // A use cannot produce useful bits
+ APInt UsefulBitsForUse = APInt(UsefulBits);
+ getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
+ UsersUsefulBits |= UsefulBitsForUse;
+ }
+ // UsefulBits contains the produced bits that are meaningful for the
+ // current definition, thus a user cannot make a bit meaningful at
+ // this point
+ UsefulBits &= UsersUsefulBits;
+}
+
+/// Create a machine node performing a notional SHL of Op by ShlAmount. If
+/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
+/// 0, return Op unchanged.
+static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
+ if (ShlAmount == 0)
+ return Op;
+
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
+
+ SDNode *ShiftNode;
+ if (ShlAmount > 0) {
+ // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
+ ShiftNode = CurDAG->getMachineNode(
+ UBFMOpc, SDLoc(Op), VT, Op,
+ CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
+ CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
+ } else {
+ // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
+ assert(ShlAmount < 0 && "expected right shift");
+ int ShrAmount = -ShlAmount;
+ ShiftNode = CurDAG->getMachineNode(
+ UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
+ CurDAG->getTargetConstant(BitWidth - 1, VT));
+ }
+
+ return SDValue(ShiftNode, 0);
+}
+
+/// Does this tree qualify as an attempt to move a bitfield into position,
+/// essentially "(and (shl VAL, N), Mask)".
+static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
+ SDValue &Src, int &ShiftAmount,
+ int &MaskWidth) {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ (void)BitWidth;
+ assert(BitWidth == 32 || BitWidth == 64);
+
+ APInt KnownZero, KnownOne;
+ CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
+
+ // Non-zero in the sense that they're not provably zero, which is the key
+ // point if we want to use this value
+ uint64_t NonZeroBits = (~KnownZero).getZExtValue();
+
+ // Discard a constant AND mask if present. It's safe because the node will
+ // already have been factored into the computeKnownBits calculation above.
+ uint64_t AndImm;
+ if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
+ assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
+ Op = Op.getOperand(0);
+ }
+
+ uint64_t ShlImm;
+ if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
+ return false;
+ Op = Op.getOperand(0);
+
+ if (!isShiftedMask_64(NonZeroBits))
+ return false;
+
+ ShiftAmount = countTrailingZeros(NonZeroBits);
+ MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
+
+ // BFI encompasses sufficiently many nodes that it's worth inserting an extra
+ // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
+ // amount.
+ Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
+
+ return true;
+}
+
+// Given a OR operation, check if we have the following pattern
+// ubfm c, b, imm, imm2 (or something that does the same jobs, see
+// isBitfieldExtractOp)
+// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
+// countTrailingZeros(mask2) == imm2 - imm + 1
+// f = d | c
+// if yes, given reference arguments will be update so that one can replace
+// the OR instruction with:
+// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
+static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
+ SDValue &Src, unsigned &ImmR,
+ unsigned &ImmS, SelectionDAG *CurDAG) {
+ assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
+
+ // Set Opc
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::i32)
+ Opc = AArch64::BFMWri;
+ else if (VT == MVT::i64)
+ Opc = AArch64::BFMXri;
+ else
+ return false;
+
+ // Because of simplify-demanded-bits in DAGCombine, involved masks may not
+ // have the expected shape. Try to undo that.
+ APInt UsefulBits;
+ getUsefulBits(SDValue(N, 0), UsefulBits);
+
+ unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
+ unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
+
+ // OR is commutative, check both possibilities (does llvm provide a
+ // way to do that directely, e.g., via code matcher?)
+ SDValue OrOpd1Val = N->getOperand(1);
+ SDNode *OrOpd0 = N->getOperand(0).getNode();
+ SDNode *OrOpd1 = N->getOperand(1).getNode();
+ for (int i = 0; i < 2;
+ ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
+ unsigned BFXOpc;
+ int DstLSB, Width;
+ if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
+ NumberOfIgnoredLowBits, true)) {
+ // Check that the returned opcode is compatible with the pattern,
+ // i.e., same type and zero extended (U and not S)
+ if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
+ (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
+ continue;
+
+ // Compute the width of the bitfield insertion
+ DstLSB = 0;
+ Width = ImmS - ImmR + 1;
+ // FIXME: This constraint is to catch bitfield insertion we may
+ // want to widen the pattern if we want to grab general bitfied
+ // move case
+ if (Width <= 0)
+ continue;
+
+ // If the mask on the insertee is correct, we have a BFXIL operation. We
+ // can share the ImmR and ImmS values from the already-computed UBFM.
+ } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
+ DstLSB, Width)) {
+ ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
+ ImmS = Width - 1;
+ } else
+ continue;
+
+ // Check the second part of the pattern
+ EVT VT = OrOpd1->getValueType(0);
+ assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
+
+ // Compute the Known Zero for the candidate of the first operand.
+ // This allows to catch more general case than just looking for
+ // AND with imm. Indeed, simplify-demanded-bits may have removed
+ // the AND instruction because it proves it was useless.
+ APInt KnownZero, KnownOne;
+ CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
+
+ // Check if there is enough room for the second operand to appear
+ // in the first one
+ APInt BitsToBeInserted =
+ APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
+
+ if ((BitsToBeInserted & ~KnownZero) != 0)
+ continue;
+
+ // Set the first operand
+ uint64_t Imm;
+ if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
+ isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
+ // In that case, we can eliminate the AND
+ Dst = OrOpd1->getOperand(0);
else
- Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b;
+ // Maybe the AND has been removed by simplify-demanded-bits
+ // or is useful because it discards more bits
+ Dst = OrOpd1Val;
+
+ // both parts match
+ return true;
+ }
+
+ return false;
+}
+
+SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
+ if (N->getOpcode() != ISD::OR)
+ return nullptr;
+
+ unsigned Opc;
+ unsigned LSB, MSB;
+ SDValue Opd0, Opd1;
+
+ if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
+ return nullptr;
+
+ EVT VT = N->getValueType(0);
+ SDValue Ops[] = { Opd0,
+ Opd1,
+ CurDAG->getTargetConstant(LSB, VT),
+ CurDAG->getTargetConstant(MSB, VT) };
+ return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
+}
+
+SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ unsigned Variant;
+ unsigned Opc;
+ unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
+
+ if (VT == MVT::f32) {
+ Variant = 0;
+ } else if (VT == MVT::f64) {
+ Variant = 1;
+ } else
+ return nullptr; // Unrecognized argument type. Fall back on default codegen.
+
+ // Pick the FRINTX variant needed to set the flags.
+ unsigned FRINTXOpc = FRINTXOpcs[Variant];
+
+ switch (N->getOpcode()) {
+ default:
+ return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
+ case ISD::FCEIL: {
+ unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
+ Opc = FRINTPOpcs[Variant];
break;
- case 2:
- if (IsExt)
- Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b;
- else
- Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b;
+ }
+ case ISD::FFLOOR: {
+ unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
+ Opc = FRINTMOpcs[Variant];
break;
- case 3:
- if (IsExt)
- Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b;
- else
- Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b;
+ }
+ case ISD::FTRUNC: {
+ unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
+ Opc = FRINTZOpcs[Variant];
break;
- case 4:
- if (IsExt)
- Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b;
- else
- Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b;
+ }
+ case ISD::FROUND: {
+ unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
+ Opc = FRINTAOpcs[Variant];
break;
}
+ }
+
+ SDLoc dl(N);
+ SDValue In = N->getOperand(0);
+ SmallVector<SDValue, 2> Ops;
+ Ops.push_back(In);
+
+ if (!TM.Options.UnsafeFPMath) {
+ SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
+ Ops.push_back(SDValue(FRINTX, 1));
+ }
- return Opc;
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops);
}
-SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs,
- bool IsExt) {
- assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
- SDLoc dl(N);
+bool
+AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
+ unsigned RegWidth) {
+ APFloat FVal(0.0);
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ FVal = CN->getValueAPF();
+ else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
+ // Some otherwise illegal constants are allowed in this case.
+ if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
+ !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
+ return false;
- // Check the element of look up table is 64-bit or not
- unsigned Vec0Idx = IsExt ? 2 : 1;
- assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() &&
- "The element of lookup table for vtbl and vtbx must be 128-bit");
+ ConstantPoolSDNode *CN =
+ dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
+ FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
+ } else
+ return false;
- // Check the return value type is 64-bit or not
- EVT ResVT = N->getValueType(0);
- bool is64BitRes = ResVT.is64BitVector();
+ // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
+ // is between 1 and 32 for a destination w-register, or 1 and 64 for an
+ // x-register.
+ //
+ // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
+ // want THIS_NODE to be 2^fbits. This is much easier to deal with using
+ // integers.
+ bool IsExact;
- // Create new SDValue for vector list
- SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Idx,
- N->op_begin() + Vec0Idx + NumVecs);
- SDValue TblReg = createQTuple(Regs);
- unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs);
+ // fbits is between 1 and 64 in the worst-case, which means the fmul
+ // could have 2^64 as an actual operand. Need 65 bits of precision.
+ APSInt IntVal(65, true);
+ FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
- SmallVector<SDValue, 3> Ops;
- if (IsExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(TblReg);
- Ops.push_back(N->getOperand(Vec0Idx + NumVecs));
- return CurDAG->getMachineNode(Opc, dl, ResVT, Ops);
+ // N.b. isPowerOf2 also checks for > 0.
+ if (!IsExact || !IntVal.isPowerOf2()) return false;
+ unsigned FBits = IntVal.logBase2();
+
+ // Checks above should have guaranteed that we haven't lost information in
+ // finding FBits, but it must still be in range.
+ if (FBits == 0 || FBits > RegWidth) return false;
+
+ FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
+ return true;
}
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected
- DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
+ DEBUG(errs() << "Selecting: ");
+ DEBUG(Node->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ // If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << "\n");
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
Node->setNodeId(-1);
- return NULL;
- }
-
- switch (Node->getOpcode()) {
- case ISD::ATOMIC_LOAD_ADD:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_ADD_I8,
- AArch64::ATOMIC_LOAD_ADD_I16,
- AArch64::ATOMIC_LOAD_ADD_I32,
- AArch64::ATOMIC_LOAD_ADD_I64);
- case ISD::ATOMIC_LOAD_SUB:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_SUB_I8,
- AArch64::ATOMIC_LOAD_SUB_I16,
- AArch64::ATOMIC_LOAD_SUB_I32,
- AArch64::ATOMIC_LOAD_SUB_I64);
- case ISD::ATOMIC_LOAD_AND:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_AND_I8,
- AArch64::ATOMIC_LOAD_AND_I16,
- AArch64::ATOMIC_LOAD_AND_I32,
- AArch64::ATOMIC_LOAD_AND_I64);
- case ISD::ATOMIC_LOAD_OR:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_OR_I8,
- AArch64::ATOMIC_LOAD_OR_I16,
- AArch64::ATOMIC_LOAD_OR_I32,
- AArch64::ATOMIC_LOAD_OR_I64);
- case ISD::ATOMIC_LOAD_XOR:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_XOR_I8,
- AArch64::ATOMIC_LOAD_XOR_I16,
- AArch64::ATOMIC_LOAD_XOR_I32,
- AArch64::ATOMIC_LOAD_XOR_I64);
- case ISD::ATOMIC_LOAD_NAND:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_NAND_I8,
- AArch64::ATOMIC_LOAD_NAND_I16,
- AArch64::ATOMIC_LOAD_NAND_I32,
- AArch64::ATOMIC_LOAD_NAND_I64);
- case ISD::ATOMIC_LOAD_MIN:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_MIN_I8,
- AArch64::ATOMIC_LOAD_MIN_I16,
- AArch64::ATOMIC_LOAD_MIN_I32,
- AArch64::ATOMIC_LOAD_MIN_I64);
- case ISD::ATOMIC_LOAD_MAX:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_MAX_I8,
- AArch64::ATOMIC_LOAD_MAX_I16,
- AArch64::ATOMIC_LOAD_MAX_I32,
- AArch64::ATOMIC_LOAD_MAX_I64);
- case ISD::ATOMIC_LOAD_UMIN:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_UMIN_I8,
- AArch64::ATOMIC_LOAD_UMIN_I16,
- AArch64::ATOMIC_LOAD_UMIN_I32,
- AArch64::ATOMIC_LOAD_UMIN_I64);
- case ISD::ATOMIC_LOAD_UMAX:
- return SelectAtomic(Node,
- AArch64::ATOMIC_LOAD_UMAX_I8,
- AArch64::ATOMIC_LOAD_UMAX_I16,
- AArch64::ATOMIC_LOAD_UMAX_I32,
- AArch64::ATOMIC_LOAD_UMAX_I64);
- case ISD::ATOMIC_SWAP:
- return SelectAtomic(Node,
- AArch64::ATOMIC_SWAP_I8,
- AArch64::ATOMIC_SWAP_I16,
- AArch64::ATOMIC_SWAP_I32,
- AArch64::ATOMIC_SWAP_I64);
- case ISD::ATOMIC_CMP_SWAP:
- return SelectAtomic(Node,
- AArch64::ATOMIC_CMP_SWAP_I8,
- AArch64::ATOMIC_CMP_SWAP_I16,
- AArch64::ATOMIC_CMP_SWAP_I32,
- AArch64::ATOMIC_CMP_SWAP_I64);
- case ISD::FrameIndex: {
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- EVT PtrTy = getTargetLowering()->getPointerTy();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy);
- return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy,
- TFI, CurDAG->getTargetConstant(0, PtrTy));
+ return nullptr;
}
- case ISD::Constant: {
- SDNode *ResNode = 0;
- if (cast<ConstantSDNode>(Node)->getZExtValue() == 0) {
- // XZR and WZR are probably even better than an actual move: most of the
- // time they can be folded into another instruction with *no* cost.
-
- EVT Ty = Node->getValueType(0);
- assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type");
- uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR;
- ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
- SDLoc(Node),
- Register, Ty).getNode();
- }
- // Next best option is a move-immediate, see if we can do that.
- if (!ResNode) {
- ResNode = TrySelectToMoveImm(Node);
- }
+ // Few custom selection stuff.
+ SDNode *ResNode = nullptr;
+ EVT VT = Node->getValueType(0);
- if (ResNode)
- return ResNode;
+ switch (Node->getOpcode()) {
+ default:
+ break;
- // If even that fails we fall back to a lit-pool entry at the moment. Future
- // tuning may change this to a sequence of MOVZ/MOVN/MOVK instructions.
- ResNode = SelectToLitPool(Node);
- assert(ResNode && "We need *some* way to materialise a constant");
+ case ISD::ADD:
+ if (SDNode *I = SelectMLAV64LaneV128(Node))
+ return I;
+ break;
- // We want to continue selection at this point since the litpool access
- // generated used generic nodes for simplicity.
- ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
- Node = ResNode;
+ case ISD::LOAD: {
+ // Try to select as an indexed load. Fall through to normal processing
+ // if we can't.
+ bool Done = false;
+ SDNode *I = SelectIndexedLoad(Node, Done);
+ if (Done)
+ return I;
break;
}
- case ISD::ConstantFP: {
- if (A64Imms::isFPImm(cast<ConstantFPSDNode>(Node)->getValueAPF())) {
- // FMOV will take care of it from TableGen
- break;
- }
- SDNode *ResNode = LowerToFPLitPool(Node);
- ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+ case ISD::SRL:
+ case ISD::AND:
+ case ISD::SRA:
+ if (SDNode *I = SelectBitfieldExtractOp(Node))
+ return I;
+ break;
- // We want to continue selection at this point since the litpool access
- // generated used generic nodes for simplicity.
- Node = ResNode;
+ case ISD::OR:
+ if (SDNode *I = SelectBitfieldInsertOp(Node))
+ return I;
break;
+
+ case ISD::EXTRACT_VECTOR_ELT: {
+ // Extracting lane zero is a special case where we can just use a plain
+ // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
+ // the rest of the compiler, especially the register allocator and copyi
+ // propagation, to reason about, so is preferred when it's possible to
+ // use it.
+ ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
+ // Bail and use the default Select() for non-zero lanes.
+ if (LaneNode->getZExtValue() != 0)
+ break;
+ // If the element type is not the same as the result type, likewise
+ // bail and use the default Select(), as there's more to do than just
+ // a cross-class COPY. This catches extracts of i8 and i16 elements
+ // since they will need an explicit zext.
+ if (VT != Node->getOperand(0).getValueType().getVectorElementType())
+ break;
+ unsigned SubReg;
+ switch (Node->getOperand(0)
+ .getValueType()
+ .getVectorElementType()
+ .getSizeInBits()) {
+ default:
+ assert(0 && "Unexpected vector element type!");
+ case 64:
+ SubReg = AArch64::dsub;
+ break;
+ case 32:
+ SubReg = AArch64::ssub;
+ break;
+ case 16: // FALLTHROUGH
+ case 8:
+ llvm_unreachable("unexpected zext-requiring extract element!");
+ }
+ SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
+ Node->getOperand(0));
+ DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
+ DEBUG(Extract->dumpr(CurDAG));
+ DEBUG(dbgs() << "\n");
+ return Extract.getNode();
}
- case AArch64ISD::NEON_LD1_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed,
- AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed,
- AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed,
- AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed
- };
- return SelectVLD(Node, true, 1, Opcodes);
- }
- case AArch64ISD::NEON_LD2_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed,
- AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
- AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed,
- AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed
- };
- return SelectVLD(Node, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_LD3_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed,
- AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
- AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed,
- AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed
- };
- return SelectVLD(Node, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_LD4_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed,
- AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
- AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed,
- AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed
- };
- return SelectVLD(Node, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_LD1x2_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed,
- AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed,
- AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed,
- AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed
- };
- return SelectVLD(Node, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_LD1x3_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed,
- AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed,
- AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed,
- AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed
- };
- return SelectVLD(Node, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_LD1x4_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed,
- AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed,
- AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed,
- AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed
- };
- return SelectVLD(Node, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_ST1_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed,
- AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed,
- AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed,
- AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed
- };
- return SelectVST(Node, true, 1, Opcodes);
- }
- case AArch64ISD::NEON_ST2_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed,
- AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
- AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed,
- AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed
- };
- return SelectVST(Node, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_ST3_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed,
- AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
- AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed,
- AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed
- };
- return SelectVST(Node, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_ST4_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed,
- AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
- AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed,
- AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed
- };
- return SelectVST(Node, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_LD2DUP: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S,
- AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H,
- AArch64::LD2R_4S, AArch64::LD2R_2D
- };
- return SelectVLDDup(Node, false, 2, Opcodes);
- }
- case AArch64ISD::NEON_LD3DUP: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S,
- AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H,
- AArch64::LD3R_4S, AArch64::LD3R_2D
- };
- return SelectVLDDup(Node, false, 3, Opcodes);
- }
- case AArch64ISD::NEON_LD4DUP: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S,
- AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H,
- AArch64::LD4R_4S, AArch64::LD4R_2D
- };
- return SelectVLDDup(Node, false, 4, Opcodes);
- }
- case AArch64ISD::NEON_LD2DUP_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed,
- AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed,
- AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed,
- AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed
- };
- return SelectVLDDup(Node, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_LD3DUP_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed,
- AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed,
- AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed,
- AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed
- };
- return SelectVLDDup(Node, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_LD4DUP_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed,
- AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed,
- AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed,
- AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed
- };
- return SelectVLDDup(Node, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_LD2LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed,
- AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, true, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_LD3LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed,
- AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, true, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_LD4LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed,
- AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, true, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_ST2LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed,
- AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, false, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_ST3LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed,
- AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, false, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_ST4LN_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed,
- AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed
- };
- return SelectVLDSTLane(Node, false, true, 4, Opcodes);
- }
- case AArch64ISD::NEON_ST1x2_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed,
- AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed,
- AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed,
- AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed
- };
- return SelectVST(Node, true, 2, Opcodes);
- }
- case AArch64ISD::NEON_ST1x3_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed,
- AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed,
- AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed,
- AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed
- };
- return SelectVST(Node, true, 3, Opcodes);
- }
- case AArch64ISD::NEON_ST1x4_UPD: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed,
- AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed,
- AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed,
- AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed
- };
- return SelectVST(Node, true, 4, Opcodes);
- }
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
- bool IsExt = false;
- switch (IntNo) {
- default:
- break;
- case Intrinsic::aarch64_neon_vtbx1:
- IsExt = true;
- case Intrinsic::aarch64_neon_vtbl1:
- return SelectVTBL(Node, 1, IsExt);
- case Intrinsic::aarch64_neon_vtbx2:
- IsExt = true;
- case Intrinsic::aarch64_neon_vtbl2:
- return SelectVTBL(Node, 2, IsExt);
- case Intrinsic::aarch64_neon_vtbx3:
- IsExt = true;
- case Intrinsic::aarch64_neon_vtbl3:
- return SelectVTBL(Node, 3, IsExt);
- case Intrinsic::aarch64_neon_vtbx4:
- IsExt = true;
- case Intrinsic::aarch64_neon_vtbl4:
- return SelectVTBL(Node, 4, IsExt);
+ case ISD::Constant: {
+ // Materialize zero constants as copies from WZR/XZR. This allows
+ // the coalescer to propagate these into other instructions.
+ ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
+ if (ConstNode->isNullValue()) {
+ if (VT == MVT::i32)
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
+ AArch64::WZR, MVT::i32).getNode();
+ else if (VT == MVT::i64)
+ return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
+ AArch64::XZR, MVT::i64).getNode();
}
break;
}
- case ISD::INTRINSIC_VOID:
+
+ case ISD::FrameIndex: {
+ // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
+ int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+ unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
+ const TargetLowering *TLI = getTargetLowering();
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
+ SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+ CurDAG->getTargetConstant(Shifter, MVT::i32) };
+ return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
+ }
case ISD::INTRINSIC_W_CHAIN: {
unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
switch (IntNo) {
default:
break;
- case Intrinsic::arm_neon_vld1: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D,
- AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D
- };
- return SelectVLD(Node, false, 1, Opcodes);
- }
- case Intrinsic::arm_neon_vld2: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D,
- AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D
- };
- return SelectVLD(Node, false, 2, Opcodes);
- }
- case Intrinsic::arm_neon_vld3: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D,
- AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D
- };
- return SelectVLD(Node, false, 3, Opcodes);
+ case Intrinsic::aarch64_ldaxp:
+ case Intrinsic::aarch64_ldxp: {
+ unsigned Op =
+ IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
+ SDValue MemAddr = Node->getOperand(2);
+ SDLoc DL(Node);
+ SDValue Chain = Node->getOperand(0);
+
+ SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
+ MVT::Other, MemAddr, Chain);
+
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+ return Ld;
}
- case Intrinsic::arm_neon_vld4: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D,
- AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D
- };
- return SelectVLD(Node, false, 4, Opcodes);
+ case Intrinsic::aarch64_stlxp:
+ case Intrinsic::aarch64_stxp: {
+ unsigned Op =
+ IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
+ SDLoc DL(Node);
+ SDValue Chain = Node->getOperand(0);
+ SDValue ValLo = Node->getOperand(2);
+ SDValue ValHi = Node->getOperand(3);
+ SDValue MemAddr = Node->getOperand(4);
+
+ // Place arguments in the right order.
+ SmallVector<SDValue, 7> Ops;
+ Ops.push_back(ValLo);
+ Ops.push_back(ValHi);
+ Ops.push_back(MemAddr);
+ Ops.push_back(Chain);
+
+ SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
+ // Transfer memoperands.
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
+ cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
+ return St;
}
- case Intrinsic::aarch64_neon_vld1x2: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S,
- AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H,
- AArch64::LD1x2_4S, AArch64::LD1x2_2D
- };
- return SelectVLD(Node, false, 2, Opcodes);
- }
- case Intrinsic::aarch64_neon_vld1x3: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S,
- AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H,
- AArch64::LD1x3_4S, AArch64::LD1x3_2D
- };
- return SelectVLD(Node, false, 3, Opcodes);
- }
- case Intrinsic::aarch64_neon_vld1x4: {
- static const uint16_t Opcodes[] = {
- AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S,
- AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H,
- AArch64::LD1x4_4S, AArch64::LD1x4_2D
- };
- return SelectVLD(Node, false, 4, Opcodes);
- }
- case Intrinsic::arm_neon_vst1: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D,
- AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D
- };
- return SelectVST(Node, false, 1, Opcodes);
- }
- case Intrinsic::arm_neon_vst2: {
- static const uint16_t Opcodes[] = {
- AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D,
- AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D
- };
- return SelectVST(Node, false, 2, Opcodes);
+ case Intrinsic::aarch64_neon_ld1x2:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld1x3:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld1x4:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld2:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld3:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld4:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld2r:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld3r:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld4r:
+ if (VT == MVT::v8i8)
+ return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
+ break;
+ case Intrinsic::aarch64_neon_ld2lane:
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectLoadLane(Node, 2, AArch64::LD2i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectLoadLane(Node, 2, AArch64::LD2i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectLoadLane(Node, 2, AArch64::LD2i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectLoadLane(Node, 2, AArch64::LD2i64);
+ break;
+ case Intrinsic::aarch64_neon_ld3lane:
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectLoadLane(Node, 3, AArch64::LD3i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectLoadLane(Node, 3, AArch64::LD3i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectLoadLane(Node, 3, AArch64::LD3i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectLoadLane(Node, 3, AArch64::LD3i64);
+ break;
+ case Intrinsic::aarch64_neon_ld4lane:
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectLoadLane(Node, 4, AArch64::LD4i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectLoadLane(Node, 4, AArch64::LD4i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectLoadLane(Node, 4, AArch64::LD4i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectLoadLane(Node, 4, AArch64::LD4i64);
+ break;
}
- case Intrinsic::arm_neon_vst3: {
- static const uint16_t Opcodes[] = {
- AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D,
- AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D
- };
- return SelectVST(Node, false, 3, Opcodes);
+ } break;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_tbl2:
+ return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
+ : AArch64::TBLv16i8Two,
+ false);
+ case Intrinsic::aarch64_neon_tbl3:
+ return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
+ : AArch64::TBLv16i8Three,
+ false);
+ case Intrinsic::aarch64_neon_tbl4:
+ return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
+ : AArch64::TBLv16i8Four,
+ false);
+ case Intrinsic::aarch64_neon_tbx2:
+ return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
+ : AArch64::TBXv16i8Two,
+ true);
+ case Intrinsic::aarch64_neon_tbx3:
+ return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
+ : AArch64::TBXv16i8Three,
+ true);
+ case Intrinsic::aarch64_neon_tbx4:
+ return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
+ : AArch64::TBXv16i8Four,
+ true);
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull:
+ if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
+ return N;
+ break;
}
- case Intrinsic::arm_neon_vst4: {
- static const uint16_t Opcodes[] = {
- AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D,
- AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D
- };
- return SelectVST(Node, false, 4, Opcodes);
+ break;
+ }
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+ if (Node->getNumOperands() >= 3)
+ VT = Node->getOperand(2)->getValueType(0);
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_st1x2: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 2, AArch64::ST1Twov8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 2, AArch64::ST1Twov16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 2, AArch64::ST1Twov4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 2, AArch64::ST1Twov8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 2, AArch64::ST1Twov2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 2, AArch64::ST1Twov4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 2, AArch64::ST1Twov2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 2, AArch64::ST1Twov1d);
+ break;
}
- case Intrinsic::aarch64_neon_vst1x2: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S,
- AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H,
- AArch64::ST1x2_4S, AArch64::ST1x2_2D
- };
- return SelectVST(Node, false, 2, Opcodes);
+ case Intrinsic::aarch64_neon_st1x3: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 3, AArch64::ST1Threev8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 3, AArch64::ST1Threev16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 3, AArch64::ST1Threev4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 3, AArch64::ST1Threev8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 3, AArch64::ST1Threev2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 3, AArch64::ST1Threev4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 3, AArch64::ST1Threev2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 3, AArch64::ST1Threev1d);
+ break;
}
- case Intrinsic::aarch64_neon_vst1x3: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S,
- AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H,
- AArch64::ST1x3_4S, AArch64::ST1x3_2D
- };
- return SelectVST(Node, false, 3, Opcodes);
+ case Intrinsic::aarch64_neon_st1x4: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 4, AArch64::ST1Fourv8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 4, AArch64::ST1Fourv16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 4, AArch64::ST1Fourv4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 4, AArch64::ST1Fourv8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 4, AArch64::ST1Fourv2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 4, AArch64::ST1Fourv4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 4, AArch64::ST1Fourv2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 4, AArch64::ST1Fourv1d);
+ break;
}
- case Intrinsic::aarch64_neon_vst1x4: {
- static const uint16_t Opcodes[] = {
- AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S,
- AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H,
- AArch64::ST1x4_4S, AArch64::ST1x4_2D
- };
- return SelectVST(Node, false, 4, Opcodes);
+ case Intrinsic::aarch64_neon_st2: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 2, AArch64::ST2Twov8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 2, AArch64::ST2Twov16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 2, AArch64::ST2Twov4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 2, AArch64::ST2Twov8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 2, AArch64::ST2Twov2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 2, AArch64::ST2Twov4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 2, AArch64::ST2Twov2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 2, AArch64::ST1Twov1d);
+ break;
}
- case Intrinsic::arm_neon_vld2lane: {
- static const uint16_t Opcodes[] = {
- AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D
- };
- return SelectVLDSTLane(Node, true, false, 2, Opcodes);
+ case Intrinsic::aarch64_neon_st3: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 3, AArch64::ST3Threev8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 3, AArch64::ST3Threev16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 3, AArch64::ST3Threev4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 3, AArch64::ST3Threev8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 3, AArch64::ST3Threev2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 3, AArch64::ST3Threev4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 3, AArch64::ST3Threev2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 3, AArch64::ST1Threev1d);
+ break;
}
- case Intrinsic::arm_neon_vld3lane: {
- static const uint16_t Opcodes[] = {
- AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D
- };
- return SelectVLDSTLane(Node, true, false, 3, Opcodes);
+ case Intrinsic::aarch64_neon_st4: {
+ if (VT == MVT::v8i8)
+ return SelectStore(Node, 4, AArch64::ST4Fourv8b);
+ else if (VT == MVT::v16i8)
+ return SelectStore(Node, 4, AArch64::ST4Fourv16b);
+ else if (VT == MVT::v4i16)
+ return SelectStore(Node, 4, AArch64::ST4Fourv4h);
+ else if (VT == MVT::v8i16)
+ return SelectStore(Node, 4, AArch64::ST4Fourv8h);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectStore(Node, 4, AArch64::ST4Fourv2s);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectStore(Node, 4, AArch64::ST4Fourv4s);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectStore(Node, 4, AArch64::ST4Fourv2d);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectStore(Node, 4, AArch64::ST1Fourv1d);
+ break;
}
- case Intrinsic::arm_neon_vld4lane: {
- static const uint16_t Opcodes[] = {
- AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D
- };
- return SelectVLDSTLane(Node, true, false, 4, Opcodes);
+ case Intrinsic::aarch64_neon_st2lane: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectStoreLane(Node, 2, AArch64::ST2i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectStoreLane(Node, 2, AArch64::ST2i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectStoreLane(Node, 2, AArch64::ST2i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectStoreLane(Node, 2, AArch64::ST2i64);
+ break;
}
- case Intrinsic::arm_neon_vst2lane: {
- static const uint16_t Opcodes[] = {
- AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D
- };
- return SelectVLDSTLane(Node, false, false, 2, Opcodes);
+ case Intrinsic::aarch64_neon_st3lane: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectStoreLane(Node, 3, AArch64::ST3i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectStoreLane(Node, 3, AArch64::ST3i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectStoreLane(Node, 3, AArch64::ST3i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectStoreLane(Node, 3, AArch64::ST3i64);
+ break;
}
- case Intrinsic::arm_neon_vst3lane: {
- static const uint16_t Opcodes[] = {
- AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D
- };
- return SelectVLDSTLane(Node, false, false, 3, Opcodes);
+ case Intrinsic::aarch64_neon_st4lane: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectStoreLane(Node, 4, AArch64::ST4i8);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectStoreLane(Node, 4, AArch64::ST4i16);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectStoreLane(Node, 4, AArch64::ST4i32);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectStoreLane(Node, 4, AArch64::ST4i64);
+ break;
}
- case Intrinsic::arm_neon_vst4lane: {
- static const uint16_t Opcodes[] = {
- AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D
- };
- return SelectVLDSTLane(Node, false, false, 4, Opcodes);
}
- } // End of switch IntNo
+ }
+ case AArch64ISD::LD2post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD3post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD4post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD1x2post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD1x3post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD1x4post: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD1DUPpost: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD2DUPpost: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD3DUPpost: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD4DUPpost: {
+ if (VT == MVT::v8i8)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
+ else if (VT == MVT::v16i8)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
+ else if (VT == MVT::v4i16)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
+ else if (VT == MVT::v8i16)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
+ break;
+ }
+ case AArch64ISD::LD1LANEpost: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
+ break;
+ }
+ case AArch64ISD::LD2LANEpost: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
+ break;
+ }
+ case AArch64ISD::LD3LANEpost: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
+ break;
+ }
+ case AArch64ISD::LD4LANEpost: {
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
+ break;
+ }
+ case AArch64ISD::ST2post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
+ break;
+ }
+ case AArch64ISD::ST3post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
+ break;
+ }
+ case AArch64ISD::ST4post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
+ break;
+ }
+ case AArch64ISD::ST1x2post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
+ break;
+ }
+ case AArch64ISD::ST1x3post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
+ break;
+ }
+ case AArch64ISD::ST1x4post: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v8i8)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
+ else if (VT == MVT::v16i8)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
+ else if (VT == MVT::v4i16)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
+ else if (VT == MVT::v8i16)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
+ else if (VT == MVT::v2i32 || VT == MVT::v2f32)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v4f32)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
+ else if (VT == MVT::v1i64 || VT == MVT::v1f64)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v2f64)
+ return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
+ break;
+ }
+ case AArch64ISD::ST2LANEpost: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
+ break;
+ }
+ case AArch64ISD::ST3LANEpost: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
+ break;
+ }
+ case AArch64ISD::ST4LANEpost: {
+ VT = Node->getOperand(1).getValueType();
+ if (VT == MVT::v16i8 || VT == MVT::v8i8)
+ return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
+ else if (VT == MVT::v8i16 || VT == MVT::v4i16)
+ return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
+ else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
+ VT == MVT::v2f32)
+ return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
+ else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
+ VT == MVT::v1f64)
+ return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
break;
- } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN
- default:
- break; // Let generic code handle it
}
- SDNode *ResNode = SelectCode(Node);
+ case ISD::FCEIL:
+ case ISD::FFLOOR:
+ case ISD::FTRUNC:
+ case ISD::FROUND:
+ if (SDNode *I = SelectLIBM(Node))
+ return I;
+ break;
+ }
- DEBUG(dbgs() << "=> ";
- if (ResNode == NULL || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- dbgs() << "\n");
+ // Select the default instruction
+ ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == nullptr || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
return ResNode;
}
-/// This pass converts a legalized DAG into a AArch64-specific DAG, ready for
-/// instruction scheduling.
-FunctionPass *llvm::createAArch64ISelDAG(AArch64TargetMachine &TM,
+/// createAArch64ISelDag - This pass converts a legalized DAG into a
+/// AArch64-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new AArch64DAGToDAGISel(TM, OptLevel);
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp
index 388973a..80d6669 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
+//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,46 +7,87 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the interfaces that AArch64 uses to lower LLVM code into a
-// selection DAG.
+// This file implements the AArch64TargetLowering class.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "aarch64-isel"
-#include "AArch64.h"
#include "AArch64ISelLowering.h"
+#include "AArch64PerfectShuffle.h"
+#include "AArch64Subtarget.h"
#include "AArch64MachineFunctionInfo.h"
#include "AArch64TargetMachine.h"
#include "AArch64TargetObjectFile.h"
-#include "Utils/AArch64BaseInfo.h"
-#include "llvm/CodeGen/Analysis.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/Support/MathExtras.h"
-
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
- assert (TM.getSubtarget<AArch64Subtarget>().isTargetELF() &&
- "unknown subtarget type");
- return new AArch64ElfTargetObjectFile();
-}
+#define DEBUG_TYPE "aarch64-lower"
-AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumShiftInserts, "Number of vector shift inserts");
+
+enum AlignMode {
+ StrictAlign,
+ NoStrictAlign
+};
+
+static cl::opt<AlignMode>
+Align(cl::desc("Load/store alignment support"),
+ cl::Hidden, cl::init(NoStrictAlign),
+ cl::values(
+ clEnumValN(StrictAlign, "aarch64-strict-align",
+ "Disallow all unaligned memory accesses"),
+ clEnumValN(NoStrictAlign, "aarch64-no-strict-align",
+ "Allow unaligned memory accesses"),
+ clEnumValEnd));
+
+// Place holder until extr generation is tested fully.
+static cl::opt<bool>
+EnableAArch64ExtrGeneration("aarch64-extr-generation", cl::Hidden,
+ cl::desc("Allow AArch64 (or (shift)(shift))->extract"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
+ cl::desc("Allow AArch64 SLI/SRI formation"),
+ cl::init(false));
- const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+//===----------------------------------------------------------------------===//
+// AArch64 Lowering public interface.
+//===----------------------------------------------------------------------===//
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+ if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin())
+ return new AArch64_MachoTargetObjectFile();
+
+ return new AArch64_ELFTargetObjectFile();
+}
- // SIMD compares set the entire lane's bits to 1
+AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
+ : TargetLowering(TM, createTLOF(TM)) {
+ Subtarget = &TM.getSubtarget<AArch64Subtarget>();
+
+ // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
+ // we have to make something up. Arbitrarily, choose ZeroOrOne.
+ setBooleanContents(ZeroOrOneBooleanContent);
+ // When comparing vectors the result sets the different elements in the
+ // vector to all-one or all-zero.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- // Scalar register <-> type mapping
- addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
- addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
+ // Set up the register classes.
+ addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
+ addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
if (Subtarget->hasFPARMv8()) {
addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
@@ -56,201 +97,86 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
}
if (Subtarget->hasNEON()) {
- // And the vectors
- addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass);
- addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass);
- addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass);
- addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
- addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
- addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
- addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
- addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
- addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
- addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
+ addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
+ addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
+ // Someone set us up the NEON.
+ addDRTypeForNEON(MVT::v2f32);
+ addDRTypeForNEON(MVT::v8i8);
+ addDRTypeForNEON(MVT::v4i16);
+ addDRTypeForNEON(MVT::v2i32);
+ addDRTypeForNEON(MVT::v1i64);
+ addDRTypeForNEON(MVT::v1f64);
+
+ addQRTypeForNEON(MVT::v4f32);
+ addQRTypeForNEON(MVT::v2f64);
+ addQRTypeForNEON(MVT::v16i8);
+ addQRTypeForNEON(MVT::v8i16);
+ addQRTypeForNEON(MVT::v4i32);
+ addQRTypeForNEON(MVT::v2i64);
}
+ // Compute derived properties from the register classes
computeRegisterProperties();
- // We combine OR nodes for bitfield and NEON BSL operations.
- setTargetDAGCombine(ISD::OR);
-
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SHL);
-
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- setTargetDAGCombine(ISD::INTRINSIC_VOID);
- setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
-
- // AArch64 does not have i1 loads, or much of anything for i1 really.
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
-
- setStackPointerRegisterToSaveRestore(AArch64::XSP);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
-
- // We'll lower globals to wrappers for selection.
+ // Provide all sorts of operation actions
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
-
- // A64 instructions have the comparison predicate attached to the user of the
- // result, but having a separate comparison is valuable for matching.
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
+ setOperationAction(ISD::SETCC, MVT::f32, Custom);
+ setOperationAction(ISD::SETCC, MVT::f64, Custom);
+ setOperationAction(ISD::BRCOND, MVT::Other, Expand);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Custom);
-
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
-
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-
- setOperationAction(ISD::BRCOND, MVT::Other, Custom);
-
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::i64, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
-
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VACOPY, MVT::Other, Custom);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
-
- setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
-
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
-
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
-
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
-
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
- // Legal floating-point operations.
- setOperationAction(ISD::FABS, MVT::f32, Legal);
- setOperationAction(ISD::FABS, MVT::f64, Legal);
-
- setOperationAction(ISD::FCEIL, MVT::f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::f64, Legal);
-
- setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
-
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
-
- setOperationAction(ISD::FNEG, MVT::f32, Legal);
- setOperationAction(ISD::FNEG, MVT::f64, Legal);
-
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
-
- setOperationAction(ISD::FSQRT, MVT::f32, Legal);
- setOperationAction(ISD::FSQRT, MVT::f64, Legal);
-
- setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
-
- setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
- setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
-
- // Illegal floating-point operations.
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
-
- setOperationAction(ISD::FCOS, MVT::f32, Expand);
- setOperationAction(ISD::FCOS, MVT::f64, Expand);
-
- setOperationAction(ISD::FEXP, MVT::f32, Expand);
- setOperationAction(ISD::FEXP, MVT::f64, Expand);
-
- setOperationAction(ISD::FEXP2, MVT::f32, Expand);
- setOperationAction(ISD::FEXP2, MVT::f64, Expand);
-
- setOperationAction(ISD::FLOG, MVT::f32, Expand);
- setOperationAction(ISD::FLOG, MVT::f64, Expand);
-
- setOperationAction(ISD::FLOG2, MVT::f32, Expand);
- setOperationAction(ISD::FLOG2, MVT::f64, Expand);
-
- setOperationAction(ISD::FLOG10, MVT::f32, Expand);
- setOperationAction(ISD::FLOG10, MVT::f64, Expand);
-
- setOperationAction(ISD::FPOW, MVT::f32, Expand);
- setOperationAction(ISD::FPOW, MVT::f64, Expand);
-
- setOperationAction(ISD::FPOWI, MVT::f32, Expand);
- setOperationAction(ISD::FPOWI, MVT::f64, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f80, Expand);
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
-
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ // Custom lowering hooks are needed for XOR
+ // to fold it into CSINC/CSINV.
+ setOperationAction(ISD::XOR, MVT::i32, Custom);
+ setOperationAction(ISD::XOR, MVT::i64, Custom);
// Virtually no operation on f128 is legal, but LLVM can't expand them when
// there's a valid register class, so we need custom operations in most cases.
- setOperationAction(ISD::FABS, MVT::f128, Expand);
- setOperationAction(ISD::FADD, MVT::f128, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
- setOperationAction(ISD::FCOS, MVT::f128, Expand);
- setOperationAction(ISD::FDIV, MVT::f128, Custom);
- setOperationAction(ISD::FMA, MVT::f128, Expand);
- setOperationAction(ISD::FMUL, MVT::f128, Custom);
- setOperationAction(ISD::FNEG, MVT::f128, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::f128, Expand);
- setOperationAction(ISD::FPOW, MVT::f128, Expand);
- setOperationAction(ISD::FREM, MVT::f128, Expand);
- setOperationAction(ISD::FRINT, MVT::f128, Expand);
- setOperationAction(ISD::FSIN, MVT::f128, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
- setOperationAction(ISD::FSQRT, MVT::f128, Expand);
- setOperationAction(ISD::FSUB, MVT::f128, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
- setOperationAction(ISD::SETCC, MVT::f128, Custom);
- setOperationAction(ISD::BR_CC, MVT::f128, Custom);
- setOperationAction(ISD::SELECT, MVT::f128, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
+ setOperationAction(ISD::FABS, MVT::f128, Expand);
+ setOperationAction(ISD::FADD, MVT::f128, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
+ setOperationAction(ISD::FCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FDIV, MVT::f128, Custom);
+ setOperationAction(ISD::FMA, MVT::f128, Expand);
+ setOperationAction(ISD::FMUL, MVT::f128, Custom);
+ setOperationAction(ISD::FNEG, MVT::f128, Expand);
+ setOperationAction(ISD::FPOW, MVT::f128, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
+ setOperationAction(ISD::FRINT, MVT::f128, Expand);
+ setOperationAction(ISD::FSIN, MVT::f128, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
+ setOperationAction(ISD::FSQRT, MVT::f128, Expand);
+ setOperationAction(ISD::FSUB, MVT::f128, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
+ setOperationAction(ISD::SETCC, MVT::f128, Custom);
+ setOperationAction(ISD::BR_CC, MVT::f128, Custom);
+ setOperationAction(ISD::SELECT, MVT::f128, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
// Lowering for many of the conversions is actually specified by the non-f128
// type. The LowerXXX function will be trivial when f128 isn't involved.
@@ -266,623 +192,583 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
-
- // i128 shift operation support
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
- // This prevents LLVM trying to compress double constants into a floating
- // constant-pool entry and trying to load from there. It's of doubtful benefit
- // for A64: we'd need LDR followed by FCVT, I believe.
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
+ // Variable arguments.
+ setOperationAction(ISD::VASTART, MVT::Other, Custom);
+ setOperationAction(ISD::VAARG, MVT::Other, Custom);
+ setOperationAction(ISD::VACOPY, MVT::Other, Custom);
+ setOperationAction(ISD::VAEND, MVT::Other, Expand);
- setTruncStoreAction(MVT::f128, MVT::f64, Expand);
- setTruncStoreAction(MVT::f128, MVT::f32, Expand);
- setTruncStoreAction(MVT::f128, MVT::f16, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
- setTruncStoreAction(MVT::f64, MVT::f16, Expand);
- setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ // Variable-sized objects.
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+ // Exception handling.
+ // FIXME: These are guesses. Has this been defined yet?
setExceptionPointerRegister(AArch64::X0);
setExceptionSelectorRegister(AArch64::X1);
- if (Subtarget->hasNEON()) {
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v1i64, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v16i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v8i16, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Expand);
-
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
-
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
-
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal);
-
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
-
- setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
- setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
- setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
- setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
- setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
- setOperationAction(ISD::SETCC, MVT::v1i64, Custom);
- setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
- setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
- setOperationAction(ISD::SETCC, MVT::v1f64, Custom);
- setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
-
- setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
- setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal);
- setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
-
- setOperationAction(ISD::FCEIL, MVT::v2f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
- setOperationAction(ISD::FCEIL, MVT::v1f64, Legal);
- setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
-
- setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal);
- setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
-
- setOperationAction(ISD::FRINT, MVT::v2f32, Legal);
- setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FRINT, MVT::v1f64, Legal);
- setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
-
- setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
-
- setOperationAction(ISD::FROUND, MVT::v2f32, Legal);
- setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
- setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
- setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
-
- setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
+ // Constant pool entries
+ setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+ // BlockAddress
+ setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom);
-
- setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom);
-
- // Neon does not support vector divide/remainder operations except
- // floating-point divide.
- setOperationAction(ISD::SDIV, MVT::v1i8, Expand);
- setOperationAction(ISD::SDIV, MVT::v8i8, Expand);
- setOperationAction(ISD::SDIV, MVT::v16i8, Expand);
- setOperationAction(ISD::SDIV, MVT::v1i16, Expand);
- setOperationAction(ISD::SDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::SDIV, MVT::v8i16, Expand);
- setOperationAction(ISD::SDIV, MVT::v1i32, Expand);
- setOperationAction(ISD::SDIV, MVT::v2i32, Expand);
- setOperationAction(ISD::SDIV, MVT::v4i32, Expand);
- setOperationAction(ISD::SDIV, MVT::v1i64, Expand);
- setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
-
- setOperationAction(ISD::UDIV, MVT::v1i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v8i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v16i8, Expand);
- setOperationAction(ISD::UDIV, MVT::v1i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v8i16, Expand);
- setOperationAction(ISD::UDIV, MVT::v1i32, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i32, Expand);
- setOperationAction(ISD::UDIV, MVT::v4i32, Expand);
- setOperationAction(ISD::UDIV, MVT::v1i64, Expand);
- setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
-
- setOperationAction(ISD::SREM, MVT::v1i8, Expand);
- setOperationAction(ISD::SREM, MVT::v8i8, Expand);
- setOperationAction(ISD::SREM, MVT::v16i8, Expand);
- setOperationAction(ISD::SREM, MVT::v1i16, Expand);
- setOperationAction(ISD::SREM, MVT::v4i16, Expand);
- setOperationAction(ISD::SREM, MVT::v8i16, Expand);
- setOperationAction(ISD::SREM, MVT::v1i32, Expand);
- setOperationAction(ISD::SREM, MVT::v2i32, Expand);
- setOperationAction(ISD::SREM, MVT::v4i32, Expand);
- setOperationAction(ISD::SREM, MVT::v1i64, Expand);
- setOperationAction(ISD::SREM, MVT::v2i64, Expand);
-
- setOperationAction(ISD::UREM, MVT::v1i8, Expand);
- setOperationAction(ISD::UREM, MVT::v8i8, Expand);
- setOperationAction(ISD::UREM, MVT::v16i8, Expand);
- setOperationAction(ISD::UREM, MVT::v1i16, Expand);
- setOperationAction(ISD::UREM, MVT::v4i16, Expand);
- setOperationAction(ISD::UREM, MVT::v8i16, Expand);
- setOperationAction(ISD::UREM, MVT::v1i32, Expand);
- setOperationAction(ISD::UREM, MVT::v2i32, Expand);
- setOperationAction(ISD::UREM, MVT::v4i32, Expand);
- setOperationAction(ISD::UREM, MVT::v1i64, Expand);
- setOperationAction(ISD::UREM, MVT::v2i64, Expand);
-
- setOperationAction(ISD::FREM, MVT::v2f32, Expand);
- setOperationAction(ISD::FREM, MVT::v4f32, Expand);
- setOperationAction(ISD::FREM, MVT::v1f64, Expand);
- setOperationAction(ISD::FREM, MVT::v2f64, Expand);
-
- setOperationAction(ISD::SELECT, MVT::v8i8, Expand);
- setOperationAction(ISD::SELECT, MVT::v16i8, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i16, Expand);
- setOperationAction(ISD::SELECT, MVT::v8i16, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v1i64, Expand);
- setOperationAction(ISD::SELECT, MVT::v2i64, Expand);
- setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
- setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
- setOperationAction(ISD::SELECT, MVT::v2f64, Expand);
-
- setOperationAction(ISD::SELECT_CC, MVT::v8i8, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v16i8, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v4i16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v8i16, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v2i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v4i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v1i64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v2i64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v2f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v4f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v1f64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::v2f64, Custom);
-
- // Vector ExtLoad and TruncStore are expanded.
- for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
- I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
- MVT VT = (MVT::SimpleValueType) I;
- setLoadExtAction(ISD::SEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, VT, Expand);
- for (unsigned II = MVT::FIRST_VECTOR_VALUETYPE;
- II <= MVT::LAST_VECTOR_VALUETYPE; ++II) {
- MVT VT1 = (MVT::SimpleValueType) II;
- // A TruncStore has two vector types of the same number of elements
- // and different element sizes.
- if (VT.getVectorNumElements() == VT1.getVectorNumElements() &&
- VT.getVectorElementType().getSizeInBits()
- > VT1.getVectorElementType().getSizeInBits())
- setTruncStoreAction(VT, VT1, Expand);
- }
- }
+ // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
+ setOperationAction(ISD::ADDC, MVT::i32, Custom);
+ setOperationAction(ISD::ADDE, MVT::i32, Custom);
+ setOperationAction(ISD::SUBC, MVT::i32, Custom);
+ setOperationAction(ISD::SUBE, MVT::i32, Custom);
+ setOperationAction(ISD::ADDC, MVT::i64, Custom);
+ setOperationAction(ISD::ADDE, MVT::i64, Custom);
+ setOperationAction(ISD::SUBC, MVT::i64, Custom);
+ setOperationAction(ISD::SUBE, MVT::i64, Custom);
+
+ // AArch64 lacks both left-rotate and popcount instructions.
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTL, MVT::i64, Expand);
- // There is no v1i64/v2i64 multiply, expand v1i64/v2i64 to GPR i64 multiply.
- // FIXME: For a v2i64 multiply, we copy VPR to GPR and do 2 i64 multiplies,
- // and then copy back to VPR. This solution may be optimized by Following 3
- // NEON instructions:
- // pmull v2.1q, v0.1d, v1.1d
- // pmull2 v3.1q, v0.2d, v1.2d
- // ins v2.d[1], v3.d[0]
- // As currently we can't verify the correctness of such assumption, we can
- // do such optimization in the future.
- setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ // AArch64 doesn't have {U|S}MUL_LOHI.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
- setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
- setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
- setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
- setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
- setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
- setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
- setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
- setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
- }
- setTargetDAGCombine(ISD::SETCC);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::VSELECT);
-}
+ // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
+ // counterparts, which AArch64 supports directly.
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
-EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- // It's reasonably important that this value matches the "natural" legal
- // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
- // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
- if (!VT.isVector()) return MVT::i32;
- return VT.changeVectorElementTypeToInteger();
-}
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
- unsigned &LdrOpc,
- unsigned &StrOpc) {
- static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
- AArch64::LDXR_word, AArch64::LDXR_dword};
- static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
- AArch64::LDAXR_word, AArch64::LDAXR_dword};
- static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
- AArch64::STXR_word, AArch64::STXR_dword};
- static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
- AArch64::STLXR_word, AArch64::STLXR_dword};
-
- const unsigned *LoadOps, *StoreOps;
- if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- LoadOps = LoadAcqs;
- else
- LoadOps = LoadBares;
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i64, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- StoreOps = StoreRels;
- else
- StoreOps = StoreBares;
+ // Custom lower Add/Sub/Mul with overflow.
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i32, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ setOperationAction(ISD::UMULO, MVT::i32, Custom);
+ setOperationAction(ISD::UMULO, MVT::i64, Custom);
- assert(isPowerOf2_32(Size) && Size <= 8 &&
- "unsupported size for atomic binary op!");
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FSIN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+ // AArch64 has implementations of a lot of rounding-like FP operations.
+ static MVT RoundingTypes[] = { MVT::f32, MVT::f64};
+ for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
+ MVT Ty = RoundingTypes[I];
+ setOperationAction(ISD::FFLOOR, Ty, Legal);
+ setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+ setOperationAction(ISD::FCEIL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FTRUNC, Ty, Legal);
+ setOperationAction(ISD::FROUND, Ty, Legal);
+ }
- LdrOpc = LoadOps[Log2_32(Size)];
- StrOpc = StoreOps[Log2_32(Size)];
-}
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
-// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really
-// have value type mapped, and they are both being defined as MVT::untyped.
-// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost
-// would fail to figure out the register pressure correctly.
-std::pair<const TargetRegisterClass*, uint8_t>
-AArch64TargetLowering::findRepresentativeClass(MVT VT) const{
- const TargetRegisterClass *RRC = 0;
- uint8_t Cost = 1;
- switch (VT.SimpleTy) {
- default:
- return TargetLowering::findRepresentativeClass(VT);
- case MVT::v4i64:
- RRC = &AArch64::QPairRegClass;
- Cost = 2;
- break;
- case MVT::v8i64:
- RRC = &AArch64::QQuadRegClass;
- Cost = 4;
- break;
+ if (Subtarget->isTargetMachO()) {
+ // For iOS, we don't want to the normal expansion of a libcall to
+ // sincos. We want to issue a libcall to __sincos_stret to avoid memory
+ // traffic.
+ setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
+ } else {
+ setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
}
- return std::make_pair(RRC, Cost);
-}
-MachineBasicBlock *
-AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size,
- unsigned BinOpcode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ // AArch64 does not have floating-point extending loads, i1 sign-extending
+ // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f80, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f64, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f128, MVT::f16, Expand);
+ // Indexed loads and stores are supported.
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedLoadAction(im, MVT::i64, Legal);
+ setIndexedLoadAction(im, MVT::f64, Legal);
+ setIndexedLoadAction(im, MVT::f32, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i64, Legal);
+ setIndexedStoreAction(im, MVT::f64, Legal);
+ setIndexedStoreAction(im, MVT::f32, Legal);
+ }
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ // Trap.
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- const TargetRegisterClass *TRC
- = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldxr dest, ptr
- // <binop> scratch, dest, incr
- // stxr stxr_status, scratch, ptr
- // cbnz stxr_status, loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (BinOpcode) {
- // All arithmetic operations we'll be creating are designed to take an extra
- // shift or extend operand, which we can conveniently set to zero.
-
- // Operand order needs to go the other way for NAND.
- if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
- BuildMI(BB, dl, TII->get(BinOpcode), scratch)
- .addReg(incr).addReg(dest).addImm(0);
- else
- BuildMI(BB, dl, TII->get(BinOpcode), scratch)
- .addReg(dest).addReg(incr).addImm(0);
- }
+ // We combine OR nodes for bitfield operations.
+ setTargetDAGCombine(ISD::OR);
- // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
- unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+ // Vector add and sub nodes may conceal a high-half opportunity.
+ // Also, try to fold ADD into CSINC/CSINV..
+ setTargetDAGCombine(ISD::ADD);
+ setTargetDAGCombine(ISD::SUB);
- BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::CBNZw))
- .addReg(stxr_status).addMBB(loopMBB);
+ setTargetDAGCombine(ISD::XOR);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
- // exitMBB:
- // ...
- BB = exitMBB;
+ setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::ZERO_EXTEND);
+ setTargetDAGCombine(ISD::SIGN_EXTEND);
+ setTargetDAGCombine(ISD::BITCAST);
+ setTargetDAGCombine(ISD::CONCAT_VECTORS);
+ setTargetDAGCombine(ISD::STORE);
- MI->eraseFromParent(); // The instruction is gone now.
+ setTargetDAGCombine(ISD::MUL);
- return BB;
-}
+ setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::VSELECT);
-MachineBasicBlock *
-AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size,
- unsigned CmpOp,
- A64CC::CondCodes Cond) const {
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ setTargetDAGCombine(ISD::INTRINSIC_VOID);
+ setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+ setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
+ MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
+ MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
+ MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
+ setStackPointerRegisterToSaveRestore(AArch64::SP);
- unsigned oldval = dest;
- DebugLoc dl = MI->getDebugLoc();
+ setSchedulingPreference(Sched::Hybrid);
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const TargetRegisterClass *TRC, *TRCsp;
- if (Size == 8) {
- TRC = &AArch64::GPR64RegClass;
- TRCsp = &AArch64::GPR64xspRegClass;
- } else {
- TRC = &AArch64::GPR32RegClass;
- TRCsp = &AArch64::GPR32wspRegClass;
- }
+ // Enable TBZ/TBNZ
+ MaskAndBranchFoldingIsLegal = true;
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
+ setMinFunctionAlignment(2);
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
+ RequireStrictAlign = (Align == StrictAlign);
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+ setHasExtractBitsInsn(true);
- unsigned scratch = MRI.createVirtualRegister(TRC);
- MRI.constrainRegClass(scratch, TRCsp);
+ if (Subtarget->hasNEON()) {
+ // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
+ // silliness like this:
+ setOperationAction(ISD::FABS, MVT::v1f64, Expand);
+ setOperationAction(ISD::FADD, MVT::v1f64, Expand);
+ setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
+ setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
+ setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
+ setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
+ setOperationAction(ISD::FMA, MVT::v1f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
+ setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
+ setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
+ setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
+ setOperationAction(ISD::FREM, MVT::v1f64, Expand);
+ setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
+ setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
+ setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
+ setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
+ setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
+ setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
+ setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
+ setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
- // loopMBB:
- // ldxr dest, ptr
- // cmp incr, dest (, sign extend if necessary)
- // csel scratch, dest, incr, cond
- // stxr stxr_status, scratch, ptr
- // cbnz stxr_status, loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
+ setOperationAction(ISD::MUL, MVT::v1i64, Expand);
- // Build compare and cmov instructions.
- MRI.constrainRegClass(incr, TRCsp);
- BuildMI(BB, dl, TII->get(CmpOp))
- .addReg(incr).addReg(oldval).addImm(0);
+ // AArch64 doesn't have a direct vector ->f32 conversion instructions for
+ // elements smaller than i32, so promote the input to i32 first.
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
+ // Similarly, there is no direct i32 -> f64 vector conversion instruction.
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
- BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
- scratch)
- .addReg(oldval).addReg(incr).addImm(Cond);
+ // AArch64 doesn't have MUL.2d:
+ setOperationAction(ISD::MUL, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ // Likewise, narrowing and extending vector loads/stores aren't handled
+ // directly.
+ for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
+ Expand);
+
+ setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ }
- unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
+ // AArch64 has implementations of a lot of rounding-like FP operations.
+ static MVT RoundingVecTypes[] = {MVT::v2f32, MVT::v4f32, MVT::v2f64 };
+ for (unsigned I = 0; I < array_lengthof(RoundingVecTypes); ++I) {
+ MVT Ty = RoundingVecTypes[I];
+ setOperationAction(ISD::FFLOOR, Ty, Legal);
+ setOperationAction(ISD::FNEARBYINT, Ty, Legal);
+ setOperationAction(ISD::FCEIL, Ty, Legal);
+ setOperationAction(ISD::FRINT, Ty, Legal);
+ setOperationAction(ISD::FTRUNC, Ty, Legal);
+ setOperationAction(ISD::FROUND, Ty, Legal);
+ }
+ }
+}
- BuildMI(BB, dl, TII->get(strOpc), stxr_status)
- .addReg(scratch).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::CBNZw))
- .addReg(stxr_status).addMBB(loopMBB);
+void AArch64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
+ if (VT == MVT::v2f32) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
+ } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+ setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
- // exitMBB:
- // ...
- BB = exitMBB;
+ setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+ AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64);
+ }
- MI->eraseFromParent(); // The instruction is gone now.
+ // Mark vector float intrinsics as expand.
+ if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
+ setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
+ }
- return BB;
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::AND, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+
+ setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
+ setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+ // CNT supports only B element sizes.
+ if (VT != MVT::v8i8 && VT != MVT::v16i8)
+ setOperationAction(ISD::CTPOP, VT.getSimpleVT(), Expand);
+
+ setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+ setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+
+ setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
+
+ if (Subtarget->isLittleEndian()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC;
+ im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+ setIndexedLoadAction(im, VT.getSimpleVT(), Legal);
+ setIndexedStoreAction(im, VT.getSimpleVT(), Legal);
+ }
+ }
}
-MachineBasicBlock *
-AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned oldval = MI->getOperand(2).getReg();
- unsigned newval = MI->getOperand(3).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const TargetRegisterClass *TRCsp;
- TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loop1MBB);
- MF->insert(It, loop2MBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // thisMBB:
- // ...
- // fallthrough --> loop1MBB
- BB->addSuccessor(loop1MBB);
-
- // loop1MBB:
- // ldxr dest, [ptr]
- // cmp dest, oldval
- // b.ne exitMBB
- BB = loop1MBB;
- BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
-
- unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
- MRI.constrainRegClass(dest, TRCsp);
- BuildMI(BB, dl, TII->get(CmpOp))
- .addReg(dest).addReg(oldval).addImm(0);
- BuildMI(BB, dl, TII->get(AArch64::Bcc))
- .addImm(A64CC::NE).addMBB(exitMBB);
- BB->addSuccessor(loop2MBB);
- BB->addSuccessor(exitMBB);
-
- // loop2MBB:
- // strex stxr_status, newval, [ptr]
- // cbnz stxr_status, loop1MBB
- BB = loop2MBB;
- unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
-
- BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
- BuildMI(BB, dl, TII->get(AArch64::CBNZw))
- .addReg(stxr_status).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
+void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &AArch64::FPR64RegClass);
+ addTypeForNEON(VT, MVT::v2i32);
+}
+
+void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
+ addRegisterClass(VT, &AArch64::FPR128RegClass);
+ addTypeForNEON(VT, MVT::v4i32);
+}
+
+EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
+ if (!VT.isVector())
+ return MVT::i32;
+ return VT.changeVectorElementTypeToInteger();
+}
+
+/// computeKnownBitsForTargetNode - Determine which of the bits specified in
+/// Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void AArch64TargetLowering::computeKnownBitsForTargetNode(
+ const SDValue Op, APInt &KnownZero, APInt &KnownOne,
+ const SelectionDAG &DAG, unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ break;
+ case AArch64ISD::CSEL: {
+ APInt KnownZero2, KnownOne2;
+ DAG.computeKnownBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
+ DAG.computeKnownBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
+ Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
+ switch (IntID) {
+ default: return;
+ case Intrinsic::aarch64_ldaxr:
+ case Intrinsic::aarch64_ldxr: {
+ unsigned BitWidth = KnownOne.getBitWidth();
+ EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
+ unsigned MemBits = VT.getScalarType().getSizeInBits();
+ KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
+ return;
+ }
+ }
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN:
+ case ISD::INTRINSIC_VOID: {
+ unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv: {
+ // Figure out the datatype of the vector operand. The UMINV instruction
+ // will zero extend the result, so we can mark as known zero all the
+ // bits larger than the element datatype. 32-bit or larget doesn't need
+ // this as those are legal types and will be handled by isel directly.
+ MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
+ unsigned BitWidth = KnownZero.getBitWidth();
+ if (VT == MVT::v8i8 || VT == MVT::v16i8) {
+ assert(BitWidth >= 8 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
+ KnownZero |= Mask;
+ } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
+ assert(BitWidth >= 16 && "Unexpected width!");
+ APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
+ KnownZero |= Mask;
+ }
+ break;
+ } break;
+ }
+ }
+ }
+}
+
+MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
+ return MVT::i64;
+}
+
+unsigned AArch64TargetLowering::getMaximalGlobalOffset() const {
+ // FIXME: On AArch64, this depends on the type.
+ // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
+ // and the offset has to be a multiple of the related size in bytes.
+ return 4095;
+}
+
+FastISel *
+AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ return AArch64::createFastISel(funcInfo, libInfo);
+}
+
+const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return nullptr;
+ case AArch64ISD::CALL: return "AArch64ISD::CALL";
+ case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
+ case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
+ case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
+ case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
+ case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
+ case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
+ case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
+ case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
+ case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
+ case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
+ case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
+ case AArch64ISD::TLSDESC_CALL: return "AArch64ISD::TLSDESC_CALL";
+ case AArch64ISD::ADC: return "AArch64ISD::ADC";
+ case AArch64ISD::SBC: return "AArch64ISD::SBC";
+ case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
+ case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
+ case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
+ case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
+ case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
+ case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
+ case AArch64ISD::FMIN: return "AArch64ISD::FMIN";
+ case AArch64ISD::FMAX: return "AArch64ISD::FMAX";
+ case AArch64ISD::DUP: return "AArch64ISD::DUP";
+ case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
+ case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
+ case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
+ case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
+ case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
+ case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
+ case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
+ case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
+ case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
+ case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
+ case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
+ case AArch64ISD::BICi: return "AArch64ISD::BICi";
+ case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
+ case AArch64ISD::BSL: return "AArch64ISD::BSL";
+ case AArch64ISD::NEG: return "AArch64ISD::NEG";
+ case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
+ case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
+ case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
+ case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
+ case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
+ case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
+ case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
+ case AArch64ISD::REV16: return "AArch64ISD::REV16";
+ case AArch64ISD::REV32: return "AArch64ISD::REV32";
+ case AArch64ISD::REV64: return "AArch64ISD::REV64";
+ case AArch64ISD::EXT: return "AArch64ISD::EXT";
+ case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
+ case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
+ case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
+ case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
+ case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
+ case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
+ case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
+ case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
+ case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
+ case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
+ case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
+ case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
+ case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
+ case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
+ case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
+ case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
+ case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
+ case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
+ case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
+ case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
+ case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
+ case AArch64ISD::NOT: return "AArch64ISD::NOT";
+ case AArch64ISD::BIT: return "AArch64ISD::BIT";
+ case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
+ case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
+ case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
+ case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
+ case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
+ case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
+ case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
+ case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
+ case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
+ case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
+ case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
+ case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
+ case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
+ case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
+ case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
+ case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
+ case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
+ case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
+ case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
+ case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
+ case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
+ case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
+ case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
+ case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
+ case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
+ case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
+ case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
+ case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
+ case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
+ case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
+ case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
+ case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
+ case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
+ case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
+ case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
+ case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
+ }
}
MachineBasicBlock *
AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *MBB) const {
- // We materialise the F128CSEL pseudo-instruction using conditional branches
- // and loads, giving an instruciton sequence like:
- // str q0, [sp]
- // b.ne IfTrue
- // b Finish
- // IfTrue:
- // str q1, [sp]
- // Finish:
- // ldr q0, [sp]
- //
- // Using virtual registers would probably not be beneficial since COPY
- // instructions are expensive for f128 (there's no actual instruction to
- // implement them).
- //
- // An alternative would be to do an integer-CSEL on some address. E.g.:
- // mov x0, sp
- // add x1, sp, #16
- // str q0, [x0]
- // str q1, [x1]
- // csel x0, x0, x1, ne
- // ldr q0, [x0]
- //
- // It's unclear which approach is actually optimal.
+ // We materialise the F128CSEL pseudo-instruction as some control flow and a
+ // phi node:
+
+ // OrigBB:
+ // [... previous instrs leading to comparison ...]
+ // b.ne TrueBB
+ // b EndBB
+ // TrueBB:
+ // ; Fallthrough
+ // EndBB:
+ // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
+
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
MachineFunction *MF = MBB->getParent();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@@ -906,49 +792,24 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MBB->end());
EndBB->transferSuccessorsAndUpdatePHIs(MBB);
- // We need somewhere to store the f128 value needed.
- int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
-
- // [... start of incoming MBB ...]
- // str qIFFALSE, [sp]
- // b.cc IfTrue
- // b Done
- BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
- .addReg(IfFalseReg)
- .addFrameIndex(ScratchFI)
- .addImm(0);
- BuildMI(MBB, DL, TII->get(AArch64::Bcc))
- .addImm(CondCode)
- .addMBB(TrueBB);
- BuildMI(MBB, DL, TII->get(AArch64::Bimm))
- .addMBB(EndBB);
+ BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
+ BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
MBB->addSuccessor(TrueBB);
MBB->addSuccessor(EndBB);
+ // TrueBB falls through to the end.
+ TrueBB->addSuccessor(EndBB);
+
if (!NZCVKilled) {
- // NZCV is live-through TrueBB.
TrueBB->addLiveIn(AArch64::NZCV);
EndBB->addLiveIn(AArch64::NZCV);
}
- // IfTrue:
- // str qIFTRUE, [sp]
- BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
- .addReg(IfTrueReg)
- .addFrameIndex(ScratchFI)
- .addImm(0);
-
- // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
- // blocks.
- TrueBB->addSuccessor(EndBB);
-
- // Done:
- // ldr qDEST, [sp]
- // [... rest of incoming MBB ...]
- MachineInstr *StartOfEnd = EndBB->begin();
- BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
- .addFrameIndex(ScratchFI)
- .addImm(0);
+ BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
+ .addReg(IfTrueReg)
+ .addMBB(TrueBB)
+ .addReg(IfFalseReg)
+ .addMBB(MBB);
MI->eraseFromParent();
return EndBB;
@@ -956,853 +817,1140 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
MachineBasicBlock *
AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
+ MachineBasicBlock *BB) const {
switch (MI->getOpcode()) {
- default: llvm_unreachable("Unhandled instruction with custom inserter");
+ default:
+#ifndef NDEBUG
+ MI->dump();
+#endif
+ assert(0 && "Unexpected instruction for custom inserter!");
+ break;
+
case AArch64::F128CSEL:
- return EmitF128CSEL(MI, MBB);
- case AArch64::ATOMIC_LOAD_ADD_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
- case AArch64::ATOMIC_LOAD_ADD_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
- case AArch64::ATOMIC_LOAD_ADD_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
- case AArch64::ATOMIC_LOAD_ADD_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_SUB_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
- case AArch64::ATOMIC_LOAD_SUB_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
- case AArch64::ATOMIC_LOAD_SUB_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
- case AArch64::ATOMIC_LOAD_SUB_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_AND_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
- case AArch64::ATOMIC_LOAD_AND_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
- case AArch64::ATOMIC_LOAD_AND_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
- case AArch64::ATOMIC_LOAD_AND_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_OR_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
- case AArch64::ATOMIC_LOAD_OR_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
- case AArch64::ATOMIC_LOAD_OR_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
- case AArch64::ATOMIC_LOAD_OR_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_XOR_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
- case AArch64::ATOMIC_LOAD_XOR_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
- case AArch64::ATOMIC_LOAD_XOR_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
- case AArch64::ATOMIC_LOAD_XOR_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_NAND_I8:
- return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
- case AArch64::ATOMIC_LOAD_NAND_I16:
- return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
- case AArch64::ATOMIC_LOAD_NAND_I32:
- return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
- case AArch64::ATOMIC_LOAD_NAND_I64:
- return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
-
- case AArch64::ATOMIC_LOAD_MIN_I8:
- return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
- case AArch64::ATOMIC_LOAD_MIN_I16:
- return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
- case AArch64::ATOMIC_LOAD_MIN_I32:
- return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
- case AArch64::ATOMIC_LOAD_MIN_I64:
- return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
-
- case AArch64::ATOMIC_LOAD_MAX_I8:
- return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
- case AArch64::ATOMIC_LOAD_MAX_I16:
- return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
- case AArch64::ATOMIC_LOAD_MAX_I32:
- return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
- case AArch64::ATOMIC_LOAD_MAX_I64:
- return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
-
- case AArch64::ATOMIC_LOAD_UMIN_I8:
- return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
- case AArch64::ATOMIC_LOAD_UMIN_I16:
- return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
- case AArch64::ATOMIC_LOAD_UMIN_I32:
- return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
- case AArch64::ATOMIC_LOAD_UMIN_I64:
- return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
-
- case AArch64::ATOMIC_LOAD_UMAX_I8:
- return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
- case AArch64::ATOMIC_LOAD_UMAX_I16:
- return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
- case AArch64::ATOMIC_LOAD_UMAX_I32:
- return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
- case AArch64::ATOMIC_LOAD_UMAX_I64:
- return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
-
- case AArch64::ATOMIC_SWAP_I8:
- return emitAtomicBinary(MI, MBB, 1, 0);
- case AArch64::ATOMIC_SWAP_I16:
- return emitAtomicBinary(MI, MBB, 2, 0);
- case AArch64::ATOMIC_SWAP_I32:
- return emitAtomicBinary(MI, MBB, 4, 0);
- case AArch64::ATOMIC_SWAP_I64:
- return emitAtomicBinary(MI, MBB, 8, 0);
-
- case AArch64::ATOMIC_CMP_SWAP_I8:
- return emitAtomicCmpSwap(MI, MBB, 1);
- case AArch64::ATOMIC_CMP_SWAP_I16:
- return emitAtomicCmpSwap(MI, MBB, 2);
- case AArch64::ATOMIC_CMP_SWAP_I32:
- return emitAtomicCmpSwap(MI, MBB, 4);
- case AArch64::ATOMIC_CMP_SWAP_I64:
- return emitAtomicCmpSwap(MI, MBB, 8);
+ return EmitF128CSEL(MI, BB);
+
+ case TargetOpcode::STACKMAP:
+ case TargetOpcode::PATCHPOINT:
+ return emitPatchPoint(MI, BB);
}
+ llvm_unreachable("Unexpected instruction for custom inserter!");
}
+//===----------------------------------------------------------------------===//
+// AArch64 Lowering private implementation.
+//===----------------------------------------------------------------------===//
-const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- case AArch64ISD::BR_CC: return "AArch64ISD::BR_CC";
- case AArch64ISD::Call: return "AArch64ISD::Call";
- case AArch64ISD::FPMOV: return "AArch64ISD::FPMOV";
- case AArch64ISD::GOTLoad: return "AArch64ISD::GOTLoad";
- case AArch64ISD::BFI: return "AArch64ISD::BFI";
- case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
- case AArch64ISD::Ret: return "AArch64ISD::Ret";
- case AArch64ISD::SBFX: return "AArch64ISD::SBFX";
- case AArch64ISD::SELECT_CC: return "AArch64ISD::SELECT_CC";
- case AArch64ISD::SETCC: return "AArch64ISD::SETCC";
- case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
- case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
- case AArch64ISD::TLSDESCCALL: return "AArch64ISD::TLSDESCCALL";
- case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
- case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall";
-
- case AArch64ISD::NEON_MOVIMM:
- return "AArch64ISD::NEON_MOVIMM";
- case AArch64ISD::NEON_MVNIMM:
- return "AArch64ISD::NEON_MVNIMM";
- case AArch64ISD::NEON_FMOVIMM:
- return "AArch64ISD::NEON_FMOVIMM";
- case AArch64ISD::NEON_CMP:
- return "AArch64ISD::NEON_CMP";
- case AArch64ISD::NEON_CMPZ:
- return "AArch64ISD::NEON_CMPZ";
- case AArch64ISD::NEON_TST:
- return "AArch64ISD::NEON_TST";
- case AArch64ISD::NEON_QSHLs:
- return "AArch64ISD::NEON_QSHLs";
- case AArch64ISD::NEON_QSHLu:
- return "AArch64ISD::NEON_QSHLu";
- case AArch64ISD::NEON_VDUP:
- return "AArch64ISD::NEON_VDUP";
- case AArch64ISD::NEON_VDUPLANE:
- return "AArch64ISD::NEON_VDUPLANE";
- case AArch64ISD::NEON_REV16:
- return "AArch64ISD::NEON_REV16";
- case AArch64ISD::NEON_REV32:
- return "AArch64ISD::NEON_REV32";
- case AArch64ISD::NEON_REV64:
- return "AArch64ISD::NEON_REV64";
- case AArch64ISD::NEON_UZP1:
- return "AArch64ISD::NEON_UZP1";
- case AArch64ISD::NEON_UZP2:
- return "AArch64ISD::NEON_UZP2";
- case AArch64ISD::NEON_ZIP1:
- return "AArch64ISD::NEON_ZIP1";
- case AArch64ISD::NEON_ZIP2:
- return "AArch64ISD::NEON_ZIP2";
- case AArch64ISD::NEON_TRN1:
- return "AArch64ISD::NEON_TRN1";
- case AArch64ISD::NEON_TRN2:
- return "AArch64ISD::NEON_TRN2";
- case AArch64ISD::NEON_LD1_UPD:
- return "AArch64ISD::NEON_LD1_UPD";
- case AArch64ISD::NEON_LD2_UPD:
- return "AArch64ISD::NEON_LD2_UPD";
- case AArch64ISD::NEON_LD3_UPD:
- return "AArch64ISD::NEON_LD3_UPD";
- case AArch64ISD::NEON_LD4_UPD:
- return "AArch64ISD::NEON_LD4_UPD";
- case AArch64ISD::NEON_ST1_UPD:
- return "AArch64ISD::NEON_ST1_UPD";
- case AArch64ISD::NEON_ST2_UPD:
- return "AArch64ISD::NEON_ST2_UPD";
- case AArch64ISD::NEON_ST3_UPD:
- return "AArch64ISD::NEON_ST3_UPD";
- case AArch64ISD::NEON_ST4_UPD:
- return "AArch64ISD::NEON_ST4_UPD";
- case AArch64ISD::NEON_LD1x2_UPD:
- return "AArch64ISD::NEON_LD1x2_UPD";
- case AArch64ISD::NEON_LD1x3_UPD:
- return "AArch64ISD::NEON_LD1x3_UPD";
- case AArch64ISD::NEON_LD1x4_UPD:
- return "AArch64ISD::NEON_LD1x4_UPD";
- case AArch64ISD::NEON_ST1x2_UPD:
- return "AArch64ISD::NEON_ST1x2_UPD";
- case AArch64ISD::NEON_ST1x3_UPD:
- return "AArch64ISD::NEON_ST1x3_UPD";
- case AArch64ISD::NEON_ST1x4_UPD:
- return "AArch64ISD::NEON_ST1x4_UPD";
- case AArch64ISD::NEON_LD2DUP:
- return "AArch64ISD::NEON_LD2DUP";
- case AArch64ISD::NEON_LD3DUP:
- return "AArch64ISD::NEON_LD3DUP";
- case AArch64ISD::NEON_LD4DUP:
- return "AArch64ISD::NEON_LD4DUP";
- case AArch64ISD::NEON_LD2DUP_UPD:
- return "AArch64ISD::NEON_LD2DUP_UPD";
- case AArch64ISD::NEON_LD3DUP_UPD:
- return "AArch64ISD::NEON_LD3DUP_UPD";
- case AArch64ISD::NEON_LD4DUP_UPD:
- return "AArch64ISD::NEON_LD4DUP_UPD";
- case AArch64ISD::NEON_LD2LN_UPD:
- return "AArch64ISD::NEON_LD2LN_UPD";
- case AArch64ISD::NEON_LD3LN_UPD:
- return "AArch64ISD::NEON_LD3LN_UPD";
- case AArch64ISD::NEON_LD4LN_UPD:
- return "AArch64ISD::NEON_LD4LN_UPD";
- case AArch64ISD::NEON_ST2LN_UPD:
- return "AArch64ISD::NEON_ST2LN_UPD";
- case AArch64ISD::NEON_ST3LN_UPD:
- return "AArch64ISD::NEON_ST3LN_UPD";
- case AArch64ISD::NEON_ST4LN_UPD:
- return "AArch64ISD::NEON_ST4LN_UPD";
- case AArch64ISD::NEON_VEXTRACT:
- return "AArch64ISD::NEON_VEXTRACT";
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
+/// CC
+static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
+ switch (CC) {
default:
- return NULL;
+ llvm_unreachable("Unknown condition code!");
+ case ISD::SETNE:
+ return AArch64CC::NE;
+ case ISD::SETEQ:
+ return AArch64CC::EQ;
+ case ISD::SETGT:
+ return AArch64CC::GT;
+ case ISD::SETGE:
+ return AArch64CC::GE;
+ case ISD::SETLT:
+ return AArch64CC::LT;
+ case ISD::SETLE:
+ return AArch64CC::LE;
+ case ISD::SETUGT:
+ return AArch64CC::HI;
+ case ISD::SETUGE:
+ return AArch64CC::HS;
+ case ISD::SETULT:
+ return AArch64CC::LO;
+ case ISD::SETULE:
+ return AArch64CC::LS;
}
}
-static const uint16_t AArch64FPRArgRegs[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
- AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
-};
-static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
+/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
+static void changeFPCCToAArch64CC(ISD::CondCode CC,
+ AArch64CC::CondCode &CondCode,
+ AArch64CC::CondCode &CondCode2) {
+ CondCode2 = AArch64CC::AL;
+ switch (CC) {
+ default:
+ llvm_unreachable("Unknown FP condition!");
+ case ISD::SETEQ:
+ case ISD::SETOEQ:
+ CondCode = AArch64CC::EQ;
+ break;
+ case ISD::SETGT:
+ case ISD::SETOGT:
+ CondCode = AArch64CC::GT;
+ break;
+ case ISD::SETGE:
+ case ISD::SETOGE:
+ CondCode = AArch64CC::GE;
+ break;
+ case ISD::SETOLT:
+ CondCode = AArch64CC::MI;
+ break;
+ case ISD::SETOLE:
+ CondCode = AArch64CC::LS;
+ break;
+ case ISD::SETONE:
+ CondCode = AArch64CC::MI;
+ CondCode2 = AArch64CC::GT;
+ break;
+ case ISD::SETO:
+ CondCode = AArch64CC::VC;
+ break;
+ case ISD::SETUO:
+ CondCode = AArch64CC::VS;
+ break;
+ case ISD::SETUEQ:
+ CondCode = AArch64CC::EQ;
+ CondCode2 = AArch64CC::VS;
+ break;
+ case ISD::SETUGT:
+ CondCode = AArch64CC::HI;
+ break;
+ case ISD::SETUGE:
+ CondCode = AArch64CC::PL;
+ break;
+ case ISD::SETLT:
+ case ISD::SETULT:
+ CondCode = AArch64CC::LT;
+ break;
+ case ISD::SETLE:
+ case ISD::SETULE:
+ CondCode = AArch64CC::LE;
+ break;
+ case ISD::SETNE:
+ case ISD::SETUNE:
+ CondCode = AArch64CC::NE;
+ break;
+ }
+}
-static const uint16_t AArch64ArgRegs[] = {
- AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
- AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
-};
-static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
+/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
+/// CC usable with the vector instructions. Fewer operations are available
+/// without a real NZCV register, so we have to use less efficient combinations
+/// to get the same effect.
+static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
+ AArch64CC::CondCode &CondCode,
+ AArch64CC::CondCode &CondCode2,
+ bool &Invert) {
+ Invert = false;
+ switch (CC) {
+ default:
+ // Mostly the scalar mappings work fine.
+ changeFPCCToAArch64CC(CC, CondCode, CondCode2);
+ break;
+ case ISD::SETUO:
+ Invert = true; // Fallthrough
+ case ISD::SETO:
+ CondCode = AArch64CC::MI;
+ CondCode2 = AArch64CC::GE;
+ break;
+ case ISD::SETUEQ:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // All of the compare-mask comparisons are ordered, but we can switch
+ // between the two by a double inversion. E.g. ULE == !OGT.
+ Invert = true;
+ changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
+ break;
+ }
+}
+
+static bool isLegalArithImmed(uint64_t C) {
+ // Matches AArch64DAGToDAGISel::SelectArithImmed().
+ return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
+}
-static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
- // Mark all remaining general purpose registers as allocated. We don't
- // backtrack: if (for example) an i128 gets put on the stack, no subsequent
- // i64 will go in registers (C.11).
- for (unsigned i = 0; i < NumArgRegs; ++i)
- State.AllocateReg(AArch64ArgRegs[i]);
+static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDLoc dl, SelectionDAG &DAG) {
+ EVT VT = LHS.getValueType();
+
+ if (VT.isFloatingPoint())
+ return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
+
+ // The CMP instruction is just an alias for SUBS, and representing it as
+ // SUBS means that it's possible to get CSE with subtract operations.
+ // A later phase can perform the optimization of setting the destination
+ // register to WZR/XZR if it ends up being unused.
+ unsigned Opcode = AArch64ISD::SUBS;
+
+ if (RHS.getOpcode() == ISD::SUB && isa<ConstantSDNode>(RHS.getOperand(0)) &&
+ cast<ConstantSDNode>(RHS.getOperand(0))->getZExtValue() == 0 &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
+ // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
+ // can be set differently by this operation. It comes down to whether
+ // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
+ // everything is fine. If not then the optimization is wrong. Thus general
+ // comparisons are only valid if op2 != 0.
+
+ // So, finally, the only LLVM-native comparisons that don't mention C and V
+ // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
+ // the absence of information about op2.
+ Opcode = AArch64ISD::ADDS;
+ RHS = RHS.getOperand(1);
+ } else if (LHS.getOpcode() == ISD::AND && isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ !isUnsignedIntSetCC(CC)) {
+ // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
+ // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
+ // of the signed comparisons.
+ Opcode = AArch64ISD::ANDS;
+ RHS = LHS.getOperand(1);
+ LHS = LHS.getOperand(0);
+ }
- return false;
+ return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
+ .getValue(1);
}
-#include "AArch64GenCallingConv.inc"
+static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &AArch64cc, SelectionDAG &DAG, SDLoc dl) {
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+ EVT VT = RHS.getValueType();
+ uint64_t C = RHSC->getZExtValue();
+ if (!isLegalArithImmed(C)) {
+ // Constant does not fit, try adjusting it by one?
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETLT:
+ case ISD::SETGE:
+ if ((VT == MVT::i32 && C != 0x80000000 &&
+ isLegalArithImmed((uint32_t)(C - 1))) ||
+ (VT == MVT::i64 && C != 0x80000000ULL &&
+ isLegalArithImmed(C - 1ULL))) {
+ CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+ C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
+ RHS = DAG.getConstant(C, VT);
+ }
+ break;
+ case ISD::SETULT:
+ case ISD::SETUGE:
+ if ((VT == MVT::i32 && C != 0 &&
+ isLegalArithImmed((uint32_t)(C - 1))) ||
+ (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
+ CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+ C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
+ RHS = DAG.getConstant(C, VT);
+ }
+ break;
+ case ISD::SETLE:
+ case ISD::SETGT:
+ if ((VT == MVT::i32 && C != 0x7fffffff &&
+ isLegalArithImmed((uint32_t)(C + 1))) ||
+ (VT == MVT::i64 && C != 0x7ffffffffffffffULL &&
+ isLegalArithImmed(C + 1ULL))) {
+ CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+ C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
+ RHS = DAG.getConstant(C, VT);
+ }
+ break;
+ case ISD::SETULE:
+ case ISD::SETUGT:
+ if ((VT == MVT::i32 && C != 0xffffffff &&
+ isLegalArithImmed((uint32_t)(C + 1))) ||
+ (VT == MVT::i64 && C != 0xfffffffffffffffULL &&
+ isLegalArithImmed(C + 1ULL))) {
+ CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+ C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
+ RHS = DAG.getConstant(C, VT);
+ }
+ break;
+ }
+ }
+ }
-CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
+ SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+ AArch64cc = DAG.getConstant(AArch64CC, MVT::i32);
+ return Cmp;
+}
- switch(CC) {
- default: llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- case CallingConv::C:
- return CC_A64_APCS;
+static std::pair<SDValue, SDValue>
+getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
+ assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
+ "Unsupported value type");
+ SDValue Value, Overflow;
+ SDLoc DL(Op);
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ unsigned Opc = 0;
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::SADDO:
+ Opc = AArch64ISD::ADDS;
+ CC = AArch64CC::VS;
+ break;
+ case ISD::UADDO:
+ Opc = AArch64ISD::ADDS;
+ CC = AArch64CC::HS;
+ break;
+ case ISD::SSUBO:
+ Opc = AArch64ISD::SUBS;
+ CC = AArch64CC::VS;
+ break;
+ case ISD::USUBO:
+ Opc = AArch64ISD::SUBS;
+ CC = AArch64CC::LO;
+ break;
+ // Multiply needs a little bit extra work.
+ case ISD::SMULO:
+ case ISD::UMULO: {
+ CC = AArch64CC::NE;
+ bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
+ if (Op.getValueType() == MVT::i32) {
+ unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ // For a 32 bit multiply with overflow check we want the instruction
+ // selector to generate a widening multiply (SMADDL/UMADDL). For that we
+ // need to generate the following pattern:
+ // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
+ LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
+ RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
+ DAG.getConstant(0, MVT::i64));
+ // On AArch64 the upper 32 bits are always zero extended for a 32 bit
+ // operation. We need to clear out the upper 32 bits, because we used a
+ // widening multiply that wrote all 64 bits. In the end this should be a
+ // noop.
+ Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
+ if (IsSigned) {
+ // The signed overflow check requires more than just a simple check for
+ // any bit set in the upper 32 bits of the result. These bits could be
+ // just the sign bits of a negative number. To perform the overflow
+ // check we have to arithmetic shift right the 32nd bit of the result by
+ // 31 bits. Then we compare the result to the upper 32 bits.
+ SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
+ DAG.getConstant(32, MVT::i64));
+ UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
+ SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
+ DAG.getConstant(31, MVT::i64));
+ // It is important that LowerBits is last, otherwise the arithmetic
+ // shift will not be folded into the compare (SUBS).
+ SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
+ Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
+ .getValue(1);
+ } else {
+ // The overflow check for unsigned multiply is easy. We only need to
+ // check if any of the upper 32 bits are set. This can be done with a
+ // CMP (shifted register). For that we need to generate the following
+ // pattern:
+ // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
+ SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
+ DAG.getConstant(32, MVT::i64));
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ Overflow =
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ UpperBits).getValue(1);
+ }
+ break;
+ }
+ assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
+ // For the 64 bit multiply
+ Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
+ if (IsSigned) {
+ SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
+ SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
+ DAG.getConstant(63, MVT::i64));
+ // It is important that LowerBits is last, otherwise the arithmetic
+ // shift will not be folded into the compare (SUBS).
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
+ .getValue(1);
+ } else {
+ SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
+ SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
+ Overflow =
+ DAG.getNode(AArch64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
+ UpperBits).getValue(1);
+ }
+ break;
+ }
+ } // switch (...)
+
+ if (Opc) {
+ SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
+
+ // Emit the AArch64 operation with overflow check.
+ Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
+ Overflow = Value.getValue(1);
}
+ return std::make_pair(Value, Overflow);
}
-void
-AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
- SDLoc DL, SDValue &Chain) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- AArch64MachineFunctionInfo *FuncInfo
- = MF.getInfo<AArch64MachineFunctionInfo>();
+SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const {
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
+ Ops.push_back(Op.getOperand(i));
- SmallVector<SDValue, 8> MemOps;
+ return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
+ SDLoc(Op)).first;
+}
- unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
- NumArgRegs);
- unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
- NumFPRArgRegs);
+static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
+ SDValue Sel = Op.getOperand(0);
+ SDValue Other = Op.getOperand(1);
- unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
- int GPRIdx = 0;
- if (GPRSaveSize != 0) {
- GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+ // If neither operand is a SELECT_CC, give up.
+ if (Sel.getOpcode() != ISD::SELECT_CC)
+ std::swap(Sel, Other);
+ if (Sel.getOpcode() != ISD::SELECT_CC)
+ return Op;
- SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+ // The folding we want to perform is:
+ // (xor x, (select_cc a, b, cc, 0, -1) )
+ // -->
+ // (csel x, (xor x, -1), cc ...)
+ //
+ // The latter will get matched to a CSINV instruction.
- for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 8),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
- }
- }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
+ SDValue LHS = Sel.getOperand(0);
+ SDValue RHS = Sel.getOperand(1);
+ SDValue TVal = Sel.getOperand(2);
+ SDValue FVal = Sel.getOperand(3);
+ SDLoc dl(Sel);
- if (getSubtarget()->hasFPARMv8()) {
- unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
- int FPRIdx = 0;
- // According to the AArch64 Procedure Call Standard, section B.1/B.3, we
- // can omit a register save area if we know we'll never use registers of
- // that class.
- if (FPRSaveSize != 0) {
- FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+ // FIXME: This could be generalized to non-integer comparisons.
+ if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
+ return Op;
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
+ ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
+ ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
- for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
- &AArch64::FPR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
- SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16),
- false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
- }
- }
- FuncInfo->setVariadicFPRIdx(FPRIdx);
- FuncInfo->setVariadicFPRSize(FPRSaveSize);
+ // The the values aren't constants, this isn't the pattern we're looking for.
+ if (!CFVal || !CTVal)
+ return Op;
+
+ // We can commute the SELECT_CC by inverting the condition. This
+ // might be needed to make this fit into a CSINV pattern.
+ if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
}
- unsigned StackOffset = RoundUpToAlignment(CCInfo.getNextStackOffset(), 8);
- int StackIdx = MFI->CreateFixedObject(8, StackOffset, true);
+ // If the constants line up, perform the transform!
+ if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
+ SDValue CCVal;
+ SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- FuncInfo->setVariadicStackIdx(StackIdx);
- FuncInfo->setVariadicGPRIdx(GPRIdx);
- FuncInfo->setVariadicGPRSize(GPRSaveSize);
+ FVal = Other;
+ TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
+ DAG.getConstant(-1ULL, Other.getValueType()));
- if (!MemOps.empty()) {
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
- MemOps.size());
+ return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
+ CCVal, Cmp);
}
+
+ return Op;
}
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
-SDValue
-AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- AArch64MachineFunctionInfo *FuncInfo
- = MF.getInfo<AArch64MachineFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
- SmallVector<SDValue, 16> ArgValues;
+ unsigned Opc;
+ bool ExtraOp = false;
+ switch (Op.getOpcode()) {
+ default:
+ assert(0 && "Invalid code");
+ case ISD::ADDC:
+ Opc = AArch64ISD::ADDS;
+ break;
+ case ISD::SUBC:
+ Opc = AArch64ISD::SUBS;
+ break;
+ case ISD::ADDE:
+ Opc = AArch64ISD::ADCS;
+ ExtraOp = true;
+ break;
+ case ISD::SUBE:
+ Opc = AArch64ISD::SBCS;
+ ExtraOp = true;
+ break;
+ }
- SDValue ArgValue;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
+ if (!ExtraOp)
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
+ return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
+ Op.getOperand(2));
+}
- if (Flags.isByVal()) {
- // Byval is used for small structs and HFAs in the PCS, but the system
- // should work in a non-compliant manner for larger structs.
- EVT PtrTy = getPointerTy();
- int Size = Flags.getByValSize();
- unsigned NumRegs = (Size + 7) / 8;
+static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
- uint32_t BEAlign = 0;
- if (Size < 8 && !getSubtarget()->isLittle())
- BEAlign = 8-Size;
- unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
- VA.getLocMemOffset() + BEAlign,
- false);
- SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
- InVals.push_back(FrameIdxN);
+ AArch64CC::CondCode CC;
+ // The actual operation that sets the overflow or carry flag.
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
- continue;
- } else if (VA.isRegLoc()) {
- MVT RegVT = VA.getLocVT();
- const TargetRegisterClass *RC = getRegClassFor(RegVT);
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, MVT::i32);
- ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
- } else { // VA.isRegLoc()
- assert(VA.isMemLoc());
+ // We use an inverted condition, because the conditional select is inverted
+ // too. This will allow it to be selected to a single instruction:
+ // CSINC Wd, WZR, WZR, invert(cond).
+ SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
+ Overflow = DAG.getNode(AArch64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal,
+ CCVal, Overflow);
- int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
- VA.getLocMemOffset(), true);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+}
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, false, 0);
+// Prefetch operands are:
+// 1: Address to prefetch
+// 2: bool isWrite
+// 3: int locality (0 = no locality ... 3 = extreme locality)
+// 4: bool isDataCache
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+ unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+ // The data thing is not used.
+ // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ bool IsStream = !Locality;
+ // When the locality number is set
+ if (Locality) {
+ // The front-end should have filtered out the out-of-range values
+ assert(Locality <= 3 && "Prefetch locality out-of-range");
+ // The locality degree is the opposite of the cache speed.
+ // Put the number the other way around.
+ // The encoding starts at 0 for level 1
+ Locality = 3 - Locality;
+ }
+ // built the mask value encoding the expected behavior.
+ unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
+ (Locality << 1) | // Cache level bits
+ (unsigned)IsStream; // Stream bit
+ return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
+}
- }
+SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::BCvt:
- ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
- break;
- case CCValAssign::SExt:
- case CCValAssign::ZExt:
- case CCValAssign::AExt:
- case CCValAssign::FPExt: {
- unsigned DestSize = VA.getValVT().getSizeInBits();
- unsigned DestSubReg;
-
- switch (DestSize) {
- case 8: DestSubReg = AArch64::sub_8; break;
- case 16: DestSubReg = AArch64::sub_16; break;
- case 32: DestSubReg = AArch64::sub_32; break;
- case 64: DestSubReg = AArch64::sub_64; break;
- default: llvm_unreachable("Unexpected argument promotion");
- }
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
- ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
- VA.getValVT(), ArgValue,
- DAG.getTargetConstant(DestSubReg, MVT::i32)),
- 0);
- break;
- }
- }
+ return LowerF128Call(Op, DAG, LC);
+}
- InVals.push_back(ArgValue);
+SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
}
- if (isVarArg)
- SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
+ RTLIB::Libcall LC;
+ LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
- unsigned StackArgSize = CCInfo.getNextStackOffset();
- if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
- // This is a non-standard ABI so by fiat I say we're allowed to make full
- // use of the stack area to be popped, which must be aligned to 16 bytes in
- // any case:
- StackArgSize = RoundUpToAlignment(StackArgSize, 16);
+ // FP_ROUND node has a second operand indicating whether it is known to be
+ // precise. That doesn't take part in the LibCall so we can't directly use
+ // LowerF128Call.
+ SDValue SrcVal = Op.getOperand(0);
+ return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
+ /*isSigned*/ false, SDLoc(Op)).first;
+}
- // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
- // a multiple of 16.
- FuncInfo->setArgumentStackToRestore(StackArgSize);
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
+ // Any additional optimization in this function should be recorded
+ // in the cost tables.
+ EVT InVT = Op.getOperand(0).getValueType();
+ EVT VT = Op.getValueType();
- // This realignment carries over to the available bytes below. Our own
- // callers will guarantee the space is free by giving an aligned value to
- // CALLSEQ_START.
+ // FP_TO_XINT conversion from the same type are legal.
+ if (VT.getSizeInBits() == InVT.getSizeInBits())
+ return Op;
+
+ if (InVT == MVT::v2f64 || InVT == MVT::v4f32) {
+ SDLoc dl(Op);
+ SDValue Cv =
+ DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
+ Op.getOperand(0));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
+ } else if (InVT == MVT::v2f32) {
+ SDLoc dl(Op);
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0));
+ return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
}
- // Even if we're not expected to free up the space, it's useful to know how
- // much is there while considering tail calls (because we can reuse it).
- FuncInfo->setBytesInStackArgArea(StackArgSize);
- return Chain;
+ // Type changing conversions are illegal.
+ return SDValue();
}
-SDValue
-AArch64TargetLowering::LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const {
- // CCValAssign - represent the assignment of the return value to a location.
- SmallVector<CCValAssign, 16> RVLocs;
+SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getOperand(0).getValueType().isVector())
+ return LowerVectorFP_TO_INT(Op, DAG);
- // CCState - Info about the registers and stack slots.
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
+ if (Op.getOperand(0).getValueType() != MVT::f128) {
+ // It's legal except when f128 is involved
+ return Op;
+ }
- // Analyze outgoing return values.
- CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::FP_TO_SINT)
+ LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
- SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
+ SmallVector<SDValue, 2> Ops;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
+ Ops.push_back(Op.getOperand(i));
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- // PCS: "If the type, T, of the result of a function is such that
- // void func(T arg) would require that arg be passed as a value in a
- // register (or set of registers) according to the rules in 5.4, then the
- // result is returned in the same registers as would be used for such an
- // argument.
- //
- // Otherwise, the caller shall reserve a block of memory of sufficient
- // size and alignment to hold the result. The address of the memory block
- // shall be passed as an additional argument to the function in x8."
- //
- // This is implemented in two places. The register-return values are dealt
- // with here, more complex returns are passed as an sret parameter, which
- // means we don't have to worry about it during actual return.
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
+ return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
+ SDLoc(Op)).first;
+}
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+ // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
+ // Any additional optimization in this function should be recorded
+ // in the cost tables.
+ EVT VT = Op.getValueType();
+ SDLoc dl(Op);
+ SDValue In = Op.getOperand(0);
+ EVT InVT = In.getValueType();
- SDValue Arg = OutVals[i];
+ // v2i32 to v2f32 is legal.
+ if (VT == MVT::v2f32 && InVT == MVT::v2i32)
+ return Op;
- // There's no convenient note in the ABI about this as there is for normal
- // arguments, but it says return values are passed in the same registers as
- // an argument would be. I believe that includes the comments about
- // unspecified higher bits, putting the burden of widening on the *caller*
- // for return values.
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- case CCValAssign::ZExt:
- case CCValAssign::AExt:
- // Floating-point values should only be extended when they're going into
- // memory, which can't happen here so an integer extend is acceptable.
- Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
- break;
- case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
- break;
- }
+ // This function only handles v2f64 outputs.
+ if (VT == MVT::v2f64) {
+ // Extend the input argument to a v2i64 that we can feed into the
+ // floating point conversion. Zero or sign extend based on whether
+ // we're doing a signed or unsigned float conversion.
+ unsigned Opc =
+ Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
+ SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
+ return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
+ }
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
- Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ // Scalarize v2i64 to v2f32 conversions.
+ std::vector<SDValue> BuildVectorOps;
+ for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
+ SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
+ DAG.getConstant(i, MVT::i64));
+ Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
+ BuildVectorOps.push_back(Sclr);
}
- RetOps[0] = Chain; // Update chain.
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps);
+}
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Op.getValueType().isVector())
+ return LowerVectorINT_TO_FP(Op, DAG);
- return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
- &RetOps[0], RetOps.size());
-}
+ // i128 conversions are libcalls.
+ if (Op.getOperand(0).getValueType() == MVT::i128)
+ return SDValue();
+
+ // Other conversions are legal, unless it's to the completely software-based
+ // fp128.
+ if (Op.getValueType() != MVT::f128)
+ return Op;
-unsigned AArch64TargetLowering::getByValTypeAlignment(Type *Ty) const {
- // This is a new backend. For anything more precise than this a FE should
- // set an explicit alignment.
- return 4;
+ RTLIB::Libcall LC;
+ if (Op.getOpcode() == ISD::SINT_TO_FP)
+ LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ else
+ LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+
+ return LowerF128Call(Op, DAG, LC);
}
-SDValue
-AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- SDLoc &dl = CLI.DL;
- SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
- SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
- SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &IsTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
- bool IsVarArg = CLI.IsVarArg;
+SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
+ SelectionDAG &DAG) const {
+ // For iOS, we want to call an alternative entry point: __sincos_stret,
+ // which returns the values in two S / D registers.
+ SDLoc dl(Op);
+ SDValue Arg = Op.getOperand(0);
+ EVT ArgVT = Arg.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- MachineFunction &MF = DAG.getMachineFunction();
- AArch64MachineFunctionInfo *FuncInfo
- = MF.getInfo<AArch64MachineFunctionInfo>();
- bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
- bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
- bool IsSibCall = false;
+ ArgListTy Args;
+ ArgListEntry Entry;
- if (IsTailCall) {
- IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
- IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, OutVals, Ins, DAG);
+ Entry.Node = Arg;
+ Entry.Ty = ArgTy;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
- // A sibling call is one where we're under the usual C ABI and not planning
- // to change that but can still do a tail call:
- if (!TailCallOpt && IsTailCall)
- IsSibCall = true;
- }
+ const char *LibcallName =
+ (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
+ SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
+ StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0);
- // On AArch64 (and all other architectures I'm aware of) the most this has to
- // do is adjust the stack pointer.
- unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
- if (IsSibCall) {
- // Since we're not changing the ABI to make this a tail call, the memory
- // operands are already available in the caller's incoming argument space.
- NumBytes = 0;
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ return CallResult.first;
+}
+
+SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
+ SelectionDAG &DAG) const {
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unimplemented operand");
+ return SDValue();
+ case ISD::GlobalAddress:
+ return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
+ case ISD::BR_CC:
+ return LowerBR_CC(Op, DAG);
+ case ISD::SELECT:
+ return LowerSELECT(Op, DAG);
+ case ISD::SELECT_CC:
+ return LowerSELECT_CC(Op, DAG);
+ case ISD::JumpTable:
+ return LowerJumpTable(Op, DAG);
+ case ISD::ConstantPool:
+ return LowerConstantPool(Op, DAG);
+ case ISD::BlockAddress:
+ return LowerBlockAddress(Op, DAG);
+ case ISD::VASTART:
+ return LowerVASTART(Op, DAG);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG);
+ case ISD::VAARG:
+ return LowerVAARG(Op, DAG);
+ case ISD::ADDC:
+ case ISD::ADDE:
+ case ISD::SUBC:
+ case ISD::SUBE:
+ return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
+ return LowerXALUO(Op, DAG);
+ case ISD::FADD:
+ return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
+ case ISD::FSUB:
+ return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
+ case ISD::FMUL:
+ return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
+ case ISD::FDIV:
+ return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
+ case ISD::FP_ROUND:
+ return LowerFP_ROUND(Op, DAG);
+ case ISD::FP_EXTEND:
+ return LowerFP_EXTEND(Op, DAG);
+ case ISD::FRAMEADDR:
+ return LowerFRAMEADDR(Op, DAG);
+ case ISD::RETURNADDR:
+ return LowerRETURNADDR(Op, DAG);
+ case ISD::INSERT_VECTOR_ELT:
+ return LowerINSERT_VECTOR_ELT(Op, DAG);
+ case ISD::EXTRACT_VECTOR_ELT:
+ return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::BUILD_VECTOR:
+ return LowerBUILD_VECTOR(Op, DAG);
+ case ISD::VECTOR_SHUFFLE:
+ return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::EXTRACT_SUBVECTOR:
+ return LowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::SRA:
+ case ISD::SRL:
+ case ISD::SHL:
+ return LowerVectorSRA_SRL_SHL(Op, DAG);
+ case ISD::SHL_PARTS:
+ return LowerShiftLeftParts(Op, DAG);
+ case ISD::SRL_PARTS:
+ case ISD::SRA_PARTS:
+ return LowerShiftRightParts(Op, DAG);
+ case ISD::CTPOP:
+ return LowerCTPOP(Op, DAG);
+ case ISD::FCOPYSIGN:
+ return LowerFCOPYSIGN(Op, DAG);
+ case ISD::AND:
+ return LowerVectorAND(Op, DAG);
+ case ISD::OR:
+ return LowerVectorOR(Op, DAG);
+ case ISD::XOR:
+ return LowerXOR(Op, DAG);
+ case ISD::PREFETCH:
+ return LowerPREFETCH(Op, DAG);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return LowerINT_TO_FP(Op, DAG);
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ return LowerFP_TO_INT(Op, DAG);
+ case ISD::FSINCOS:
+ return LowerFSINCOS(Op, DAG);
}
+}
- // FPDiff is the byte offset of the call's argument area from the callee's.
- // Stores to callee stack arguments will be placed in FixedStackSlots offset
- // by this amount for a tail call. In a sibling call it must be 0 because the
- // caller will deallocate the entire stack and the callee still expects its
- // arguments to begin at SP+0. Completely unused for non-tail calls.
- int FPDiff = 0;
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned AArch64TargetLowering::getFunctionAlignment(const Function *F) const {
+ return 2;
+}
- if (IsTailCall && !IsSibCall) {
- unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
- // FPDiff will be negative if this tail call requires more space than we
- // would automatically have in our incoming argument space. Positive if we
- // can actually shrink the stack.
- FPDiff = NumReusableBytes - NumBytes;
+#include "AArch64GenCallingConv.inc"
- // The stack pointer must be 16-byte aligned at all times it's used for a
- // memory operation, which in practice means at *all* times and in
- // particular across call boundaries. Therefore our own arguments started at
- // a 16-byte aligned SP and the delta applied for the tail call should
- // satisfy the same constraint.
- assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
+/// Selects the correct CCAssignFn for a the given CallingConvention
+/// value.
+CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ switch (CC) {
+ default:
+ llvm_unreachable("Unsupported calling convention.");
+ case CallingConv::WebKit_JS:
+ return CC_AArch64_WebKit_JS;
+ case CallingConv::C:
+ case CallingConv::Fast:
+ if (!Subtarget->isTargetDarwin())
+ return CC_AArch64_AAPCS;
+ return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
}
+}
- if (!IsSibCall)
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
- dl);
-
- SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
- getPointerTy());
+SDValue AArch64TargetLowering::LowerFormalArguments(
+ SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
- SmallVector<SDValue, 8> MemOpChains;
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+ // At this point, Ins[].VT may already be promoted to i32. To correctly
+ // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
+ // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
+ // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
+ // we use a special version of AnalyzeFormalArguments to pass in ValVT and
+ // LocVT.
+ unsigned NumArgs = Ins.size();
+ Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
+ unsigned CurArgIdx = 0;
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ValVT = Ins[i].VT;
+ std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[i].OrigArgIndex;
+
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ bool Res =
+ AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
+ }
+ assert(ArgLocs.size() == Ins.size());
+ SmallVector<SDValue, 16> ArgValues;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- SDValue Arg = OutVals[i];
-
- // Callee does the actual widening, so all extensions just use an implicit
- // definition of the rest of the Loc. Aesthetically, this would be nicer as
- // an ANY_EXTEND, but that isn't valid for floating-point types and this
- // alternative works on integer types too.
- switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::SExt:
- case CCValAssign::ZExt:
- case CCValAssign::AExt:
- case CCValAssign::FPExt: {
- unsigned SrcSize = VA.getValVT().getSizeInBits();
- unsigned SrcSubReg;
-
- switch (SrcSize) {
- case 8: SrcSubReg = AArch64::sub_8; break;
- case 16: SrcSubReg = AArch64::sub_16; break;
- case 32: SrcSubReg = AArch64::sub_32; break;
- case 64: SrcSubReg = AArch64::sub_64; break;
- default: llvm_unreachable("Unexpected argument promotion");
- }
- Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VA.getLocVT(),
- DAG.getUNDEF(VA.getLocVT()),
- Arg,
- DAG.getTargetConstant(SrcSubReg, MVT::i32)),
- 0);
+ if (Ins[i].Flags.isByVal()) {
+ // Byval is used for HFAs in the PCS, but the system should work in a
+ // non-compliant manner for larger structs.
+ EVT PtrTy = getPointerTy();
+ int Size = Ins[i].Flags.getByValSize();
+ unsigned NumRegs = (Size + 7) / 8;
- break;
- }
- case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
- break;
- }
+ // FIXME: This works on big-endian for composite byvals, which are the common
+ // case. It should also work for fundamental types too.
+ unsigned FrameIdx =
+ MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+ SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
+ InVals.push_back(FrameIdxN);
- if (VA.isRegLoc()) {
- // A normal register (sub-) argument. For now we just note it down because
- // we want to copy things into registers as late as possible to avoid
- // register-pressure (and possibly worse).
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
continue;
- }
+ } if (VA.isRegLoc()) {
+ // Arguments stored in registers.
+ EVT RegVT = VA.getLocVT();
+
+ SDValue ArgValue;
+ const TargetRegisterClass *RC;
+
+ if (RegVT == MVT::i32)
+ RC = &AArch64::GPR32RegClass;
+ else if (RegVT == MVT::i64)
+ RC = &AArch64::GPR64RegClass;
+ else if (RegVT == MVT::f32)
+ RC = &AArch64::FPR32RegClass;
+ else if (RegVT == MVT::f64 || RegVT.is64BitVector())
+ RC = &AArch64::FPR64RegClass;
+ else if (RegVT == MVT::f128 || RegVT.is128BitVector())
+ RC = &AArch64::FPR128RegClass;
+ else
+ llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
- assert(VA.isMemLoc() && "unexpected argument location");
+ // Transform the arguments in physical registers into virtual ones.
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
- SDValue DstAddr;
- MachinePointerInfo DstInfo;
- if (IsTailCall) {
- uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
- VA.getLocVT().getSizeInBits();
- OpSize = (OpSize + 7) / 8;
- int32_t Offset = VA.getLocMemOffset() + FPDiff;
- int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ // If this is an 8, 16 or 32-bit value, it is really passed promoted
+ // to 64 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
+ break;
+ case CCValAssign::AExt:
+ case CCValAssign::SExt:
+ case CCValAssign::ZExt:
+ // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
+ // nodes after our lowering.
+ assert(RegVT == Ins[i].VT && "incorrect register location selected");
+ break;
+ }
- DstAddr = DAG.getFrameIndex(FI, getPointerTy());
- DstInfo = MachinePointerInfo::getFixedStack(FI);
+ InVals.push_back(ArgValue);
+
+ } else { // VA.isRegLoc()
+ assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
+ unsigned ArgOffset = VA.getLocMemOffset();
+ unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
- // Make sure any stack arguments overlapping with where we're storing are
- // loaded before this eventual operation. Otherwise they'll be clobbered.
- Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
- } else {
- uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize()*8 :
- VA.getLocVT().getSizeInBits();
- OpSize = (OpSize + 7) / 8;
uint32_t BEAlign = 0;
- if (OpSize < 8 && !getSubtarget()->isLittle())
- BEAlign = 8-OpSize;
- SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset() + BEAlign);
+ if (ArgSize < 8 && !Subtarget->isLittleEndian())
+ BEAlign = 8 - ArgSize;
- DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
- DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
- }
+ int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
- if (Flags.isByVal()) {
- SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
- SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
- Flags.getByValAlign(),
- /*isVolatile = */ false,
- /*alwaysInline = */ false,
- DstInfo, MachinePointerInfo(0));
- MemOpChains.push_back(Cpy);
- } else {
- // Normal stack argument, put it where it's needed.
- SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
- false, false, 0);
- MemOpChains.push_back(Store);
- }
- }
+ // Create load nodes to retrieve arguments from the stack.
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ SDValue ArgValue;
- // The loads and stores generated above shouldn't clash with each
- // other. Combining them with this TokenFactor notes that fact for the rest of
- // the backend.
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+ switch (VA.getLocInfo()) {
+ default:
+ break;
+ case CCValAssign::SExt:
+ ExtType = ISD::SEXTLOAD;
+ break;
+ case CCValAssign::ZExt:
+ ExtType = ISD::ZEXTLOAD;
+ break;
+ case CCValAssign::AExt:
+ ExtType = ISD::EXTLOAD;
+ break;
+ }
- // Most of the rest of the instructions need to be glued together; we don't
- // want assignments to actual registers used by a call to be rearranged by a
- // well-meaning scheduler.
- SDValue InFlag;
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ VA.getLocVT(),
+ false, false, false, 0);
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ InVals.push_back(ArgValue);
+ }
}
- // The linker is responsible for inserting veneers when necessary to put a
- // function call destination in range, so we don't need to bother with a
- // wrapper here.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
- }
+ // varargs
+ if (isVarArg) {
+ if (!Subtarget->isTargetDarwin()) {
+ // The AAPCS variadic function ABI is identical to the non-variadic
+ // one. As a result there may be more arguments in registers and we should
+ // save them for future reference.
+ saveVarArgRegisters(CCInfo, DAG, DL, Chain);
+ }
- // We don't usually want to end the call-sequence here because we would tidy
- // the frame up *after* the call, however in the ABI-changing tail-call case
- // we've carefully laid out the parameters so that when sp is reset they'll be
- // in the correct location.
- if (IsTailCall && !IsSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, dl);
- InFlag = Chain.getValue(1);
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ // This will point to the next argument passed via stack.
+ unsigned StackOffset = CCInfo.getNextStackOffset();
+ // We currently pass all varargs at 8-byte alignment.
+ StackOffset = ((StackOffset + 7) & ~7);
+ AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
}
- // We produce the following DAG scheme for the actual call instruction:
- // (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
- //
- // Most arguments aren't going to be used and just keep the values live as
- // far as LLVM is concerned. It's expected to be selected as simply "bl
- // callee" (for a direct, non-tail call).
- std::vector<SDValue> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ unsigned StackArgSize = CCInfo.getNextStackOffset();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
+ // This is a non-standard ABI so by fiat I say we're allowed to make full
+ // use of the stack area to be popped, which must be aligned to 16 bytes in
+ // any case:
+ StackArgSize = RoundUpToAlignment(StackArgSize, 16);
- if (IsTailCall) {
- // Each tail call may have to adjust the stack by a different amount, so
- // this information must travel along with the operation for eventual
- // consumption by emitEpilogue.
- Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
+ // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
+ // a multiple of 16.
+ FuncInfo->setArgumentStackToRestore(StackArgSize);
+
+ // This realignment carries over to the available bytes below. Our own
+ // callers will guarantee the space is free by giving an aligned value to
+ // CALLSEQ_START.
}
+ // Even if we're not expected to free up the space, it's useful to know how
+ // much is there while considering tail calls (because we can reuse it).
+ FuncInfo->setBytesInStackArgArea(StackArgSize);
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
+ return Chain;
+}
+void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
+ SelectionDAG &DAG, SDLoc DL,
+ SDValue &Chain) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- // Add a register mask operand representing the call-preserved registers. This
- // is used later in codegen to constrain register-allocation.
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
+ SmallVector<SDValue, 8> MemOps;
- // If we needed glue, put it in as the last argument.
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7 };
+ static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+ unsigned FirstVariadicGPR =
+ CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
+ int GPRIdx = 0;
+ if (GPRSaveSize != 0) {
+ GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
- if (IsTailCall) {
- return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
+
+ for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 8), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(8, getPointerTy()));
+ }
}
+ FuncInfo->setVarArgsGPRIndex(GPRIdx);
+ FuncInfo->setVarArgsGPRSize(GPRSaveSize);
- Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
+ if (Subtarget->hasFPARMv8()) {
+ static const MCPhysReg FPRArgRegs[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
+ AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
+ static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
+ unsigned FirstVariadicFPR =
+ CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
+
+ unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
+ int FPRIdx = 0;
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
- // Now we can reclaim the stack, just as well do it before working out where
- // our return value is.
- if (!IsSibCall) {
- uint64_t CalleePopBytes
- = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
+ SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(CalleePopBytes, true),
- InFlag, dl);
- InFlag = Chain.getValue(1);
+ for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
+ unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
+
+ SDValue Store =
+ DAG.getStore(Val.getValue(1), DL, Val, FIN,
+ MachinePointerInfo::getStack(i * 16), false, false, 0);
+ MemOps.push_back(Store);
+ FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
+ DAG.getConstant(16, getPointerTy()));
+ }
+ }
+ FuncInfo->setVarArgsFPRIndex(FPRIdx);
+ FuncInfo->setVarArgsFPRSize(FPRSaveSize);
}
- return LowerCallResult(Chain, InFlag, CallConv,
- IsVarArg, Ins, dl, DAG, InVals);
+ if (!MemOps.empty()) {
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
+ }
}
-SDValue
-AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue AArch64TargetLowering::LowerCallResult(
+ SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
+ SDValue ThisVal) const {
+ CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+ ? RetCC_AArch64_WebKit_JS
+ : RetCC_AArch64_AAPCS;
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
+ CCInfo.AnalyzeCallResult(Ins, RetCC);
+ // Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign VA = RVLocs[i];
- // Return values that are too big to fit into registers should use an sret
- // pointer, so this can be a lot simpler than the main argument code.
- assert(VA.isRegLoc() && "Memory locations not expected for call return");
+ // Pass 'this' value directly from the argument to return value, to avoid
+ // reg unit interference
+ if (i == 0 && isThisReturn) {
+ assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
+ "unexpected return calling convention register assignment");
+ InVals.push_back(ThisVal);
+ continue;
+ }
- SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
- InFlag);
+ SDValue Val =
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
Chain = Val.getValue(1);
InFlag = Val.getValue(2);
switch (VA.getLocInfo()) {
- default: llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full: break;
- case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
break;
- case CCValAssign::ZExt:
- case CCValAssign::SExt:
- case CCValAssign::AExt:
- // Floating-point arguments only get extended/truncated if they're going
- // in memory, so using the integer operation is acceptable here.
- Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+ case CCValAssign::BCvt:
+ Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
}
@@ -1812,17 +1960,12 @@ AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
return Chain;
}
-bool
-AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool IsVarArg,
- bool IsCalleeStructRet,
- bool IsCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
-
+bool AArch64TargetLowering::isEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
// For CallingConv::C this function knows whether the ABI needs
// changing. That's not true for other conventions so they will have to opt in
// manually.
@@ -1838,7 +1981,8 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// we want to reuse during a tail call. Working around this *is* possible (see
// X86) but less efficient and uglier in LowerCall.
for (Function::const_arg_iterator i = CallerF->arg_begin(),
- e = CallerF->arg_end(); i != e; ++i)
+ e = CallerF->arg_end();
+ i != e; ++i)
if (i->hasByValAttr())
return false;
@@ -1854,10 +1998,10 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
- assert((!IsVarArg || CalleeCC == CallingConv::C)
- && "Unexpected variadic calling convention");
+ assert((!isVarArg || CalleeCC == CallingConv::C) &&
+ "Unexpected variadic calling convention");
- if (IsVarArg && !Outs.empty()) {
+ if (isVarArg && !Outs.empty()) {
// At least two cases here: if caller is fastcc then we can't have any
// memory arguments (we'd be expected to clean up the stack afterwards). If
// caller is C then we could potentially use its argument area.
@@ -1865,10 +2009,10 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// FIXME: for now we take the most conservative of these in both cases:
// disallow all variadic memory operands.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
if (!ArgLocs[i].isRegLoc())
return false;
@@ -1880,12 +2024,12 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
SmallVector<CCValAssign, 16> RVLocs1;
CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs1, *DAG.getContext());
- CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForCall(CalleeCC, isVarArg));
SmallVector<CCValAssign, 16> RVLocs2;
CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
getTargetMachine(), RVLocs2, *DAG.getContext());
- CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForCall(CallerCC, isVarArg));
if (RVLocs1.size() != RVLocs2.size())
return false;
@@ -1909,28 +2053,18 @@ AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return true;
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
+ CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
getTargetMachine(), ArgLocs, *DAG.getContext());
- CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
+ CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
- const AArch64MachineFunctionInfo *FuncInfo
- = MF.getInfo<AArch64MachineFunctionInfo>();
+ const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
// If the stack arguments for this call would fit into our own save area then
// the call can be made tail.
return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
}
-bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
- bool TailCallOpt) const {
- return CallCC == CallingConv::Fast && TailCallOpt;
-}
-
-bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
- return CallCC == CallingConv::Fast;
-}
-
SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
SelectionDAG &DAG,
MachineFrameInfo *MFI,
@@ -1946,7 +2080,8 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
// Add a chain value for each stack argument corresponding
for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
- UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
+ UE = DAG.getEntryNode().getNode()->use_end();
+ U != UE; ++U)
if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
if (FI->getIndex() < 0) {
@@ -1959,625 +2094,609 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
ArgChains.push_back(SDValue(L, 1));
}
- // Build a tokenfactor for all the chains.
- return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
- &ArgChains[0], ArgChains.size());
+ // Build a tokenfactor for all the chains.
+ return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
}
-static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
- switch (CC) {
- case ISD::SETEQ: return A64CC::EQ;
- case ISD::SETGT: return A64CC::GT;
- case ISD::SETGE: return A64CC::GE;
- case ISD::SETLT: return A64CC::LT;
- case ISD::SETLE: return A64CC::LE;
- case ISD::SETNE: return A64CC::NE;
- case ISD::SETUGT: return A64CC::HI;
- case ISD::SETUGE: return A64CC::HS;
- case ISD::SETULT: return A64CC::LO;
- case ISD::SETULE: return A64CC::LS;
- default: llvm_unreachable("Unexpected condition code");
- }
-}
-
-bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
- // icmp is implemented using adds/subs immediate, which take an unsigned
- // 12-bit immediate, optionally shifted left by 12 bits.
-
- // Symmetric by using adds/subs
- if (Val < 0)
- Val = -Val;
-
- return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
-}
-
-SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
- ISD::CondCode CC, SDValue &A64cc,
- SelectionDAG &DAG, SDLoc &dl) const {
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
- int64_t C = 0;
- EVT VT = RHSC->getValueType(0);
- bool knownInvalid = false;
-
- // I'm not convinced the rest of LLVM handles these edge cases properly, but
- // we can at least get it right.
- if (isSignedIntSetCC(CC)) {
- C = RHSC->getSExtValue();
- } else if (RHSC->getZExtValue() > INT64_MAX) {
- // A 64-bit constant not representable by a signed 64-bit integer is far
- // too big to fit into a SUBS immediate anyway.
- knownInvalid = true;
- } else {
- C = RHSC->getZExtValue();
- }
-
- if (!knownInvalid && !isLegalICmpImmediate(C)) {
- // Constant does not fit, try adjusting it by one?
- switch (CC) {
- default: break;
- case ISD::SETLT:
- case ISD::SETGE:
- if (isLegalICmpImmediate(C-1)) {
- CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
- RHS = DAG.getConstant(C-1, VT);
- }
- break;
- case ISD::SETULT:
- case ISD::SETUGE:
- if (isLegalICmpImmediate(C-1)) {
- CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
- RHS = DAG.getConstant(C-1, VT);
- }
- break;
- case ISD::SETLE:
- case ISD::SETGT:
- if (isLegalICmpImmediate(C+1)) {
- CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
- RHS = DAG.getConstant(C+1, VT);
- }
- break;
- case ISD::SETULE:
- case ISD::SETUGT:
- if (isLegalICmpImmediate(C+1)) {
- CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
- RHS = DAG.getConstant(C+1, VT);
- }
- break;
- }
- }
- }
-
- A64CC::CondCodes CondCode = IntCCToA64CC(CC);
- A64cc = DAG.getConstant(CondCode, MVT::i32);
- return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
- DAG.getCondCode(CC));
+bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
+ bool TailCallOpt) const {
+ return CallCC == CallingConv::Fast && TailCallOpt;
}
-static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
- A64CC::CondCodes &Alternative) {
- A64CC::CondCodes CondCode = A64CC::Invalid;
- Alternative = A64CC::Invalid;
-
- switch (CC) {
- default: llvm_unreachable("Unknown FP condition!");
- case ISD::SETEQ:
- case ISD::SETOEQ: CondCode = A64CC::EQ; break;
- case ISD::SETGT:
- case ISD::SETOGT: CondCode = A64CC::GT; break;
- case ISD::SETGE:
- case ISD::SETOGE: CondCode = A64CC::GE; break;
- case ISD::SETOLT: CondCode = A64CC::MI; break;
- case ISD::SETOLE: CondCode = A64CC::LS; break;
- case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
- case ISD::SETO: CondCode = A64CC::VC; break;
- case ISD::SETUO: CondCode = A64CC::VS; break;
- case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
- case ISD::SETUGT: CondCode = A64CC::HI; break;
- case ISD::SETUGE: CondCode = A64CC::PL; break;
- case ISD::SETLT:
- case ISD::SETULT: CondCode = A64CC::LT; break;
- case ISD::SETLE:
- case ISD::SETULE: CondCode = A64CC::LE; break;
- case ISD::SETNE:
- case ISD::SETUNE: CondCode = A64CC::NE; break;
- }
- return CondCode;
+bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
+ return CallCC == CallingConv::Fast;
}
+/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
+/// and add input and output parameter nodes.
SDValue
-AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT PtrVT = getPointerTy();
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
-
- switch(getTargetMachine().getCodeModel()) {
- case CodeModel::Small:
- // The most efficient code is PC-relative anyway for the small memory model,
- // so we don't need to worry about relocation model.
- return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0,
- AArch64II::MO_NO_FLAG),
- DAG.getTargetBlockAddress(BA, PtrVT, 0,
- AArch64II::MO_LO12),
- DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
- case CodeModel::Large:
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
- default:
- llvm_unreachable("Only small and large code models supported now");
- }
-}
-
+AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc &DL = CLI.DL;
+ SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
+ SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
+ SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDValue Chain = CLI.Chain;
+ SDValue Callee = CLI.Callee;
+ bool &IsTailCall = CLI.IsTailCall;
+ CallingConv::ID CallConv = CLI.CallConv;
+ bool IsVarArg = CLI.IsVarArg;
-// (BRCOND chain, val, dest)
-SDValue
-AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDValue Chain = Op.getOperand(0);
- SDValue TheBit = Op.getOperand(1);
- SDValue DestBB = Op.getOperand(2);
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsThisReturn = false;
- // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
- // that as the consumer we are responsible for ignoring rubbish in higher
- // bits.
- TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
- DAG.getConstant(1, MVT::i32));
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
+ bool IsSibCall = false;
- SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
- DAG.getConstant(0, TheBit.getValueType()),
- DAG.getCondCode(ISD::SETNE));
+ if (IsTailCall) {
+ // Check if it's really possible to do a tail call.
+ IsTailCall = isEligibleForTailCallOptimization(
+ Callee, CallConv, IsVarArg, IsStructRet,
+ MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG);
+ if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
- return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
- A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
- DestBB);
-}
+ // A sibling call is one where we're under the usual C ABI and not planning
+ // to change that but can still do a tail call:
+ if (!TailCallOpt && IsTailCall)
+ IsSibCall = true;
-// (BR_CC chain, condcode, lhs, rhs, dest)
-SDValue
-AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDValue Chain = Op.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue DestBB = Op.getOperand(4);
+ if (IsTailCall)
+ ++NumTailCalls;
+ }
- if (LHS.getValueType() == MVT::f128) {
- // f128 comparisons are lowered to runtime calls by a routine which sets
- // LHS, RHS and CC appropriately for the rest of this function to continue.
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
- // If softenSetCCOperands returned a scalar, we need to compare the result
- // against zero to select between true and false values.
- if (RHS.getNode() == 0) {
- RHS = DAG.getConstant(0, LHS.getValueType());
- CC = ISD::SETNE;
+ if (IsVarArg) {
+ // Handle fixed and variable vector arguments differently.
+ // Variable vector arguments always go into memory.
+ unsigned NumArgs = Outs.size();
+
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ArgVT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
+ /*IsVarArg=*/ !Outs[i].IsFixed);
+ bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
+ }
+ } else {
+ // At this point, Outs[].VT may already be promoted to i32. To correctly
+ // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
+ // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
+ // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
+ // we use a special version of AnalyzeCallOperands to pass in ValVT and
+ // LocVT.
+ unsigned NumArgs = Outs.size();
+ for (unsigned i = 0; i != NumArgs; ++i) {
+ MVT ValVT = Outs[i].VT;
+ // Get type of the original argument.
+ EVT ActualVT = getValueType(CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
+ /*AllowUnknown*/ true);
+ MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+ // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
+ if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
+ ValVT = MVT::i8;
+ else if (ActualMVT == MVT::i16)
+ ValVT = MVT::i16;
+
+ CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
+ bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
+ assert(!Res && "Call operand has unhandled type");
+ (void)Res;
}
}
- if (LHS.getValueType().isInteger()) {
- SDValue A64cc;
-
- // Integers are handled in a separate function because the combinations of
- // immediates and tests can get hairy and we may want to fiddle things.
- SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
- return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
- Chain, CmpOp, A64cc, DestBB);
+ if (IsSibCall) {
+ // Since we're not changing the ABI to make this a tail call, the memory
+ // operands are already available in the caller's incoming argument space.
+ NumBytes = 0;
}
- // Note that some LLVM floating-point CondCodes can't be lowered to a single
- // conditional branch, hence FPCCToA64CC can set a second test, where either
- // passing is sufficient.
- A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
- CondCode = FPCCToA64CC(CC, Alternative);
- SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
- SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
- DAG.getCondCode(CC));
- SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
- Chain, SetCC, A64cc, DestBB);
+ // FPDiff is the byte offset of the call's argument area from the callee's.
+ // Stores to callee stack arguments will be placed in FixedStackSlots offset
+ // by this amount for a tail call. In a sibling call it must be 0 because the
+ // caller will deallocate the entire stack and the callee still expects its
+ // arguments to begin at SP+0. Completely unused for non-tail calls.
+ int FPDiff = 0;
- if (Alternative != A64CC::Invalid) {
- A64cc = DAG.getConstant(Alternative, MVT::i32);
- A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
- A64BR_CC, SetCC, A64cc, DestBB);
+ if (IsTailCall && !IsSibCall) {
+ unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
- }
+ // Since callee will pop argument stack as a tail call, we must keep the
+ // popped size 16-byte aligned.
+ NumBytes = RoundUpToAlignment(NumBytes, 16);
- return A64BR_CC;
-}
+ // FPDiff will be negative if this tail call requires more space than we
+ // would automatically have in our incoming argument space. Positive if we
+ // can actually shrink the stack.
+ FPDiff = NumReusableBytes - NumBytes;
-SDValue
-AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const {
- ArgListTy Args;
- ArgListEntry Entry;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
- EVT ArgVT = Op.getOperand(i).getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Args.push_back(Entry);
+ // The stack pointer must be 16-byte aligned at all times it's used for a
+ // memory operation, which in practice means at *all* times and in
+ // particular across call boundaries. Therefore our own arguments started at
+ // a 16-byte aligned SP and the delta applied for the tail call should
+ // satisfy the same constraint.
+ assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
- SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
- Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
+ // Adjust the stack pointer for the new arguments...
+ // These operations are automatically eliminated by the prolog/epilog pass
+ if (!IsSibCall)
+ Chain =
+ DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
- // By default, the input chain to this libcall is the entry node of the
- // function. If the libcall is going to be emitted as a tail call then
- // isUsedByReturnOnly will change it to the right chain if the return
- // node which is being folded has a non-entry input chain.
- SDValue InChain = DAG.getEntryNode();
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP, getPointerTy());
- // isTailCall may be true since the callee does not reference caller stack
- // frame. Check if it's in the right position.
- SDValue TCChain = InChain;
- bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
- if (isTailCall)
- InChain = TCChain;
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
- 0, getLibcallCallingConv(Call), isTailCall,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, SDLoc(Op));
- std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
- if (!CallInfo.second.getNode())
- // It's a tailcall, return the chain (which is the DAG root).
- return DAG.getRoot();
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ if (Outs[realArgIdx].ArgVT == MVT::i1) {
+ // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
+ }
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::FPExt:
+ Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ }
- return CallInfo.first;
-}
+ if (VA.isRegLoc()) {
+ if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
+ assert(VA.getLocVT() == MVT::i64 &&
+ "unexpected calling convention register assignment");
+ assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
+ "unexpected use of 'returned'");
+ IsThisReturn = true;
+ }
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
-SDValue
-AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
- if (Op.getOperand(0).getValueType() != MVT::f128) {
- // It's legal except when f128 is involved
- return Op;
- }
+ SDValue DstAddr;
+ MachinePointerInfo DstInfo;
- RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
+ // FIXME: This works on big-endian for composite byvals, which are the
+ // common case. It should also work for fundamental types too.
+ uint32_t BEAlign = 0;
+ unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
+ : VA.getLocVT().getSizeInBits();
+ OpSize = (OpSize + 7) / 8;
+ if (!Subtarget->isLittleEndian() && !Flags.isByVal()) {
+ if (OpSize < 8)
+ BEAlign = 8 - OpSize;
+ }
+ unsigned LocMemOffset = VA.getLocMemOffset();
+ int32_t Offset = LocMemOffset + BEAlign;
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+ PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+
+ if (IsTailCall) {
+ Offset = Offset + FPDiff;
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+
+ DstAddr = DAG.getFrameIndex(FI, getPointerTy());
+ DstInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // Make sure any stack arguments overlapping with where we're storing
+ // are loaded before this eventual operation. Otherwise they'll be
+ // clobbered.
+ Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
+ } else {
+ SDValue PtrOff = DAG.getIntPtrConstant(Offset);
- SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
-}
+ DstAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
+ DstInfo = MachinePointerInfo::getStack(LocMemOffset);
+ }
-SDValue
-AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
+ if (Outs[i].Flags.isByVal()) {
+ SDValue SizeNode =
+ DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64);
+ SDValue Cpy = DAG.getMemcpy(
+ Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
+ /*isVolatile = */ false,
+ /*alwaysInline = */ false, DstInfo, MachinePointerInfo());
- RTLIB::Libcall LC;
- LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
+ MemOpChains.push_back(Cpy);
+ } else {
+ // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
+ // promoted to a legal register type i32, we should truncate Arg back to
+ // i1/i8/i16.
+ if (Arg.getValueType().isSimple() &&
+ Arg.getValueType().getSimpleVT() == MVT::i32 &&
+ (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
+ VA.getLocVT() == MVT::i16))
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
+
+ SDValue Store =
+ DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0);
+ MemOpChains.push_back(Store);
+ }
+ }
+ }
- return LowerF128ToCall(Op, DAG, LC);
-}
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- bool IsSigned) {
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
- SDValue Vec = Op.getOperand(0);
- EVT OpVT = Vec.getValueType();
- unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
-
- if (VT.getVectorNumElements() == 1) {
- assert(OpVT == MVT::v1f64 && "Unexpected vector type!");
- if (VT.getSizeInBits() == OpVT.getSizeInBits())
- return Op;
- return DAG.UnrollVectorOp(Op.getNode());
- }
-
- if (VT.getSizeInBits() > OpVT.getSizeInBits()) {
- assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 &&
- "Unexpected vector type!");
- Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec);
- return DAG.getNode(Opc, dl, VT, Vec);
- } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) {
- EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
- OpVT.getVectorElementType().getSizeInBits());
- CastVT =
- EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
- Vec = DAG.getNode(Opc, dl, CastVT, Vec);
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec);
- }
- return DAG.getNode(Opc, dl, VT, Vec);
-}
-
-static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
- // We custom lower concat_vectors with 4, 8, or 16 operands that are all the
- // same operand and of type v1* using the DUP instruction.
- unsigned NumOps = Op->getNumOperands();
- if (NumOps == 2) {
- assert(Op.getValueType().getSizeInBits() == 128 && "unexpected concat");
- return Op;
+ // Build a sequence of copy-to-reg nodes chained together with token chain
+ // and flag operands which copy the outgoing args into the appropriate regs.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
}
- if (NumOps != 4 && NumOps != 8 && NumOps != 16)
- return SDValue();
-
- // Must be a single value for VDUP.
- SDValue Op0 = Op.getOperand(0);
- for (unsigned i = 1; i < NumOps; ++i) {
- SDValue OpN = Op.getOperand(i);
- if (Op0 != OpN)
- return SDValue();
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ Subtarget->isTargetMachO()) {
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ bool InternalLinkage = GV->hasInternalLinkage();
+ if (InternalLinkage)
+ Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ else {
+ Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
+ AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ }
+ } else if (ExternalSymbolSDNode *S =
+ dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+ Callee =
+ DAG.getTargetExternalSymbol(Sym, getPointerTy(), AArch64II::MO_GOT);
+ Callee = DAG.getNode(AArch64ISD::LOADgot, DL, getPointerTy(), Callee);
+ }
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ const char *Sym = S->getSymbol();
+ Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
}
- // Verify the value type.
- EVT EltVT = Op0.getValueType();
- switch (NumOps) {
- default: llvm_unreachable("Unexpected number of operands");
- case 4:
- if (EltVT != MVT::v1i16 && EltVT != MVT::v1i32)
- return SDValue();
- break;
- case 8:
- if (EltVT != MVT::v1i8 && EltVT != MVT::v1i16)
- return SDValue();
- break;
- case 16:
- if (EltVT != MVT::v1i8)
- return SDValue();
- break;
+ // We don't usually want to end the call-sequence here because we would tidy
+ // the frame up *after* the call, however in the ABI-changing tail-call case
+ // we've carefully laid out the parameters so that when sp is reset they'll be
+ // in the correct location.
+ if (IsTailCall && !IsSibCall) {
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag, DL);
+ InFlag = Chain.getValue(1);
}
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- // VDUP produces better code for constants.
- if (Op0->getOpcode() == ISD::BUILD_VECTOR)
- return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Op0->getOperand(0));
- return DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, Op0,
- DAG.getConstant(0, MVT::i64));
-}
+ std::vector<SDValue> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
-SDValue
-AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- bool IsSigned) const {
- if (Op.getValueType().isVector())
- return LowerVectorFP_TO_INT(Op, DAG, IsSigned);
- if (Op.getOperand(0).getValueType() != MVT::f128) {
- // It's legal except when f128 is involved
- return Op;
+ if (IsTailCall) {
+ // Each tail call may have to adjust the stack by a different amount, so
+ // this information must travel along with the operation for eventual
+ // consumption by emitEpilogue.
+ Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
}
- RTLIB::Libcall LC;
- if (IsSigned)
- LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
- else
- LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
+ // Add argument registers to the end of the list so that they are known live
+ // into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
- return LowerF128ToCall(Op, DAG, LC);
-}
+ // Add a register mask operand representing the call-preserved registers.
+ const uint32_t *Mask;
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *ARI =
+ static_cast<const AArch64RegisterInfo *>(TRI);
+ if (IsThisReturn) {
+ // For 'this' returns, use the X0-preserving mask if applicable
+ Mask = ARI->getThisReturnPreservedMask(CallConv);
+ if (!Mask) {
+ IsThisReturn = false;
+ Mask = ARI->getCallPreservedMask(CallConv);
+ }
+ } else
+ Mask = ARI->getCallPreservedMask(CallConv);
-SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setReturnAddressIsTaken(true);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
- if (verifyReturnAddressArgumentIsConstant(Op, DAG))
- return SDValue();
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- if (Depth) {
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, MVT::i64);
- return DAG.getLoad(VT, dl, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, false, 0);
- }
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- // Return X30, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64));
- return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64);
-}
+ // If we're doing a tall call, use a TC_RETURN here rather than an
+ // actual call instruction.
+ if (IsTailCall)
+ return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
+ // Returns a chain and a flag for retval copy to use.
+ Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
+ InFlag = Chain.getValue(1);
-SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG)
- const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
+ uint64_t CalleePopBytes = DoesCalleeRestoreStack(CallConv, TailCallOpt)
+ ? RoundUpToAlignment(NumBytes, 16)
+ : 0;
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = AArch64::X29;
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
- while (Depth--)
- FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
- MachinePointerInfo(),
- false, false, false, 0);
- return FrameAddr;
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(CalleePopBytes, true),
+ InFlag, DL);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ InVals, IsThisReturn,
+ IsThisReturn ? OutVals[0] : SDValue());
+}
+
+bool AArch64TargetLowering::CanLowerReturn(
+ CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+ ? RetCC_AArch64_WebKit_JS
+ : RetCC_AArch64_AAPCS;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC);
}
SDValue
-AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
- SelectionDAG &DAG) const {
- assert(getTargetMachine().getCodeModel() == CodeModel::Large);
- assert(getTargetMachine().getRelocationModel() == Reloc::Static);
+AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const {
+ CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
+ ? RetCC_AArch64_WebKit_JS
+ : RetCC_AArch64_AAPCS;
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeReturn(Outs, RetCC);
- EVT PtrVT = getPointerTy();
- SDLoc dl(Op);
- const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GN->getGlobal();
+ // Copy the result values into the output registers.
+ SDValue Flag;
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ SDValue Arg = OutVals[realRVLocIdx];
+
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ if (Outs[i].ArgVT == MVT::i1) {
+ // AAPCS requires i1 to be zero-extended to i8 by the producer of the
+ // value. This is strictly redundant on Darwin (which uses "zeroext
+ // i1"), but will be optimised out before ISel.
+ Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ }
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
- SDValue GlobalAddr = DAG.getNode(
- AArch64ISD::WrapperLarge, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
+ RetOps[0] = Chain; // Update chain.
- if (GN->getOffset() != 0)
- return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
- DAG.getConstant(GN->getOffset(), PtrVT));
+ // Add the flag if we have it.
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
- return GlobalAddr;
+ return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
}
-SDValue
-AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
- SelectionDAG &DAG) const {
- assert(getTargetMachine().getCodeModel() == CodeModel::Small);
+//===----------------------------------------------------------------------===//
+// Other Lowering Code
+//===----------------------------------------------------------------------===//
+SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
- SDLoc dl(Op);
- const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- Reloc::Model RelocM = getTargetMachine().getRelocationModel();
- if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
- // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
- // to zero when they remain undefined. In PIC mode the GOT can take care of
- // this, but in absolute mode we use a constant pool load.
- SDValue PoolAddr;
- PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
- DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
- AArch64II::MO_NO_FLAG),
- DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
- AArch64II::MO_LO12),
- DAG.getConstant(8, MVT::i32));
- SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
- MachinePointerInfo::getConstantPool(),
- /*isVolatile=*/ false,
- /*isNonTemporal=*/ true,
- /*isInvariant=*/ true, 8);
- if (GN->getOffset() != 0)
- return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
- DAG.getConstant(GN->getOffset(), PtrVT));
-
- return GlobalAddr;
- }
-
- if (Alignment == 0) {
- const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
- if (GVPtrTy->getElementType()->isSized()) {
- Alignment
- = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
- } else {
- // Be conservative if we can't guess, not that it really matters:
- // functions and labels aren't valid for loads, and the methods used to
- // actually calculate an address work with any alignment.
- Alignment = 1;
- }
+ SDLoc DL(Op);
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ unsigned char OpFlags =
+ Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+
+ assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
+ "unexpected offset in global node");
+
+ // This also catched the large code model case for Darwin.
+ if ((OpFlags & AArch64II::MO_GOT) != 0) {
+ SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes instead of using a wrapper node.
+ return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
}
- unsigned char HiFixup, LoFixup;
- bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
-
- if (UseGOT) {
- HiFixup = AArch64II::MO_GOT;
- LoFixup = AArch64II::MO_GOT_LO12;
- Alignment = 8;
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G3),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
} else {
- HiFixup = AArch64II::MO_NO_FLAG;
- LoFixup = AArch64II::MO_LO12;
+ // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
+ // the only correct model on Darwin.
+ SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
+ OpFlags | AArch64II::MO_PAGE);
+ unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC;
+ SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
+
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
}
+}
- // AArch64's small model demands the following sequence:
- // ADRP x0, somewhere
- // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
- SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- HiFixup),
- DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- LoFixup),
- DAG.getConstant(Alignment, MVT::i32));
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address (for Darwin, currently) and
+/// return an SDValue containing the final node.
- if (UseGOT) {
- GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
- GlobalRef);
- }
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i64] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first xword, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "x0".
+///
+/// Since this descriptor may be in a different unit, in general even the
+/// descriptor must be accessed via an indirect load. The "ideal" code sequence
+/// is:
+/// adrp x0, _var@TLVPPAGE
+/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
+/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
+/// ; the function pointer
+/// blr x1 ; Uses descriptor address in x0
+/// ; Address of _var is now in x0.
+///
+/// If the address of _var's descriptor *is* known to the linker, then it can
+/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
+/// a slight efficiency gain.
+SDValue
+AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
- if (GN->getOffset() != 0)
- return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
- DAG.getConstant(GN->getOffset(), PtrVT));
+ SDLoc DL(Op);
+ MVT PtrVT = getPointerTy();
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- return GlobalRef;
-}
+ SDValue TLVPAddr =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
+ SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
-SDValue
-AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
- SelectionDAG &DAG) const {
- // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
- // we make those distinctions here.
-
- switch (getTargetMachine().getCodeModel()) {
- case CodeModel::Small:
- return LowerGlobalAddressELFSmall(Op, DAG);
- case CodeModel::Large:
- return LowerGlobalAddressELFLarge(Op, DAG);
- default:
- llvm_unreachable("Only small and large code models supported now");
- }
-}
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
+ false, true, true, 8);
+ Chain = FuncTLVGet.getValue(1);
-SDValue
-AArch64TargetLowering::LowerConstantPool(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT PtrVT = getPointerTy();
- ConstantPoolSDNode *CN = cast<ConstantPoolSDNode>(Op);
- const Constant *C = CN->getConstVal();
-
- switch(getTargetMachine().getCodeModel()) {
- case CodeModel::Small:
- // The most efficient code is PC-relative anyway for the small memory model,
- // so we don't need to worry about relocation model.
- return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- DAG.getTargetConstantPool(C, PtrVT, 0, 0,
- AArch64II::MO_NO_FLAG),
- DAG.getTargetConstantPool(C, PtrVT, 0, 0,
- AArch64II::MO_LO12),
- DAG.getConstant(CN->getAlignment(), MVT::i32));
- case CodeModel::Large:
- return DAG.getNode(
- AArch64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G3),
- DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC),
- DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC),
- DAG.getTargetConstantPool(C, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC));
- default:
- llvm_unreachable("Only small and large code models supported now");
- }
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
+ // silly).
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *ARI =
+ static_cast<const AArch64RegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask();
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: x0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
}
-SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
- SDValue DescAddr,
- SDLoc DL,
- SelectionDAG &DAG) const {
+/// When accessing thread-local variables under either the general-dynamic or
+/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
+/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
+/// is a function pointer to carry out the resolution. This function takes the
+/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
+/// other registers (except LR, NZCV) are preserved.
+///
+/// Thus, the ideal call sequence on AArch64 is:
+///
+/// adrp x0, :tlsdesc:thread_var
+/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
+/// add x0, x0, :tlsdesc_lo12:thread_var
+/// .tlsdesccall thread_var
+/// blr x8
+/// (TPIDR_EL0 offset now in x0).
+///
+/// The ".tlsdesccall" directive instructs the assembler to insert a particular
+/// relocation to help the linker relax this sequence if it turns out to be too
+/// conservative.
+///
+/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
+/// is harmless.
+SDValue AArch64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
+ SDValue DescAddr, SDLoc DL,
+ SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
// The function we need to call is simply the first entry in the GOT for this
// descriptor, load it in preparation.
- SDValue Func, Chain;
- Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
- DescAddr);
+ SDValue Func = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, SymAddr);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
+ // silly).
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ const AArch64RegisterInfo *ARI =
+ static_cast<const AArch64RegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask();
// The function takes only one argument: the address of the descriptor itself
// in X0.
- SDValue Glue;
+ SDValue Glue, Chain;
Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
Glue = Chain.getValue(1);
- // Finally, there's a special calling-convention which means that the lookup
- // must preserve all registers (except X0, obviously).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const AArch64RegisterInfo *A64RI
- = static_cast<const AArch64RegisterInfo *>(TRI);
- const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
-
// We're now ready to populate the argument list, as with a normal call:
- std::vector<SDValue> Ops;
+ SmallVector<SDValue, 6> Ops;
Ops.push_back(Chain);
Ops.push_back(Func);
Ops.push_back(SymAddr);
@@ -2586,22 +2705,18 @@ SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
Ops.push_back(Glue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
- Ops.size());
+ Chain = DAG.getNode(AArch64ISD::TLSDESC_CALL, DL, NodeTys, Ops);
Glue = Chain.getValue(1);
- // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
- // back to the generic handling code.
return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
}
SDValue
-AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
- SelectionDAG &DAG) const {
- assert(getSubtarget()->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
- assert(getTargetMachine().getCodeModel() == CodeModel::Small
- && "TLS only supported in small memory model");
+AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetELF() && "This function expects an ELF target");
+ assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
+ "ELF TLS only supported in small memory model");
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
@@ -2613,39 +2728,22 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
- if (Model == TLSModel::InitialExec) {
- TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- AArch64II::MO_GOTTPREL),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- AArch64II::MO_GOTTPREL_LO12),
- DAG.getConstant(8, MVT::i32));
- TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
- TPOff);
- } else if (Model == TLSModel::LocalExec) {
- SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
- AArch64II::MO_TPREL_G1);
- SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
- AArch64II::MO_TPREL_G0_NC);
-
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
- DAG.getTargetConstant(1, MVT::i32)), 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
- TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)), 0);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- AArch64II::MO_TLSDESC);
- SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- AArch64II::MO_TLSDESC_LO12);
- SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- HiDesc, LoDesc,
- DAG.getConstant(8, MVT::i32));
- SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
-
- TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
+ if (Model == TLSModel::LocalExec) {
+ SDValue HiVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(16, MVT::i32)),
+ 0);
+ TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+ } else if (Model == TLSModel::InitialExec) {
+ TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
+ TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
} else if (Model == TLSModel::LocalDynamic) {
// Local-dynamic accesses proceed in two phases. A general-dynamic TLS
// descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
@@ -2653,367 +2751,354 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
// calculation.
// These accesses will need deduplicating if there's more than one.
- AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
- .getInfo<AArch64MachineFunctionInfo>();
+ AArch64FunctionInfo *MFI =
+ DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
-
- // Get the location of _TLS_MODULE_BASE_:
- SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLSDESC);
- SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
- AArch64II::MO_TLSDESC_LO12);
- SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
- HiDesc, LoDesc,
- DAG.getConstant(8, MVT::i32));
- SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
-
- ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
-
- // Get the variable's offset from _TLS_MODULE_BASE_
- SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
- AArch64II::MO_DTPREL_G1);
- SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
- AArch64II::MO_DTPREL_G0_NC);
-
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
- DAG.getTargetConstant(0, MVT::i32)), 0);
- TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
- TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)), 0);
+ // Accesses used in this sequence go via the TLS descriptor which lives in
+ // the GOT. Prepare an address we can use to handle this.
+ SDValue HiDesc = DAG.getTargetExternalSymbol(
+ "_TLS_MODULE_BASE_", PtrVT, AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ SDValue LoDesc = DAG.getTargetExternalSymbol(
+ "_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ // First argument to the descriptor call is the address of the descriptor
+ // itself.
+ SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
+ DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
+
+ // The call needs a relocation too for linker relaxation. It doesn't make
+ // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
+ // the address.
+ SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
+ AArch64II::MO_TLS);
+
+ // Now we can calculate the offset from TPIDR_EL0 to this module's
+ // thread-local area.
+ TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
+
+ // Now use :dtprel_whatever: operations to calculate this variable's offset
+ // in its thread-storage area.
+ SDValue HiVar = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
+ SDValue LoVar = DAG.getTargetGlobalAddress(
+ GV, DL, MVT::i64, 0,
+ AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
+
+ SDValue DTPOff =
+ SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
+ DAG.getTargetConstant(16, MVT::i32)),
+ 0);
+ DTPOff =
+ SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
+ DAG.getTargetConstant(0, MVT::i32)),
+ 0);
+
+ TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
+ } else if (Model == TLSModel::GeneralDynamic) {
+ // Accesses used in this sequence go via the TLS descriptor which lives in
+ // the GOT. Prepare an address we can use to handle this.
+ SDValue HiDesc = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGE);
+ SDValue LoDesc = DAG.getTargetGlobalAddress(
+ GV, DL, PtrVT, 0,
+ AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ // First argument to the descriptor call is the address of the descriptor
+ // itself.
+ SDValue DescAddr = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, HiDesc);
+ DescAddr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
+
+ // The call needs a relocation too for linker relaxation. It doesn't make
+ // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
+ // the address.
+ SDValue SymAddr =
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
+
+ // Finally we can make a call to calculate the offset from tpidr_el0.
+ TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
} else
- llvm_unreachable("Unsupported TLS access model");
-
+ llvm_unreachable("Unsupported ELF TLS access model");
return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
}
-static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- bool IsSigned) {
+SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerDarwinGlobalTLSAddress(Op, DAG);
+ else if (Subtarget->isTargetELF())
+ return LowerELFGlobalTLSAddress(Op, DAG);
+
+ llvm_unreachable("Unexpected platform trying to use TLS");
+}
+SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
SDLoc dl(Op);
- EVT VT = Op.getValueType();
- SDValue Vec = Op.getOperand(0);
- unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
- if (VT.getVectorNumElements() == 1) {
- assert(VT == MVT::v1f64 && "Unexpected vector type!");
- if (VT.getSizeInBits() == Vec.getValueSizeInBits())
- return Op;
- return DAG.UnrollVectorOp(Op.getNode());
- }
+ // Handle f128 first, since lowering it will result in comparing the return
+ // value of a libcall against zero, which is just what the rest of LowerBR_CC
+ // is expecting to deal with.
+ if (LHS.getValueType() == MVT::f128) {
+ softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
- if (VT.getSizeInBits() < Vec.getValueSizeInBits()) {
- assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 &&
- "Unexpected vector type!");
- Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec);
- return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0));
- } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) {
- unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getVectorElementType().getSizeInBits());
- CastVT =
- EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
- Vec = DAG.getNode(CastOpc, dl, CastVT, Vec);
+ // If softenSetCCOperands returned a scalar, we need to compare the result
+ // against zero to select between true and false values.
+ if (!RHS.getNode()) {
+ RHS = DAG.getConstant(0, LHS.getValueType());
+ CC = ISD::SETNE;
+ }
}
- return DAG.getNode(Opc, dl, VT, Vec);
-}
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
+ // instruction.
+ unsigned Opc = LHS.getOpcode();
+ if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
+ cast<ConstantSDNode>(RHS)->isOne() &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ "Unexpected condition code.");
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
+ return SDValue();
-SDValue
-AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
- bool IsSigned) const {
- if (Op.getValueType().isVector())
- return LowerVectorINT_TO_FP(Op, DAG, IsSigned);
- if (Op.getValueType() != MVT::f128) {
- // Legal for everything except f128.
- return Op;
- }
+ // The actual operation with overflow check.
+ AArch64CC::CondCode OFCC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
- RTLIB::Libcall LC;
- if (IsSigned)
- LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
- else
- LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
+ if (CC == ISD::SETNE)
+ OFCC = getInvertedCondCode(OFCC);
+ SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
- return LowerF128ToCall(Op, DAG, LC);
-}
+ return DAG.getNode(AArch64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
+ CCVal, Overflow);
+ }
+ if (LHS.getValueType().isInteger()) {
+ assert((LHS.getValueType() == RHS.getValueType()) &&
+ (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
+
+ // If the RHS of the comparison is zero, we can potentially fold this
+ // to a specialized branch.
+ const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if (RHSC && RHSC->getZExtValue() == 0) {
+ if (CC == ISD::SETEQ) {
+ // See if we can use a TBZ to fold in an AND as well.
+ // TBZ has a smaller branch displacement than CBZ. If the offset is
+ // out of bounds, a late MI-layer pass rewrites branches.
+ // 403.gcc is an example that hits this case.
+ if (LHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ isPowerOf2_64(LHS.getConstantOperandVal(1))) {
+ SDValue Test = LHS.getOperand(0);
+ uint64_t Mask = LHS.getConstantOperandVal(1);
+
+ // TBZ only operates on i64's, but the ext should be free.
+ if (Test.getValueType() == MVT::i32)
+ Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
+
+ return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
+ DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ }
-SDValue
-AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDLoc dl(JT);
- EVT PtrVT = getPointerTy();
+ return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
+ } else if (CC == ISD::SETNE) {
+ // See if we can use a TBZ to fold in an AND as well.
+ // TBZ has a smaller branch displacement than CBZ. If the offset is
+ // out of bounds, a late MI-layer pass rewrites branches.
+ // 403.gcc is an example that hits this case.
+ if (LHS.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ isPowerOf2_64(LHS.getConstantOperandVal(1))) {
+ SDValue Test = LHS.getOperand(0);
+ uint64_t Mask = LHS.getConstantOperandVal(1);
+
+ // TBNZ only operates on i64's, but the ext should be free.
+ if (Test.getValueType() == MVT::i32)
+ Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
+
+ return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
+ DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
+ }
- // When compiling PIC, jump tables get put in the code section so a static
- // relocation-style is acceptable for both cases.
- switch (getTargetMachine().getCodeModel()) {
- case CodeModel::Small:
- return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- AArch64II::MO_LO12),
- DAG.getConstant(1, MVT::i32));
- case CodeModel::Large:
- return DAG.getNode(
- AArch64ISD::WrapperLarge, dl, PtrVT,
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
- DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
- default:
- llvm_unreachable("Only small and large code models supported now");
- }
-}
+ return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
+ }
+ }
-// (SELECT testbit, iftrue, iffalse)
-SDValue
-AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
- SDValue TheBit = Op.getOperand(0);
- SDValue IfTrue = Op.getOperand(1);
- SDValue IfFalse = Op.getOperand(2);
+ SDValue CCVal;
+ SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
+ Cmp);
+ }
- // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
- // that as the consumer we are responsible for ignoring rubbish in higher
- // bits.
- TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
- DAG.getConstant(1, MVT::i32));
- SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
- DAG.getConstant(0, TheBit.getValueType()),
- DAG.getCondCode(ISD::SETNE));
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
+ // clean. Some of them require two branches to implement.
+ SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+ AArch64CC::CondCode CC1, CC2;
+ changeFPCCToAArch64CC(CC, CC1, CC2);
+ SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue BR1 =
+ DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
+ if (CC2 != AArch64CC::AL) {
+ SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
+ Cmp);
+ }
- return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
- A64CMP, IfTrue, IfFalse,
- DAG.getConstant(A64CC::NE, MVT::i32));
+ return BR1;
}
-static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
- SDLoc DL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
+ SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- bool Invert = false;
- SDValue Op0, Op1;
- unsigned Opcode;
+ SDLoc DL(Op);
- if (LHS.getValueType().isInteger()) {
+ SDValue In1 = Op.getOperand(0);
+ SDValue In2 = Op.getOperand(1);
+ EVT SrcVT = In2.getValueType();
+ if (SrcVT != VT) {
+ if (SrcVT == MVT::f32 && VT == MVT::f64)
+ In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
+ else if (SrcVT == MVT::f64 && VT == MVT::f32)
+ In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
+ else
+ // FIXME: Src type is different, bail out for now. Can VT really be a
+ // vector type?
+ return SDValue();
+ }
- // Attempt to use Vector Integer Compare Mask Test instruction.
- // TST = icmp ne (and (op0, op1), zero).
- if (CC == ISD::SETNE) {
- if (((LHS.getOpcode() == ISD::AND) &&
- ISD::isBuildVectorAllZeros(RHS.getNode())) ||
- ((RHS.getOpcode() == ISD::AND) &&
- ISD::isBuildVectorAllZeros(LHS.getNode()))) {
-
- SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
- SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
- SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
- return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
- }
+ EVT VecVT;
+ EVT EltVT;
+ SDValue EltMask, VecVal1, VecVal2;
+ if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
+ EltVT = MVT::i32;
+ VecVT = MVT::v4i32;
+ EltMask = DAG.getConstant(0x80000000ULL, EltVT);
+
+ if (!VT.isVector()) {
+ VecVal1 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ DAG.getUNDEF(VecVT), In1);
+ VecVal2 = DAG.getTargetInsertSubreg(AArch64::ssub, DL, VecVT,
+ DAG.getUNDEF(VecVT), In2);
+ } else {
+ VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
+ VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
-
- // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
- // Note: Compare against Zero does not support unsigned predicates.
- if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
- ISD::isBuildVectorAllZeros(LHS.getNode())) &&
- !isUnsignedIntSetCC(CC)) {
-
- // If LHS is the zero value, swap operands and CondCode.
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- CC = getSetCCSwappedOperands(CC);
- Op0 = RHS;
- } else
- Op0 = LHS;
-
- // Ensure valid CondCode for Compare Mask against Zero instruction:
- // EQ, GE, GT, LE, LT.
- if (ISD::SETNE == CC) {
- Invert = true;
- CC = ISD::SETEQ;
- }
-
- // Using constant type to differentiate integer and FP compares with zero.
- Op1 = DAG.getConstant(0, MVT::i32);
- Opcode = AArch64ISD::NEON_CMPZ;
-
+ } else if (VT == MVT::f64 || VT == MVT::v2f64) {
+ EltVT = MVT::i64;
+ VecVT = MVT::v2i64;
+
+ // We want to materialize a mask with the the high bit set, but the AdvSIMD
+ // immediate moves cannot materialize that in a single instruction for
+ // 64-bit elements. Instead, materialize zero and then negate it.
+ EltMask = DAG.getConstant(0, EltVT);
+
+ if (!VT.isVector()) {
+ VecVal1 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
+ DAG.getUNDEF(VecVT), In1);
+ VecVal2 = DAG.getTargetInsertSubreg(AArch64::dsub, DL, VecVT,
+ DAG.getUNDEF(VecVT), In2);
} else {
- // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
- // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
- bool Swap = false;
- switch (CC) {
- default:
- llvm_unreachable("Illegal integer comparison.");
- case ISD::SETEQ:
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- break;
- case ISD::SETNE:
- Invert = true;
- CC = ISD::SETEQ;
- break;
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETLT:
- case ISD::SETLE:
- Swap = true;
- CC = getSetCCSwappedOperands(CC);
- }
-
- if (Swap)
- std::swap(LHS, RHS);
-
- Opcode = AArch64ISD::NEON_CMP;
- Op0 = LHS;
- Op1 = RHS;
+ VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
+ VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
}
+ } else {
+ llvm_unreachable("Invalid type for copysign!");
+ }
- // Generate Compare Mask instr or Compare Mask against Zero instr.
- SDValue NeonCmp =
- DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
+ std::vector<SDValue> BuildVectorOps;
+ for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
+ BuildVectorOps.push_back(EltMask);
- if (Invert)
- NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, BuildVectorOps);
- return NeonCmp;
+ // If we couldn't materialize the mask above, then the mask vector will be
+ // the zero vector, and we need to negate it here.
+ if (VT == MVT::f64 || VT == MVT::v2f64) {
+ BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
+ BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
+ BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
}
- // Now handle Floating Point cases.
- // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
- if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
- ISD::isBuildVectorAllZeros(LHS.getNode())) {
-
- // If LHS is the zero value, swap operands and CondCode.
- if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
- CC = getSetCCSwappedOperands(CC);
- Op0 = RHS;
- } else
- Op0 = LHS;
+ SDValue Sel =
+ DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
- // Using constant type to differentiate integer and FP compares with zero.
- Op1 = DAG.getConstantFP(0, MVT::f32);
- Opcode = AArch64ISD::NEON_CMPZ;
- } else {
- // Attempt to use Vector Floating Point Compare Mask instruction.
- Op0 = LHS;
- Op1 = RHS;
- Opcode = AArch64ISD::NEON_CMP;
- }
+ if (VT == MVT::f32)
+ return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
+ else if (VT == MVT::f64)
+ return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
+ else
+ return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
+}
- SDValue NeonCmpAlt;
- // Some register compares have to be implemented with swapped CC and operands,
- // e.g.: OLT implemented as OGT with swapped operands.
- bool SwapIfRegArgs = false;
+SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
+ if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
+ return SDValue();
- // Ensure valid CondCode for FP Compare Mask against Zero instruction:
- // EQ, GE, GT, LE, LT.
- // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
- switch (CC) {
- default:
- llvm_unreachable("Illegal FP comparison");
- case ISD::SETUNE:
- case ISD::SETNE:
- Invert = true; // Fallthrough
- case ISD::SETOEQ:
- case ISD::SETEQ:
- CC = ISD::SETEQ;
- break;
- case ISD::SETOLT:
- case ISD::SETLT:
- CC = ISD::SETLT;
- SwapIfRegArgs = true;
- break;
- case ISD::SETOGT:
- case ISD::SETGT:
- CC = ISD::SETGT;
- break;
- case ISD::SETOLE:
- case ISD::SETLE:
- CC = ISD::SETLE;
- SwapIfRegArgs = true;
- break;
- case ISD::SETOGE:
- case ISD::SETGE:
- CC = ISD::SETGE;
- break;
- case ISD::SETUGE:
- Invert = true;
- CC = ISD::SETLT;
- SwapIfRegArgs = true;
- break;
- case ISD::SETULE:
- Invert = true;
- CC = ISD::SETGT;
- break;
- case ISD::SETUGT:
- Invert = true;
- CC = ISD::SETLE;
- SwapIfRegArgs = true;
- break;
- case ISD::SETULT:
- Invert = true;
- CC = ISD::SETGE;
- break;
- case ISD::SETUEQ:
- Invert = true; // Fallthrough
- case ISD::SETONE:
- // Expand this to (OGT |OLT).
- NeonCmpAlt =
- DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
- CC = ISD::SETLT;
- SwapIfRegArgs = true;
- break;
- case ISD::SETUO:
- Invert = true; // Fallthrough
- case ISD::SETO:
- // Expand this to (OGE | OLT).
- NeonCmpAlt =
- DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
- CC = ISD::SETLT;
- SwapIfRegArgs = true;
- break;
- }
+ // While there is no integer popcount instruction, it can
+ // be more efficiently lowered to the following sequence that uses
+ // AdvSIMD registers/instructions as long as the copies to/from
+ // the AdvSIMD registers are cheap.
+ // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
+ // CNT V0.8B, V0.8B // 8xbyte pop-counts
+ // ADDV B0, V0.8B // sum 8xbyte pop-counts
+ // UMOV X0, V0.B[0] // copy byte result back to integer reg
+ SDValue Val = Op.getOperand(0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
- if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
- CC = getSetCCSwappedOperands(CC);
- std::swap(Op0, Op1);
+ SDValue VecVal;
+ if (VT == MVT::i32) {
+ VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
+ VecVal = DAG.getTargetInsertSubreg(AArch64::ssub, DL, MVT::v8i8, ZeroVec,
+ VecVal);
+ } else {
+ VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
}
- // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
- SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
+ SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
+ SDValue UaddLV = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, MVT::i32), CtPop);
- if (NeonCmpAlt.getNode())
- NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
+ if (VT == MVT::i64)
+ UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
+ return UaddLV;
+}
- if (Invert)
- NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
+SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- return NeonCmp;
-}
+ if (Op.getValueType().isVector())
+ return LowerVSETCC(Op, DAG);
-// (SETCC lhs, rhs, condcode)
-SDValue
-AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- EVT VT = Op.getValueType();
+ SDLoc dl(Op);
- if (VT.isVector())
- return LowerVectorSETCC(Op, DAG);
+ // We chose ZeroOrOneBooleanContents, so use zero and one.
+ EVT VT = Op.getValueType();
+ SDValue TVal = DAG.getConstant(1, VT);
+ SDValue FVal = DAG.getConstant(0, VT);
+ // Handle f128 first, since one possible outcome is a normal integer
+ // comparison which gets picked up by the next if statement.
if (LHS.getValueType() == MVT::f128) {
- // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
- // for the rest of the function (some i32 or i64 values).
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, use it.
- if (RHS.getNode() == 0) {
+ if (!RHS.getNode()) {
assert(LHS.getValueType() == Op.getValueType() &&
"Unexpected setcc expansion!");
return LHS;
@@ -3021,205 +3106,403 @@ AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
}
if (LHS.getValueType().isInteger()) {
- SDValue A64cc;
+ SDValue CCVal;
+ SDValue Cmp =
+ getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
+
+ // Note that we inverted the condition above, so we reverse the order of
+ // the true and false operands here. This will allow the setcc to be
+ // matched to a single CSINC instruction.
+ return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
+ }
+
+ // Now we know we're dealing with FP values.
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
+ // and do the comparison.
+ SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
- // Integers are handled in a separate function because the combinations of
- // immediates and tests can get hairy and we may want to fiddle things.
- SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+ AArch64CC::CondCode CC1, CC2;
+ changeFPCCToAArch64CC(CC, CC1, CC2);
+ if (CC2 == AArch64CC::AL) {
+ changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
+ SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
- return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
- CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
- A64cc);
+ // Note that we inverted the condition above, so we reverse the order of
+ // the true and false operands here. This will allow the setcc to be
+ // matched to a single CSINC instruction.
+ return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
+ } else {
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
+ // totally clean. Some of them require two CSELs to implement. As is in
+ // this case, we emit the first CSEL and then emit a second using the output
+ // of the first as the RHS. We're effectively OR'ing the two CC's together.
+
+ // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
+ SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CS1 =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
+
+ SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
+}
- // Note that some LLVM floating-point CondCodes can't be lowered to a single
- // conditional branch, hence FPCCToA64CC can set a second test, where either
- // passing is sufficient.
- A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
- CondCode = FPCCToA64CC(CC, Alternative);
- SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
- SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
- DAG.getCondCode(CC));
- SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
- CmpOp, DAG.getConstant(1, VT),
- DAG.getConstant(0, VT), A64cc);
+/// A SELECT_CC operation is really some kind of max or min if both values being
+/// compared are, in some sense, equal to the results in either case. However,
+/// it is permissible to compare f32 values and produce directly extended f64
+/// values.
+///
+/// Extending the comparison operands would also be allowed, but is less likely
+/// to happen in practice since their use is right here. Note that truncate
+/// operations would *not* be semantically equivalent.
+static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
+ if (Cmp == Result)
+ return true;
- if (Alternative != A64CC::Invalid) {
- A64cc = DAG.getConstant(Alternative, MVT::i32);
- A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
- DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
+ ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
+ ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
+ if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
+ Result.getValueType() == MVT::f64) {
+ bool Lossy;
+ APFloat CmpVal = CCmp->getValueAPF();
+ CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
+ return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
}
- return A64SELECT_CC;
+ return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
}
-static SDValue LowerVectorSELECT_CC(SDValue Op, SelectionDAG &DAG) {
- SDLoc dl(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue IfTrue = Op.getOperand(2);
- SDValue IfFalse = Op.getOperand(3);
- EVT IfTrueVT = IfTrue.getValueType();
- EVT CondVT = IfTrueVT.changeVectorElementTypeToInteger();
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue CC = Op->getOperand(0);
+ SDValue TVal = Op->getOperand(1);
+ SDValue FVal = Op->getOperand(2);
+ SDLoc DL(Op);
- // If LHS & RHS are floating point and IfTrue & IfFalse are vectors, we will
- // use NEON compare.
- if ((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)) {
- EVT EltVT = LHS.getValueType();
- unsigned EltNum = 128 / EltVT.getSizeInBits();
- EVT VT = EVT::getVectorVT(*DAG.getContext(), EltVT, EltNum);
- unsigned SubConstant =
- (LHS.getValueType() == MVT::f32) ? AArch64::sub_32 :AArch64::sub_64;
- EVT CEltT = (LHS.getValueType() == MVT::f32) ? MVT::i32 : MVT::i64;
- EVT CVT = EVT::getVectorVT(*DAG.getContext(), CEltT, EltNum);
-
- LHS
- = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
- VT, DAG.getTargetConstant(0, MVT::i32), LHS,
- DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
- RHS
- = SDValue(DAG.getMachineNode(TargetOpcode::SUBREG_TO_REG, dl,
- VT, DAG.getTargetConstant(0, MVT::i32), RHS,
- DAG.getTargetConstant(SubConstant, MVT::i32)), 0);
-
- SDValue VSetCC = DAG.getSetCC(dl, CVT, LHS, RHS, CC);
- SDValue ResCC = LowerVectorSETCC(VSetCC, DAG);
- if (CEltT.getSizeInBits() < IfTrueVT.getSizeInBits()) {
- EVT DUPVT =
- EVT::getVectorVT(*DAG.getContext(), CEltT,
- IfTrueVT.getSizeInBits() / CEltT.getSizeInBits());
- ResCC = DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, DUPVT, ResCC,
- DAG.getConstant(0, MVT::i64, false));
-
- ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
- } else {
- // FIXME: If IfTrue & IfFalse hold v1i8, v1i16 or v1i32, this function
- // can't handle them and will hit this assert.
- assert(CEltT.getSizeInBits() == IfTrueVT.getSizeInBits() &&
- "Vector of IfTrue & IfFalse is too small.");
-
- unsigned ExEltNum =
- EltNum * IfTrueVT.getSizeInBits() / ResCC.getValueSizeInBits();
- EVT ExVT = EVT::getVectorVT(*DAG.getContext(), CEltT, ExEltNum);
- ResCC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ExVT, ResCC,
- DAG.getConstant(0, MVT::i64, false));
- ResCC = DAG.getNode(ISD::BITCAST, dl, CondVT, ResCC);
- }
- SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
- ResCC, IfTrue, IfFalse);
- return VSelect;
- }
-
- // Here we handle the case that LHS & RHS are integer and IfTrue & IfFalse are
- // vectors.
- A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
- CondCode = FPCCToA64CC(CC, Alternative);
- SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
- SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
- DAG.getCondCode(CC));
- EVT SEVT = MVT::i32;
- if (IfTrue.getValueType().getVectorElementType().getSizeInBits() > 32)
- SEVT = MVT::i64;
- SDValue AllOne = DAG.getConstant(-1, SEVT);
- SDValue AllZero = DAG.getConstant(0, SEVT);
- SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, SEVT, SetCC,
- AllOne, AllZero, A64cc);
-
- if (Alternative != A64CC::Invalid) {
- A64cc = DAG.getConstant(Alternative, MVT::i32);
- A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
- SetCC, AllOne, A64SELECT_CC, A64cc);
- }
- SDValue VDup;
- if (IfTrue.getValueType().getVectorNumElements() == 1)
- VDup = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, CondVT, A64SELECT_CC);
+ unsigned Opc = CC.getOpcode();
+ // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
+ // instruction.
+ if (CC.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
+ // Only lower legal XALUO ops.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
+ return SDValue();
+
+ AArch64CC::CondCode OFCC;
+ SDValue Value, Overflow;
+ std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CC.getValue(0), DAG);
+ SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
+
+ return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+ CCVal, Overflow);
+ }
+
+ if (CC.getOpcode() == ISD::SETCC)
+ return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
+ cast<CondCodeSDNode>(CC.getOperand(2))->get());
else
- VDup = DAG.getNode(AArch64ISD::NEON_VDUP, dl, CondVT, A64SELECT_CC);
- SDValue VSelect = DAG.getNode(ISD::VSELECT, dl, IfTrue.getValueType(),
- VDup, IfTrue, IfFalse);
- return VSelect;
+ return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
+ FVal, ISD::SETNE);
}
-// (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
-SDValue
-AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
- SDLoc dl(Op);
+SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
+ SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
- SDValue IfTrue = Op.getOperand(2);
- SDValue IfFalse = Op.getOperand(3);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
-
- if (IfTrue.getValueType().isVector())
- return LowerVectorSELECT_CC(Op, DAG);
+ SDValue TVal = Op.getOperand(2);
+ SDValue FVal = Op.getOperand(3);
+ SDLoc dl(Op);
+ // Handle f128 first, because it will result in a comparison of some RTLIB
+ // call result against zero.
if (LHS.getValueType() == MVT::f128) {
- // f128 comparisons are lowered to libcalls, but slot in nicely here
- // afterwards.
softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
- if (RHS.getNode() == 0) {
+ if (!RHS.getNode()) {
RHS = DAG.getConstant(0, LHS.getValueType());
CC = ISD::SETNE;
}
}
+ // Handle integers first.
if (LHS.getValueType().isInteger()) {
- SDValue A64cc;
+ assert((LHS.getValueType() == RHS.getValueType()) &&
+ (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
+
+ unsigned Opcode = AArch64ISD::CSEL;
+
+ // If both the TVal and the FVal are constants, see if we can swap them in
+ // order to for a CSINV or CSINC out of them.
+ ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
+ ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
+
+ if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ } else if (TVal.getOpcode() == ISD::XOR) {
+ // If TVal is a NOT we want to swap TVal and FVal so that we can match
+ // with a CSINV rather than a CSEL.
+ ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
+
+ if (CVal && CVal->isAllOnesValue()) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+ } else if (TVal.getOpcode() == ISD::SUB) {
+ // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
+ // that we can match with a CSNEG rather than a CSEL.
+ ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
+
+ if (CVal && CVal->isNullValue()) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+ } else if (CTVal && CFVal) {
+ const int64_t TrueVal = CTVal->getSExtValue();
+ const int64_t FalseVal = CFVal->getSExtValue();
+ bool Swap = false;
- // Integers are handled in a separate function because the combinations of
- // immediates and tests can get hairy and we may want to fiddle things.
- SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
+ // If both TVal and FVal are constants, see if FVal is the
+ // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
+ // instead of a CSEL in that case.
+ if (TrueVal == ~FalseVal) {
+ Opcode = AArch64ISD::CSINV;
+ } else if (TrueVal == -FalseVal) {
+ Opcode = AArch64ISD::CSNEG;
+ } else if (TVal.getValueType() == MVT::i32) {
+ // If our operands are only 32-bit wide, make sure we use 32-bit
+ // arithmetic for the check whether we can use CSINC. This ensures that
+ // the addition in the check will wrap around properly in case there is
+ // an overflow (which would not be the case if we do the check with
+ // 64-bit arithmetic).
+ const uint32_t TrueVal32 = CTVal->getZExtValue();
+ const uint32_t FalseVal32 = CFVal->getZExtValue();
+
+ if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
+ Opcode = AArch64ISD::CSINC;
+
+ if (TrueVal32 > FalseVal32) {
+ Swap = true;
+ }
+ }
+ // 64-bit check whether we can use CSINC.
+ } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
+ Opcode = AArch64ISD::CSINC;
+
+ if (TrueVal > FalseVal) {
+ Swap = true;
+ }
+ }
+
+ // Swap TVal and FVal if necessary.
+ if (Swap) {
+ std::swap(TVal, FVal);
+ std::swap(CTVal, CFVal);
+ CC = ISD::getSetCCInverse(CC, true);
+ }
+
+ if (Opcode != AArch64ISD::CSEL) {
+ // Drop FVal since we can get its value by simply inverting/negating
+ // TVal.
+ FVal = TVal;
+ }
+ }
+
+ SDValue CCVal;
+ SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(), CmpOp,
- IfTrue, IfFalse, A64cc);
+ EVT VT = Op.getValueType();
+ return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
}
- // Note that some LLVM floating-point CondCodes can't be lowered to a single
- // conditional branch, hence FPCCToA64CC can set a second test, where either
- // passing is sufficient.
- A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
- CondCode = FPCCToA64CC(CC, Alternative);
- SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
- SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
- DAG.getCondCode(CC));
- SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
- Op.getValueType(),
- SetCC, IfTrue, IfFalse, A64cc);
+ // Now we know we're dealing with FP values.
+ assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+ assert(LHS.getValueType() == RHS.getValueType());
+ EVT VT = Op.getValueType();
+
+ // Try to match this select into a max/min operation, which have dedicated
+ // opcode in the instruction set.
+ // FIXME: This is not correct in the presence of NaNs, so we only enable this
+ // in no-NaNs mode.
+ if (getTargetMachine().Options.NoNaNsFPMath) {
+ SDValue MinMaxLHS = TVal, MinMaxRHS = FVal;
+ if (selectCCOpsAreFMaxCompatible(LHS, MinMaxRHS) &&
+ selectCCOpsAreFMaxCompatible(RHS, MinMaxLHS)) {
+ CC = ISD::getSetCCSwappedOperands(CC);
+ std::swap(MinMaxLHS, MinMaxRHS);
+ }
- if (Alternative != A64CC::Invalid) {
- A64cc = DAG.getConstant(Alternative, MVT::i32);
- A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
- SetCC, IfTrue, A64SELECT_CC, A64cc);
+ if (selectCCOpsAreFMaxCompatible(LHS, MinMaxLHS) &&
+ selectCCOpsAreFMaxCompatible(RHS, MinMaxRHS)) {
+ switch (CC) {
+ default:
+ break;
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ return DAG.getNode(AArch64ISD::FMAX, dl, VT, MinMaxLHS, MinMaxRHS);
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ return DAG.getNode(AArch64ISD::FMIN, dl, VT, MinMaxLHS, MinMaxRHS);
+ break;
+ }
+ }
+ }
+ // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
+ // and do the comparison.
+ SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
+
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
+ // clean. Some of them require two CSELs to implement.
+ AArch64CC::CondCode CC1, CC2;
+ changeFPCCToAArch64CC(CC, CC1, CC2);
+ SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
+ SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
+
+ // If we need a second CSEL, emit it, using the output of the first as the
+ // RHS. We're effectively OR'ing the two CC's together.
+ if (CC2 != AArch64CC::AL) {
+ SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
+ return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
}
- return A64SELECT_CC;
+ // Otherwise, return the output of the first CSEL.
+ return CS1;
}
-SDValue
-AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
- const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
- const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Jump table entries as PC relative offsets. No additional tweaking
+ // is necessary here. Just get the address of the jump table.
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ EVT PtrVT = getPointerTy();
+ SDLoc DL(Op);
- // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
- // rather than just 8.
- return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
- Op.getOperand(1), Op.getOperand(2),
- DAG.getConstant(32, MVT::i32), 8, false, false,
- MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ !Subtarget->isTargetMachO()) {
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, PtrVT,
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G3),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G2 | MO_NC),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_G1 | MO_NC),
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ AArch64II::MO_G0 | MO_NC));
+ }
+
+ SDValue Hi =
+ DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_PAGE);
+ SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
}
-SDValue
-AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
+ SelectionDAG &DAG) const {
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = getPointerTy();
+ SDLoc DL(Op);
+
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ // Use the GOT for the large code model on iOS.
+ if (Subtarget->isTargetMachO()) {
+ SDValue GotAddr = DAG.getTargetConstantPool(
+ CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
+ AArch64II::MO_GOT);
+ return DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, GotAddr);
+ }
+
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, PtrVT,
+ DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
+ CP->getOffset(), AArch64II::MO_G3),
+ DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
+ CP->getOffset(), AArch64II::MO_G2 | MO_NC),
+ DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
+ CP->getOffset(), AArch64II::MO_G1 | MO_NC),
+ DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
+ CP->getOffset(), AArch64II::MO_G0 | MO_NC));
+ } else {
+ // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
+ // ELF, the only valid one on Darwin.
+ SDValue Hi =
+ DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
+ CP->getOffset(), AArch64II::MO_PAGE);
+ SDValue Lo = DAG.getTargetConstantPool(
+ CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
+ AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ }
+}
+
+SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+ EVT PtrVT = getPointerTy();
+ SDLoc DL(Op);
+ if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+ !Subtarget->isTargetMachO()) {
+ const unsigned char MO_NC = AArch64II::MO_NC;
+ return DAG.getNode(
+ AArch64ISD::WrapperLarge, DL, PtrVT,
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G3),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G2 | MO_NC),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G1 | MO_NC),
+ DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_G0 | MO_NC));
+ } else {
+ SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGE);
+ SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC);
+ SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, Hi);
+ return DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
+ }
+}
+
+SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ AArch64FunctionInfo *FuncInfo =
+ DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+
+ SDLoc DL(Op);
+ SDValue FR =
+ DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
+ const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+ MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
+ SelectionDAG &DAG) const {
// The layout of the va_list struct is specified in the AArch64 Procedure Call
// Standard, section B.3.
MachineFunction &MF = DAG.getMachineFunction();
- AArch64MachineFunctionInfo *FuncInfo
- = MF.getInfo<AArch64MachineFunctionInfo>();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
SDValue Chain = Op.getOperand(0);
@@ -3228,498 +3511,2894 @@ AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SmallVector<SDValue, 4> MemOps;
// void *__stack at offset 0
- SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
- getPointerTy());
+ SDValue Stack =
+ DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
- MachinePointerInfo(SV), false, false, 0));
+ MachinePointerInfo(SV), false, false, 8));
// void *__gr_top at offset 8
- int GPRSize = FuncInfo->getVariadicGPRSize();
+ int GPRSize = FuncInfo->getVarArgsGPRSize();
if (GPRSize > 0) {
SDValue GRTop, GRTopAddr;
GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(8, getPointerTy()));
- GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
+ GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
DAG.getConstant(GPRSize, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
- MachinePointerInfo(SV, 8),
- false, false, 0));
+ MachinePointerInfo(SV, 8), false, false, 8));
}
// void *__vr_top at offset 16
- int FPRSize = FuncInfo->getVariadicFPRSize();
+ int FPRSize = FuncInfo->getVarArgsFPRSize();
if (FPRSize > 0) {
SDValue VRTop, VRTopAddr;
VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(16, getPointerTy()));
- VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
+ VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
DAG.getConstant(FPRSize, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
- MachinePointerInfo(SV, 16),
- false, false, 0));
+ MachinePointerInfo(SV, 16), false, false, 8));
}
// int __gr_offs at offset 24
SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(24, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
- GROffsAddr, MachinePointerInfo(SV, 24),
- false, false, 0));
+ GROffsAddr, MachinePointerInfo(SV, 24), false,
+ false, 4));
// int __vr_offs at offset 28
SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
DAG.getConstant(28, getPointerTy()));
MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
- VROffsAddr, MachinePointerInfo(SV, 28),
- false, false, 0));
+ VROffsAddr, MachinePointerInfo(SV, 28), false,
+ false, 4));
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
- MemOps.size());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
-SDValue
-AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Don't know how to custom lower this!");
- case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
- case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
- case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
- case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
- case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
- case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
- case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
- case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
- case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
- case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
- case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
- case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
-
- case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
- case ISD::SRL_PARTS:
- case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
-
- case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
- case ISD::BRCOND: return LowerBRCOND(Op, DAG);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG);
- case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
- case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
- case ISD::SELECT: return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
- case ISD::SETCC: return LowerSETCC(Op, DAG);
- case ISD::VACOPY: return LowerVACOPY(Op, DAG);
- case ISD::VASTART: return LowerVASTART(Op, DAG);
- case ISD::BUILD_VECTOR:
- return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
- case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
- case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
+ SelectionDAG &DAG) const {
+ return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
+ : LowerAAPCS_VASTART(Op, DAG);
+}
+
+SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
+ SelectionDAG &DAG) const {
+ // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
+ // pointer.
+ unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
+ const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+ const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+ return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
+ Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
+ 8, false, false, MachinePointerInfo(DestSV),
+ MachinePointerInfo(SrcSV));
+}
+
+SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() &&
+ "automatic va_arg instruction only works on Darwin");
+
+ const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(1);
+ unsigned Align = Op.getConstantOperandVal(3);
+
+ SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
+ MachinePointerInfo(V), false, false, false, 0);
+ Chain = VAList.getValue(1);
+
+ if (Align > 8) {
+ assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
+ VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(Align - 1, getPointerTy()));
+ VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
+ DAG.getConstant(-(int64_t)Align, getPointerTy()));
}
- return SDValue();
+ Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
+
+ // Scalar integer and FP values smaller than 64 bits are implicitly extended
+ // up to 64 bits. At the very least, we have to increase the striding of the
+ // vaargs list to match this, and for FP values we need to introduce
+ // FP_ROUND nodes as well.
+ if (VT.isInteger() && !VT.isVector())
+ ArgSize = 8;
+ bool NeedFPTrunc = false;
+ if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
+ ArgSize = 8;
+ NeedFPTrunc = true;
+ }
+
+ // Increment the pointer, VAList, to the next vaarg
+ SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
+ DAG.getConstant(ArgSize, getPointerTy()));
+ // Store the incremented VAList to the legalized pointer
+ SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
+ false, false, 0);
+
+ // Load the actual argument out of the pointer VAList
+ if (NeedFPTrunc) {
+ // Load the value as an f64.
+ SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList,
+ MachinePointerInfo(), false, false, false, 0);
+ // Round the value down to an f32.
+ SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
+ DAG.getIntPtrConstant(1));
+ SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
+ // Merge the rounded value with the chain output of the load.
+ return DAG.getMergeValues(Ops, DL);
+ }
+
+ return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false,
+ false, false, 0);
}
-/// Check if the specified splat value corresponds to a valid vector constant
-/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If
-/// so, return the encoded 8-bit immediate and the OpCmode instruction fields
-/// values.
-static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
- unsigned SplatBitSize, SelectionDAG &DAG,
- bool is128Bits, NeonModImmType type, EVT &VT,
- unsigned &Imm, unsigned &OpCmode) {
- switch (SplatBitSize) {
+SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr =
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
+ MachinePointerInfo(), false, false, false, 0);
+ return FrameAddr;
+}
+
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned AArch64TargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("sp", AArch64::SP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
+SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(8, getPointerTy());
+ return DAG.getLoad(VT, DL, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
+ MachinePointerInfo(), false, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i64 values and take a 2 x i64 value to shift plus a shift amount.
+SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+ unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+ assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
+ DAG.getConstant(VTBits, MVT::i64));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ ISD::SETGE, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+
+ SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+ SDValue Lo =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+
+ // AArch64 shifts larger than the register width are wrapped rather than
+ // clamped, so we can't just emit "hi >> x".
+ SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+ SDValue TrueValHi = Opc == ISD::SRA
+ ? DAG.getNode(Opc, dl, VT, ShOpHi,
+ DAG.getConstant(VTBits - 1, MVT::i64))
+ : DAG.getConstant(0, VT);
+ SDValue Hi =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i64 values and take a 2 x i64 value to shift plus a shift amount.
+SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getSizeInBits();
+ SDLoc dl(Op);
+ SDValue ShOpLo = Op.getOperand(0);
+ SDValue ShOpHi = Op.getOperand(1);
+ SDValue ShAmt = Op.getOperand(2);
+ SDValue ARMcc;
+
+ assert(Op.getOpcode() == ISD::SHL_PARTS);
+ SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
+ DAG.getConstant(VTBits, MVT::i64), ShAmt);
+ SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+ SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
+ DAG.getConstant(VTBits, MVT::i64));
+ SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+ SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+ SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+
+ SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64),
+ ISD::SETGE, dl, DAG);
+ SDValue CCVal = DAG.getConstant(AArch64CC::GE, MVT::i32);
+ SDValue Hi =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
+
+ // AArch64 shifts of larger than register sizes are wrapped rather than
+ // clamped, so we can't just emit "lo << a" if a is too big.
+ SDValue TrueValLo = DAG.getConstant(0, VT);
+ SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+ SDValue Lo =
+ DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
+
+ SDValue Ops[2] = { Lo, Hi };
+ return DAG.getMergeValues(Ops, dl);
+}
+
+bool AArch64TargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // The AArch64 target doesn't support folding offsets into global addresses.
+ return false;
+}
+
+bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
+ // FIXME: We should be able to handle f128 as well with a clever lowering.
+ if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
+ return true;
+
+ if (VT == MVT::f64)
+ return AArch64_AM::getFP64Imm(Imm) != -1;
+ else if (VT == MVT::f32)
+ return AArch64_AM::getFP32Imm(Imm) != -1;
+ return false;
+}
+
+//===----------------------------------------------------------------------===//
+// AArch64 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// AArch64 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+// Table of Constraints
+// TODO: This is the current set of constraints supported by ARM for the
+// compiler, not all of them may make sense, e.g. S may be difficult to support.
+//
+// r - A general register
+// w - An FP/SIMD register of some size in the range v0-v31
+// x - An FP/SIMD register of some size in the range v0-v15
+// I - Constant that can be used with an ADD instruction
+// J - Constant that can be used with a SUB instruction
+// K - Constant that can be used with a 32-bit logical instruction
+// L - Constant that can be used with a 64-bit logical instruction
+// M - Constant that can be used as a 32-bit MOV immediate
+// N - Constant that can be used as a 64-bit MOV immediate
+// Q - A memory reference with base register and no offset
+// S - A symbolic address
+// Y - Floating point constant zero
+// Z - Integer constant zero
+//
+// Note that general register operands will be output using their 64-bit x
+// register name, whatever the size of the variable, unless the asm operand
+// is prefixed by the %w modifier. Floating-point and SIMD register operands
+// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
+// %q modifier.
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AArch64TargetLowering::ConstraintType
+AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default:
+ break;
+ case 'z':
+ return C_Other;
+ case 'x':
+ case 'w':
+ return C_RegisterClass;
+ // An address with a single base register. Due to the way we
+ // currently handle addresses it is the same as 'r'.
+ case 'Q':
+ return C_Memory;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AArch64TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (!CallOperandVal)
+ return CW_Default;
+ Type *type = CallOperandVal->getType();
+ // Look at the constraint type.
+ switch (*constraint) {
default:
- llvm_unreachable("unexpected size for isNeonModifiedImm");
- case 8: {
- if (type != Neon_Mov_Imm)
- return false;
- assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
- // Neon movi per byte: Op=0, Cmode=1110.
- OpCmode = 0xe;
- Imm = SplatBits;
- VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+ weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+ break;
+ case 'x':
+ case 'w':
+ if (type->isFloatingPointTy() || type->isVectorTy())
+ weight = CW_Register;
+ break;
+ case 'z':
+ weight = CW_Constant;
break;
}
- case 16: {
- // Neon move inst per halfword
- VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
- if ((SplatBits & ~0xff) == 0) {
- // Value = 0x00nn is 0x00nn LSL 0
- // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
- // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001
- // Op=x, Cmode=100y
- Imm = SplatBits;
- OpCmode = 0x8;
+ return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+AArch64TargetLowering::getRegForInlineAsmConstraint(
+ const std::string &Constraint, MVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, &AArch64::GPR64commonRegClass);
+ return std::make_pair(0U, &AArch64::GPR32commonRegClass);
+ case 'w':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, &AArch64::FPR32RegClass);
+ if (VT.getSizeInBits() == 64)
+ return std::make_pair(0U, &AArch64::FPR64RegClass);
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &AArch64::FPR128RegClass);
break;
- }
- if ((SplatBits & ~0xff00) == 0) {
- // Value = 0xnn00 is 0x00nn LSL 8
- // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
- // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011
- // Op=x, Cmode=101x
- Imm = SplatBits >> 8;
- OpCmode = 0xa;
+ // The instructions that this constraint is designed for can
+ // only take 128-bit registers so just use that regclass.
+ case 'x':
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, &AArch64::FPR128_loRegClass);
break;
}
- // can't handle any other
- return false;
}
+ if (StringRef("{cc}").equals_lower(Constraint))
+ return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
- case 32: {
- // First the LSL variants (MSL is unusable by some interested instructions).
+ // Use the default implementation in TargetLowering to convert the register
+ // constraint into a member of a register class.
+ std::pair<unsigned, const TargetRegisterClass *> Res;
+ Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // Not found as a standard register?
+ if (!Res.second) {
+ unsigned Size = Constraint.size();
+ if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
+ tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
+ const std::string Reg =
+ std::string(&Constraint[2], &Constraint[Size - 1]);
+ int RegNo = atoi(Reg.c_str());
+ if (RegNo >= 0 && RegNo <= 31) {
+ // v0 - v31 are aliases of q0 - q31.
+ // By default we'll emit v0-v31 for this unless there's a modifier where
+ // we'll emit the correct register as well.
+ Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
+ Res.second = &AArch64::FPR128RegClass;
+ }
+ }
+ }
- // Neon move instr per word, shift zeros
- VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
- if ((SplatBits & ~0xff) == 0) {
- // Value = 0x000000nn is 0x000000nn LSL 0
- // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
- // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001
- // Op=x, Cmode=000x
- Imm = SplatBits;
- OpCmode = 0;
- break;
+ return Res;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector. If it is invalid, don't add anything to Ops.
+void AArch64TargetLowering::LowerAsmOperandForConstraint(
+ SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const {
+ SDValue Result;
+
+ // Currently only support length 1 constraints.
+ if (Constraint.length() != 1)
+ return;
+
+ char ConstraintLetter = Constraint[0];
+ switch (ConstraintLetter) {
+ default:
+ break;
+
+ // This set of constraints deal with valid constants for various instructions.
+ // Validate and return a target constant for them if we can.
+ case 'z': {
+ // 'z' maps to xzr or wzr so it needs an input of 0.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C || C->getZExtValue() != 0)
+ return;
+
+ if (Op.getValueType() == MVT::i64)
+ Result = DAG.getRegister(AArch64::XZR, MVT::i64);
+ else
+ Result = DAG.getRegister(AArch64::WZR, MVT::i32);
+ break;
+ }
+
+ case 'I':
+ case 'J':
+ case 'K':
+ case 'L':
+ case 'M':
+ case 'N':
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+ if (!C)
+ return;
+
+ // Grab the value and do some validation.
+ uint64_t CVal = C->getZExtValue();
+ switch (ConstraintLetter) {
+ // The I constraint applies only to simple ADD or SUB immediate operands:
+ // i.e. 0 to 4095 with optional shift by 12
+ // The J constraint applies only to ADD or SUB immediates that would be
+ // valid when negated, i.e. if [an add pattern] were to be output as a SUB
+ // instruction [or vice versa], in other words -1 to -4095 with optional
+ // left shift by 12.
+ case 'I':
+ if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
+ break;
+ return;
+ case 'J': {
+ uint64_t NVal = -C->getSExtValue();
+ if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal))
+ break;
+ return;
}
- if ((SplatBits & ~0xff00) == 0) {
- // Value = 0x0000nn00 is 0x000000nn LSL 8
- // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010
- // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011
- // Op=x, Cmode=001x
- Imm = SplatBits >> 8;
- OpCmode = 0x2;
- break;
+ // The K and L constraints apply *only* to logical immediates, including
+ // what used to be the MOVI alias for ORR (though the MOVI alias has now
+ // been removed and MOV should be used). So these constraints have to
+ // distinguish between bit patterns that are valid 32-bit or 64-bit
+ // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
+ // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
+ // versa.
+ case 'K':
+ if (AArch64_AM::isLogicalImmediate(CVal, 32))
+ break;
+ return;
+ case 'L':
+ if (AArch64_AM::isLogicalImmediate(CVal, 64))
+ break;
+ return;
+ // The M and N constraints are a superset of K and L respectively, for use
+ // with the MOV (immediate) alias. As well as the logical immediates they
+ // also match 32 or 64-bit immediates that can be loaded either using a
+ // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
+ // (M) or 64-bit 0x1234000000000000 (N) etc.
+ // As a note some of this code is liberally stolen from the asm parser.
+ case 'M': {
+ if (!isUInt<32>(CVal))
+ return;
+ if (AArch64_AM::isLogicalImmediate(CVal, 32))
+ break;
+ if ((CVal & 0xFFFF) == CVal)
+ break;
+ if ((CVal & 0xFFFF0000ULL) == CVal)
+ break;
+ uint64_t NCVal = ~(uint32_t)CVal;
+ if ((NCVal & 0xFFFFULL) == NCVal)
+ break;
+ if ((NCVal & 0xFFFF0000ULL) == NCVal)
+ break;
+ return;
}
- if ((SplatBits & ~0xff0000) == 0) {
- // Value = 0x00nn0000 is 0x000000nn LSL 16
- // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
- // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101
- // Op=x, Cmode=010x
- Imm = SplatBits >> 16;
- OpCmode = 0x4;
- break;
+ case 'N': {
+ if (AArch64_AM::isLogicalImmediate(CVal, 64))
+ break;
+ if ((CVal & 0xFFFFULL) == CVal)
+ break;
+ if ((CVal & 0xFFFF0000ULL) == CVal)
+ break;
+ if ((CVal & 0xFFFF00000000ULL) == CVal)
+ break;
+ if ((CVal & 0xFFFF000000000000ULL) == CVal)
+ break;
+ uint64_t NCVal = ~CVal;
+ if ((NCVal & 0xFFFFULL) == NCVal)
+ break;
+ if ((NCVal & 0xFFFF0000ULL) == NCVal)
+ break;
+ if ((NCVal & 0xFFFF00000000ULL) == NCVal)
+ break;
+ if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
+ break;
+ return;
}
- if ((SplatBits & ~0xff000000) == 0) {
- // Value = 0xnn000000 is 0x000000nn LSL 24
- // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
- // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111
- // Op=x, Cmode=011x
- Imm = SplatBits >> 24;
- OpCmode = 0x6;
- break;
+ default:
+ return;
}
- // Now the MSL immediates.
+ // All assembler immediates are 64-bit integers.
+ Result = DAG.getTargetConstant(CVal, MVT::i64);
+ break;
+ }
- // Neon move instr per word, shift ones
- if ((SplatBits & ~0xffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xff) == 0xff) {
- // Value = 0x0000nnff is 0x000000nn MSL 8
- // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
- // Op=x, Cmode=1100
- Imm = SplatBits >> 8;
- OpCmode = 0xc;
- break;
+ if (Result.getNode()) {
+ Ops.push_back(Result);
+ return;
+ }
+
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+//===----------------------------------------------------------------------===//
+// AArch64 Advanced SIMD Support
+//===----------------------------------------------------------------------===//
+
+/// WidenVector - Given a value in the V64 register class, produce the
+/// equivalent value in the V128 register class.
+static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
+ EVT VT = V64Reg.getValueType();
+ unsigned NarrowSize = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType().getSimpleVT();
+ MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
+ SDLoc DL(V64Reg);
+
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
+ V64Reg, DAG.getConstant(0, MVT::i32));
+}
+
+/// getExtFactor - Determine the adjustment factor for the position when
+/// generating an "extract from vector registers" instruction.
+static unsigned getExtFactor(SDValue &V) {
+ EVT EltType = V.getValueType().getVectorElementType();
+ return EltType.getSizeInBits() / 8;
+}
+
+/// NarrowVector - Given a value in the V128 register class, produce the
+/// equivalent value in the V64 register class.
+static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
+ EVT VT = V128Reg.getValueType();
+ unsigned WideSize = VT.getVectorNumElements();
+ MVT EltTy = VT.getVectorElementType().getSimpleVT();
+ MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
+ SDLoc DL(V128Reg);
+
+ return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 2> SourceVecs;
+ SmallVector<unsigned, 2> MinElts;
+ SmallVector<unsigned, 2> MaxElts;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+ // A shuffle can only come from building a vector from various
+ // elements of other vectors.
+ return SDValue();
}
- if ((SplatBits & ~0xffffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
- // Value = 0x00nnffff is 0x000000nn MSL 16
- // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
- // Op=x, Cmode=1101
- Imm = SplatBits >> 16;
- OpCmode = 0xd;
- break;
+
+ // Record this extraction against the appropriate vector if possible...
+ SDValue SourceVec = V.getOperand(0);
+ unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+ bool FoundSource = false;
+ for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+ if (SourceVecs[j] == SourceVec) {
+ if (MinElts[j] > EltNo)
+ MinElts[j] = EltNo;
+ if (MaxElts[j] < EltNo)
+ MaxElts[j] = EltNo;
+ FoundSource = true;
+ break;
+ }
+ }
+
+ // Or record a new source if not...
+ if (!FoundSource) {
+ SourceVecs.push_back(SourceVec);
+ MinElts.push_back(EltNo);
+ MaxElts.push_back(EltNo);
+ }
+ }
+
+ // Currently only do something sane when at most two source vectors
+ // involved.
+ if (SourceVecs.size() > 2)
+ return SDValue();
+
+ SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+ int VEXTOffsets[2] = { 0, 0 };
+
+ // This loop extracts the usage patterns of the source vectors
+ // and prepares appropriate SDValues for a shuffle if possible.
+ for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+ if (SourceVecs[i].getValueType() == VT) {
+ // No VEXT necessary
+ ShuffleSrcs[i] = SourceVecs[i];
+ VEXTOffsets[i] = 0;
+ continue;
+ } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+ // We can pad out the smaller vector for free, so if it's part of a
+ // shuffle...
+ ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i],
+ DAG.getUNDEF(SourceVecs[i].getValueType()));
+ continue;
+ }
+
+ // Don't attempt to extract subvectors from BUILD_VECTOR sources
+ // that expand or trunc the original value.
+ // TODO: We can try to bitcast and ANY_EXTEND the result but
+ // we need to consider the cost of vector ANY_EXTEND, and the
+ // legality of all the types.
+ if (SourceVecs[i].getValueType().getVectorElementType() !=
+ VT.getVectorElementType())
+ return SDValue();
+
+ // Since only 64-bit and 128-bit vectors are legal on ARM and
+ // we've eliminated the other cases...
+ assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
+ "unexpected vector sizes in ReconstructShuffle");
+
+ if (MaxElts[i] - MinElts[i] >= NumElts) {
+ // Span too large for a VEXT to cope
+ return SDValue();
+ }
+
+ if (MinElts[i] >= NumElts) {
+ // The extraction can just take the second half
+ VEXTOffsets[i] = NumElts;
+ ShuffleSrcs[i] =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ } else if (MaxElts[i] < NumElts) {
+ // The extraction can just take the first half
+ VEXTOffsets[i] = 0;
+ ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i], DAG.getIntPtrConstant(0));
+ } else {
+ // An actual VEXT is needed
+ VEXTOffsets[i] = MinElts[i];
+ SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+ SourceVecs[i], DAG.getIntPtrConstant(0));
+ SDValue VEXTSrc2 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
+ DAG.getIntPtrConstant(NumElts));
+ unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
+ ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+ }
+
+ SmallVector<int, 8> Mask;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Entry = Op.getOperand(i);
+ if (Entry.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(-1);
+ continue;
+ }
+
+ SDValue ExtractVec = Entry.getOperand(0);
+ int ExtractElt =
+ cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
+ if (ExtractVec == SourceVecs[0]) {
+ Mask.push_back(ExtractElt - VEXTOffsets[0]);
+ } else {
+ Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
}
- // can't handle any other
+ }
+
+ // Final check before we try to produce nonsense...
+ if (isShuffleMaskLegal(Mask, VT))
+ return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+ &Mask[0]);
+
+ return SDValue();
+}
+
+// check if an EXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are the same.
+static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Assume that the first shuffle index is not UNDEF. Fail if it is.
+ if (M[0] < 0)
+ return false;
+
+ Imm = M[0];
+
+ // If this is a VEXT shuffle, the immediate value is the index of the first
+ // element. The other shuffle indices must be the successive elements after
+ // the first one.
+ unsigned ExpectedElt = Imm;
+ for (unsigned i = 1; i < NumElts; ++i) {
+ // Increment the expected index. If it wraps around, just follow it
+ // back to index zero and keep going.
+ ++ExpectedElt;
+ if (ExpectedElt == NumElts)
+ ExpectedElt = 0;
+
+ if (M[i] < 0)
+ continue; // ignore UNDEF indices
+ if (ExpectedElt != static_cast<unsigned>(M[i]))
+ return false;
+ }
+
+ return true;
+}
+
+// check if an EXT instruction can handle the shuffle mask when the
+// vector sources of the shuffle are different.
+static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
+ unsigned &Imm) {
+ // Look for the first non-undef element.
+ const int *FirstRealElt = std::find_if(M.begin(), M.end(),
+ [](int Elt) {return Elt >= 0;});
+
+ // Benefit form APInt to handle overflow when calculating expected element.
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
+ APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
+ // The following shuffle indices must be the successive elements after the
+ // first real element.
+ const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
+ [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
+ if (FirstWrongElt != M.end())
+ return false;
+
+ // The index of an EXT is the first element if it is not UNDEF.
+ // Watch out for the beginning UNDEFs. The EXT index should be the expected
+ // value of the first element. E.g.
+ // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
+ // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
+ // ExpectedElt is the last mask index plus 1.
+ Imm = ExpectedElt.getZExtValue();
+
+ // There are two difference cases requiring to reverse input vectors.
+ // For example, for vector <4 x i32> we have the following cases,
+ // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
+ // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
+ // For both cases, we finally use mask <5, 6, 7, 0>, which requires
+ // to reverse two input vectors.
+ if (Imm < NumElts)
+ ReverseEXT = true;
+ else
+ Imm -= NumElts;
+
+ return true;
+}
+
+/// isREVMask - Check if a vector shuffle corresponds to a REV
+/// instruction with the specified blocksize. (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
+ assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
+ "Only possible block sizes for REV are: 16, 32, 64");
+
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned BlockElts = M[0] + 1;
+ // If the first shuffle index is UNDEF, be optimistic.
+ if (M[0] < 0)
+ BlockElts = BlockSize / EltSz;
+
+ if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
return false;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0)
+ continue; // ignore UNDEF indices
+ if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
+ return false;
+ }
+
+ return true;
+}
+
+static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
+ (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
+ return false;
+ Idx += 1;
+ }
+
+ return true;
+}
+
+static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (M[i] < 0)
+ continue; // ignore UNDEF indices
+ if ((unsigned)M[i] != 2 * i + WhichResult)
+ return false;
+ }
+
+ return true;
+}
+
+static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
+ (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
+ return false;
}
+ return true;
+}
- case 64: {
- if (type != Neon_Mov_Imm)
+/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
+ (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
return false;
- // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
- // movi Op=1, Cmode=1110.
- OpCmode = 0x1e;
- uint64_t BitMask = 0xff;
- uint64_t Val = 0;
- unsigned ImmMask = 1;
- Imm = 0;
- for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
- Val |= BitMask;
- Imm |= ImmMask;
- } else if ((SplatBits & BitMask) != 0) {
+ Idx += 1;
+ }
+
+ return true;
+}
+
+/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ int MIdx = M[i + j * Half];
+ if (MIdx >= 0 && (unsigned)MIdx != Idx)
return false;
- }
- BitMask <<= 8;
- ImmMask <<= 1;
+ Idx += 2;
}
- SplatBits = Val;
- VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
- break;
- }
}
return true;
}
-static SDValue PerformANDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
+ (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
+ return false;
+ }
+ return true;
+}
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
+static bool isINSMask(ArrayRef<int> M, int NumInputElements,
+ bool &DstIsLeft, int &Anomaly) {
+ if (M.size() != static_cast<size_t>(NumInputElements))
+ return false;
- // We're looking for an SRA/SHL pair which form an SBFX.
+ int NumLHSMatch = 0, NumRHSMatch = 0;
+ int LastLHSMismatch = -1, LastRHSMismatch = -1;
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
+ for (int i = 0; i < NumInputElements; ++i) {
+ if (M[i] == -1) {
+ ++NumLHSMatch;
+ ++NumRHSMatch;
+ continue;
+ }
- if (!isa<ConstantSDNode>(N->getOperand(1)))
- return SDValue();
+ if (M[i] == i)
+ ++NumLHSMatch;
+ else
+ LastLHSMismatch = i;
- uint64_t TruncMask = N->getConstantOperandVal(1);
- if (!isMask_64(TruncMask))
- return SDValue();
+ if (M[i] == i + NumInputElements)
+ ++NumRHSMatch;
+ else
+ LastRHSMismatch = i;
+ }
- uint64_t Width = CountPopulation_64(TruncMask);
- SDValue Shift = N->getOperand(0);
+ if (NumLHSMatch == NumInputElements - 1) {
+ DstIsLeft = true;
+ Anomaly = LastLHSMismatch;
+ return true;
+ } else if (NumRHSMatch == NumInputElements - 1) {
+ DstIsLeft = false;
+ Anomaly = LastRHSMismatch;
+ return true;
+ }
- if (Shift.getOpcode() != ISD::SRL)
- return SDValue();
+ return false;
+}
- if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
+ if (VT.getSizeInBits() != 128)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+
+ for (int I = 0, E = NumElts / 2; I != E; I++) {
+ if (Mask[I] != I)
+ return false;
+ }
+
+ int Offset = NumElts / 2;
+ for (int I = NumElts / 2, E = NumElts; I != E; I++) {
+ if (Mask[I] != I + SplitLHS * Offset)
+ return false;
+ }
+
+ return true;
+}
+
+static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue V0 = Op.getOperand(0);
+ SDValue V1 = Op.getOperand(1);
+ ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
+ VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return SDValue();
- uint64_t LSB = Shift->getConstantOperandVal(1);
- if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
+ bool SplitV0 = V0.getValueType().getSizeInBits() == 128;
+
+ if (!isConcatMask(Mask, VT, SplitV0))
return SDValue();
- return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
- DAG.getConstant(LSB, MVT::i64),
- DAG.getConstant(LSB + Width - 1, MVT::i64));
-}
-
-/// For a true bitfield insert, the bits getting into that contiguous mask
-/// should come from the low part of an existing value: they must be formed from
-/// a compatible SHL operation (unless they're already low). This function
-/// checks that condition and returns the least-significant bit that's
-/// intended. If the operation not a field preparation, -1 is returned.
-static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
- SDValue &MaskedVal, uint64_t Mask) {
- if (!isShiftedMask_64(Mask))
- return -1;
-
- // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
- // instruction. BFI will do a left-shift by LSB before applying the mask we've
- // spotted, so in general we should pre-emptively "undo" that by making sure
- // the incoming bits have had a right-shift applied to them.
- //
- // This right shift, however, will combine with existing left/right shifts. In
- // the simplest case of a completely straight bitfield operation, it will be
- // expected to completely cancel out with an existing SHL. More complicated
- // cases (e.g. bitfield to bitfield copy) may still need a real shift before
- // the BFI.
-
- uint64_t LSB = countTrailingZeros(Mask);
- int64_t ShiftRightRequired = LSB;
- if (MaskedVal.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
- ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
- MaskedVal = MaskedVal.getOperand(0);
- } else if (MaskedVal.getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
- ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
- MaskedVal = MaskedVal.getOperand(0);
- }
-
- if (ShiftRightRequired > 0)
- MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
- DAG.getConstant(ShiftRightRequired, MVT::i64));
- else if (ShiftRightRequired < 0) {
- // We could actually end up with a residual left shift, for example with
- // "struc.bitfield = val << 1".
- MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
- DAG.getConstant(-ShiftRightRequired, MVT::i64));
- }
-
- return LSB;
-}
-
-/// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
-/// a mask and an extension. Returns true if a BFI was found and provides
-/// information on its surroundings.
-static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
- bool &Extended) {
- Extended = false;
- if (N.getOpcode() == ISD::ZERO_EXTEND) {
- Extended = true;
- N = N.getOperand(0);
- }
-
- if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
- Mask = N->getConstantOperandVal(1);
- N = N.getOperand(0);
+ EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
+ VT.getVectorNumElements() / 2);
+ if (SplitV0) {
+ V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
+ DAG.getConstant(0, MVT::i64));
+ }
+ if (V1.getValueType().getSizeInBits() == 128) {
+ V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
+ DAG.getConstant(0, MVT::i64));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+ SDValue RHS, SelectionDAG &DAG,
+ SDLoc dl) {
+ unsigned OpNum = (PFEntry >> 26) & 0x0F;
+ unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
+ unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
+
+ enum {
+ OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+ OP_VREV,
+ OP_VDUP0,
+ OP_VDUP1,
+ OP_VDUP2,
+ OP_VDUP3,
+ OP_VEXT1,
+ OP_VEXT2,
+ OP_VEXT3,
+ OP_VUZPL, // VUZP, left result
+ OP_VUZPR, // VUZP, right result
+ OP_VZIPL, // VZIP, left result
+ OP_VZIPR, // VZIP, right result
+ OP_VTRNL, // VTRN, left result
+ OP_VTRNR // VTRN, right result
+ };
+
+ if (OpNum == OP_COPY) {
+ if (LHSID == (1 * 9 + 2) * 9 + 3)
+ return LHS;
+ assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
+ return RHS;
+ }
+
+ SDValue OpLHS, OpRHS;
+ OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+ OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+ EVT VT = OpLHS.getValueType();
+
+ switch (OpNum) {
+ default:
+ llvm_unreachable("Unknown shuffle opcode!");
+ case OP_VREV:
+ // VREV divides the vector in half and swaps within the half.
+ if (VT.getVectorElementType() == MVT::i32 ||
+ VT.getVectorElementType() == MVT::f32)
+ return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
+ // vrev <4 x i16> -> REV32
+ if (VT.getVectorElementType() == MVT::i16)
+ return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
+ // vrev <4 x i8> -> REV16
+ assert(VT.getVectorElementType() == MVT::i8);
+ return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
+ case OP_VDUP0:
+ case OP_VDUP1:
+ case OP_VDUP2:
+ case OP_VDUP3: {
+ EVT EltTy = VT.getVectorElementType();
+ unsigned Opcode;
+ if (EltTy == MVT::i8)
+ Opcode = AArch64ISD::DUPLANE8;
+ else if (EltTy == MVT::i16)
+ Opcode = AArch64ISD::DUPLANE16;
+ else if (EltTy == MVT::i32 || EltTy == MVT::f32)
+ Opcode = AArch64ISD::DUPLANE32;
+ else if (EltTy == MVT::i64 || EltTy == MVT::f64)
+ Opcode = AArch64ISD::DUPLANE64;
+ else
+ llvm_unreachable("Invalid vector element type?");
+
+ if (VT.getSizeInBits() == 64)
+ OpLHS = WidenVector(OpLHS, DAG);
+ SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
+ return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
+ }
+ case OP_VEXT1:
+ case OP_VEXT2:
+ case OP_VEXT3: {
+ unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
+ return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+ case OP_VUZPL:
+ return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ case OP_VUZPR:
+ return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ case OP_VZIPL:
+ return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ case OP_VZIPR:
+ return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ case OP_VTRNL:
+ return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ case OP_VTRNR:
+ return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
+ OpRHS);
+ }
+}
+
+static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
+ SelectionDAG &DAG) {
+ // Check to see if we can use the TBL instruction.
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ SDLoc DL(Op);
+
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
+
+ SmallVector<SDValue, 8> TBLMask;
+ for (int Val : ShuffleMask) {
+ for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
+ unsigned Offset = Byte + Val * BytesPerElt;
+ TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
+ }
+ }
+
+ MVT IndexVT = MVT::v8i8;
+ unsigned IndexLen = 8;
+ if (Op.getValueType().getSizeInBits() == 128) {
+ IndexVT = MVT::v16i8;
+ IndexLen = 16;
+ }
+
+ SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
+ SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
+
+ SDValue Shuffle;
+ if (V2.getNode()->getOpcode() == ISD::UNDEF) {
+ if (IndexLen == 8)
+ V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
+ Shuffle = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
+ makeArrayRef(TBLMask.data(), IndexLen)));
} else {
- // Mask is the whole width.
- Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
+ if (IndexLen == 8) {
+ V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
+ Shuffle = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, MVT::i32), V1Cst,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
+ makeArrayRef(TBLMask.data(), IndexLen)));
+ } else {
+ // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
+ // cannot currently represent the register constraints on the input
+ // table registers.
+ // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
+ // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
+ // &TBLMask[0], IndexLen));
+ Shuffle = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
+ DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
+ makeArrayRef(TBLMask.data(), IndexLen)));
+ }
+ }
+ return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
+}
+
+static unsigned getDUPLANEOp(EVT EltType) {
+ if (EltType == MVT::i8)
+ return AArch64ISD::DUPLANE8;
+ if (EltType == MVT::i16)
+ return AArch64ISD::DUPLANE16;
+ if (EltType == MVT::i32 || EltType == MVT::f32)
+ return AArch64ISD::DUPLANE32;
+ if (EltType == MVT::i64 || EltType == MVT::f64)
+ return AArch64ISD::DUPLANE64;
+
+ llvm_unreachable("Invalid vector element type?");
+}
+
+SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+
+ // Convert shuffles that are directly supported on NEON to target-specific
+ // DAG nodes, instead of keeping them as shuffles and matching them again
+ // during code selection. This is more efficient and avoids the possibility
+ // of inconsistencies between legalization and selection.
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0],
+ V1.getValueType().getSimpleVT())) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1)
+ Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
+ V1.getOperand(0));
+ // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
+ // constant. If so, we can just reference the lane's definition directly.
+ if (V1.getOpcode() == ISD::BUILD_VECTOR &&
+ !isa<ConstantSDNode>(V1.getOperand(Lane)))
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
+
+ // Otherwise, duplicate from the lane of the input vector.
+ unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
+
+ // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
+ // to make a vector of the same size as this SHUFFLE. We can ignore the
+ // extract entirely, and canonicalise the concat using WidenVector.
+ if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
+ V1 = V1.getOperand(0);
+ } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
+ unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
+ Lane -= Idx * VT.getVectorNumElements() / 2;
+ V1 = WidenVector(V1.getOperand(Idx), DAG);
+ } else if (VT.getSizeInBits() == 64)
+ V1 = WidenVector(V1, DAG);
+
+ return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
+ }
+
+ if (isREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
+ if (isREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
+ if (isREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
+
+ bool ReverseEXT = false;
+ unsigned Imm;
+ if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
+ if (ReverseEXT)
+ std::swap(V1, V2);
+ Imm *= getExtFactor(V1);
+ return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ } else if (V2->getOpcode() == ISD::UNDEF &&
+ isSingletonEXTMask(ShuffleMask, VT, Imm)) {
+ Imm *= getExtFactor(V1);
+ return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
+ DAG.getConstant(Imm, MVT::i32));
+ }
+
+ unsigned WhichResult;
+ if (isZIPMask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
+ }
+ if (isUZPMask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
+ }
+ if (isTRNMask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
+ }
+
+ if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
+ }
+ if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
+ }
+ if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
+ unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
+ return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
+ }
+
+ SDValue Concat = tryFormConcatFromShuffle(Op, DAG);
+ if (Concat.getNode())
+ return Concat;
+
+ bool DstIsLeft;
+ int Anomaly;
+ int NumInputElements = V1.getValueType().getVectorNumElements();
+ if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
+ SDValue DstVec = DstIsLeft ? V1 : V2;
+ SDValue DstLaneV = DAG.getConstant(Anomaly, MVT::i64);
+
+ SDValue SrcVec = V1;
+ int SrcLane = ShuffleMask[Anomaly];
+ if (SrcLane >= NumInputElements) {
+ SrcVec = V2;
+ SrcLane -= VT.getVectorNumElements();
+ }
+ SDValue SrcLaneV = DAG.getConstant(SrcLane, MVT::i64);
+
+ EVT ScalarVT = VT.getVectorElementType();
+ if (ScalarVT.getSizeInBits() < 32)
+ ScalarVT = MVT::i32;
+
+ return DAG.getNode(
+ ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
+ DstLaneV);
+ }
+
+ // If the shuffle is not directly supported and it has 4 elements, use
+ // the PerfectShuffle-generated table to synthesize it from other shuffles.
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (ShuffleMask[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = ShuffleMask[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
+ PFIndexes[2] * 9 + PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- if (N.getOpcode() == AArch64ISD::BFI) {
- BFI = N;
+ return GenerateTBL(Op, ShuffleMask, DAG);
+}
+
+static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
+ APInt &UndefBits) {
+ EVT VT = BVN->getValueType(0);
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+ unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
+
+ for (unsigned i = 0; i < NumSplats; ++i) {
+ CnstBits <<= SplatBitSize;
+ UndefBits <<= SplatBitSize;
+ CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
+ UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
+ }
+
return true;
}
return false;
}
-/// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
-/// is roughly equivalent to (and (BFI ...), mask). This form is used because it
-/// can often be further combined with a larger mask. Ultimately, we want mask
-/// to be 2^32-1 or 2^64-1 so the AND can be skipped.
-static SDValue tryCombineToBFI(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
+SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN =
+ dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
+ SDValue LHS = Op.getOperand(0);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ if (!BVN)
+ return Op;
+
+ APInt CnstBits(VT.getSizeInBits(), 0);
+ APInt UndefBits(VT.getSizeInBits(), 0);
+ if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
+ // We only have BIC vector immediate instruction, which is and-not.
+ CnstBits = ~CnstBits;
+
+ // We make use of a little bit of goto ickiness in order to avoid having to
+ // duplicate the immediate matching logic for the undef toggled case.
+ bool SecondTry = false;
+ AttemptModImm:
+
+ if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
+ CnstBits = CnstBits.zextOrTrunc(64);
+ uint64_t CnstVal = CnstBits.getZExtValue();
+
+ if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(16, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(24, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+ }
+
+ if (SecondTry)
+ goto FailedModImm;
+ SecondTry = true;
+ CnstBits = ~UndefBits;
+ goto AttemptModImm;
+ }
+
+// We can always fall back to a non-immediate AND.
+FailedModImm:
+ return Op;
+}
+
+// Specialized code to quickly find if PotentialBVec is a BuildVector that
+// consists of only the same constant int value, returned in reference arg
+// ConstVal
+static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
+ uint64_t &ConstVal) {
+ BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
+ if (!Bvec)
+ return false;
+ ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
+ if (!FirstElt)
+ return false;
+ EVT VT = Bvec->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 1; i < NumElts; ++i)
+ if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
+ return false;
+ ConstVal = FirstElt->getZExtValue();
+ return true;
+}
+
+static unsigned getIntrinsicID(const SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ switch (Opcode) {
+ default:
+ return Intrinsic::not_intrinsic;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ if (IID < Intrinsic::num_intrinsics)
+ return IID;
+ return Intrinsic::not_intrinsic;
+ }
+ }
+}
+
+// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
+// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
+// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
+// Also, logical shift right -> sri, with the same structure.
+static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
EVT VT = N->getValueType(0);
- assert(N->getOpcode() == ISD::OR && "Unexpected root");
+ if (!VT.isVector())
+ return SDValue();
- // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
- // abandon the effort.
- SDValue LHS = N->getOperand(0);
- if (LHS.getOpcode() != ISD::AND)
+ SDLoc DL(N);
+
+ // Is the first op an AND?
+ const SDValue And = N->getOperand(0);
+ if (And.getOpcode() != ISD::AND)
return SDValue();
- uint64_t LHSMask;
- if (isa<ConstantSDNode>(LHS.getOperand(1)))
- LHSMask = LHS->getConstantOperandVal(1);
- else
+ // Is the second op an shl or lshr?
+ SDValue Shift = N->getOperand(1);
+ // This will have been turned into: AArch64ISD::VSHL vector, #shift
+ // or AArch64ISD::VLSHR vector, #shift
+ unsigned ShiftOpc = Shift.getOpcode();
+ if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
return SDValue();
+ bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
- // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
- // is or abandon the effort.
- SDValue RHS = N->getOperand(1);
- if (RHS.getOpcode() != ISD::AND)
+ // Is the shift amount constant?
+ ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+ if (!C2node)
return SDValue();
- uint64_t RHSMask;
- if (isa<ConstantSDNode>(RHS.getOperand(1)))
- RHSMask = RHS->getConstantOperandVal(1);
- else
+ // Is the and mask vector all constant?
+ uint64_t C1;
+ if (!isAllConstantBuildVector(And.getOperand(1), C1))
return SDValue();
- // Can't do anything if the masks are incompatible.
- if (LHSMask & RHSMask)
+ // Is C1 == ~C2, taking into account how much one can shift elements of a
+ // particular size?
+ uint64_t C2 = C2node->getZExtValue();
+ unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits();
+ if (C2 > ElemSizeInBits)
+ return SDValue();
+ unsigned ElemMask = (1 << ElemSizeInBits) - 1;
+ if ((C1 & ElemMask) != (~C2 & ElemMask))
return SDValue();
- // Now we need one of the masks to be a contiguous field. Without loss of
- // generality that should be the RHS one.
- SDValue Bitfield = LHS.getOperand(0);
- if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
- // We know that LHS is a candidate new value, and RHS isn't already a better
- // one.
- std::swap(LHS, RHS);
- std::swap(LHSMask, RHSMask);
+ SDValue X = And.getOperand(0);
+ SDValue Y = Shift.getOperand(0);
+
+ unsigned Intrin =
+ IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
+ SDValue ResultSLI =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
+
+ DEBUG(dbgs() << "aarch64-lower: transformed: \n");
+ DEBUG(N->dump(&DAG));
+ DEBUG(dbgs() << "into: \n");
+ DEBUG(ResultSLI->dump(&DAG));
+
+ ++NumShiftInserts;
+ return ResultSLI;
+}
+
+SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
+ if (EnableAArch64SlrGeneration) {
+ SDValue Res = tryLowerToSLI(Op.getNode(), DAG);
+ if (Res.getNode())
+ return Res;
+ }
+
+ BuildVectorSDNode *BVN =
+ dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
+ SDValue LHS = Op.getOperand(1);
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ // OR commutes, so try swapping the operands.
+ if (!BVN) {
+ LHS = Op.getOperand(0);
+ BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
}
+ if (!BVN)
+ return Op;
- // We've done our best to put the right operands in the right places, all we
- // can do now is check whether a BFI exists.
- Bitfield = RHS.getOperand(0);
- int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
- if (LSB == -1)
+ APInt CnstBits(VT.getSizeInBits(), 0);
+ APInt UndefBits(VT.getSizeInBits(), 0);
+ if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
+ // We make use of a little bit of goto ickiness in order to avoid having to
+ // duplicate the immediate matching logic for the undef toggled case.
+ bool SecondTry = false;
+ AttemptModImm:
+
+ if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
+ CnstBits = CnstBits.zextOrTrunc(64);
+ uint64_t CnstVal = CnstBits.getZExtValue();
+
+ if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(16, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(24, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+ }
+
+ if (SecondTry)
+ goto FailedModImm;
+ SecondTry = true;
+ CnstBits = UndefBits;
+ goto AttemptModImm;
+ }
+
+// We can always fall back to a non-immediate OR.
+FailedModImm:
+ return Op;
+}
+
+SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+
+ APInt CnstBits(VT.getSizeInBits(), 0);
+ APInt UndefBits(VT.getSizeInBits(), 0);
+ if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
+ // We make use of a little bit of goto ickiness in order to avoid having to
+ // duplicate the immediate matching logic for the undef toggled case.
+ bool SecondTry = false;
+ AttemptModImm:
+
+ if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
+ CnstBits = CnstBits.zextOrTrunc(64);
+ uint64_t CnstVal = CnstBits.getZExtValue();
+
+ // Certain magic vector constants (used to express things like NOT
+ // and NEG) are passed through unmodified. This allows codegen patterns
+ // for these operations to match. Special-purpose patterns will lower
+ // these immediates to MOVIs if it proves necessary.
+ if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
+ return Op;
+
+ // The many faces of MOVI...
+ if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
+ if (VT.getSizeInBits() == 128) {
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
+ DAG.getConstant(CnstVal, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ // Support the V64 version via subregister insertion.
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
+ DAG.getConstant(CnstVal, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(16, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(24, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(264, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(272, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
+ SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ // The few faces of FMOV...
+ if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
+ SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
+ VT.getSizeInBits() == 128) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
+ SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
+ DAG.getConstant(CnstVal, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ // The many faces of MVNI...
+ CnstVal = ~CnstVal;
+ if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(16, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(24, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(0, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(8, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(264, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+
+ if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
+ CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
+ MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
+ SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
+ DAG.getConstant(CnstVal, MVT::i32),
+ DAG.getConstant(272, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
+ }
+ }
+
+ if (SecondTry)
+ goto FailedModImm;
+ SecondTry = true;
+ CnstBits = UndefBits;
+ goto AttemptModImm;
+ }
+FailedModImm:
+
+ // Scan through the operands to find some interesting properties we can
+ // exploit:
+ // 1) If only one value is used, we can use a DUP, or
+ // 2) if only the low element is not undef, we can just insert that, or
+ // 3) if only one constant value is used (w/ some non-constant lanes),
+ // we can splat the constant value into the whole vector then fill
+ // in the non-constant lanes.
+ // 4) FIXME: If different constant values are used, but we can intelligently
+ // select the values we'll be overwriting for the non-constant
+ // lanes such that we can directly materialize the vector
+ // some other way (MOVI, e.g.), we can be sneaky.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool usesOnlyOneConstantValue = true;
+ bool isConstant = true;
+ unsigned NumConstantLanes = 0;
+ SDValue Value;
+ SDValue ConstantValue;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
+ ++NumConstantLanes;
+ if (!ConstantValue.getNode())
+ ConstantValue = V;
+ else if (ConstantValue != V)
+ usesOnlyOneConstantValue = false;
+ }
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ // Use DUP for non-constant splats. For f32 constant splats, reduce to
+ // i32 and try again.
+ if (usesOnlyOneValue) {
+ if (!isConstant) {
+ if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ Value.getValueType() != VT)
+ return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
+
+ // This is actually a DUPLANExx operation, which keeps everything vectory.
+
+ // DUPLANE works on 128-bit vectors, widen it if necessary.
+ SDValue Lane = Value.getOperand(1);
+ Value = Value.getOperand(0);
+ if (Value.getValueType().getSizeInBits() == 64)
+ Value = WidenVector(Value, DAG);
+
+ unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
+ return DAG.getNode(Opcode, dl, VT, Value, Lane);
+ }
+
+ if (VT.getVectorElementType().isFloatingPoint()) {
+ SmallVector<SDValue, 8> Ops;
+ MVT NewType =
+ (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
+ Val = LowerBUILD_VECTOR(Val, DAG);
+ if (Val.getNode())
+ return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+ }
+ }
+
+ // If there was only one constant value used and for more than one lane,
+ // start by splatting that value, then replace the non-constant lanes. This
+ // is better than the default, which will perform a separate initialization
+ // for each lane.
+ if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
+ SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
+ // Now insert the non-constant lanes.
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
+ // Note that type legalization likely mucked about with the VT of the
+ // source operand, so we may have to convert it here before inserting.
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
+ }
+ }
+ return Val;
+ }
+
+ // If all elements are constants and the case above didn't get hit, fall back
+ // to the default expansion, which will generate a load from the constant
+ // pool.
+ if (isConstant)
+ return SDValue();
+
+ // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+ if (NumElts >= 4) {
+ SDValue shuffle = ReconstructShuffle(Op, DAG);
+ if (shuffle != SDValue())
+ return shuffle;
+ }
+
+ // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
+ // know the default expansion would otherwise fall back on something even
+ // worse. For a vector with one or two non-undef values, that's
+ // scalar_to_vector for the elements followed by a shuffle (provided the
+ // shuffle is valid for the target) and materialization element by element
+ // on the stack followed by a load for everything else.
+ if (!isConstant && !usesOnlyOneValue) {
+ SDValue Vec = DAG.getUNDEF(VT);
+ SDValue Op0 = Op.getOperand(0);
+ unsigned ElemSize = VT.getVectorElementType().getSizeInBits();
+ unsigned i = 0;
+ // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
+ // a) Avoid a RMW dependency on the full vector register, and
+ // b) Allow the register coalescer to fold away the copy if the
+ // value is already in an S or D register.
+ if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
+ unsigned SubIdx = ElemSize == 32 ? AArch64::ssub : AArch64::dsub;
+ MachineSDNode *N =
+ DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
+ DAG.getTargetConstant(SubIdx, MVT::i32));
+ Vec = SDValue(N, 0);
+ ++i;
+ }
+ for (; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
+ }
+ return Vec;
+ }
+
+ // Just use the default expansion. We failed to find a better alternative.
+ return SDValue();
+}
+
+SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
+
+ // Check for non-constant lane.
+ if (!isa<ConstantSDNode>(Op.getOperand(2)))
return SDValue();
- uint32_t Width = CountPopulation_64(RHSMask);
- assert(Width && "Expected non-zero bitfield width");
+ EVT VT = Op.getOperand(0).getValueType();
- SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
- LHS.getOperand(0), Bitfield,
- DAG.getConstant(LSB, MVT::i64),
- DAG.getConstant(Width, MVT::i64));
+ // Insertion/extraction are legal for V128 types.
+ if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ return Op;
+
+ if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
+ VT != MVT::v1i64 && VT != MVT::v2f32)
+ return SDValue();
- // Mask is trivial
- if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
- return BFI;
+ // For V64 types, we perform insertion by expanding the value
+ // to a V128 type and perform the insertion on that.
+ SDLoc DL(Op);
+ SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
+ EVT WideTy = WideVec.getValueType();
- return DAG.getNode(ISD::AND, DL, VT, BFI,
- DAG.getConstant(LHSMask | RHSMask, VT));
+ SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
+ Op.getOperand(1), Op.getOperand(2));
+ // Re-narrow the resultant vector.
+ return NarrowVector(Node, DAG);
}
-/// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
-/// original input. This is surprisingly common because SROA splits things up
-/// into i8 chunks, so the originally detected MaskedBFI may actually only act
-/// on the low (say) byte of a word. This is then orred into the rest of the
-/// word afterwards.
-///
-/// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
-///
-/// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
-/// MaskedBFI. We can also deal with a certain amount of extend/truncate being
-/// involved.
-static SDValue tryCombineToLargerBFI(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
+SDValue
+AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
- // First job is to hunt for a MaskedBFI on either the left or right. Swap
- // operands if it's actually on the right.
- SDValue BFI;
- SDValue PossExtraMask;
- uint64_t ExistingMask = 0;
- bool Extended = false;
- if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
- PossExtraMask = N->getOperand(1);
- else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
- PossExtraMask = N->getOperand(0);
- else
+ // Check for non-constant lane.
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+
+ EVT VT = Op.getOperand(0).getValueType();
+
+ // Insertion/extraction are legal for V128 types.
+ if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
+ VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
+ return Op;
+
+ if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
+ VT != MVT::v1i64 && VT != MVT::v2f32)
+ return SDValue();
+
+ // For V64 types, we perform extraction by expanding the value
+ // to a V128 type and perform the extraction on that.
+ SDLoc DL(Op);
+ SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
+ EVT WideTy = WideVec.getValueType();
+
+ EVT ExtrTy = WideTy.getVectorElementType();
+ if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
+ ExtrTy = MVT::i32;
+
+ // For extractions, we just return the result directly.
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
+ Op.getOperand(1));
+}
+
+SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getOperand(0).getValueType();
+ SDLoc dl(Op);
+ // Just in case...
+ if (!VT.isVector())
return SDValue();
- // We can only combine a BFI with another compatible mask.
- if (PossExtraMask.getOpcode() != ISD::AND ||
- !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Cst)
return SDValue();
+ unsigned Val = Cst->getZExtValue();
+
+ unsigned Size = Op.getValueType().getSizeInBits();
+ if (Val == 0) {
+ switch (Size) {
+ case 8:
+ return DAG.getTargetExtractSubreg(AArch64::bsub, dl, Op.getValueType(),
+ Op.getOperand(0));
+ case 16:
+ return DAG.getTargetExtractSubreg(AArch64::hsub, dl, Op.getValueType(),
+ Op.getOperand(0));
+ case 32:
+ return DAG.getTargetExtractSubreg(AArch64::ssub, dl, Op.getValueType(),
+ Op.getOperand(0));
+ case 64:
+ return DAG.getTargetExtractSubreg(AArch64::dsub, dl, Op.getValueType(),
+ Op.getOperand(0));
+ default:
+ llvm_unreachable("Unexpected vector type in extract_subvector!");
+ }
+ }
+ // If this is extracting the upper 64-bits of a 128-bit vector, we match
+ // that directly.
+ if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
+ return Op;
+
+ return SDValue();
+}
+
+bool AArch64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+ EVT VT) const {
+ if (VT.getVectorNumElements() == 4 &&
+ (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned PFIndexes[4];
+ for (unsigned i = 0; i != 4; ++i) {
+ if (M[i] < 0)
+ PFIndexes[i] = 8;
+ else
+ PFIndexes[i] = M[i];
+ }
+
+ // Compute the index in the perfect shuffle table.
+ unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
+ PFIndexes[2] * 9 + PFIndexes[3];
+ unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+ unsigned Cost = (PFEntry >> 30);
+
+ if (Cost <= 4)
+ return true;
+ }
+
+ bool DummyBool;
+ int DummyInt;
+ unsigned DummyUnsigned;
+
+ return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
+ isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
+ isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
+ // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
+ isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
+ isZIPMask(M, VT, DummyUnsigned) ||
+ isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
+ isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
+ isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
+ isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
+ isConcatMask(M, VT, VT.getSizeInBits() == 128));
+}
+
+/// getVShiftImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BITCAST)
+ Op = Op.getOperand(0);
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+ APInt SplatBits, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElementBits) ||
+ SplatBitSize > ElementBits)
+ return false;
+ Cnt = SplatBits.getSExtValue();
+ return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation. That value must be in the range:
+/// 0 <= Value < ElementBits for a left shift; or
+/// 0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation. For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+/// 1 <= |Value| <= ElementBits for a right shift; or
+/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+ int64_t &Cnt) {
+ assert(VT.isVector() && "vector shift count is not a vector type");
+ unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+ if (!getVShiftImm(Op, ElementBits, Cnt))
+ return false;
+ if (isIntrinsic)
+ Cnt = -Cnt;
+ return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
+}
+
+SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ int64_t Cnt;
+
+ if (!Op.getOperand(1).getValueType().isVector())
+ return Op;
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected shift opcode");
+
+ case ISD::SHL:
+ if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
+ return DAG.getNode(AArch64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Intrinsic::aarch64_neon_ushl, MVT::i32),
+ Op.getOperand(0), Op.getOperand(1));
+ case ISD::SRA:
+ case ISD::SRL:
+ // Right shift immediate
+ if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
+ Cnt < EltSize) {
+ unsigned Opc =
+ (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
+ return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
+ DAG.getConstant(Cnt, MVT::i32));
+ }
+
+ // Right shift register. Note, there is not a shift right register
+ // instruction, but the shift left register instruction takes a signed
+ // value, where negative numbers specify a right shift.
+ unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
+ : Intrinsic::aarch64_neon_ushl;
+ // negate the shift amount
+ SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
+ SDValue NegShiftLeft =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+ DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
+ return NegShiftLeft;
+ }
+
+ return SDValue();
+}
+
+static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
+ AArch64CC::CondCode CC, bool NoNans, EVT VT,
+ SDLoc dl, SelectionDAG &DAG) {
+ EVT SrcVT = LHS.getValueType();
+
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
+ APInt CnstBits(VT.getSizeInBits(), 0);
+ APInt UndefBits(VT.getSizeInBits(), 0);
+ bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
+ bool IsZero = IsCnst && (CnstBits == 0);
+
+ if (SrcVT.getVectorElementType().isFloatingPoint()) {
+ switch (CC) {
+ default:
+ return SDValue();
+ case AArch64CC::NE: {
+ SDValue Fcmeq;
+ if (IsZero)
+ Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
+ else
+ Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+ return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
+ }
+ case AArch64CC::EQ:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+ case AArch64CC::GE:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
+ case AArch64CC::GT:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
+ case AArch64CC::LS:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
+ case AArch64CC::LT:
+ if (!NoNans)
+ return SDValue();
+ // If we ignore NaNs then we can use to the MI implementation.
+ // Fallthrough.
+ case AArch64CC::MI:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
+ }
+ }
+
+ switch (CC) {
+ default:
+ return SDValue();
+ case AArch64CC::NE: {
+ SDValue Cmeq;
+ if (IsZero)
+ Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
+ else
+ Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
+ return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
+ }
+ case AArch64CC::EQ:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
+ case AArch64CC::GE:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
+ case AArch64CC::GT:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
+ case AArch64CC::LE:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
+ case AArch64CC::LS:
+ return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
+ case AArch64CC::LO:
+ return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
+ case AArch64CC::LT:
+ if (IsZero)
+ return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
+ return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
+ case AArch64CC::HI:
+ return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
+ case AArch64CC::HS:
+ return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
+ }
+}
- uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
+SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
+ SelectionDAG &DAG) const {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc dl(Op);
+
+ if (LHS.getValueType().getVectorElementType().isInteger()) {
+ assert(LHS.getValueType() == RHS.getValueType());
+ AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+ return EmitVectorComparison(LHS, RHS, AArch64CC, false, Op.getValueType(),
+ dl, DAG);
+ }
+
+ assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
+ LHS.getValueType().getVectorElementType() == MVT::f64);
+
+ // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
+ // clean. Some of them require two branches to implement.
+ AArch64CC::CondCode CC1, CC2;
+ bool ShouldInvert;
+ changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
- // Masks must be compatible.
- if (ExtraMask & ExistingMask)
+ bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
+ SDValue Cmp =
+ EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
+ if (!Cmp.getNode())
return SDValue();
- SDValue OldBFIVal = BFI.getOperand(0);
- SDValue NewBFIVal = BFI.getOperand(1);
- if (Extended) {
- // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
- // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
- // need to be made compatible.
- assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
- && "Invalid types for BFI");
- OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
- NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
+ if (CC2 != AArch64CC::AL) {
+ SDValue Cmp2 =
+ EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
+ if (!Cmp2.getNode())
+ return SDValue();
+
+ Cmp = DAG.getNode(ISD::OR, dl, Cmp.getValueType(), Cmp, Cmp2);
+ }
+
+ if (ShouldInvert)
+ return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
+
+ return Cmp;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+ switch (Intrinsic) {
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_ld1x2:
+ case Intrinsic::aarch64_neon_ld1x3:
+ case Intrinsic::aarch64_neon_ld1x4:
+ case Intrinsic::aarch64_neon_ld2lane:
+ case Intrinsic::aarch64_neon_ld3lane:
+ case Intrinsic::aarch64_neon_ld4lane:
+ case Intrinsic::aarch64_neon_ld2r:
+ case Intrinsic::aarch64_neon_ld3r:
+ case Intrinsic::aarch64_neon_ld4r: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ // Conservatively set memVT to the entire set of vectors loaded.
+ uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.offset = 0;
+ Info.align = 0;
+ Info.vol = false; // volatile loads with NEON intrinsics not supported
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4:
+ case Intrinsic::aarch64_neon_st1x2:
+ case Intrinsic::aarch64_neon_st1x3:
+ case Intrinsic::aarch64_neon_st1x4:
+ case Intrinsic::aarch64_neon_st2lane:
+ case Intrinsic::aarch64_neon_st3lane:
+ case Intrinsic::aarch64_neon_st4lane: {
+ Info.opc = ISD::INTRINSIC_VOID;
+ // Conservatively set memVT to the entire set of vectors stored.
+ unsigned NumElts = 0;
+ for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+ Type *ArgTy = I.getArgOperand(ArgI)->getType();
+ if (!ArgTy->isVectorTy())
+ break;
+ NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
+ }
+ Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+ Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
+ Info.offset = 0;
+ Info.align = 0;
+ Info.vol = false; // volatile stores with NEON intrinsics not supported
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::aarch64_ldaxr:
+ case Intrinsic::aarch64_ldxr: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::aarch64_stlxr:
+ case Intrinsic::aarch64_stxr: {
+ PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(PtrTy->getElementType());
+ Info.ptrVal = I.getArgOperand(1);
+ Info.offset = 0;
+ Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ case Intrinsic::aarch64_ldaxp:
+ case Intrinsic::aarch64_ldxp: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = 16;
+ Info.vol = true;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ case Intrinsic::aarch64_stlxp:
+ case Intrinsic::aarch64_stxp: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::i128;
+ Info.ptrVal = I.getArgOperand(2);
+ Info.offset = 0;
+ Info.align = 16;
+ Info.vol = true;
+ Info.readMem = false;
+ Info.writeMem = true;
+ return true;
+ }
+ default:
+ break;
}
- // We need the MaskedBFI to be combined with a mask of the *same* value.
- if (PossExtraMask.getOperand(0) != OldBFIVal)
+ return false;
+}
+
+// Truncations from 64-bit GPR to 32-bit GPR is free.
+bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 <= NumBits2)
+ return false;
+ return true;
+}
+
+// All 32-bit GPR operations implicitly zero the high-half of the corresponding
+// 64-bit GPR.
+bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ if (NumBits1 == 32 && NumBits2 == 64)
+ return true;
+ return false;
+}
+bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ if (NumBits1 == 32 && NumBits2 == 64)
+ return true;
+ return false;
+}
+
+bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ EVT VT1 = Val.getValueType();
+ if (isZExtFree(VT1, VT2)) {
+ return true;
+ }
+
+ if (Val.getOpcode() != ISD::LOAD)
+ return false;
+
+ // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
+ return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
+ VT2.isInteger() && VT1.getSizeInBits() <= 32);
+}
+
+bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
+ unsigned &RequiredAligment) const {
+ if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
+ return false;
+ // Cyclone supports unaligned accesses.
+ RequiredAligment = 0;
+ unsigned NumBits = LoadedType->getPrimitiveSizeInBits();
+ return NumBits == 32 || NumBits == 64;
+}
+
+bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
+ unsigned &RequiredAligment) const {
+ if (!LoadedType.isSimple() ||
+ (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
+ return false;
+ // Cyclone supports unaligned accesses.
+ RequiredAligment = 0;
+ unsigned NumBits = LoadedType.getSizeInBits();
+ return NumBits == 32 || NumBits == 64;
+}
+
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT AArch64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
+ // instruction to materialize the v2i64 zero and one store (with restrictive
+ // addressing mode). Just do two i64 store of zero-registers.
+ bool Fast;
+ const Function *F = MF.getFunction();
+ if (Subtarget->hasFPARMv8() && !IsMemset && Size >= 16 &&
+ !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::NoImplicitFloat) &&
+ (memOpAlign(SrcAlign, DstAlign, 16) ||
+ (allowsUnalignedMemoryAccesses(MVT::f128, 0, &Fast) && Fast)))
+ return MVT::f128;
+
+ return Size >= 8 ? MVT::i64 : MVT::i32;
+}
+
+// 12-bit optionally shifted immediates are legal for adds.
+bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
+ if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0))
+ return true;
+ return false;
+}
+
+// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
+// immediates is the same as for an add or a sub.
+bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
+ if (Immed < 0)
+ Immed *= -1;
+ return isLegalAddImmediate(Immed);
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+ Type *Ty) const {
+ // AArch64 has five basic addressing modes:
+ // reg
+ // reg + 9-bit signed offset
+ // reg + SIZE_IN_BYTES * 12-bit unsigned offset
+ // reg1 + reg2
+ // reg + SIZE_IN_BYTES * reg
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // No reg+reg+imm addressing.
+ if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
+ return false;
+
+ // check reg + imm case:
+ // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
+ uint64_t NumBytes = 0;
+ if (Ty->isSized()) {
+ uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
+ NumBytes = NumBits / 8;
+ if (!isPowerOf2_64(NumBits))
+ NumBytes = 0;
+ }
+
+ if (!AM.Scale) {
+ int64_t Offset = AM.BaseOffs;
+
+ // 9-bit signed offset
+ if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
+ return true;
+
+ // 12-bit unsigned offset
+ unsigned shift = Log2_64(NumBytes);
+ if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
+ // Must be a multiple of NumBytes (NumBytes is a power of 2)
+ (Offset >> shift) << shift == Offset)
+ return true;
+ return false;
+ }
+
+ // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
+
+ if (!AM.Scale || AM.Scale == 1 ||
+ (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
+ return true;
+ return false;
+}
+
+int AArch64TargetLowering::getScalingFactorCost(const AddrMode &AM,
+ Type *Ty) const {
+ // Scaling factors are not free at all.
+ // Operands | Rt Latency
+ // -------------------------------------------
+ // Rt, [Xn, Xm] | 4
+ // -------------------------------------------
+ // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
+ // Rt, [Xn, Wm, <extend> #imm] |
+ if (isLegalAddressingMode(AM, Ty))
+ // Scale represents reg2 * scale, thus account for 1 if
+ // it is not equal to 0 or 1.
+ return AM.Scale != 0 && AM.Scale != 1;
+ return -1;
+}
+
+bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+const MCPhysReg *
+AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ // LR is a callee-save register, but we must treat it as clobbered by any call
+ // site. Hence we include LR in the scratch registers, which are in turn added
+ // as implicit-defs for stackmaps and patchpoints.
+ static const MCPhysReg ScratchRegs[] = {
+ AArch64::X16, AArch64::X17, AArch64::LR, 0
+ };
+ return ScratchRegs;
+}
+
+bool
+AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N) const {
+ EVT VT = N->getValueType(0);
+ // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
+ // it with shift to let it be lowered to UBFX.
+ if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
+ isa<ConstantSDNode>(N->getOperand(1))) {
+ uint64_t TruncMask = N->getConstantOperandVal(1);
+ if (isMask_64(TruncMask) &&
+ N->getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
+ return false;
+ }
+ return true;
+}
+
+bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return false;
+
+ int64_t Val = Imm.getSExtValue();
+ if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
+ return true;
+
+ if ((int64_t)Val < 0)
+ Val = ~Val;
+ if (BitSize == 32)
+ Val &= (1LL << 32) - 1;
+
+ unsigned LZ = countLeadingZeros((uint64_t)Val);
+ unsigned Shift = (63 - LZ) / 16;
+ // MOVZ is free so return true for one or fewer MOVK.
+ return (Shift < 3) ? true : false;
+}
+
+// Generate SUBS and CSEL for integer abs.
+static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDLoc DL(N);
+
+ // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
+ // and change it to SUB and CSEL.
+ if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
+ N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+ N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
+ if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
+ if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
+ N0.getOperand(0));
+ // Generate SUBS & CSEL.
+ SDValue Cmp =
+ DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
+ N0.getOperand(0), DAG.getConstant(0, VT));
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
+ DAG.getConstant(AArch64CC::PL, MVT::i32),
+ SDValue(Cmp.getNode(), 1));
+ }
+ return SDValue();
+}
+
+// performXorCombine - Attempts to handle integer ABS.
+static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
- BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
- OldBFIVal, NewBFIVal,
- BFI.getOperand(2), BFI.getOperand(3));
+ return performIntegerAbsCombine(N, DAG);
+}
- // If the masking is trivial, we don't need to create it.
- if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
- return BFI;
+static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
- return DAG.getNode(ISD::AND, DL, VT, BFI,
- DAG.getConstant(ExtraMask | ExistingMask, VT));
+ // Multiplication of a power of two plus/minus one can be done more
+ // cheaply as as shift+add/sub. For now, this is true unilaterally. If
+ // future CPUs have a cheaper MADD instruction, this may need to be
+ // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
+ // 64-bit is 5 cycles, so this is always a win.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ APInt Value = C->getAPIntValue();
+ EVT VT = N->getValueType(0);
+ APInt VP1 = Value + 1;
+ if (VP1.isPowerOf2()) {
+ // Multiplying by one less than a power of two, replace with a shift
+ // and a subtract.
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VP1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ }
+ APInt VM1 = Value - 1;
+ if (VM1.isPowerOf2()) {
+ // Multiplying by one more than a power of two, replace with a shift
+ // and an add.
+ SDValue ShiftedVal =
+ DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
+ DAG.getConstant(VM1.logBase2(), MVT::i64));
+ return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
+ }
+ }
+ return SDValue();
+}
+
+static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+ // Only optimize when the source and destination types have the same width.
+ if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
+ return SDValue();
+
+ // If the result of an integer load is only used by an integer-to-float
+ // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
+ // This eliminates an "integer-to-vector-move UOP and improve throughput.
+ SDValue N0 = N->getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+ // Do not change the width of a volatile load.
+ !cast<LoadSDNode>(N0)->isVolatile()) {
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), LN0->isVolatile(),
+ LN0->isNonTemporal(), LN0->isInvariant(),
+ LN0->getAlignment());
+
+ // Make sure successors of the original load stay after it by updating them
+ // to use the new Chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
+
+ unsigned Opcode =
+ (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
+ return DAG.getNode(Opcode, SDLoc(N), VT, Load);
+ }
+
+ return SDValue();
}
/// An EXTR instruction is made up of two shifts, ORed together. This helper
@@ -3782,298 +6461,952 @@ static SDValue tryCombineToEXTR(SDNode *N,
std::swap(ShiftLHS, ShiftRHS);
}
- return DAG.getNode(AArch64ISD::EXTR, DL, VT,
- LHS, RHS,
+ return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
DAG.getConstant(ShiftRHS, MVT::i64));
}
-/// Target-specific dag combine xforms for ISD::OR
-static SDValue PerformORCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *Subtarget) {
-
+static SDValue tryCombineToBSL(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
+
+ if (!VT.isVector())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ if (N0.getOpcode() != ISD::AND)
+ return SDValue();
+
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::AND)
+ return SDValue();
+
+ // We only have to look for constant vectors here since the general, variable
+ // case can be handled in TableGen.
+ unsigned Bits = VT.getVectorElementType().getSizeInBits();
+ uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
+ for (int i = 1; i >= 0; --i)
+ for (int j = 1; j >= 0; --j) {
+ BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
+ BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
+ if (!BVN0 || !BVN1)
+ continue;
+
+ bool FoundMatch = true;
+ for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
+ ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
+ ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
+ if (!CN0 || !CN1 ||
+ CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
+ FoundMatch = false;
+ break;
+ }
+ }
+
+ if (FoundMatch)
+ return DAG.getNode(AArch64ISD::BSL, DL, VT, SDValue(BVN0, 0),
+ N0->getOperand(1 - i), N1->getOperand(1 - j));
+ }
+
+ return SDValue();
+}
+
+static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
+ if (!EnableAArch64ExtrGeneration)
+ return SDValue();
+ SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
- if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
return SDValue();
- // Attempt to recognise bitfield-insert operations.
- SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
+ SDValue Res = tryCombineToEXTR(N, DCI);
if (Res.getNode())
return Res;
- // Attempt to combine an existing MaskedBFI operation into one with a larger
- // mask.
- Res = tryCombineToLargerBFI(N, DCI, Subtarget);
+ Res = tryCombineToBSL(N, DCI);
if (Res.getNode())
return Res;
- Res = tryCombineToEXTR(N, DCI);
- if (Res.getNode())
- return Res;
+ return SDValue();
+}
- if (!Subtarget->hasNEON())
+static SDValue performBitcastCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Wait 'til after everything is legalized to try this. That way we have
+ // legal vector types and such.
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
- // Attempt to use vector immediate-form BSL
- // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
+ // Remove extraneous bitcasts around an extract_subvector.
+ // For example,
+ // (v4i16 (bitconvert
+ // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
+ // becomes
+ // (extract_subvector ((v8i16 ...), (i64 4)))
- SDValue N0 = N->getOperand(0);
- if (N0.getOpcode() != ISD::AND)
+ // Only interested in 64-bit vectors as the ultimate result.
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector())
return SDValue();
-
- SDValue N1 = N->getOperand(1);
- if (N1.getOpcode() != ISD::AND)
+ if (VT.getSimpleVT().getSizeInBits() != 64)
return SDValue();
-
- if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
- APInt SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
- APInt SplatBits0;
- if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
- HasAnyUndefs) &&
- !HasAnyUndefs) {
- BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
- APInt SplatBits1;
- if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
- HasAnyUndefs) && !HasAnyUndefs &&
- SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
- SplatBits0 == ~SplatBits1) {
-
- return DAG.getNode(ISD::VSELECT, DL, VT, N0->getOperand(1),
- N0->getOperand(0), N1->getOperand(0));
- }
- }
+ // Is the operand an extract_subvector starting at the beginning or halfway
+ // point of the vector? A low half may also come through as an
+ // EXTRACT_SUBREG, so look for that, too.
+ SDValue Op0 = N->getOperand(0);
+ if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
+ !(Op0->isMachineOpcode() &&
+ Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG))
+ return SDValue();
+ uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
+ if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
+ if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
+ return SDValue();
+ } else if (Op0->getMachineOpcode() == AArch64::EXTRACT_SUBREG) {
+ if (idx != AArch64::dsub)
+ return SDValue();
+ // The dsub reference is equivalent to a lane zero subvector reference.
+ idx = 0;
}
+ // Look through the bitcast of the input to the extract.
+ if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
+ return SDValue();
+ SDValue Source = Op0->getOperand(0)->getOperand(0);
+ // If the source type has twice the number of elements as our destination
+ // type, we know this is an extract of the high or low half of the vector.
+ EVT SVT = Source->getValueType(0);
+ if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
+ return SDValue();
- return SDValue();
+ DEBUG(dbgs() << "aarch64-lower: bitcast extract_subvector simplification\n");
+
+ // Create the simplified form to just extract the low or high half of the
+ // vector directly rather than bothering with the bitcasts.
+ SDLoc dl(N);
+ unsigned NumElements = VT.getVectorNumElements();
+ if (idx) {
+ SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
+ } else {
+ SDValue SubReg = DAG.getTargetConstant(AArch64::dsub, MVT::i32);
+ return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
+ Source, SubReg),
+ 0);
+ }
}
-/// Target-specific dag combine xforms for ISD::SRA
-static SDValue PerformSRACombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue performConcatVectorsCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Wait 'til after everything is legalized to try this. That way we have
+ // legal vector types and such.
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
+ SDLoc dl(N);
EVT VT = N->getValueType(0);
- // We're looking for an SRA/SHL pair which form an SBFX.
+ // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
+ // splat. The indexed instructions are going to be expecting a DUPLANE64, so
+ // canonicalise to that.
+ if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
+ assert(VT.getVectorElementType().getSizeInBits() == 64);
+ return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT,
+ WidenVector(N->getOperand(0), DAG),
+ DAG.getConstant(0, MVT::i64));
+ }
- if (VT != MVT::i32 && VT != MVT::i64)
+ // Canonicalise concat_vectors so that the right-hand vector has as few
+ // bit-casts as possible before its real operation. The primary matching
+ // destination for these operations will be the narrowing "2" instructions,
+ // which depend on the operation being performed on this right-hand vector.
+ // For example,
+ // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
+ // becomes
+ // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
+
+ SDValue Op1 = N->getOperand(1);
+ if (Op1->getOpcode() != ISD::BITCAST)
return SDValue();
+ SDValue RHS = Op1->getOperand(0);
+ MVT RHSTy = RHS.getValueType().getSimpleVT();
+ // If the RHS is not a vector, this is not the pattern we're looking for.
+ if (!RHSTy.isVector())
+ return SDValue();
+
+ DEBUG(dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n");
+
+ MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
+ RHSTy.getVectorNumElements() * 2);
+ return DAG.getNode(
+ ISD::BITCAST, dl, VT,
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+ DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
+}
- if (!isa<ConstantSDNode>(N->getOperand(1)))
+static SDValue tryCombineFixedPointConvert(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // Wait 'til after everything is legalized to try this. That way we have
+ // legal vector types and such.
+ if (DCI.isBeforeLegalizeOps())
return SDValue();
+ // Transform a scalar conversion of a value from a lane extract into a
+ // lane extract of a vector conversion. E.g., from foo1 to foo2:
+ // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
+ // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
+ //
+ // The second form interacts better with instruction selection and the
+ // register allocator to avoid cross-class register copies that aren't
+ // coalescable due to a lane reference.
+
+ // Check the operand and see if it originates from a lane extract.
+ SDValue Op1 = N->getOperand(1);
+ if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ // Yep, no additional predication needed. Perform the transform.
+ SDValue IID = N->getOperand(0);
+ SDValue Shift = N->getOperand(2);
+ SDValue Vec = Op1.getOperand(0);
+ SDValue Lane = Op1.getOperand(1);
+ EVT ResTy = N->getValueType(0);
+ EVT VecResTy;
+ SDLoc DL(N);
+
+ // The vector width should be 128 bits by the time we get here, even
+ // if it started as 64 bits (the extract_vector handling will have
+ // done so).
+ assert(Vec.getValueType().getSizeInBits() == 128 &&
+ "unexpected vector size on extract_vector_elt!");
+ if (Vec.getValueType() == MVT::v4i32)
+ VecResTy = MVT::v4f32;
+ else if (Vec.getValueType() == MVT::v2i64)
+ VecResTy = MVT::v2f64;
+ else
+ assert(0 && "unexpected vector type!");
- uint64_t ExtraSignBits = N->getConstantOperandVal(1);
- SDValue Shift = N->getOperand(0);
+ SDValue Convert =
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
+ }
+ return SDValue();
+}
- if (Shift.getOpcode() != ISD::SHL)
+// AArch64 high-vector "long" operations are formed by performing the non-high
+// version on an extract_subvector of each operand which gets the high half:
+//
+// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
+//
+// However, there are cases which don't have an extract_high explicitly, but
+// have another operation that can be made compatible with one for free. For
+// example:
+//
+// (dupv64 scalar) --> (extract_high (dup128 scalar))
+//
+// This routine does the actual conversion of such DUPs, once outer routines
+// have determined that everything else is in order.
+static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
+ // We can handle most types of duplicate, but the lane ones have an extra
+ // operand saying *which* lane, so we need to know.
+ bool IsDUPLANE;
+ switch (N.getOpcode()) {
+ case AArch64ISD::DUP:
+ IsDUPLANE = false;
+ break;
+ case AArch64ISD::DUPLANE8:
+ case AArch64ISD::DUPLANE16:
+ case AArch64ISD::DUPLANE32:
+ case AArch64ISD::DUPLANE64:
+ IsDUPLANE = true;
+ break;
+ default:
return SDValue();
+ }
- if (!isa<ConstantSDNode>(Shift->getOperand(1)))
+ MVT NarrowTy = N.getSimpleValueType();
+ if (!NarrowTy.is64BitVector())
return SDValue();
- uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
- uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
- uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
+ MVT ElementTy = NarrowTy.getVectorElementType();
+ unsigned NumElems = NarrowTy.getVectorNumElements();
+ MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
- if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
- return SDValue();
+ SDValue NewDUP;
+ if (IsDUPLANE)
+ NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
+ N.getOperand(1));
+ else
+ NewDUP = DAG.getNode(AArch64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
- return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
- DAG.getConstant(LSB, MVT::i64),
- DAG.getConstant(LSB + Width - 1, MVT::i64));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
+ NewDUP, DAG.getConstant(NumElems, MVT::i64));
}
-/// Check if this is a valid build_vector for the immediate operand of
-/// a vector shift operation, where all the elements of the build_vector
-/// must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
- // Ignore bit_converts.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
- SplatBitSize > ElementBits)
- return false;
- Cnt = SplatBits.getSExtValue();
- return true;
+static bool isEssentiallyExtractSubvector(SDValue N) {
+ if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ return true;
+
+ return N.getOpcode() == ISD::BITCAST &&
+ N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
}
-/// Check if this is a valid build_vector for the immediate operand of
-/// a vector shift left operation. That value must be in the range:
-/// 0 <= Value < ElementBits
-static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
- if (!getVShiftImm(Op, ElementBits, Cnt))
+/// \brief Helper structure to keep track of ISD::SET_CC operands.
+struct GenericSetCCInfo {
+ const SDValue *Opnd0;
+ const SDValue *Opnd1;
+ ISD::CondCode CC;
+};
+
+/// \brief Helper structure to keep track of a SET_CC lowered into AArch64 code.
+struct AArch64SetCCInfo {
+ const SDValue *Cmp;
+ AArch64CC::CondCode CC;
+};
+
+/// \brief Helper structure to keep track of SetCC information.
+union SetCCInfo {
+ GenericSetCCInfo Generic;
+ AArch64SetCCInfo AArch64;
+};
+
+/// \brief Helper structure to be able to read SetCC information. If set to
+/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
+/// GenericSetCCInfo.
+struct SetCCInfoAndKind {
+ SetCCInfo Info;
+ bool IsAArch64;
+};
+
+/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
+/// an
+/// AArch64 lowered one.
+/// \p SetCCInfo is filled accordingly.
+/// \post SetCCInfo is meanginfull only when this function returns true.
+/// \return True when Op is a kind of SET_CC operation.
+static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
+ // If this is a setcc, this is straight forward.
+ if (Op.getOpcode() == ISD::SETCC) {
+ SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
+ SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
+ SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SetCCInfo.IsAArch64 = false;
+ return true;
+ }
+ // Otherwise, check if this is a matching csel instruction.
+ // In other words:
+ // - csel 1, 0, cc
+ // - csel 0, 1, !cc
+ if (Op.getOpcode() != AArch64ISD::CSEL)
+ return false;
+ // Set the information about the operands.
+ // TODO: we want the operands of the Cmp not the csel
+ SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
+ SetCCInfo.IsAArch64 = true;
+ SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
+ cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
+
+ // Check that the operands matches the constraints:
+ // (1) Both operands must be constants.
+ // (2) One must be 1 and the other must be 0.
+ ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+ ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+
+ // Check (1).
+ if (!TValue || !FValue)
return false;
- return (Cnt >= 0 && Cnt < ElementBits);
+
+ // Check (2).
+ if (!TValue->isOne()) {
+ // Update the comparison when we are interested in !cc.
+ std::swap(TValue, FValue);
+ SetCCInfo.Info.AArch64.CC =
+ AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
+ }
+ return TValue->isOne() && FValue->isNullValue();
}
-/// Check if this is a valid build_vector for the immediate operand of a
-/// vector shift right operation. The value must be in the range:
-/// 1 <= Value <= ElementBits
-static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
- if (!getVShiftImm(Op, ElementBits, Cnt))
- return false;
- return (Cnt >= 1 && Cnt <= ElementBits);
+// Returns true if Op is setcc or zext of setcc.
+static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
+ if (isSetCC(Op, Info))
+ return true;
+ return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
+ isSetCC(Op->getOperand(0), Info));
}
-static SDValue GenForSextInreg(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- EVT SrcVT, EVT DestVT, EVT SubRegVT,
- const int *Mask, SDValue Src) {
- SelectionDAG &DAG = DCI.DAG;
- SDValue Bitcast
- = DAG.getNode(ISD::BITCAST, SDLoc(N), SrcVT, Src);
- SDValue Sext
- = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), DestVT, Bitcast);
- SDValue ShuffleVec
- = DAG.getVectorShuffle(DestVT, SDLoc(N), Sext, DAG.getUNDEF(DestVT), Mask);
- SDValue ExtractSubreg
- = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N),
- SubRegVT, ShuffleVec,
- DAG.getTargetConstant(AArch64::sub_64, MVT::i32)), 0);
- return ExtractSubreg;
-}
-
-/// Checks for vector shifts and lowers them.
-static SDValue PerformShiftCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const AArch64Subtarget *ST) {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
- if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64))
- return PerformSRACombine(N, DCI);
+// The folding we want to perform is:
+// (add x, [zext] (setcc cc ...) )
+// -->
+// (csel x, (add x, 1), !cc ...)
+//
+// The latter will get matched to a CSINC instruction.
+static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
+ assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
+ SDValue LHS = Op->getOperand(0);
+ SDValue RHS = Op->getOperand(1);
+ SetCCInfoAndKind InfoAndKind;
+
+ // If neither operand is a SET_CC, give up.
+ if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
+ std::swap(LHS, RHS);
+ if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
+ return SDValue();
+ }
- // We're looking for an SRA/SHL pair to help generating instruction
- // sshll v0.8h, v0.8b, #0
- // The instruction STXL is also the alias of this instruction.
- //
- // For example, for DAG like below,
- // v2i32 = sra (v2i32 (shl v2i32, 16)), 16
- // we can transform it into
- // v2i32 = EXTRACT_SUBREG
- // (v4i32 (suffle_vector
- // (v4i32 (sext (v4i16 (bitcast v2i32))),
- // undef, (0, 2, u, u)),
- // sub_64
- //
- // With this transformation we expect to generate "SSHLL + UZIP1"
- // Sometimes UZIP1 can be optimized away by combining with other context.
- int64_t ShrCnt, ShlCnt;
- if (N->getOpcode() == ISD::SRA
- && (VT == MVT::v2i32 || VT == MVT::v4i16)
- && isVShiftRImm(N->getOperand(1), VT, ShrCnt)
- && N->getOperand(0).getOpcode() == ISD::SHL
- && isVShiftRImm(N->getOperand(0).getOperand(1), VT, ShlCnt)) {
- SDValue Src = N->getOperand(0).getOperand(0);
- if (VT == MVT::v2i32 && ShrCnt == 16 && ShlCnt == 16) {
- // sext_inreg(v2i32, v2i16)
- // We essentially only care the Mask {0, 2, u, u}
- int Mask[4] = {0, 2, 4, 6};
- return GenForSextInreg(N, DCI, MVT::v4i16, MVT::v4i32, MVT::v2i32,
- Mask, Src);
- }
- else if (VT == MVT::v2i32 && ShrCnt == 24 && ShlCnt == 24) {
- // sext_inreg(v2i16, v2i8)
- // We essentially only care the Mask {0, u, 4, u, u, u, u, u, u, u, u, u}
- int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
- return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v2i32,
- Mask, Src);
- }
- else if (VT == MVT::v4i16 && ShrCnt == 8 && ShlCnt == 8) {
- // sext_inreg(v4i16, v4i8)
- // We essentially only care the Mask {0, 2, 4, 6, u, u, u, u, u, u, u, u}
- int Mask[8] = {0, 2, 4, 6, 8, 10, 12, 14};
- return GenForSextInreg(N, DCI, MVT::v8i8, MVT::v8i16, MVT::v4i16,
- Mask, Src);
- }
+ // FIXME: This could be generatized to work for FP comparisons.
+ EVT CmpVT = InfoAndKind.IsAArch64
+ ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
+ : InfoAndKind.Info.Generic.Opnd0->getValueType();
+ if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
+ return SDValue();
+
+ SDValue CCVal;
+ SDValue Cmp;
+ SDLoc dl(Op);
+ if (InfoAndKind.IsAArch64) {
+ CCVal = DAG.getConstant(
+ AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), MVT::i32);
+ Cmp = *InfoAndKind.Info.AArch64.Cmp;
+ } else
+ Cmp = getAArch64Cmp(*InfoAndKind.Info.Generic.Opnd0,
+ *InfoAndKind.Info.Generic.Opnd1,
+ ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
+ CCVal, DAG, dl);
+
+ EVT VT = Op->getValueType(0);
+ LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
+ return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
+}
+
+// The basic add/sub long vector instructions have variants with "2" on the end
+// which act on the high-half of their inputs. They are normally matched by
+// patterns like:
+//
+// (add (zeroext (extract_high LHS)),
+// (zeroext (extract_high RHS)))
+// -> uaddl2 vD, vN, vM
+//
+// However, if one of the extracts is something like a duplicate, this
+// instruction can still be used profitably. This function puts the DAG into a
+// more appropriate form for those patterns to trigger.
+static SDValue performAddSubLongCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ MVT VT = N->getSimpleValueType(0);
+ if (!VT.is128BitVector()) {
+ if (N->getOpcode() == ISD::ADD)
+ return performSetccAddFolding(N, DAG);
+ return SDValue();
}
- // Nothing to be done for scalar shifts.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!VT.isVector() || !TLI.isTypeLegal(VT))
+ // Make sure both branches are extended in the same way.
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
+ LHS.getOpcode() != ISD::SIGN_EXTEND) ||
+ LHS.getOpcode() != RHS.getOpcode())
return SDValue();
- assert(ST->hasNEON() && "unexpected vector shift");
- int64_t Cnt;
+ unsigned ExtType = LHS.getOpcode();
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("unexpected shift opcode");
+ // It's not worth doing if at least one of the inputs isn't already an
+ // extract, but we don't know which it'll be so we have to try both.
+ if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
+ RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
+ if (!RHS.getNode())
+ return SDValue();
- case ISD::SHL:
- if (isVShiftLImm(N->getOperand(1), VT, Cnt)) {
- SDValue RHS =
- DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
- DAG.getConstant(Cnt, MVT::i32));
- return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS);
- }
- break;
+ RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
+ } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
+ LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
+ if (!LHS.getNode())
+ return SDValue();
- case ISD::SRA:
- case ISD::SRL:
- if (isVShiftRImm(N->getOperand(1), VT, Cnt)) {
- SDValue RHS =
- DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT,
- DAG.getConstant(Cnt, MVT::i32));
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS);
- }
+ LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
+ }
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
+}
+
+// Massage DAGs which we can use the high-half "long" operations on into
+// something isel will recognize better. E.g.
+//
+// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
+// (aarch64_neon_umull (extract_high (v2i64 vec)))
+// (extract_high (v2i64 (dup128 scalar)))))
+//
+static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+ assert(LHS.getValueType().is64BitVector() &&
+ RHS.getValueType().is64BitVector() &&
+ "unexpected shape for long operation");
+
+ // Either node could be a DUP, but it's not worth doing both of them (you'd
+ // just as well use the non-high version) so look for a corresponding extract
+ // operation on the other "wing".
+ if (isEssentiallyExtractSubvector(LHS)) {
+ RHS = tryExtendDUPToExtractHigh(RHS, DAG);
+ if (!RHS.getNode())
+ return SDValue();
+ } else if (isEssentiallyExtractSubvector(RHS)) {
+ LHS = tryExtendDUPToExtractHigh(LHS, DAG);
+ if (!LHS.getNode())
+ return SDValue();
+ }
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), LHS, RHS);
+}
+
+static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
+ MVT ElemTy = N->getSimpleValueType(0).getScalarType();
+ unsigned ElemBits = ElemTy.getSizeInBits();
+
+ int64_t ShiftAmount;
+ if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
+ HasAnyUndefs, ElemBits) ||
+ SplatBitSize != ElemBits)
+ return SDValue();
+
+ ShiftAmount = SplatValue.getSExtValue();
+ } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
+ ShiftAmount = CVN->getSExtValue();
+ } else
+ return SDValue();
+
+ unsigned Opcode;
+ bool IsRightShift;
+ switch (IID) {
+ default:
+ llvm_unreachable("Unknown shift intrinsic");
+ case Intrinsic::aarch64_neon_sqshl:
+ Opcode = AArch64ISD::SQSHL_I;
+ IsRightShift = false;
+ break;
+ case Intrinsic::aarch64_neon_uqshl:
+ Opcode = AArch64ISD::UQSHL_I;
+ IsRightShift = false;
+ break;
+ case Intrinsic::aarch64_neon_srshl:
+ Opcode = AArch64ISD::SRSHR_I;
+ IsRightShift = true;
+ break;
+ case Intrinsic::aarch64_neon_urshl:
+ Opcode = AArch64ISD::URSHR_I;
+ IsRightShift = true;
+ break;
+ case Intrinsic::aarch64_neon_sqshlu:
+ Opcode = AArch64ISD::SQSHLU_I;
+ IsRightShift = false;
break;
}
+ if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(-ShiftAmount, MVT::i32));
+ else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
+ return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
+ DAG.getConstant(ShiftAmount, MVT::i32));
+
return SDValue();
}
-/// ARM-specific DAG combining for intrinsics.
-static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+// The CRC32[BH] instructions ignore the high bits of their data operand. Since
+// the intrinsics must be legal and take an i32, this means there's almost
+// certainly going to be a zext in the DAG which we can eliminate.
+static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
+ SDValue AndN = N->getOperand(2);
+ if (AndN.getOpcode() != ISD::AND)
+ return SDValue();
+
+ ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
+ if (!CMask || CMask->getZExtValue() != Mask)
+ return SDValue();
+
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
+ N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
+}
- switch (IntNo) {
+static SDValue performIntrinsicCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+ unsigned IID = getIntrinsicID(N);
+ switch (IID) {
default:
- // Don't do anything for most intrinsics.
break;
+ case Intrinsic::aarch64_neon_vcvtfxs2fp:
+ case Intrinsic::aarch64_neon_vcvtfxu2fp:
+ return tryCombineFixedPointConvert(N, DCI, DAG);
+ break;
+ case Intrinsic::aarch64_neon_fmax:
+ return DAG.getNode(AArch64ISD::FMAX, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_fmin:
+ return DAG.getNode(AArch64ISD::FMIN, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull:
+ case Intrinsic::aarch64_neon_pmull:
+ case Intrinsic::aarch64_neon_sqdmull:
+ return tryCombineLongOpWithDup(IID, N, DCI, DAG);
+ case Intrinsic::aarch64_neon_sqshl:
+ case Intrinsic::aarch64_neon_uqshl:
+ case Intrinsic::aarch64_neon_sqshlu:
+ case Intrinsic::aarch64_neon_srshl:
+ case Intrinsic::aarch64_neon_urshl:
+ return tryCombineShiftImm(IID, N, DAG);
+ case Intrinsic::aarch64_crc32b:
+ case Intrinsic::aarch64_crc32cb:
+ return tryCombineCRC32(0xff, N, DAG);
+ case Intrinsic::aarch64_crc32h:
+ case Intrinsic::aarch64_crc32ch:
+ return tryCombineCRC32(0xffff, N, DAG);
+ }
+ return SDValue();
+}
- case Intrinsic::arm_neon_vqshifts:
- case Intrinsic::arm_neon_vqshiftu:
- EVT VT = N->getOperand(1).getValueType();
- int64_t Cnt;
- if (!isVShiftLImm(N->getOperand(2), VT, Cnt))
- break;
- unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts)
- ? AArch64ISD::NEON_QSHLs
- : AArch64ISD::NEON_QSHLu;
- return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0),
- N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+static SDValue performExtendCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
+ // we can convert that DUP into another extract_high (of a bigger DUP), which
+ // helps the backend to decide that an sabdl2 would be useful, saving a real
+ // extract_high operation.
+ if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
+ N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+ SDNode *ABDNode = N->getOperand(0).getNode();
+ unsigned IID = getIntrinsicID(ABDNode);
+ if (IID == Intrinsic::aarch64_neon_sabd ||
+ IID == Intrinsic::aarch64_neon_uabd) {
+ SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
+ if (!NewABD.getNode())
+ return SDValue();
+
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
+ NewABD);
+ }
+ }
+
+ // This is effectively a custom type legalization for AArch64.
+ //
+ // Type legalization will split an extend of a small, legal, type to a larger
+ // illegal type by first splitting the destination type, often creating
+ // illegal source types, which then get legalized in isel-confusing ways,
+ // leading to really terrible codegen. E.g.,
+ // %result = v8i32 sext v8i8 %value
+ // becomes
+ // %losrc = extract_subreg %value, ...
+ // %hisrc = extract_subreg %value, ...
+ // %lo = v4i32 sext v4i8 %losrc
+ // %hi = v4i32 sext v4i8 %hisrc
+ // Things go rapidly downhill from there.
+ //
+ // For AArch64, the [sz]ext vector instructions can only go up one element
+ // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
+ // take two instructions.
+ //
+ // This implies that the most efficient way to do the extend from v8i8
+ // to two v4i32 values is to first extend the v8i8 to v8i16, then do
+ // the normal splitting to happen for the v8i16->v8i32.
+
+ // This is pre-legalization to catch some cases where the default
+ // type legalization will create ill-tempered code.
+ if (!DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // We're only interested in cleaning things up for non-legal vector types
+ // here. If both the source and destination are legal, things will just
+ // work naturally without any fiddling.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT ResVT = N->getValueType(0);
+ if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
+ return SDValue();
+ // If the vector type isn't a simple VT, it's beyond the scope of what
+ // we're worried about here. Let legalization do its thing and hope for
+ // the best.
+ if (!ResVT.isSimple())
+ return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ MVT SrcVT = Src->getValueType(0).getSimpleVT();
+ // If the source VT is a 64-bit vector, we can play games and get the
+ // better results we want.
+ if (SrcVT.getSizeInBits() != 64)
+ return SDValue();
+
+ unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+ unsigned ElementCount = SrcVT.getVectorNumElements();
+ SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
+ SDLoc DL(N);
+ Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
+
+ // Now split the rest of the operation into two halves, each with a 64
+ // bit source.
+ EVT LoVT, HiVT;
+ SDValue Lo, Hi;
+ unsigned NumElements = ResVT.getVectorNumElements();
+ assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+ LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+ ResVT.getVectorElementType(), NumElements / 2);
+
+ EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
+ LoVT.getVectorNumElements());
+ Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
+ DAG.getIntPtrConstant(0));
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
+
+ // Now combine the parts back together so we still have a single result
+ // like the combiner expects.
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
+/// value. The load store optimizer pass will merge them to store pair stores.
+/// This has better performance than a splat of the scalar followed by a split
+/// vector store. Even if the stores are not merged it is four stores vs a dup,
+/// followed by an ext.b and two stores.
+static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
+ SDValue StVal = St->getValue();
+ EVT VT = StVal.getValueType();
+
+ // Don't replace floating point stores, they possibly won't be transformed to
+ // stp because of the store pair suppress pass.
+ if (VT.isFloatingPoint())
+ return SDValue();
+
+ // Check for insert vector elements.
+ if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ // We can express a splat as store pair(s) for 2 or 4 elements.
+ unsigned NumVecElts = VT.getVectorNumElements();
+ if (NumVecElts != 4 && NumVecElts != 2)
+ return SDValue();
+ SDValue SplatVal = StVal.getOperand(1);
+ unsigned RemainInsertElts = NumVecElts - 1;
+
+ // Check that this is a splat.
+ while (--RemainInsertElts) {
+ SDValue NextInsertElt = StVal.getOperand(0);
+ if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+ if (NextInsertElt.getOperand(1) != SplatVal)
+ return SDValue();
+ StVal = NextInsertElt;
+ }
+ unsigned OrigAlignment = St->getAlignment();
+ unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
+ unsigned Alignment = std::min(OrigAlignment, EltOffset);
+
+ // Create scalar stores. This is at least as good as the code sequence for a
+ // split unaligned store wich is a dup.s, ext.b, and two stores.
+ // Most of the time the three stores should be replaced by store pair
+ // instructions (stp).
+ SDLoc DL(St);
+ SDValue BasePtr = St->getBasePtr();
+ SDValue NewST1 =
+ DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(),
+ St->isVolatile(), St->isNonTemporal(), St->getAlignment());
+
+ unsigned Offset = EltOffset;
+ while (--NumVecElts) {
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+ DAG.getConstant(Offset, MVT::i64));
+ NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
+ St->getPointerInfo(), St->isVolatile(),
+ St->isNonTemporal(), Alignment);
+ Offset += EltOffset;
+ }
+ return NewST1;
+}
+
+static SDValue performSTORECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ if (!DCI.isBeforeLegalize())
+ return SDValue();
+
+ StoreSDNode *S = cast<StoreSDNode>(N);
+ if (S->isVolatile())
+ return SDValue();
+
+ // Cyclone has bad performance on unaligned 16B stores when crossing line and
+ // page boundries. We want to split such stores.
+ if (!Subtarget->isCyclone())
+ return SDValue();
+
+ // Don't split at Oz.
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::MinSize);
+ if (IsMinSize)
+ return SDValue();
+
+ SDValue StVal = S->getValue();
+ EVT VT = StVal.getValueType();
+
+ // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
+ // those up regresses performance on micro-benchmarks and olden/bh.
+ if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
+ return SDValue();
+
+ // Split unaligned 16B stores. They are terrible for performance.
+ // Don't split stores with alignment of 1 or 2. Code that uses clang vector
+ // extensions can use this to mark that it does not want splitting to happen
+ // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
+ // eliminating alignment hazards is only 1 in 8 for alignment of 2.
+ if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
+ S->getAlignment() <= 2)
+ return SDValue();
+
+ // If we get a splat of a scalar convert this vector store to a store of
+ // scalars. They will be merged into store pairs thereby removing two
+ // instructions.
+ SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
+ if (ReplacedSplat != SDValue())
+ return ReplacedSplat;
+
+ SDLoc DL(S);
+ unsigned NumElts = VT.getVectorNumElements() / 2;
+ // Split VT into two.
+ EVT HalfVT =
+ EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
+ SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
+ DAG.getIntPtrConstant(0));
+ SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
+ DAG.getIntPtrConstant(NumElts));
+ SDValue BasePtr = S->getBasePtr();
+ SDValue NewST1 =
+ DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
+ S->isVolatile(), S->isNonTemporal(), S->getAlignment());
+ SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+ DAG.getConstant(8, MVT::i64));
+ return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
+ S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
+ S->getAlignment());
+}
+
+/// Target-specific DAG combine function for post-increment LD1 (lane) and
+/// post-increment LD1R.
+static SDValue performPostLD1Combine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ bool IsLaneOp) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+
+ unsigned LoadIdx = IsLaneOp ? 1 : 0;
+ SDNode *LD = N->getOperand(LoadIdx).getNode();
+ // If it is not LOAD, can not do such combine.
+ if (LD->getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
+ EVT MemVT = LoadSDN->getMemoryVT();
+ // Check if memory operand is the same type as the vector element.
+ if (MemVT != VT.getVectorElementType())
+ return SDValue();
+
+ // Check if there are other uses. If so, do not combine as it will introduce
+ // an extra load.
+ for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
+ ++UI) {
+ if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
+ continue;
+ if (*UI != N)
+ return SDValue();
}
+ SDValue Addr = LD->getOperand(1);
+ SDValue Vector = N->getOperand(0);
+ // Search for a use of the address operand that is an increment.
+ for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
+ Addr.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ if (User->getOpcode() != ISD::ADD
+ || UI.getUse().getResNo() != Addr.getResNo())
+ continue;
+
+ // Check that the add is independent of the load. Otherwise, folding it
+ // would create a cycle.
+ if (User->isPredecessorOf(LD) || LD->isPredecessorOf(User))
+ continue;
+ // Also check that add is not used in the vector operand. This would also
+ // create a cycle.
+ if (User->isPredecessorOf(Vector.getNode()))
+ continue;
+
+ // If the increment is a constant, it must match the memory ref size.
+ SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+ if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+ uint32_t IncVal = CInc->getZExtValue();
+ unsigned NumBytes = VT.getScalarSizeInBits() / 8;
+ if (IncVal != NumBytes)
+ continue;
+ Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
+ }
+
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(LD->getOperand(0)); // Chain
+ if (IsLaneOp) {
+ Ops.push_back(Vector); // The vector to be inserted
+ Ops.push_back(N->getOperand(2)); // The lane to be inserted in the vector
+ }
+ Ops.push_back(Addr);
+ Ops.push_back(Inc);
+
+ EVT Tys[3] = { VT, MVT::i64, MVT::Other };
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, 3));
+ unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
+ MemVT,
+ LoadSDN->getMemOperand());
+
+ // Update the uses.
+ std::vector<SDValue> NewResults;
+ NewResults.push_back(SDValue(LD, 0)); // The result of load
+ NewResults.push_back(SDValue(UpdN.getNode(), 2)); // Chain
+ DCI.CombineTo(LD, NewResults);
+ DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
+ DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
+
+ break;
+ }
return SDValue();
}
/// Target-specific DAG combine function for NEON load/store intrinsics
/// to merge base address updates.
-static SDValue CombineBaseUpdate(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue performNEONPostLDSTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
- bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
- N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
- unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+ unsigned AddrOpIdx = N->getNumOperands() - 1;
SDValue Addr = N->getOperand(AddrOpIdx);
// Search for a use of the address operand that is an increment.
@@ -4090,106 +7423,96 @@ static SDValue CombineBaseUpdate(SDNode *N,
continue;
// Find the new opcode for the updating load/store.
- bool isLoad = true;
- bool isLaneOp = false;
+ bool IsStore = false;
+ bool IsLaneOp = false;
+ bool IsDupOp = false;
unsigned NewOpc = 0;
unsigned NumVecs = 0;
- if (isIntrinsic) {
- unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: llvm_unreachable("unexpected intrinsic for Neon base update");
- case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD;
- NumVecs = 1; break;
- case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD;
- NumVecs = 2; break;
- case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD;
- NumVecs = 3; break;
- case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD;
- NumVecs = 4; break;
- case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD;
- NumVecs = 1; isLoad = false; break;
- case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD;
- NumVecs = 2; isLoad = false; break;
- case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD;
- NumVecs = 3; isLoad = false; break;
- case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD;
- NumVecs = 4; isLoad = false; break;
- case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD;
- NumVecs = 2; break;
- case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD;
- NumVecs = 3; break;
- case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD;
- NumVecs = 4; break;
- case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD;
- NumVecs = 2; isLoad = false; break;
- case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD;
- NumVecs = 3; isLoad = false; break;
- case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD;
- NumVecs = 4; isLoad = false; break;
- case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD;
- NumVecs = 2; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD;
- NumVecs = 3; isLaneOp = true; break;
- case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD;
- NumVecs = 4; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD;
- NumVecs = 2; isLoad = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD;
- NumVecs = 3; isLoad = false; isLaneOp = true; break;
- case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD;
- NumVecs = 4; isLoad = false; isLaneOp = true; break;
- }
- } else {
- isLaneOp = true;
- switch (N->getOpcode()) {
- default: llvm_unreachable("unexpected opcode for Neon base update");
- case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD;
- NumVecs = 2; break;
- case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD;
- NumVecs = 3; break;
- case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD;
- NumVecs = 4; break;
- }
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default: llvm_unreachable("unexpected intrinsic for Neon base update");
+ case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
+ NumVecs = 2; break;
+ case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
+ NumVecs = 3; break;
+ case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
+ NumVecs = 4; break;
+ case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
+ NumVecs = 2; IsStore = true; break;
+ case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
+ NumVecs = 3; IsStore = true; break;
+ case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
+ NumVecs = 4; IsStore = true; break;
+ case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
+ NumVecs = 2; break;
+ case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
+ NumVecs = 3; break;
+ case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
+ NumVecs = 4; break;
+ case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
+ NumVecs = 2; IsStore = true; break;
+ case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
+ NumVecs = 3; IsStore = true; break;
+ case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
+ NumVecs = 4; IsStore = true; break;
+ case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
+ NumVecs = 2; IsDupOp = true; break;
+ case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
+ NumVecs = 3; IsDupOp = true; break;
+ case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
+ NumVecs = 4; IsDupOp = true; break;
+ case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
+ NumVecs = 2; IsLaneOp = true; break;
+ case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
+ NumVecs = 3; IsLaneOp = true; break;
+ case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
+ NumVecs = 4; IsLaneOp = true; break;
+ case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
+ NumVecs = 2; IsStore = true; IsLaneOp = true; break;
+ case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
+ NumVecs = 3; IsStore = true; IsLaneOp = true; break;
+ case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
+ NumVecs = 4; IsStore = true; IsLaneOp = true; break;
}
- // Find the size of memory referenced by the load/store.
EVT VecTy;
- if (isLoad)
- VecTy = N->getValueType(0);
+ if (IsStore)
+ VecTy = N->getOperand(2).getValueType();
else
- VecTy = N->getOperand(AddrOpIdx + 1).getValueType();
- unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
- if (isLaneOp)
- NumBytes /= VecTy.getVectorNumElements();
+ VecTy = N->getValueType(0);
// If the increment is a constant, it must match the memory ref size.
SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
uint32_t IncVal = CInc->getZExtValue();
+ unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+ if (IsLaneOp || IsDupOp)
+ NumBytes /= VecTy.getVectorNumElements();
if (IncVal != NumBytes)
continue;
- Inc = DAG.getTargetConstant(IncVal, MVT::i32);
+ Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
}
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(N->getOperand(0)); // Incoming chain
+ // Load lane and store have vector list as input.
+ if (IsLaneOp || IsStore)
+ for (unsigned i = 2; i < AddrOpIdx; ++i)
+ Ops.push_back(N->getOperand(i));
+ Ops.push_back(Addr); // Base register
+ Ops.push_back(Inc);
- // Create the new updating load/store node.
+ // Return Types.
EVT Tys[6];
- unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+ unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
unsigned n;
for (n = 0; n < NumResultVecs; ++n)
Tys[n] = VecTy;
- Tys[n++] = MVT::i64;
- Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2);
- SmallVector<SDValue, 8> Ops;
- Ops.push_back(N->getOperand(0)); // incoming chain
- Ops.push_back(N->getOperand(AddrOpIdx));
- Ops.push_back(Inc);
- for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ Tys[n++] = MVT::i64; // Type of write back register
+ Tys[n] = MVT::Other; // Type of the chain
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs + 2));
+
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
- SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops.data(), Ops.size(),
+ SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
MemInt->getMemoryVT(),
MemInt->getMemOperand());
@@ -4198,7 +7521,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
for (unsigned i = 0; i < NumResultVecs; ++i) {
NewResults.push_back(SDValue(UpdN.getNode(), i));
}
- NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
+ NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
DCI.CombineTo(N, NewResults);
DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
@@ -4207,107 +7530,58 @@ static SDValue CombineBaseUpdate(SDNode *N,
return SDValue();
}
-/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1)
-/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs.
-/// If so, combine them to a vldN-dup operation and return true.
-static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
-
- // Check if the VDUPLANE operand is a vldN-dup intrinsic.
- SDNode *VLD = N->getOperand(0).getNode();
- if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+// Optimize compare with zero and branch.
+static SDValue performBRCONDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Dest = N->getOperand(1);
+ SDValue CCVal = N->getOperand(2);
+ SDValue Cmp = N->getOperand(3);
+
+ assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
+ unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
+ if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
return SDValue();
- unsigned NumVecs = 0;
- unsigned NewOpc = 0;
- unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
- if (IntNo == Intrinsic::arm_neon_vld2lane) {
- NumVecs = 2;
- NewOpc = AArch64ISD::NEON_LD2DUP;
- } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
- NumVecs = 3;
- NewOpc = AArch64ISD::NEON_LD3DUP;
- } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
- NumVecs = 4;
- NewOpc = AArch64ISD::NEON_LD4DUP;
- } else {
+
+ unsigned CmpOpc = Cmp.getOpcode();
+ if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
return SDValue();
- }
- // First check that all the vldN-lane uses are VDUPLANEs and that the lane
- // numbers match the load.
- unsigned VLDLaneNo =
- cast<ConstantSDNode>(VLD->getOperand(NumVecs + 3))->getZExtValue();
- for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
- UI != UE; ++UI) {
- // Ignore uses of the chain result.
- if (UI.getUse().getResNo() == NumVecs)
- continue;
- SDNode *User = *UI;
- if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE ||
- VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
- return SDValue();
- }
+ // Only attempt folding if there is only one use of the flag and no use of the
+ // value.
+ if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
+ return SDValue();
- // Create the vldN-dup node.
- EVT Tys[5];
- unsigned n;
- for (n = 0; n < NumVecs; ++n)
- Tys[n] = VT;
- Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1);
- SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
- MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
- SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2,
- VLDMemInt->getMemoryVT(),
- VLDMemInt->getMemOperand());
-
- // Update the uses.
- for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
- UI != UE; ++UI) {
- unsigned ResNo = UI.getUse().getResNo();
- // Ignore uses of the chain result.
- if (ResNo == NumVecs)
- continue;
- SDNode *User = *UI;
- DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
- }
+ SDValue LHS = Cmp.getOperand(0);
+ SDValue RHS = Cmp.getOperand(1);
- // Now the vldN-lane intrinsic is dead except for its chain result.
- // Update uses of the chain.
- std::vector<SDValue> VLDDupResults;
- for (unsigned n = 0; n < NumVecs; ++n)
- VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
- VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
- DCI.CombineTo(VLD, VLDDupResults);
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected the value type to be the same for both operands!");
+ if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
+ return SDValue();
- return SDValue(N, 0);
-}
+ if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
+ std::swap(LHS, RHS);
-// v1i1 setcc ->
-// v1i1 (bitcast (i1 setcc (extract_vector_elt, extract_vector_elt))
-// FIXME: Currently the type legalizer can't handle SETCC having v1i1 as result.
-// If it can legalize "v1i1 SETCC" correctly, no need to combine such SETCC.
-static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
- EVT ResVT = N->getValueType(0);
+ if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
+ return SDValue();
- if (!ResVT.isVector() || ResVT.getVectorNumElements() != 1 ||
- ResVT.getVectorElementType() != MVT::i1)
+ if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
+ LHS.getOpcode() == ISD::SRL)
return SDValue();
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- EVT CmpVT = LHS.getValueType();
- LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- CmpVT.getVectorElementType(), LHS,
- DAG.getConstant(0, MVT::i64));
- RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
- CmpVT.getVectorElementType(), RHS,
- DAG.getConstant(0, MVT::i64));
- SDValue SetCC =
- DAG.getSetCC(SDLoc(N), MVT::i1, LHS, RHS,
- cast<CondCodeSDNode>(N->getOperand(2))->get());
- return DAG.getNode(ISD::BITCAST, SDLoc(N), ResVT, SetCC);
+ // Fold the compare into the branch instruction.
+ SDValue BR;
+ if (CC == AArch64CC::EQ)
+ BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
+ else
+ BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, BR, false);
+
+ return SDValue();
}
// vselect (v1i1 setcc) ->
@@ -4315,7 +7589,7 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
// such VSELECT.
-static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
EVT CCVT = N0.getValueType();
@@ -4340,79 +7614,109 @@ static SDValue PerformVSelectCombine(SDNode *N, SelectionDAG &DAG) {
IfTrue, IfFalse);
}
-// sign_extend (extract_vector_elt (v1i1 setcc)) ->
-// extract_vector_elt (v1iXX setcc)
-// (XX is the size of the compared operand type)
-static SDValue PerformSignExtendCombine(SDNode *N, SelectionDAG &DAG) {
+/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
+/// the compare-mask instructions rather than going via NZCV, even if LHS and
+/// RHS are really scalar. This replaces any scalar setcc in the above pattern
+/// with a vector one followed by a DUP shuffle on the result.
+static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
- SDValue Vec = N0.getOperand(0);
+ EVT ResVT = N->getValueType(0);
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- Vec.getOpcode() != ISD::SETCC)
+ if (!N->getOperand(1).getValueType().isVector())
return SDValue();
- EVT ResVT = N->getValueType(0);
- EVT CmpVT = Vec.getOperand(0).getValueType();
- // Only optimize when the result type is of the same size as the element
- // type of the compared operand.
- if (ResVT.getSizeInBits() != CmpVT.getVectorElementType().getSizeInBits())
+ if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
return SDValue();
- SDValue Lane = N0.getOperand(1);
- SDValue SetCC =
- DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
- Vec.getOperand(0), Vec.getOperand(1),
- cast<CondCodeSDNode>(Vec.getOperand(2))->get());
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ResVT,
- SetCC, Lane);
+ SDLoc DL(N0);
+
+ EVT SrcVT = N0.getOperand(0).getValueType();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT,
+ ResVT.getSizeInBits() / SrcVT.getSizeInBits());
+ EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
+
+ // First perform a vector comparison, where lane 0 is the one we're interested
+ // in.
+ SDValue LHS =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
+ SDValue RHS =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
+ SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
+
+ // Now duplicate the comparison mask we want across all other lanes.
+ SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
+ SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask.data());
+ Mask = DAG.getNode(ISD::BITCAST, DL, ResVT.changeVectorElementTypeToInteger(),
+ Mask);
+
+ return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
}
-SDValue
-AArch64TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
- default: break;
- case ISD::AND: return PerformANDCombine(N, DCI);
- case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI, getSubtarget());
- case ISD::SETCC: return PerformSETCCCombine(N, DCI.DAG);
- case ISD::VSELECT: return PerformVSelectCombine(N, DCI.DAG);
- case ISD::SIGN_EXTEND: return PerformSignExtendCombine(N, DCI.DAG);
+ default:
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ return performAddSubLongCombine(N, DCI, DAG);
+ case ISD::XOR:
+ return performXorCombine(N, DAG, DCI, Subtarget);
+ case ISD::MUL:
+ return performMulCombine(N, DAG, DCI, Subtarget);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return performIntToFpCombine(N, DAG);
+ case ISD::OR:
+ return performORCombine(N, DCI, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
- case AArch64ISD::NEON_VDUPLANE:
- return CombineVLDDUP(N, DCI);
- case AArch64ISD::NEON_LD2DUP:
- case AArch64ISD::NEON_LD3DUP:
- case AArch64ISD::NEON_LD4DUP:
- return CombineBaseUpdate(N, DCI);
+ return performIntrinsicCombine(N, DCI, Subtarget);
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ return performExtendCombine(N, DCI, DAG);
+ case ISD::BITCAST:
+ return performBitcastCombine(N, DCI, DAG);
+ case ISD::CONCAT_VECTORS:
+ return performConcatVectorsCombine(N, DCI, DAG);
+ case ISD::SELECT:
+ return performSelectCombine(N, DAG);
+ case ISD::VSELECT:
+ return performVSelectCombine(N, DCI.DAG);
+ case ISD::STORE:
+ return performSTORECombine(N, DCI, DAG, Subtarget);
+ case AArch64ISD::BRCOND:
+ return performBRCONDCombine(N, DCI, DAG);
+ case AArch64ISD::DUP:
+ return performPostLD1Combine(N, DCI, false);
+ case ISD::INSERT_VECTOR_ELT:
+ return performPostLD1Combine(N, DCI, true);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
- case Intrinsic::arm_neon_vld1:
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::arm_neon_vst1:
- case Intrinsic::arm_neon_vst2:
- case Intrinsic::arm_neon_vst3:
- case Intrinsic::arm_neon_vst4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane:
- case Intrinsic::aarch64_neon_vld1x2:
- case Intrinsic::aarch64_neon_vld1x3:
- case Intrinsic::aarch64_neon_vld1x4:
- case Intrinsic::aarch64_neon_vst1x2:
- case Intrinsic::aarch64_neon_vst1x3:
- case Intrinsic::aarch64_neon_vst1x4:
- case Intrinsic::arm_neon_vst2lane:
- case Intrinsic::arm_neon_vst3lane:
- case Intrinsic::arm_neon_vst4lane:
- return CombineBaseUpdate(N, DCI);
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_ld1x2:
+ case Intrinsic::aarch64_neon_ld1x3:
+ case Intrinsic::aarch64_neon_ld1x4:
+ case Intrinsic::aarch64_neon_ld2lane:
+ case Intrinsic::aarch64_neon_ld3lane:
+ case Intrinsic::aarch64_neon_ld4lane:
+ case Intrinsic::aarch64_neon_ld2r:
+ case Intrinsic::aarch64_neon_ld3r:
+ case Intrinsic::aarch64_neon_ld4r:
+ case Intrinsic::aarch64_neon_st2:
+ case Intrinsic::aarch64_neon_st3:
+ case Intrinsic::aarch64_neon_st4:
+ case Intrinsic::aarch64_neon_st1x2:
+ case Intrinsic::aarch64_neon_st1x3:
+ case Intrinsic::aarch64_neon_st1x4:
+ case Intrinsic::aarch64_neon_st2lane:
+ case Intrinsic::aarch64_neon_st3lane:
+ case Intrinsic::aarch64_neon_st4lane:
+ return performNEONPostLDSTCombine(N, DCI, DAG);
default:
break;
}
@@ -4420,979 +7724,214 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
-bool
-AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- VT = VT.getScalarType();
-
- if (!VT.isSimple())
- return false;
-
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::f16:
- case MVT::f32:
- case MVT::f64:
- return true;
- case MVT::f128:
+// Check if the return value is used as only a return value, as otherwise
+// we can't perform a tail-call. In particular, we need to check for
+// target ISD nodes that are returns and any other "odd" constructs
+// that the generic analysis code won't necessarily catch.
+bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
+ SDValue &Chain) const {
+ if (N->getNumValues() != 1)
return false;
- default:
- break;
- }
-
- return false;
-}
-// Check whether a shuffle_vector could be presented as concat_vector.
-bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
- SDValue V0, SDValue V1,
- const int *Mask,
- SDValue &Res) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- if (VT.getSizeInBits() != 128)
+ if (!N->hasNUsesOfValue(1, 0))
return false;
- if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
- VT.getVectorElementType() != V1.getValueType().getVectorElementType())
- return false;
-
- unsigned NumElts = VT.getVectorNumElements();
- bool isContactVector = true;
- bool splitV0 = false;
- if (V0.getValueType().getSizeInBits() == 128)
- splitV0 = true;
-
- for (int I = 0, E = NumElts / 2; I != E; I++) {
- if (Mask[I] != I) {
- isContactVector = false;
- break;
- }
- }
-
- if (isContactVector) {
- int offset = NumElts / 2;
- for (int I = NumElts / 2, E = NumElts; I != E; I++) {
- if (Mask[I] != I + splitV0 * offset) {
- isContactVector = false;
- break;
- }
- }
- }
-
- if (isContactVector) {
- EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- NumElts / 2);
- if (splitV0) {
- V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
- DAG.getConstant(0, MVT::i64));
- }
- if (V1.getValueType().getSizeInBits() == 128) {
- V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
- DAG.getConstant(0, MVT::i64));
- }
- Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
- return true;
- }
- return false;
-}
-
-// Check whether a Build Vector could be presented as Shuffle Vector.
-// This Shuffle Vector maybe not legalized, so the length of its operand and
-// the length of result may not equal.
-bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
- SDValue &V0, SDValue &V1,
- int *Mask) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
- unsigned V0NumElts = 0;
- // Check if all elements are extracted from less than 3 vectors.
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Elt = Op.getOperand(i);
- if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- Elt.getOperand(0).getValueType().getVectorElementType() !=
- VT.getVectorElementType())
+ SDValue TCChain = Chain;
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() == ISD::CopyToReg) {
+ // If the copy has a glue operand, we conservatively assume it isn't safe to
+ // perform a tail call.
+ if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
+ MVT::Glue)
return false;
+ TCChain = Copy->getOperand(0);
+ } else if (Copy->getOpcode() != ISD::FP_EXTEND)
+ return false;
- if (V0.getNode() == 0) {
- V0 = Elt.getOperand(0);
- V0NumElts = V0.getValueType().getVectorNumElements();
- }
- if (Elt.getOperand(0) == V0) {
- Mask[i] = (cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue());
- continue;
- } else if (V1.getNode() == 0) {
- V1 = Elt.getOperand(0);
- }
- if (Elt.getOperand(0) == V1) {
- unsigned Lane = cast<ConstantSDNode>(Elt->getOperand(1))->getZExtValue();
- Mask[i] = (Lane + V0NumElts);
- continue;
- } else {
+ bool HasRet = false;
+ for (SDNode *Node : Copy->uses()) {
+ if (Node->getOpcode() != AArch64ISD::RET_FLAG)
return false;
- }
+ HasRet = true;
}
- return true;
-}
-
-// LowerShiftRightParts - Lower SRL_PARTS and SRA_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
- EVT VT = Op.getValueType();
- unsigned VTBits = VT.getSizeInBits();
- SDLoc dl(Op);
- SDValue ShOpLo = Op.getOperand(0);
- SDValue ShOpHi = Op.getOperand(1);
- SDValue ShAmt = Op.getOperand(2);
- unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
-
- assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
- SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
- SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
- SDValue Tmp3 = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-
- SDValue A64cc;
- SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
- DAG.getConstant(0, MVT::i64),
- ISD::SETGE, A64cc,
- DAG, dl);
- SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
- DAG.getConstant(0, Tmp3.getValueType()), Tmp3,
- A64cc);
- SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
- TrueVal, FalseVal, A64cc);
+ if (!HasRet)
+ return false;
- SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ Chain = TCChain;
+ return true;
}
-/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getNumOperands() == 3 && "Not a quad-shift!");
- EVT VT = Op.getValueType();
- unsigned VTBits = VT.getSizeInBits();
- SDLoc dl(Op);
- SDValue ShOpLo = Op.getOperand(0);
- SDValue ShOpHi = Op.getOperand(1);
- SDValue ShAmt = Op.getOperand(2);
-
- assert(Op.getOpcode() == ISD::SHL_PARTS);
- SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
- SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
- SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue Tmp4 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-
- SDValue A64cc;
- SDValue CmpOp = getSelectableIntSetCC(ExtraShAmt,
- DAG.getConstant(0, MVT::i64),
- ISD::SETGE, A64cc,
- DAG, dl);
-
- SDValue Lo = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
- DAG.getConstant(0, Tmp4.getValueType()), Tmp4,
- A64cc);
- SDValue Hi = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
- Tmp3, FalseVal, A64cc);
+// Return whether the an instruction can potentially be optimized to a tail
+// call. This will cause the optimizers to attempt to move, or duplicate,
+// return instructions to help enable tail call optimizations for this
+// instruction.
+bool AArch64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
- SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return true;
}
-// If this is a case we can't handle, return null and let the default
-// expansion code take care of it.
-SDValue
-AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
- const AArch64Subtarget *ST) const {
-
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
-
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
-
- unsigned UseNeonMov = VT.getSizeInBits() >= 64;
-
- // Note we favor lowering MOVI over MVNI.
- // This has implications on the definition of patterns in TableGen to select
- // BIC immediate instructions but not ORR immediate instructions.
- // If this lowering order is changed, TableGen patterns for BIC immediate and
- // ORR immediate instructions have to be updated.
- if (UseNeonMov &&
- BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- if (SplatBitSize <= 64) {
- // First attempt to use vector immediate-form MOVI
- EVT NeonMovVT;
- unsigned Imm = 0;
- unsigned OpCmode = 0;
-
- if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG, VT.is128BitVector(),
- Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
- SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
- SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
-
- if (ImmVal.getNode() && OpCmodeVal.getNode()) {
- SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
- ImmVal, OpCmodeVal);
- return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
- }
- }
-
- // Then attempt to use vector immediate-form MVNI
- uint64_t NegatedImm = (~SplatBits).getZExtValue();
- if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
- DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
- Imm, OpCmode)) {
- SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
- SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
- if (ImmVal.getNode() && OpCmodeVal.getNode()) {
- SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
- ImmVal, OpCmodeVal);
- return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
- }
- }
-
- // Attempt to use vector immediate-form FMOV
- if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
- (VT == MVT::v2f64 && SplatBitSize == 64)) {
- APFloat RealVal(
- SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
- SplatBits);
- uint32_t ImmVal;
- if (A64Imms::isFPImm(RealVal, ImmVal)) {
- SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
- return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
- }
- }
- }
- }
-
- unsigned NumElts = VT.getVectorNumElements();
- bool isOnlyLowElement = true;
- bool usesOnlyOneValue = true;
- bool hasDominantValue = false;
- bool isConstant = true;
-
- // Map of the number of times a particular SDValue appears in the
- // element list.
- DenseMap<SDValue, unsigned> ValueCounts;
- SDValue Value;
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- if (i > 0)
- isOnlyLowElement = false;
- if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
- isConstant = false;
-
- ValueCounts.insert(std::make_pair(V, 0));
- unsigned &Count = ValueCounts[V];
-
- // Is this value dominant? (takes up more than half of the lanes)
- if (++Count > (NumElts / 2)) {
- hasDominantValue = true;
- Value = V;
- }
- }
- if (ValueCounts.size() != 1)
- usesOnlyOneValue = false;
- if (!Value.getNode() && ValueCounts.size() > 0)
- Value = ValueCounts.begin()->first;
-
- if (ValueCounts.size() == 0)
- return DAG.getUNDEF(VT);
-
- if (isOnlyLowElement)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
-
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (hasDominantValue && EltSize <= 64) {
- // Use VDUP for non-constant splats.
- if (!isConstant) {
- SDValue N;
-
- // If we are DUPing a value that comes directly from a vector, we could
- // just use DUPLANE. We can only do this if the lane being extracted
- // is at a constant index, as the DUP from lane instructions only have
- // constant-index forms.
- //
- // If there is a TRUNCATE between EXTRACT_VECTOR_ELT and DUP, we can
- // remove TRUNCATE for DUPLANE by apdating the source vector to
- // appropriate vector type and lane index.
- //
- // FIXME: for now we have v1i8, v1i16, v1i32 legal vector types, if they
- // are not legal any more, no need to check the type size in bits should
- // be large than 64.
- SDValue V = Value;
- if (Value->getOpcode() == ISD::TRUNCATE)
- V = Value->getOperand(0);
- if (V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(V->getOperand(1)) &&
- V->getOperand(0).getValueType().getSizeInBits() >= 64) {
-
- // If the element size of source vector is larger than DUPLANE
- // element size, we can do transformation by,
- // 1) bitcasting source register to smaller element vector
- // 2) mutiplying the lane index by SrcEltSize/ResEltSize
- // For example, we can lower
- // "v8i16 vdup_lane(v4i32, 1)"
- // to be
- // "v8i16 vdup_lane(v8i16 bitcast(v4i32), 2)".
- SDValue SrcVec = V->getOperand(0);
- unsigned SrcEltSize =
- SrcVec.getValueType().getVectorElementType().getSizeInBits();
- unsigned ResEltSize = VT.getVectorElementType().getSizeInBits();
- if (SrcEltSize > ResEltSize) {
- assert((SrcEltSize % ResEltSize == 0) && "Invalid element size");
- SDValue BitCast;
- unsigned SrcSize = SrcVec.getValueType().getSizeInBits();
- unsigned ResSize = VT.getSizeInBits();
-
- if (SrcSize > ResSize) {
- assert((SrcSize % ResSize == 0) && "Invalid vector size");
- EVT CastVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
- SrcSize / ResEltSize);
- BitCast = DAG.getNode(ISD::BITCAST, DL, CastVT, SrcVec);
- } else {
- assert((SrcSize == ResSize) && "Invalid vector size of source vec");
- BitCast = DAG.getNode(ISD::BITCAST, DL, VT, SrcVec);
- }
-
- unsigned LaneIdx = V->getConstantOperandVal(1);
- SDValue Lane =
- DAG.getConstant((SrcEltSize / ResEltSize) * LaneIdx, MVT::i64);
- N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, BitCast, Lane);
- } else {
- assert((SrcEltSize == ResEltSize) &&
- "Invalid element size of source vec");
- N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, V->getOperand(0),
- V->getOperand(1));
- }
- } else
- N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
-
- if (!usesOnlyOneValue) {
- // The dominant value was splatted as 'N', but we now have to insert
- // all differing elements.
- for (unsigned I = 0; I < NumElts; ++I) {
- if (Op.getOperand(I) == Value)
- continue;
- SmallVector<SDValue, 3> Ops;
- Ops.push_back(N);
- Ops.push_back(Op.getOperand(I));
- Ops.push_back(DAG.getConstant(I, MVT::i64));
- N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3);
- }
- }
- return N;
- }
- if (usesOnlyOneValue && isConstant) {
- return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value);
- }
- }
- // If all elements are constants and the case above didn't get hit, fall back
- // to the default expansion, which will generate a load from the constant
- // pool.
- if (isConstant)
- return SDValue();
-
- // Try to lower this in lowering ShuffleVector way.
- SDValue V0, V1;
- int Mask[16];
- if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
- unsigned V0NumElts = V0.getValueType().getVectorNumElements();
- if (!V1.getNode() && V0NumElts == NumElts * 2) {
- V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
- DAG.getConstant(NumElts, MVT::i64));
- V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
- DAG.getConstant(0, MVT::i64));
- V0NumElts = V0.getValueType().getVectorNumElements();
- }
-
- if (V1.getNode() && NumElts == V0NumElts &&
- V0NumElts == V1.getValueType().getVectorNumElements()) {
- SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
- if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
- return Shuffle;
- else
- return LowerVECTOR_SHUFFLE(Shuffle, DAG);
- } else {
- SDValue Res;
- if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
- return Res;
- }
- }
+bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ bool &IsInc,
+ SelectionDAG &DAG) const {
+ if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
+ return false;
- // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
- // know the default expansion would otherwise fall back on something even
- // worse. For a vector with one or two non-undef values, that's
- // scalar_to_vector for the elements followed by a shuffle (provided the
- // shuffle is valid for the target) and materialization element by element
- // on the stack followed by a load for everything else.
- if (!isConstant && !usesOnlyOneValue) {
- SDValue Vec = DAG.getUNDEF(VT);
- for (unsigned i = 0 ; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
- }
- return Vec;
+ Base = Op->getOperand(0);
+ // All of the indexed addressing mode instructions take a signed
+ // 9 bit immediate offset.
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ int64_t RHSC = (int64_t)RHS->getZExtValue();
+ if (RHSC >= 256 || RHSC <= -256)
+ return false;
+ IsInc = (Op->getOpcode() == ISD::ADD);
+ Offset = Op->getOperand(1);
+ return true;
}
- return SDValue();
+ return false;
}
-/// isREVMask - Check if a vector shuffle corresponds to a REV
-/// instruction with the specified blocksize. (The order of the elements
-/// within each block of the vector is reversed.)
-static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for REV are: 16, 32, 64");
-
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
- if (EltSz == 64)
+bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ EVT VT;
+ SDValue Ptr;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
return false;
- unsigned NumElts = VT.getVectorNumElements();
- unsigned BlockElts = M[0] + 1;
- // If the first shuffle index is UNDEF, be optimistic.
- if (M[0] < 0)
- BlockElts = BlockSize / EltSz;
-
- if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+ bool IsInc;
+ if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
return false;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
- return false;
- }
-
+ AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
return true;
}
-// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and
-// TRN instruction.
-static unsigned isPermuteMask(ArrayRef<int> M, EVT VT, bool isV2undef) {
- unsigned NumElts = VT.getVectorNumElements();
- if (NumElts < 4)
- return 0;
-
- bool ismatch = true;
-
- // Check UZP1
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = i * 2;
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_UZP1;
-
- // Check UZP2
- ismatch = true;
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = i * 2 + 1;
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_UZP2;
-
- // Check ZIP1
- ismatch = true;
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = i / 2 + NumElts * (i % 2);
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_ZIP1;
-
- // Check ZIP2
- ismatch = true;
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = (NumElts + i) / 2 + NumElts * (i % 2);
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_ZIP2;
-
- // Check TRN1
- ismatch = true;
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = i + (NumElts - 1) * (i % 2);
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_TRN1;
-
- // Check TRN2
- ismatch = true;
- for (unsigned i = 0; i < NumElts; ++i) {
- unsigned answer = 1 + i + (NumElts - 1) * (i % 2);
- if (isV2undef && answer >= NumElts)
- answer -= NumElts;
- if (M[i] != -1 && (unsigned)M[i] != answer) {
- ismatch = false;
- break;
- }
- }
- if (ismatch)
- return AArch64ISD::NEON_TRN2;
-
- return 0;
-}
-
-SDValue
-AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
- SelectionDAG &DAG) const {
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
-
- // Convert shuffles that are directly supported on NEON to target-specific
- // DAG nodes, instead of keeping them as shuffles and matching them again
- // during code selection. This is more efficient and avoids the possibility
- // of inconsistencies between legalization and selection.
- ArrayRef<int> ShuffleMask = SVN->getMask();
-
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
- if (EltSize > 64)
- return SDValue();
-
- if (isREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1);
- if (isREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1);
- if (isREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1);
-
- unsigned ISDNo;
- if (V2.getOpcode() == ISD::UNDEF)
- ISDNo = isPermuteMask(ShuffleMask, VT, true);
- else
- ISDNo = isPermuteMask(ShuffleMask, VT, false);
-
- if (ISDNo) {
- if (V2.getOpcode() == ISD::UNDEF)
- return DAG.getNode(ISDNo, dl, VT, V1, V1);
- else
- return DAG.getNode(ISDNo, dl, VT, V1, V2);
- }
-
- SDValue Res;
- if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
- return Res;
-
- // If the element of shuffle mask are all the same constant, we can
- // transform it into either NEON_VDUP or NEON_VDUPLANE
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- // Test if V1 is a SCALAR_TO_VECTOR.
- if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0));
- }
- // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR.
- if (V1.getOpcode() == ISD::BUILD_VECTOR) {
- bool IsScalarToVector = true;
- for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i)
- if (V1.getOperand(i).getOpcode() != ISD::UNDEF &&
- i != (unsigned)Lane) {
- IsScalarToVector = false;
- break;
- }
- if (IsScalarToVector)
- return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT,
- V1.getOperand(Lane));
- }
-
- // Test if V1 is a EXTRACT_SUBVECTOR.
- if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- int ExtLane = cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
- return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0),
- DAG.getConstant(Lane + ExtLane, MVT::i64));
- }
- // Test if V1 is a CONCAT_VECTORS.
- if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
- V1.getOperand(1).getOpcode() == ISD::UNDEF) {
- SDValue Op0 = V1.getOperand(0);
- assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() &&
- "Invalid vector lane access");
- return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0,
- DAG.getConstant(Lane, MVT::i64));
- }
-
- return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i64));
- }
-
- int Length = ShuffleMask.size();
- int V1EltNum = V1.getValueType().getVectorNumElements();
-
- // If the number of v1 elements is the same as the number of shuffle mask
- // element and the shuffle masks are sequential values, we can transform
- // it into NEON_VEXTRACT.
- if (V1EltNum == Length) {
- // Check if the shuffle mask is sequential.
- int SkipUndef = 0;
- while (ShuffleMask[SkipUndef] == -1) {
- SkipUndef++;
- }
- int CurMask = ShuffleMask[SkipUndef];
- if (CurMask >= SkipUndef) {
- bool IsSequential = true;
- for (int I = SkipUndef; I < Length; ++I) {
- if (ShuffleMask[I] != -1 && ShuffleMask[I] != CurMask) {
- IsSequential = false;
- break;
- }
- CurMask++;
- }
- if (IsSequential) {
- assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect");
- unsigned VecSize = EltSize * V1EltNum;
- unsigned Index = (EltSize / 8) * (ShuffleMask[SkipUndef] - SkipUndef);
- if (VecSize == 64 || VecSize == 128)
- return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2,
- DAG.getConstant(Index, MVT::i64));
- }
- }
- }
-
- // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert
- // by element from V2 to V1 .
- // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a
- // better choice to be inserted than V1 as less insert needed, so we count
- // element to be inserted for both V1 and V2, and select less one as insert
- // target.
-
- // Collect elements need to be inserted and their index.
- SmallVector<int, 8> NV1Elt;
- SmallVector<int, 8> N1Index;
- SmallVector<int, 8> NV2Elt;
- SmallVector<int, 8> N2Index;
- for (int I = 0; I != Length; ++I) {
- if (ShuffleMask[I] != I) {
- NV1Elt.push_back(ShuffleMask[I]);
- N1Index.push_back(I);
- }
- }
- for (int I = 0; I != Length; ++I) {
- if (ShuffleMask[I] != (I + V1EltNum)) {
- NV2Elt.push_back(ShuffleMask[I]);
- N2Index.push_back(I);
- }
- }
-
- // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2
- // will be inserted.
- SDValue InsV = V1;
- SmallVector<int, 8> InsMasks = NV1Elt;
- SmallVector<int, 8> InsIndex = N1Index;
- if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) {
- if (NV1Elt.size() > NV2Elt.size()) {
- InsV = V2;
- InsMasks = NV2Elt;
- InsIndex = N2Index;
- }
- } else {
- InsV = DAG.getNode(ISD::UNDEF, dl, VT);
- }
-
- for (int I = 0, E = InsMasks.size(); I != E; ++I) {
- SDValue ExtV = V1;
- int Mask = InsMasks[I];
- if (Mask >= V1EltNum) {
- ExtV = V2;
- Mask -= V1EltNum;
- }
- // Any value type smaller than i32 is illegal in AArch64, and this lower
- // function is called after legalize pass, so we need to legalize
- // the result here.
- EVT EltVT;
- if (VT.getVectorElementType().isFloatingPoint())
- EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32;
- else
- EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32;
+bool AArch64TargetLowering::getPostIndexedAddressParts(
+ SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
+ EVT VT;
+ SDValue Ptr;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
- if (Mask >= 0) {
- ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV,
- DAG.getConstant(Mask, MVT::i64));
- InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV,
- DAG.getConstant(InsIndex[I], MVT::i64));
- }
- }
- return InsV;
+ bool IsInc;
+ if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
+ return false;
+ // Post-indexing updates the base, so it's not a valid transform
+ // if that's not the same as the load's pointer.
+ if (Ptr != Base)
+ return false;
+ AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
}
-AArch64TargetLowering::ConstraintType
-AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- default: break;
- case 'w': // An FP/SIMD vector register
- return C_RegisterClass;
- case 'I': // Constant that can be used with an ADD instruction
- case 'J': // Constant that can be used with a SUB instruction
- case 'K': // Constant that can be used with a 32-bit logical instruction
- case 'L': // Constant that can be used with a 64-bit logical instruction
- case 'M': // Constant that can be used as a 32-bit MOV immediate
- case 'N': // Constant that can be used as a 64-bit MOV immediate
- case 'Y': // Floating point constant zero
- case 'Z': // Integer constant zero
- return C_Other;
- case 'Q': // A memory reference with base register and no offset
- return C_Memory;
- case 'S': // A symbolic address
- return C_Other;
- }
+void AArch64TargetLowering::ReplaceNodeResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Don't know how to custom expand this");
+ case ISD::FP_TO_UINT:
+ case ISD::FP_TO_SINT:
+ assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
+ // Let normal code take care of it by not adding anything to Results.
+ return;
}
-
- // FIXME: Ump, Utf, Usa, Ush
- // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
- // whatever they may be
- // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
- // Usa: An absolute symbolic address
- // Ush: The high part (bits 32:12) of a pc-relative symbolic address
- assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
- && Constraint != "Ush" && "Unimplemented constraints");
-
- return TargetLowering::getConstraintType(Constraint);
}
-TargetLowering::ConstraintWeight
-AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
- const char *Constraint) const {
-
- llvm_unreachable("Constraint weight unimplemented");
+bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+ // Loads and stores less than 128-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 128;
+
+ // For the real atomic operations, we have ldxr/stxr up to 128 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 128;
}
-void
-AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
- SDValue Result(0, 0);
-
- // Only length 1 constraints are C_Other.
- if (Constraint.size() != 1) return;
-
- // Only C_Other constraints get lowered like this. That means constants for us
- // so return early if there's no hope the constraint can be lowered.
-
- switch(Constraint[0]) {
- default: break;
- case 'I': case 'J': case 'K': case 'L':
- case 'M': case 'N': case 'Z': {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C)
- return;
-
- uint64_t CVal = C->getZExtValue();
- uint32_t Bits;
-
- switch (Constraint[0]) {
- default:
- // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
- // is a peculiarly useless SUB constraint.
- llvm_unreachable("Unimplemented C_Other constraint");
- case 'I':
- if (CVal <= 0xfff)
- break;
- return;
- case 'K':
- if (A64Imms::isLogicalImm(32, CVal, Bits))
- break;
- return;
- case 'L':
- if (A64Imms::isLogicalImm(64, CVal, Bits))
- break;
- return;
- case 'Z':
- if (CVal == 0)
- break;
- return;
- }
-
- Result = DAG.getTargetConstant(CVal, Op.getValueType());
- break;
- }
- case 'S': {
- // An absolute symbolic address or label reference.
- if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
- Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
- GA->getValueType(0));
- } else if (const BlockAddressSDNode *BA
- = dyn_cast<BlockAddressSDNode>(Op)) {
- Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
- BA->getValueType(0));
- } else if (const ExternalSymbolSDNode *ES
- = dyn_cast<ExternalSymbolSDNode>(Op)) {
- Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
- ES->getValueType(0));
- } else
- return;
- break;
- }
- case 'Y':
- if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- if (CFP->isExactlyValue(0.0)) {
- Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
- break;
- }
- }
- return;
+Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i64, i64} and we have to recombine them into a
+ // single i128 here.
+ if (ValTy->getPrimitiveSizeInBits() == 128) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
+ Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
}
- if (Result.getNode()) {
- Ops.push_back(Result);
- return;
- }
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
+ Function *Ldxr = llvm::Intrinsic::getDeclaration(M, Int, Tys);
- // It's an unknown constraint for us. Let generic code have a go.
- TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldxr, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
}
-std::pair<unsigned, const TargetRegisterClass*>
-AArch64TargetLowering::getRegForInlineAsmConstraint(
- const std::string &Constraint,
- MVT VT) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- case 'r':
- if (VT.getSizeInBits() <= 32)
- return std::make_pair(0U, &AArch64::GPR32RegClass);
- else if (VT == MVT::i64)
- return std::make_pair(0U, &AArch64::GPR64RegClass);
- break;
- case 'w':
- if (VT == MVT::f16)
- return std::make_pair(0U, &AArch64::FPR16RegClass);
- else if (VT == MVT::f32)
- return std::make_pair(0U, &AArch64::FPR32RegClass);
- else if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, &AArch64::FPR64RegClass);
- else if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &AArch64::FPR128RegClass);
- break;
- }
+Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
+ Value *Val, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i128 intrinsics take two
+ // parameters: "i64, i64". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 128) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
+ Function *Stxr = Intrinsic::getDeclaration(M, Int);
+ Type *Int64Ty = Type::getInt64Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Stxr, Lo, Hi, Addr);
}
- // Use the default implementation in TargetLowering to convert the register
- // constraint into a member of a register class.
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-}
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
+ Type *Tys[] = { Addr->getType() };
+ Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
-/// Represent NEON load and store intrinsics as MemIntrinsicNodes.
-/// The associated MachineMemOperands record the alignment specified
-/// in the intrinsic calls.
-bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
- unsigned Intrinsic) const {
- switch (Intrinsic) {
- case Intrinsic::arm_neon_vld1:
- case Intrinsic::arm_neon_vld2:
- case Intrinsic::arm_neon_vld3:
- case Intrinsic::arm_neon_vld4:
- case Intrinsic::aarch64_neon_vld1x2:
- case Intrinsic::aarch64_neon_vld1x3:
- case Intrinsic::aarch64_neon_vld1x4:
- case Intrinsic::arm_neon_vld2lane:
- case Intrinsic::arm_neon_vld3lane:
- case Intrinsic::arm_neon_vld4lane: {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- // Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
- Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
- Info.vol = false; // volatile loads with NEON intrinsics not supported
- Info.readMem = true;
- Info.writeMem = false;
- return true;
- }
- case Intrinsic::arm_neon_vst1:
- case Intrinsic::arm_neon_vst2:
- case Intrinsic::arm_neon_vst3:
- case Intrinsic::arm_neon_vst4:
- case Intrinsic::aarch64_neon_vst1x2:
- case Intrinsic::aarch64_neon_vst1x3:
- case Intrinsic::aarch64_neon_vst1x4:
- case Intrinsic::arm_neon_vst2lane:
- case Intrinsic::arm_neon_vst3lane:
- case Intrinsic::arm_neon_vst4lane: {
- Info.opc = ISD::INTRINSIC_VOID;
- // Conservatively set memVT to the entire set of vectors stored.
- unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- Type *ArgTy = I.getArgOperand(ArgI)->getType();
- if (!ArgTy->isVectorTy())
- break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
- }
- Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
- Info.vol = false; // volatile stores with NEON intrinsics not supported
- Info.readMem = false;
- Info.writeMem = true;
- return true;
- }
- default:
- break;
- }
-
- return false;
+ return Builder.CreateCall2(
+ Stxr, Builder.CreateZExtOrBitCast(
+ Val, Stxr->getFunctionType()->getParamType(0)),
+ Addr);
}
diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h
index e946b25..de16c4d 100644
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@@ -12,364 +12,453 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64_ISELLOWERING_H
-#define LLVM_TARGET_AARCH64_ISELLOWERING_H
+#ifndef LLVM_TARGET_AArch64_ISELLOWERING_H
+#define LLVM_TARGET_AArch64_ISELLOWERING_H
-#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/CallingConv.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
+
namespace AArch64ISD {
- enum NodeType {
- // Start the numbering from where ISD NodeType finishes.
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
-
- // This is a conditional branch which also notes the flag needed
- // (eq/sgt/...). A64 puts this information on the branches rather than
- // compares as LLVM does.
- BR_CC,
-
- // A node to be selected to an actual call operation: either BL or BLR in
- // the absence of tail calls.
- Call,
-
- // Indicates a floating-point immediate which fits into the format required
- // by the FMOV instructions. First (and only) operand is the 8-bit encoded
- // value of that immediate.
- FPMOV,
-
- // Corresponds directly to an EXTR instruction. Operands are an LHS an RHS
- // and an LSB.
- EXTR,
-
- // Wraps a load from the GOT, which should always be performed with a 64-bit
- // load instruction. This prevents the DAG combiner folding a truncate to
- // form a smaller memory access.
- GOTLoad,
-
- // Performs a bitfield insert. Arguments are: the value being inserted into;
- // the value being inserted; least significant bit changed; width of the
- // field.
- BFI,
-
- // Simply a convenient node inserted during ISelLowering to represent
- // procedure return. Will almost certainly be selected to "RET".
- Ret,
-
- /// Extracts a field of contiguous bits from the source and sign extends
- /// them into a single register. Arguments are: source; immr; imms. Note
- /// these are pre-encoded since DAG matching can't cope with combining LSB
- /// and Width into these values itself.
- SBFX,
-
- /// This is an A64-ification of the standard LLVM SELECT_CC operation. The
- /// main difference is that it only has the values and an A64 condition,
- /// which will be produced by a setcc instruction.
- SELECT_CC,
-
- /// This serves most of the functions of the LLVM SETCC instruction, for two
- /// purposes. First, it prevents optimisations from fiddling with the
- /// compare after we've moved the CondCode information onto the SELECT_CC or
- /// BR_CC instructions. Second, it gives a legal instruction for the actual
- /// comparison.
- ///
- /// It keeps a record of the condition flags asked for because certain
- /// instructions are only valid for a subset of condition codes.
- SETCC,
-
- // Designates a node which is a tail call: both a call and a return
- // instruction as far as selction is concerned. It should be selected to an
- // unconditional branch. Has the usual plethora of call operands, but: 1st
- // is callee, 2nd is stack adjustment required immediately before branch.
- TC_RETURN,
-
- // Designates a call used to support the TLS descriptor ABI. The call itself
- // will be indirect ("BLR xN") but a relocation-specifier (".tlsdesccall
- // var") must be attached somehow during code generation. It takes two
- // operands: the callee and the symbol to be relocated against.
- TLSDESCCALL,
-
- // Leaf node which will be lowered to an appropriate MRS to obtain the
- // thread pointer: TPIDR_EL0.
- THREAD_POINTER,
-
- /// Extracts a field of contiguous bits from the source and zero extends
- /// them into a single register. Arguments are: source; immr; imms. Note
- /// these are pre-encoded since DAG matching can't cope with combining LSB
- /// and Width into these values itself.
- UBFX,
-
- // Wraps an address which the ISelLowering phase has decided should be
- // created using the large memory model style: i.e. a sequence of four
- // movz/movk instructions.
- WrapperLarge,
-
- // Wraps an address which the ISelLowering phase has decided should be
- // created using the small memory model style: i.e. adrp/add or
- // adrp/mem-op. This exists to prevent bare TargetAddresses which may never
- // get selected.
- WrapperSmall,
-
- // Vector move immediate
- NEON_MOVIMM,
-
- // Vector Move Inverted Immediate
- NEON_MVNIMM,
-
- // Vector FP move immediate
- NEON_FMOVIMM,
-
- // Vector permute
- NEON_UZP1,
- NEON_UZP2,
- NEON_ZIP1,
- NEON_ZIP2,
- NEON_TRN1,
- NEON_TRN2,
-
- // Vector Element reverse
- NEON_REV64,
- NEON_REV32,
- NEON_REV16,
-
- // Vector compare
- NEON_CMP,
-
- // Vector compare zero
- NEON_CMPZ,
-
- // Vector compare bitwise test
- NEON_TST,
-
- // Vector saturating shift
- NEON_QSHLs,
- NEON_QSHLu,
-
- // Vector dup
- NEON_VDUP,
-
- // Vector dup by lane
- NEON_VDUPLANE,
-
- // Vector extract
- NEON_VEXTRACT,
-
- // NEON duplicate lane loads
- NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
- NEON_LD3DUP,
- NEON_LD4DUP,
-
- // NEON loads with post-increment base updates:
- NEON_LD1_UPD,
- NEON_LD2_UPD,
- NEON_LD3_UPD,
- NEON_LD4_UPD,
- NEON_LD1x2_UPD,
- NEON_LD1x3_UPD,
- NEON_LD1x4_UPD,
-
- // NEON stores with post-increment base updates:
- NEON_ST1_UPD,
- NEON_ST2_UPD,
- NEON_ST3_UPD,
- NEON_ST4_UPD,
- NEON_ST1x2_UPD,
- NEON_ST1x3_UPD,
- NEON_ST1x4_UPD,
-
- // NEON duplicate lane loads with post-increment base updates:
- NEON_LD2DUP_UPD,
- NEON_LD3DUP_UPD,
- NEON_LD4DUP_UPD,
-
- // NEON lane loads with post-increment base updates:
- NEON_LD2LN_UPD,
- NEON_LD3LN_UPD,
- NEON_LD4LN_UPD,
-
- // NEON lane store with post-increment base updates:
- NEON_ST2LN_UPD,
- NEON_ST3LN_UPD,
- NEON_ST4LN_UPD
- };
-}
+enum {
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+ WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
+ CALL, // Function call.
+
+ // Almost the same as a normal call node, except that a TLSDesc relocation is
+ // needed so the linker can relax it correctly if possible.
+ TLSDESC_CALL,
+ ADRP, // Page address of a TargetGlobalAddress operand.
+ ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
+ LOADgot, // Load from automatically generated descriptor (e.g. Global
+ // Offset Table, TLS record).
+ RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
+ BRCOND, // Conditional branch instruction; "b.cond".
+ CSEL,
+ FCSEL, // Conditional move instruction.
+ CSINV, // Conditional select invert.
+ CSNEG, // Conditional select negate.
+ CSINC, // Conditional select increment.
+
+ // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
+ // ELF.
+ THREAD_POINTER,
+ ADC,
+ SBC, // adc, sbc instructions
+
+ // Arithmetic instructions which write flags.
+ ADDS,
+ SUBS,
+ ADCS,
+ SBCS,
+ ANDS,
+
+ // Floating point comparison
+ FCMP,
+
+ // Floating point max and min instructions.
+ FMAX,
+ FMIN,
+
+ // Scalar extract
+ EXTR,
+
+ // Scalar-to-vector duplication
+ DUP,
+ DUPLANE8,
+ DUPLANE16,
+ DUPLANE32,
+ DUPLANE64,
+
+ // Vector immedate moves
+ MOVI,
+ MOVIshift,
+ MOVIedit,
+ MOVImsl,
+ FMOV,
+ MVNIshift,
+ MVNImsl,
+
+ // Vector immediate ops
+ BICi,
+ ORRi,
+
+ // Vector bit select: similar to ISD::VSELECT but not all bits within an
+ // element must be identical.
+ BSL,
+
+ // Vector arithmetic negation
+ NEG,
+
+ // Vector shuffles
+ ZIP1,
+ ZIP2,
+ UZP1,
+ UZP2,
+ TRN1,
+ TRN2,
+ REV16,
+ REV32,
+ REV64,
+ EXT,
+
+ // Vector shift by scalar
+ VSHL,
+ VLSHR,
+ VASHR,
+
+ // Vector shift by scalar (again)
+ SQSHL_I,
+ UQSHL_I,
+ SQSHLU_I,
+ SRSHR_I,
+ URSHR_I,
+
+ // Vector comparisons
+ CMEQ,
+ CMGE,
+ CMGT,
+ CMHI,
+ CMHS,
+ FCMEQ,
+ FCMGE,
+ FCMGT,
+
+ // Vector zero comparisons
+ CMEQz,
+ CMGEz,
+ CMGTz,
+ CMLEz,
+ CMLTz,
+ FCMEQz,
+ FCMGEz,
+ FCMGTz,
+ FCMLEz,
+ FCMLTz,
+
+ // Vector bitwise negation
+ NOT,
+
+ // Vector bitwise selection
+ BIT,
+
+ // Compare-and-branch
+ CBZ,
+ CBNZ,
+ TBZ,
+ TBNZ,
+
+ // Tail calls
+ TC_RETURN,
+
+ // Custom prefetch handling
+ PREFETCH,
+
+ // {s|u}int to FP within a FP register.
+ SITOF,
+ UITOF,
+
+ // NEON Load/Store with post-increment base updates
+ LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ LD3post,
+ LD4post,
+ ST2post,
+ ST3post,
+ ST4post,
+ LD1x2post,
+ LD1x3post,
+ LD1x4post,
+ ST1x2post,
+ ST1x3post,
+ ST1x4post,
+ LD1DUPpost,
+ LD2DUPpost,
+ LD3DUPpost,
+ LD4DUPpost,
+ LD1LANEpost,
+ LD2LANEpost,
+ LD3LANEpost,
+ LD4LANEpost,
+ ST2LANEpost,
+ ST3LANEpost,
+ ST4LANEpost
+};
+
+} // end namespace AArch64ISD
class AArch64Subtarget;
class AArch64TargetMachine;
class AArch64TargetLowering : public TargetLowering {
+ bool RequireStrictAlign;
+
public:
explicit AArch64TargetLowering(AArch64TargetMachine &TM);
- const char *getTargetNodeName(unsigned Opcode) const;
+ /// Selects the correct CCAssignFn for a the given CallingConvention
+ /// value.
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
+
+ /// computeKnownBitsForTargetNode - Determine which of the bits specified in
+ /// Mask are known to be either zero or one and return them in the
+ /// KnownZero/KnownOne bitsets.
+ void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
+ APInt &KnownOne, const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override;
+
+ /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+ /// unaligned memory accesses. of the specified type.
+ bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
+ bool *Fast = nullptr) const override {
+ if (RequireStrictAlign)
+ return false;
+ // FIXME: True for Cyclone, but not necessary others.
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CC) const;
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- SDValue LowerFormalArguments(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- SDValue LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual unsigned getByValTypeAlignment(Type *Ty) const override;
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ unsigned getFunctionAlignment(const Function *F) const;
- SDValue LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+ /// be used for loads / stores from the global.
+ unsigned getMaximalGlobalOffset() const override;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool IsVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ /// Returns true if a cast between SrcAS and DestAS is a noop.
+ bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
+ // Addrspacecasts are always noops.
+ return true;
+ }
- SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const override;
- bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
- const int *Mask, SDValue &Res) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
- SDValue &V1, int *Mask) const;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
- const AArch64Subtarget *ST) const;
+ /// isShuffleMaskLegal - Return true if the given shuffle mask can be
+ /// codegen'd directly, or if it should be stack expanded.
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
- SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ /// getSetCCResultType - Return the ISD::SETCC ValueType
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
- SDValue &Chain) const;
+ SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
- /// IsEligibleForTailCallOptimization - Check whether the call is eligible
- /// for tail call optimization. Targets which want to do tail call
- /// optimization should implement this function.
- bool IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool IsVarArg,
- bool IsCalleeStructRet,
- bool IsCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
- /// Finds the incoming stack arguments which overlap the given fixed stack
- /// object and incorporates their load into the current chain. This prevents
- /// an upcoming store from clobbering the stack argument before it's used.
- SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
- MachineFrameInfo *MFI, int ClobberedFI) const;
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const override;
- bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
- bool IsTailCallConvention(CallingConv::ID CallCC) const;
+ bool isZExtFree(Type *Ty1, Type *Ty2) const override;
+ bool isZExtFree(EVT VT1, EVT VT2) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ bool hasPairedLoad(Type *LoadedType,
+ unsigned &RequiredAligment) const override;
+ bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
- bool isLegalICmpImmediate(int64_t Val) const;
- SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const;
+ bool isLegalAddImmediate(int64_t) const override;
+ bool isLegalICmpImmediate(int64_t) const override;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
- MachineBasicBlock *
- emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB,
- unsigned Size, unsigned Opcode) const;
+ /// isLegalAddressingMode - Return true if the addressing mode represented
+ /// by AM is legal for this target, for a load/store of the specified type.
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- MachineBasicBlock *
- emitAtomicBinaryMinMax(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned CmpOp,
- A64CC::CondCodes Cond) const;
- MachineBasicBlock *
- emitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size) const;
+ /// \brief Return the cost of the scaling factor used in the addressing
+ /// mode represented by AM for this target, for a load/store
+ /// of the specified type.
+ /// If the AM is supported, the return value must be >= 0.
+ /// If the AM is not supported, it returns a negative value.
+ int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
- MachineBasicBlock *
- EmitF128CSEL(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
- SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const;
- SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
- SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
+ bool isDesirableToCommuteWithShift(const SDNode *N) const override;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
- SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ bool shouldExpandAtomicInIR(Instruction *Inst) const override;
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
- virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
+private:
+ /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const AArch64Subtarget *Subtarget;
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
+ void addDRTypeForNEON(MVT VT);
+ void addQRTypeForNEON(MVT VT);
- ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &Info,
- const char *Constraint) const;
- void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ SDValue
+ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
- std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ SDValue LowerCall(CallLoweringInfo & /*CLI*/,
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
- unsigned Intrinsic) const override;
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
+ SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
+ bool isThisReturn, SDValue ThisVal) const;
+
+ bool isEligibleForTailCallOptimization(
+ SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
+ bool isCalleeStructRet, bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
-protected:
- std::pair<const TargetRegisterClass*, uint8_t>
- findRepresentativeClass(MVT VT) const;
+ /// Finds the incoming stack arguments which overlap the given fixed stack
+ /// object and incorporates their load into the current chain. This prevents
+ /// an upcoming store from clobbering the stack argument before it's used.
+ SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
+ MachineFrameInfo *MFI, int ClobberedFI) const;
-private:
- const InstrItineraryData *Itins;
+ bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
- const AArch64Subtarget *getSubtarget() const {
- return &getTargetMachine().getSubtarget<AArch64Subtarget>();
- }
-};
-enum NeonModImmType {
- Neon_Mov_Imm,
- Neon_Mvn_Imm
+ bool IsTailCallConvention(CallingConv::ID CallCC) const;
+
+ void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
+ SDValue &Chain) const;
+
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
+ SelectionDAG &DAG) const override;
+
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
+ SelectionDAG &DAG) const;
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
+ RTLIB::Libcall Call) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
+ /// Examine constraint string and operand type and determine a weight value.
+ /// The operand object must already have been set up with the operand type.
+ ConstraintWeight
+ getSingleConstraintMatchWeight(AsmOperandInfo &info,
+ const char *constraint) const override;
+
+ std::pair<unsigned, const TargetRegisterClass *>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const override;
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
+
+ bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
+ bool mayBeEmittedAsTailCall(CallInst *CI) const override;
+ bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM, bool &IsInc,
+ SelectionDAG &DAG) const;
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
+ SDValue &Offset, ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
};
-extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement,
- bool &usesOnlyOneValue, bool &hasDominantValue,
- bool &isConstant, bool &isUNDEF);
-} // namespace llvm
+namespace AArch64 {
+FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+} // end namespace AArch64
+
+} // end namespace llvm
-#endif // LLVM_TARGET_AARCH64_ISELLOWERING_H
+#endif // LLVM_TARGET_AArch64_ISELLOWERING_H
diff --git a/lib/Target/AArch64/AArch64InstrAtomics.td b/lib/Target/AArch64/AArch64InstrAtomics.td
new file mode 100644
index 0000000..3b9e3c6
--- /dev/null
+++ b/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -0,0 +1,364 @@
+//=- AArch64InstrAtomics.td - AArch64 Atomic codegen support -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AArch64 Atomic operand code-gen constructs.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------
+// Atomic fences
+//===----------------------------------
+def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
+def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
+
+//===----------------------------------
+// Atomic loads
+//===----------------------------------
+
+// When they're actually atomic, only one addressing mode (GPR64sp) is
+// supported, but when they're relaxed and anything can be used, all the
+// standard modes would be valid and may give efficiency gains.
+
+// A atomic load operation that actually needs acquire semantics.
+class acquiring_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ assert(Ordering != AcquireRelease && "unexpected load ordering");
+ return Ordering == Acquire || Ordering == SequentiallyConsistent;
+}]>;
+
+// An atomic load operation that does not need either acquire or release
+// semantics.
+class relaxed_load<PatFrag base>
+ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Monotonic || Ordering == Unordered;
+}]>;
+
+// 8-bit loads
+def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_8> (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$offset)),
+ (LDRBBroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8> (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$offset)),
+ (LDRBBroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8> (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(relaxed_load<atomic_load_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bit loads
+def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)),
+ (LDRHHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
+def : Pat<(relaxed_load<atomic_load_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)),
+ (LDRHHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
+def : Pat<(relaxed_load<atomic_load_16> (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)),
+ (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(relaxed_load<atomic_load_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bit loads
+def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend)),
+ (LDRWroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend)),
+ (LDRWroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+ uimm12s4:$offset)),
+ (LDRWui GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_load<atomic_load_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+ (LDURWi GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bit loads
+def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
+def : Pat<(relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend)),
+ (LDRXroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend)),
+ (LDRXroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+ uimm12s8:$offset)),
+ (LDRXui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(relaxed_load<atomic_load_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (LDURXi GPR64sp:$Rn, simm9:$offset)>;
+
+//===----------------------------------
+// Atomic stores
+//===----------------------------------
+
+// When they're actually atomic, only one addressing mode (GPR64sp) is
+// supported, but when they're relaxed and anything can be used, all the
+// standard modes would be valid and may give efficiency gains.
+
+// A store operation that actually needs release semantics.
+class releasing_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ assert(Ordering != AcquireRelease && "unexpected store ordering");
+ return Ordering == Release || Ordering == SequentiallyConsistent;
+}]>;
+
+// An atomic store operation that doesn't actually need to be atomic on AArch64.
+class relaxed_store<PatFrag base>
+ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
+ AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
+ return Ordering == Monotonic || Ordering == Unordered;
+}]>;
+
+// 8-bit stores
+def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
+ (STLRB GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ GPR32:$val),
+ (STRBBroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ GPR32:$val),
+ (STRBBroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset), GPR32:$val),
+ (STRBBui GPR32:$val, GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(relaxed_store<atomic_store_8>
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURBBi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bit stores
+def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
+ (STLRH GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_16> (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend),
+ GPR32:$val),
+ (STRHHroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend)>;
+def : Pat<(relaxed_store<atomic_store_16> (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend),
+ GPR32:$val),
+ (STRHHroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend)>;
+def : Pat<(relaxed_store<atomic_store_16>
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset), GPR32:$val),
+ (STRHHui GPR32:$val, GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(relaxed_store<atomic_store_16>
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURHHi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bit stores
+def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
+ (STLRW GPR32:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend),
+ GPR32:$val),
+ (STRWroW GPR32:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend),
+ GPR32:$val),
+ (STRWroX GPR32:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), GPR32:$val),
+ (STRWui GPR32:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset), GPR32:$val),
+ (STURWi GPR32:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bit stores
+def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
+ (STLRX GPR64:$val, GPR64sp:$ptr)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend),
+ GPR64:$val),
+ (STRXroW GPR64:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend),
+ GPR64:$val),
+ (STRXroX GPR64:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset), GPR64:$val),
+ (STRXui GPR64:$val, GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
+ (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+//===----------------------------------
+// Low-level exclusive operations
+//===----------------------------------
+
+// Load-exclusives.
+
+def ldxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def ldxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def : Pat<(ldxr_1 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_2 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_4 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>;
+def : Pat<(ldxr_8 GPR64sp:$addr), (LDXRX GPR64sp:$addr)>;
+
+def : Pat<(and (ldxr_1 GPR64sp:$addr), 0xff),
+ (SUBREG_TO_REG (i64 0), (LDXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldxr_2 GPR64sp:$addr), 0xffff),
+ (SUBREG_TO_REG (i64 0), (LDXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldxr_4 GPR64sp:$addr), 0xffffffff),
+ (SUBREG_TO_REG (i64 0), (LDXRW GPR64sp:$addr), sub_32)>;
+
+// Load-exclusives.
+
+def ldaxr_1 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def ldaxr_2 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def ldaxr_4 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def ldaxr_8 : PatFrag<(ops node:$ptr), (int_aarch64_ldaxr node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+def : Pat<(ldaxr_1 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_2 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_4 GPR64sp:$addr),
+ (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>;
+def : Pat<(ldaxr_8 GPR64sp:$addr), (LDAXRX GPR64sp:$addr)>;
+
+def : Pat<(and (ldaxr_1 GPR64sp:$addr), 0xff),
+ (SUBREG_TO_REG (i64 0), (LDAXRB GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldaxr_2 GPR64sp:$addr), 0xffff),
+ (SUBREG_TO_REG (i64 0), (LDAXRH GPR64sp:$addr), sub_32)>;
+def : Pat<(and (ldaxr_4 GPR64sp:$addr), 0xffffffff),
+ (SUBREG_TO_REG (i64 0), (LDAXRW GPR64sp:$addr), sub_32)>;
+
+// Store-exclusives.
+
+def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+
+def : Pat<(stxr_1 GPR64:$val, GPR64sp:$addr),
+ (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_2 GPR64:$val, GPR64sp:$addr),
+ (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_4 GPR64:$val, GPR64sp:$addr),
+ (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_8 GPR64:$val, GPR64sp:$addr),
+ (STXRX GPR64:$val, GPR64sp:$addr)>;
+
+def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr),
+ (STXRB GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr),
+ (STXRH GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stxr_4 (zext GPR32:$val), GPR64sp:$addr),
+ (STXRW GPR32:$val, GPR64sp:$addr)>;
+
+def : Pat<(stxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr),
+ (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr),
+ (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
+ (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+
+// Store-release-exclusives.
+
+def stlxr_1 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+
+def stlxr_2 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+
+def stlxr_4 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def stlxr_8 : PatFrag<(ops node:$val, node:$ptr),
+ (int_aarch64_stlxr node:$val, node:$ptr), [{
+ return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+
+def : Pat<(stlxr_1 GPR64:$val, GPR64sp:$addr),
+ (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_2 GPR64:$val, GPR64sp:$addr),
+ (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_4 GPR64:$val, GPR64sp:$addr),
+ (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_8 GPR64:$val, GPR64sp:$addr),
+ (STLXRX GPR64:$val, GPR64sp:$addr)>;
+
+def : Pat<(stlxr_1 (zext (and GPR32:$val, 0xff)), GPR64sp:$addr),
+ (STLXRB GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stlxr_2 (zext (and GPR32:$val, 0xffff)), GPR64sp:$addr),
+ (STLXRH GPR32:$val, GPR64sp:$addr)>;
+def : Pat<(stlxr_4 (zext GPR32:$val), GPR64sp:$addr),
+ (STLXRW GPR32:$val, GPR64sp:$addr)>;
+
+def : Pat<(stlxr_1 (and GPR64:$val, 0xff), GPR64sp:$addr),
+ (STLXRB (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_2 (and GPR64:$val, 0xffff), GPR64sp:$addr),
+ (STLXRH (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
+ (STLXRW (EXTRACT_SUBREG GPR64:$val, sub_32), GPR64sp:$addr)>;
+
+
+// And clear exclusive.
+
+def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td
index 4cc3813..d455d7e 100644
--- a/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/lib/Target/AArch64/AArch64InstrFormats.td
@@ -1,4 +1,4 @@
-//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tablegen -*-=//
+//===- AArch64InstrFormats.td - AArch64 Instruction Formats --*- tblgen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,1482 +6,8569 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-// This file describes AArch64 instruction formats, down to the level of the
-// instruction's overall class.
-//===----------------------------------------------------------------------===//
-
//===----------------------------------------------------------------------===//
-// A64 Instruction Format Definitions.
-//===----------------------------------------------------------------------===//
+// Describe AArch64 instructions format here
+//
+
+// Format specifies the encoding used by the instruction. This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+ bits<2> Value = val;
+}
-// A64 is currently the only instruction set supported by the AArch64
-// architecture.
-class A64Inst<dag outs, dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : Instruction {
- // All A64 instructions are 32-bit. This field will be filled in
- // gradually going down the hierarchy.
- field bits<32> Inst;
+def PseudoFrm : Format<0>;
+def NormalFrm : Format<1>; // Do we need any others?
+// AArch64 Instruction Format
+class AArch64Inst<Format f, string cstr> : Instruction {
+ field bits<32> Inst; // Instruction encoding.
+ // Mask of bits that cause an encoding to be UNPREDICTABLE.
+ // If a bit is set, then if the corresponding bit in the
+ // target encoding differs from its value in the "Inst" field,
+ // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
field bits<32> Unpredictable = 0;
// SoftFail is the generic name for this field, but we alias it so
// as to make it more obvious what it means in ARM-land.
field bits<32> SoftFail = Unpredictable;
+ let Namespace = "AArch64";
+ Format F = f;
+ bits<2> Form = F.Value;
+ let Pattern = [];
+ let Constraints = cstr;
+}
+
+// Pseudo instructions (don't have encoding information)
+class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
+ : AArch64Inst<PseudoFrm, cstr> {
+ dag OutOperandList = oops;
+ dag InOperandList = iops;
+ let Pattern = pattern;
+ let isCodeGenOnly = 1;
+}
- // LLVM-level model of the AArch64/A64 distinction.
- let Namespace = "AArch64";
- let DecoderNamespace = "A64";
+// Real instructions (have encoding information)
+class EncodedI<string cstr, list<dag> pattern> : AArch64Inst<NormalFrm, cstr> {
+ let Pattern = pattern;
let Size = 4;
+}
- // Set the templated fields
- let OutOperandList = outs;
- let InOperandList = ins;
- let AsmString = asmstr;
- let Pattern = patterns;
- let Itinerary = itin;
+// Normal instructions
+class I<dag oops, dag iops, string asm, string operands, string cstr,
+ list<dag> pattern>
+ : EncodedI<cstr, pattern> {
+ dag OutOperandList = oops;
+ dag InOperandList = iops;
+ let AsmString = !strconcat(asm, operands);
}
-class PseudoInst<dag outs, dag ins, list<dag> patterns> : Instruction {
- let Namespace = "AArch64";
+class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
+
+// Helper fragment for an extract of the high portion of a 128-bit vector.
+def extract_high_v16i8 :
+ UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
+def extract_high_v8i16 :
+ UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
+def extract_high_v4i32 :
+ UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
+def extract_high_v2i64 :
+ UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
+
+//===----------------------------------------------------------------------===//
+// Asm Operand Classes.
+//
- let OutOperandList = outs;
- let InOperandList= ins;
- let Pattern = patterns;
- let isCodeGenOnly = 1;
- let isPseudo = 1;
+// Shifter operand for arithmetic shifted encodings.
+def ShifterOperand : AsmOperandClass {
+ let Name = "Shifter";
}
-// Represents a pseudo-instruction that represents a single A64 instruction for
-// whatever reason, the eventual result will be a 32-bit real instruction.
-class A64PseudoInst<dag outs, dag ins, list<dag> patterns>
- : PseudoInst<outs, ins, patterns> {
- let Size = 4;
+// Shifter operand for mov immediate encodings.
+def MovImm32ShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MovImm32Shifter";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "InvalidMovImm32Shift";
+}
+def MovImm64ShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MovImm64Shifter";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "InvalidMovImm64Shift";
+}
+
+// Shifter operand for arithmetic register shifted encodings.
+class ArithmeticShifterOperand<int width> : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "ArithmeticShifter" # width;
+ let PredicateMethod = "isArithmeticShifter<" # width # ">";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "AddSubRegShift" # width;
}
-// As above, this will be a single A64 instruction, but we can actually give the
-// expansion in TableGen.
-class A64PseudoExpand<dag outs, dag ins, list<dag> patterns, dag Result>
- : A64PseudoInst<outs, ins, patterns>,
- PseudoInstExpansion<Result>;
+def ArithmeticShifterOperand32 : ArithmeticShifterOperand<32>;
+def ArithmeticShifterOperand64 : ArithmeticShifterOperand<64>;
+// Shifter operand for logical register shifted encodings.
+class LogicalShifterOperand<int width> : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "LogicalShifter" # width;
+ let PredicateMethod = "isLogicalShifter<" # width # ">";
+ let RenderMethod = "addShifterOperands";
+ let DiagnosticType = "AddSubRegShift" # width;
+}
-// First, some common cross-hierarchy register formats.
+def LogicalShifterOperand32 : LogicalShifterOperand<32>;
+def LogicalShifterOperand64 : LogicalShifterOperand<64>;
-class A64InstRd<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- bits<5> Rd;
+// Shifter operand for logical vector 128/64-bit shifted encodings.
+def LogicalVecShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "LogicalVecShifter";
+ let RenderMethod = "addShifterOperands";
+}
+def LogicalVecHalfWordShifterOperand : AsmOperandClass {
+ let SuperClasses = [LogicalVecShifterOperand];
+ let Name = "LogicalVecHalfWordShifter";
+ let RenderMethod = "addShifterOperands";
+}
- let Inst{4-0} = Rd;
+// The "MSL" shifter on the vector MOVI instruction.
+def MoveVecShifterOperand : AsmOperandClass {
+ let SuperClasses = [ShifterOperand];
+ let Name = "MoveVecShifter";
+ let RenderMethod = "addShifterOperands";
}
-class A64InstRt<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- bits<5> Rt;
+// Extend operand for arithmetic encodings.
+def ExtendOperand : AsmOperandClass {
+ let Name = "Extend";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+def ExtendOperand64 : AsmOperandClass {
+ let SuperClasses = [ExtendOperand];
+ let Name = "Extend64";
+ let DiagnosticType = "AddSubRegExtendSmall";
+}
+// 'extend' that's a lsl of a 64-bit register.
+def ExtendOperandLSL64 : AsmOperandClass {
+ let SuperClasses = [ExtendOperand];
+ let Name = "ExtendLSL64";
+ let RenderMethod = "addExtend64Operands";
+ let DiagnosticType = "AddSubRegExtendLarge";
+}
+
+// 8-bit floating-point immediate encodings.
+def FPImmOperand : AsmOperandClass {
+ let Name = "FPImm";
+ let ParserMethod = "tryParseFPImm";
+ let DiagnosticType = "InvalidFPImm";
+}
+
+def CondCode : AsmOperandClass {
+ let Name = "CondCode";
+ let DiagnosticType = "InvalidCondCode";
+}
+
+// A 32-bit register pasrsed as 64-bit
+def GPR32as64Operand : AsmOperandClass {
+ let Name = "GPR32as64";
+}
+def GPR32as64 : RegisterOperand<GPR32> {
+ let ParserMatchClass = GPR32as64Operand;
+}
+
+// 8-bit immediate for AdvSIMD where 64-bit values of the form:
+// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
+// are encoded as the eight bit value 'abcdefgh'.
+def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; }
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// ADR[P] instruction labels.
+def AdrpOperand : AsmOperandClass {
+ let Name = "AdrpLabel";
+ let ParserMethod = "tryParseAdrpLabel";
+ let DiagnosticType = "InvalidLabel";
+}
+def adrplabel : Operand<i64> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let PrintMethod = "printAdrpLabel";
+ let ParserMatchClass = AdrpOperand;
+}
+
+def AdrOperand : AsmOperandClass {
+ let Name = "AdrLabel";
+ let ParserMethod = "tryParseAdrLabel";
+ let DiagnosticType = "InvalidLabel";
+}
+def adrlabel : Operand<i64> {
+ let EncoderMethod = "getAdrLabelOpValue";
+ let ParserMatchClass = AdrOperand;
+}
+
+// simm9 predicate - True if the immediate is in the range [-256, 255].
+def SImm9Operand : AsmOperandClass {
+ let Name = "SImm9";
+ let DiagnosticType = "InvalidMemoryIndexedSImm9";
+}
+def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
+ let ParserMatchClass = SImm9Operand;
+}
+
+// simm7sN predicate - True if the immediate is a multiple of N in the range
+// [-64 * N, 63 * N].
+class SImm7Scaled<int Scale> : AsmOperandClass {
+ let Name = "SImm7s" # Scale;
+ let DiagnosticType = "InvalidMemoryIndexed" # Scale # "SImm7";
+}
+
+def SImm7s4Operand : SImm7Scaled<4>;
+def SImm7s8Operand : SImm7Scaled<8>;
+def SImm7s16Operand : SImm7Scaled<16>;
+
+def simm7s4 : Operand<i32> {
+ let ParserMatchClass = SImm7s4Operand;
+ let PrintMethod = "printImmScale<4>";
+}
+
+def simm7s8 : Operand<i32> {
+ let ParserMatchClass = SImm7s8Operand;
+ let PrintMethod = "printImmScale<8>";
+}
+
+def simm7s16 : Operand<i32> {
+ let ParserMatchClass = SImm7s16Operand;
+ let PrintMethod = "printImmScale<16>";
+}
+
+class AsmImmRange<int Low, int High> : AsmOperandClass {
+ let Name = "Imm" # Low # "_" # High;
+ let DiagnosticType = "InvalidImm" # Low # "_" # High;
+}
+
+def Imm1_8Operand : AsmImmRange<1, 8>;
+def Imm1_16Operand : AsmImmRange<1, 16>;
+def Imm1_32Operand : AsmImmRange<1, 32>;
+def Imm1_64Operand : AsmImmRange<1, 64>;
+
+def MovZSymbolG3AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG3";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g3 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG3AsmOperand;
+}
+
+def MovZSymbolG2AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG2";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g2 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG2AsmOperand;
+}
+
+def MovZSymbolG1AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG1";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g1 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG1AsmOperand;
+}
+
+def MovZSymbolG0AsmOperand : AsmOperandClass {
+ let Name = "MovZSymbolG0";
+ let RenderMethod = "addImmOperands";
+}
+
+def movz_symbol_g0 : Operand<i32> {
+ let ParserMatchClass = MovZSymbolG0AsmOperand;
+}
+
+def MovKSymbolG3AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG3";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g3 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG3AsmOperand;
+}
+
+def MovKSymbolG2AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG2";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g2 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG2AsmOperand;
+}
+
+def MovKSymbolG1AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG1";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g1 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG1AsmOperand;
+}
+
+def MovKSymbolG0AsmOperand : AsmOperandClass {
+ let Name = "MovKSymbolG0";
+ let RenderMethod = "addImmOperands";
+}
+
+def movk_symbol_g0 : Operand<i32> {
+ let ParserMatchClass = MovKSymbolG0AsmOperand;
+}
+
+class fixedpoint_i32<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm32";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
+class fixedpoint_i64<ValueType FloatVT>
+ : Operand<FloatVT>,
+ ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm, ld]> {
+ let EncoderMethod = "getFixedPointScaleOpValue";
+ let DecoderMethod = "DecodeFixedPointScaleImm64";
+ let ParserMatchClass = Imm1_64Operand;
+}
+
+def fixedpoint_f32_i32 : fixedpoint_i32<f32>;
+def fixedpoint_f64_i32 : fixedpoint_i32<f64>;
+
+def fixedpoint_f32_i64 : fixedpoint_i64<f32>;
+def fixedpoint_f64_i64 : fixedpoint_i64<f64>;
+
+def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR8OpValue";
+ let DecoderMethod = "DecodeVecShiftR8Imm";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16Imm";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32Imm";
+ let ParserMatchClass = Imm1_32Operand;
+}
+def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
+}]> {
+ let EncoderMethod = "getVecShiftR64OpValue";
+ let DecoderMethod = "DecodeVecShiftR64Imm";
+ let ParserMatchClass = Imm1_64Operand;
+}
+def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR64OpValue";
+ let DecoderMethod = "DecodeVecShiftR64ImmNarrow";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
+def Imm0_7Operand : AsmImmRange<0, 7>;
+def Imm0_15Operand : AsmImmRange<0, 15>;
+def Imm0_31Operand : AsmImmRange<0, 31>;
+def Imm0_63Operand : AsmImmRange<0, 63>;
+
+def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 8);
+}]> {
+ let EncoderMethod = "getVecShiftL8OpValue";
+ let DecoderMethod = "DecodeVecShiftL8Imm";
+ let ParserMatchClass = Imm0_7Operand;
+}
+def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 16);
+}]> {
+ let EncoderMethod = "getVecShiftL16OpValue";
+ let DecoderMethod = "DecodeVecShiftL16Imm";
+ let ParserMatchClass = Imm0_15Operand;
+}
+def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let EncoderMethod = "getVecShiftL32OpValue";
+ let DecoderMethod = "DecodeVecShiftL32Imm";
+ let ParserMatchClass = Imm0_31Operand;
+}
+def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 64);
+}]> {
+ let EncoderMethod = "getVecShiftL64OpValue";
+ let DecoderMethod = "DecodeVecShiftL64Imm";
+ let ParserMatchClass = Imm0_63Operand;
+}
+
+
+// Crazy immediate formats used by 32-bit and 64-bit logical immediate
+// instructions for splatting repeating bit patterns across the immediate.
+def logical_imm32_XFORM : SDNodeXForm<imm, [{
+ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+}]>;
+def logical_imm64_XFORM : SDNodeXForm<imm, [{
+ uint64_t enc = AArch64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+}]>;
+
+def LogicalImm32Operand : AsmOperandClass {
+ let Name = "LogicalImm32";
+ let DiagnosticType = "LogicalSecondSource";
+}
+def LogicalImm64Operand : AsmOperandClass {
+ let Name = "LogicalImm64";
+ let DiagnosticType = "LogicalSecondSource";
+}
+def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
+ return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32);
+}], logical_imm32_XFORM> {
+ let PrintMethod = "printLogicalImm32";
+ let ParserMatchClass = LogicalImm32Operand;
+}
+def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
+ return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 64);
+}], logical_imm64_XFORM> {
+ let PrintMethod = "printLogicalImm64";
+ let ParserMatchClass = LogicalImm64Operand;
+}
+
+// imm0_65535 predicate - True if the immediate is in the range [0,65535].
+def Imm0_65535Operand : AsmImmRange<0, 65535>;
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 65536;
+}]> {
+ let ParserMatchClass = Imm0_65535Operand;
+ let PrintMethod = "printHexImm";
+}
+
+// imm0_255 predicate - True if the immediate is in the range [0,255].
+def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 256;
+}]> {
+ let ParserMatchClass = Imm0_255Operand;
+ let PrintMethod = "printHexImm";
+}
+
+// imm0_127 predicate - True if the immediate is in the range [0,127]
+def Imm0_127Operand : AsmImmRange<0, 127>;
+def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
+ return ((uint32_t)Imm) < 128;
+}]> {
+ let ParserMatchClass = Imm0_127Operand;
+ let PrintMethod = "printHexImm";
+}
+
+// NOTE: These imm0_N operands have to be of type i64 because i64 is the size
+// for all shift-amounts.
+
+// imm0_63 predicate - True if the immediate is in the range [0,63]
+def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 64;
+}]> {
+ let ParserMatchClass = Imm0_63Operand;
+}
+
+// imm0_31 predicate - True if the immediate is in the range [0,31]
+def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 32;
+}]> {
+ let ParserMatchClass = Imm0_31Operand;
+}
+
+// imm0_15 predicate - True if the immediate is in the range [0,15]
+def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = Imm0_15Operand;
+}
+
+// imm0_7 predicate - True if the immediate is in the range [0,7]
+def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = Imm0_7Operand;
+}
+
+// An arithmetic shifter operand:
+// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
+// {5-0} - imm6
+class arith_shift<ValueType Ty, int width> : Operand<Ty> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = !cast<AsmOperandClass>(
+ "ArithmeticShifterOperand" # width);
+}
+
+def arith_shift32 : arith_shift<i32, 32>;
+def arith_shift64 : arith_shift<i64, 64>;
+
+class arith_shifted_reg<ValueType Ty, RegisterClass regclass, int width>
+ : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> {
+ let PrintMethod = "printShiftedRegister";
+ let MIOperandInfo = (ops regclass, !cast<Operand>("arith_shift" # width));
+}
+
+def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32, 32>;
+def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64, 64>;
+
+// An arithmetic shifter operand:
+// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror
+// {5-0} - imm6
+class logical_shift<int width> : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = !cast<AsmOperandClass>(
+ "LogicalShifterOperand" # width);
+}
+
+def logical_shift32 : logical_shift<32>;
+def logical_shift64 : logical_shift<64>;
+
+class logical_shifted_reg<ValueType Ty, RegisterClass regclass, Operand shiftop>
+ : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> {
+ let PrintMethod = "printShiftedRegister";
+ let MIOperandInfo = (ops regclass, shiftop);
+}
+def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32, logical_shift32>;
+def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64, logical_shift64>;
+
+// A logical vector shifter operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0, #8, #16, or #24
+def logical_vec_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getVecShifterOpValue";
+ let ParserMatchClass = LogicalVecShifterOperand;
+}
+
+// A logical vector half-word shifter operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0 or #8
+def logical_vec_hw_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getVecShifterOpValue";
+ let ParserMatchClass = LogicalVecHalfWordShifterOperand;
+}
+
+// A vector move shifter operand:
+// {0} - imm1: #8 or #16
+def move_vec_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let EncoderMethod = "getMoveVecShifterOpValue";
+ let ParserMatchClass = MoveVecShifterOperand;
+}
+
+def AddSubImmOperand : AsmOperandClass {
+ let Name = "AddSubImm";
+ let ParserMethod = "tryParseAddSubImm";
+ let DiagnosticType = "AddSubSecondSource";
+}
+// An ADD/SUB immediate shifter operand:
+// second operand:
+// {7-6} - shift type: 00 = lsl
+// {5-0} - imm6: #0 or #12
+class addsub_shifted_imm<ValueType Ty>
+ : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> {
+ let PrintMethod = "printAddSubImm";
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
+def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
+
+class neg_addsub_shifted_imm<ValueType Ty>
+ : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
+ let PrintMethod = "printAddSubImm";
+ let EncoderMethod = "getAddSubImmOpValue";
+ let ParserMatchClass = AddSubImmOperand;
+ let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>;
+def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>;
+
+// An extend operand:
+// {5-3} - extend type
+// {2-0} - imm3
+def arith_extend : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperand;
+}
+def arith_extend64 : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperand64;
+}
+
+// 'extend' that's a lsl of a 64-bit register.
+def arith_extendlsl64 : Operand<i32> {
+ let PrintMethod = "printArithExtend";
+ let ParserMatchClass = ExtendOperandLSL64;
+}
+
+class arith_extended_reg32<ValueType Ty> : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
+ let PrintMethod = "printExtendedRegister";
+ let MIOperandInfo = (ops GPR32, arith_extend);
+}
+
+class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
+ ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
+ let PrintMethod = "printExtendedRegister";
+ let MIOperandInfo = (ops GPR32, arith_extend64);
+}
+
+// Floating-point immediate.
+def fpimm32 : Operand<f32>,
+ PatLeaf<(f32 fpimm), [{
+ return AArch64_AM::getFP32Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP32Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+def fpimm64 : Operand<f64>,
+ PatLeaf<(f64 fpimm), [{
+ return AArch64_AM::getFP64Imm(N->getValueAPF()) != -1;
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::getFP64Imm(InVal);
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+
+def fpimm8 : Operand<i32> {
+ let ParserMatchClass = FPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+
+def fpimm0 : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// Vector lane operands
+class AsmVectorIndex<string Suffix> : AsmOperandClass {
+ let Name = "VectorIndex" # Suffix;
+ let DiagnosticType = "InvalidIndex" # Suffix;
+}
+def VectorIndex1Operand : AsmVectorIndex<"1">;
+def VectorIndexBOperand : AsmVectorIndex<"B">;
+def VectorIndexHOperand : AsmVectorIndex<"H">;
+def VectorIndexSOperand : AsmVectorIndex<"S">;
+def VectorIndexDOperand : AsmVectorIndex<"D">;
+
+def VectorIndex1 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) == 1;
+}]> {
+ let ParserMatchClass = VectorIndex1Operand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i64imm);
+}
+def VectorIndexB : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 16;
+}]> {
+ let ParserMatchClass = VectorIndexBOperand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i64imm);
+}
+def VectorIndexH : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 8;
+}]> {
+ let ParserMatchClass = VectorIndexHOperand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i64imm);
+}
+def VectorIndexS : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 4;
+}]> {
+ let ParserMatchClass = VectorIndexSOperand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i64imm);
+}
+def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 2;
+}]> {
+ let ParserMatchClass = VectorIndexDOperand;
+ let PrintMethod = "printVectorIndex";
+ let MIOperandInfo = (ops i64imm);
+}
+
+// 8-bit immediate for AdvSIMD where 64-bit values of the form:
+// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
+// are encoded as the eight bit value 'abcdefgh'.
+def simdimmtype10 : Operand<i32>,
+ PatLeaf<(f64 fpimm), [{
+ return AArch64_AM::isAdvSIMDModImmType10(N->getValueAPF()
+ .bitcastToAPInt()
+ .getZExtValue());
+ }], SDNodeXForm<fpimm, [{
+ APFloat InVal = N->getValueAPF();
+ uint32_t enc = AArch64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
+ .bitcastToAPInt()
+ .getZExtValue());
+ return CurDAG->getTargetConstant(enc, MVT::i32);
+ }]>> {
+ let ParserMatchClass = SIMDImmType10Operand;
+ let PrintMethod = "printSIMDType10Operand";
+}
+
+
+//---
+// System management
+//---
+
+// Base encoding for system instruction operands.
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands>
+ : I<oops, iops, asm, operands, "", []> {
+ let Inst{31-22} = 0b1101010100;
+ let Inst{21} = L;
+}
+
+// System instructions which do not have an Rt register.
+class SimpleSystemI<bit L, dag iops, string asm, string operands>
+ : BaseSystemI<L, (outs), iops, asm, operands> {
+ let Inst{4-0} = 0b11111;
+}
+
+// System instructions which have an Rt register.
+class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
+ : BaseSystemI<L, oops, iops, asm, operands>,
+ Sched<[WriteSys]> {
+ bits<5> Rt;
let Inst{4-0} = Rt;
}
+// Hint instructions that take both a CRm and a 3-bit immediate.
+class HintI<string mnemonic>
+ : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">,
+ Sched<[WriteHint]> {
+ bits <7> imm;
+ let Inst{20-12} = 0b000110010;
+ let Inst{11-5} = imm;
+}
+
+// System instructions taking a single literal operand which encodes into
+// CRm. op2 differentiates the opcodes.
+def BarrierAsmOperand : AsmOperandClass {
+ let Name = "Barrier";
+ let ParserMethod = "tryParseBarrierOperand";
+}
+def barrier_op : Operand<i32> {
+ let PrintMethod = "printBarrierOption";
+ let ParserMatchClass = BarrierAsmOperand;
+}
+class CRmSystemI<Operand crmtype, bits<3> opc, string asm>
+ : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">,
+ Sched<[WriteBarrier]> {
+ bits<4> CRm;
+ let Inst{20-12} = 0b000110011;
+ let Inst{11-8} = CRm;
+ let Inst{7-5} = opc;
+}
+
+// MRS/MSR system instructions. These have different operand classes because
+// a different subset of registers can be accessed through each instruction.
+def MRSSystemRegisterOperand : AsmOperandClass {
+ let Name = "MRSSystemRegister";
+ let ParserMethod = "tryParseSysReg";
+ let DiagnosticType = "MRS";
+}
+// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate.
+def mrs_sysreg_op : Operand<i32> {
+ let ParserMatchClass = MRSSystemRegisterOperand;
+ let DecoderMethod = "DecodeMRSSystemRegister";
+ let PrintMethod = "printMRSSystemRegister";
+}
-class A64InstRdn<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs, ins, asmstr, patterns, itin> {
- // Inherit rdt
- bits<5> Rn;
+def MSRSystemRegisterOperand : AsmOperandClass {
+ let Name = "MSRSystemRegister";
+ let ParserMethod = "tryParseSysReg";
+ let DiagnosticType = "MSR";
+}
+def msr_sysreg_op : Operand<i32> {
+ let ParserMatchClass = MSRSystemRegisterOperand;
+ let DecoderMethod = "DecodeMSRSystemRegister";
+ let PrintMethod = "printMSRSystemRegister";
+}
+
+class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins mrs_sysreg_op:$systemreg),
+ "mrs", "\t$Rt, $systemreg"> {
+ bits<15> systemreg;
+ let Inst{20} = 1;
+ let Inst{19-5} = systemreg;
+}
+
+// FIXME: Some of these def NZCV, others don't. Best way to model that?
+// Explicitly modeling each of the system register as a register class
+// would do it, but feels like overkill at this point.
+class MSRI : RtSystemI<0, (outs), (ins msr_sysreg_op:$systemreg, GPR64:$Rt),
+ "msr", "\t$systemreg, $Rt"> {
+ bits<15> systemreg;
+ let Inst{20} = 1;
+ let Inst{19-5} = systemreg;
+}
+
+def SystemPStateFieldOperand : AsmOperandClass {
+ let Name = "SystemPStateField";
+ let ParserMethod = "tryParseSysReg";
+}
+def pstatefield_op : Operand<i32> {
+ let ParserMatchClass = SystemPStateFieldOperand;
+ let PrintMethod = "printSystemPStateField";
+}
+
+let Defs = [NZCV] in
+class MSRpstateI
+ : SimpleSystemI<0, (ins pstatefield_op:$pstate_field, imm0_15:$imm),
+ "msr", "\t$pstate_field, $imm">,
+ Sched<[WriteSys]> {
+ bits<6> pstatefield;
+ bits<4> imm;
+ let Inst{20-19} = 0b00;
+ let Inst{18-16} = pstatefield{5-3};
+ let Inst{15-12} = 0b0100;
+ let Inst{11-8} = imm;
+ let Inst{7-5} = pstatefield{2-0};
+
+ let DecoderMethod = "DecodeSystemPStateInstruction";
+}
+
+// SYS and SYSL generic system instructions.
+def SysCRAsmOperand : AsmOperandClass {
+ let Name = "SysCR";
+ let ParserMethod = "tryParseSysCROperand";
+}
+
+def sys_cr_op : Operand<i32> {
+ let PrintMethod = "printSysCROperand";
+ let ParserMatchClass = SysCRAsmOperand;
+}
+
+class SystemXtI<bit L, string asm>
+ : RtSystemI<L, (outs),
+ (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt),
+ asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> {
+ bits<3> op1;
+ bits<4> Cn;
+ bits<4> Cm;
+ bits<3> op2;
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-12} = Cn;
+ let Inst{11-8} = Cm;
+ let Inst{7-5} = op2;
+}
+
+class SystemLXtI<bit L, string asm>
+ : RtSystemI<L, (outs),
+ (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
+ asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> {
+ bits<3> op1;
+ bits<4> Cn;
+ bits<4> Cm;
+ bits<3> op2;
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-12} = Cn;
+ let Inst{11-8} = Cm;
+ let Inst{7-5} = op2;
+}
+
+
+// Branch (register) instructions:
+//
+// case opc of
+// 0001 blr
+// 0000 br
+// 0101 dret
+// 0100 eret
+// 0010 ret
+// otherwise UNDEFINED
+class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm,
+ string operands, list<dag> pattern>
+ : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> {
+ let Inst{31-25} = 0b1101011;
+ let Inst{24-21} = opc;
+ let Inst{20-16} = 0b11111;
+ let Inst{15-10} = 0b000000;
+ let Inst{4-0} = 0b00000;
+}
+class BranchReg<bits<4> opc, string asm, list<dag> pattern>
+ : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> {
+ bits<5> Rn;
let Inst{9-5} = Rn;
}
-class A64InstRtn<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRt<outs, ins, asmstr, patterns, itin> {
- // Inherit rdt
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in
+class SpecialReturn<bits<4> opc, string asm>
+ : BaseBranchReg<opc, (outs), (ins), asm, "", []> {
+ let Inst{9-5} = 0b11111;
+}
+
+//---
+// Conditional branch instruction.
+//---
+
+// Condition code.
+// 4-bit immediate. Pretty-printed as <cc>
+def ccode : Operand<i32> {
+ let PrintMethod = "printCondCode";
+ let ParserMatchClass = CondCode;
+}
+def inv_ccode : Operand<i32> {
+ let PrintMethod = "printInverseCondCode";
+ let ParserMatchClass = CondCode;
+}
+
+// Conditional branch target. 19-bit immediate. The low two bits of the target
+// offset are implied zero and so are not part of the immediate.
+def PCRelLabel19Operand : AsmOperandClass {
+ let Name = "PCRelLabel19";
+ let DiagnosticType = "InvalidLabel";
+}
+def am_brcond : Operand<OtherVT> {
+ let EncoderMethod = "getCondBranchTargetOpValue";
+ let DecoderMethod = "DecodePCRelLabel19";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = PCRelLabel19Operand;
+}
+
+class BranchCond : I<(outs), (ins ccode:$cond, am_brcond:$target),
+ "b", ".$cond\t$target", "",
+ [(AArch64brcond bb:$target, imm:$cond, NZCV)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+ let Uses = [NZCV];
+
+ bits<4> cond;
+ bits<19> target;
+ let Inst{31-24} = 0b01010100;
+ let Inst{23-5} = target;
+ let Inst{4} = 0;
+ let Inst{3-0} = cond;
+}
+
+//---
+// Compare-and-branch instructions.
+//---
+class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node>
+ : I<(outs), (ins regtype:$Rt, am_brcond:$target),
+ asm, "\t$Rt, $target", "",
+ [(node regtype:$Rt, bb:$target)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+
+ bits<5> Rt;
+ bits<19> target;
+ let Inst{30-25} = 0b011010;
+ let Inst{24} = op;
+ let Inst{23-5} = target;
+ let Inst{4-0} = Rt;
+}
+
+multiclass CmpBranch<bit op, string asm, SDNode node> {
+ def W : BaseCmpBranch<GPR32, op, asm, node> {
+ let Inst{31} = 0;
+ }
+ def X : BaseCmpBranch<GPR64, op, asm, node> {
+ let Inst{31} = 1;
+ }
+}
+
+//---
+// Test-bit-and-branch instructions.
+//---
+// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
+// the target offset are implied zero and so are not part of the immediate.
+def BranchTarget14Operand : AsmOperandClass {
+ let Name = "BranchTarget14";
+}
+def am_tbrcond : Operand<OtherVT> {
+ let EncoderMethod = "getTestBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget14Operand;
+}
+
+// AsmOperand classes to emit (or not) special diagnostics
+def TBZImm0_31Operand : AsmOperandClass {
+ let Name = "TBZImm0_31";
+ let PredicateMethod = "isImm0_31";
+ let RenderMethod = "addImm0_31Operands";
+}
+def TBZImm32_63Operand : AsmOperandClass {
+ let Name = "Imm32_63";
+ let DiagnosticType = "InvalidImm0_63";
+}
+
+class tbz_imm0_31<AsmOperandClass matcher> : Operand<i64>, ImmLeaf<i64, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let ParserMatchClass = matcher;
+}
+
+def tbz_imm0_31_diag : tbz_imm0_31<Imm0_31Operand>;
+def tbz_imm0_31_nodiag : tbz_imm0_31<TBZImm0_31Operand>;
+
+def tbz_imm32_63 : Operand<i64>, ImmLeaf<i64, [{
+ return (((uint32_t)Imm) > 31) && (((uint32_t)Imm) < 64);
+}]> {
+ let ParserMatchClass = TBZImm32_63Operand;
+}
+
+class BaseTestBranch<RegisterClass regtype, Operand immtype,
+ bit op, string asm, SDNode node>
+ : I<(outs), (ins regtype:$Rt, immtype:$bit_off, am_tbrcond:$target),
+ asm, "\t$Rt, $bit_off, $target", "",
+ [(node regtype:$Rt, immtype:$bit_off, bb:$target)]>,
+ Sched<[WriteBr]> {
+ let isBranch = 1;
+ let isTerminator = 1;
+
+ bits<5> Rt;
+ bits<6> bit_off;
+ bits<14> target;
+
+ let Inst{30-25} = 0b011011;
+ let Inst{24} = op;
+ let Inst{23-19} = bit_off{4-0};
+ let Inst{18-5} = target;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeTestAndBranch";
+}
+
+multiclass TestBranch<bit op, string asm, SDNode node> {
+ def W : BaseTestBranch<GPR32, tbz_imm0_31_diag, op, asm, node> {
+ let Inst{31} = 0;
+ }
+
+ def X : BaseTestBranch<GPR64, tbz_imm32_63, op, asm, node> {
+ let Inst{31} = 1;
+ }
+
+ // Alias X-reg with 0-31 imm to W-Reg.
+ def : InstAlias<asm # "\t$Rd, $imm, $target",
+ (!cast<Instruction>(NAME#"W") GPR32as64:$Rd,
+ tbz_imm0_31_nodiag:$imm, am_tbrcond:$target), 0>;
+ def : Pat<(node GPR64:$Rn, tbz_imm0_31_diag:$imm, bb:$target),
+ (!cast<Instruction>(NAME#"W") (EXTRACT_SUBREG GPR64:$Rn, sub_32),
+ tbz_imm0_31_diag:$imm, bb:$target)>;
+}
+
+//---
+// Unconditional branch (immediate) instructions.
+//---
+def BranchTarget26Operand : AsmOperandClass {
+ let Name = "BranchTarget26";
+ let DiagnosticType = "InvalidLabel";
+}
+def am_b_target : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget26Operand;
+}
+def am_bl_target : Operand<i64> {
+ let EncoderMethod = "getBranchTargetOpValue";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = BranchTarget26Operand;
+}
+
+class BImm<bit op, dag iops, string asm, list<dag> pattern>
+ : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> {
+ bits<26> addr;
+ let Inst{31} = op;
+ let Inst{30-26} = 0b00101;
+ let Inst{25-0} = addr;
+
+ let DecoderMethod = "DecodeUnconditionalBranch";
+}
+
+class BranchImm<bit op, string asm, list<dag> pattern>
+ : BImm<op, (ins am_b_target:$addr), asm, pattern>;
+class CallImm<bit op, string asm, list<dag> pattern>
+ : BImm<op, (ins am_bl_target:$addr), asm, pattern>;
+
+//---
+// Basic one-operand data processing instructions.
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm,
+ SDPatternOperator node>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set regtype:$Rd, (node regtype:$Rn))]>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
bits<5> Rn;
- let Inst{9-5} = Rn;
+ let Inst{30-13} = 0b101101011000000000;
+ let Inst{12-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Instructions taking Rt,Rt2,Rn
-class A64InstRtt2n<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<5> Rt2;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass OneOperandData<bits<3> opc, string asm,
+ SDPatternOperator node = null_frag> {
+ def Wr : BaseOneOperandData<opc, GPR32, asm, node> {
+ let Inst{31} = 0;
+ }
- let Inst{14-10} = Rt2;
+ def Xr : BaseOneOperandData<opc, GPR64, asm, node> {
+ let Inst{31} = 1;
+ }
}
-class A64InstRdnm<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<5> Rm;
+class OneWRegData<bits<3> opc, string asm, SDPatternOperator node>
+ : BaseOneOperandData<opc, GPR32, asm, node> {
+ let Inst{31} = 0;
+}
- let Inst{20-16} = Rm;
+class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
+ : BaseOneOperandData<opc, GPR64, asm, node> {
+ let Inst{31} = 1;
}
-class A64InstRtnm<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
+//---
+// Basic two-operand data processing instructions.
+//---
+class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+ bits<5> Rd;
+ bits<5> Rn;
bits<5> Rm;
-
+ let Inst{30} = isSub;
+ let Inst{28-21} = 0b11010000;
let Inst{20-16} = Rm;
+ let Inst{15-10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-//===----------------------------------------------------------------------===//
-//
-// Actual A64 Instruction Formats
-//
+class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
+ SDNode OpNode>
+ : BaseBaseAddSubCarry<isSub, regtype, asm,
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV))]>;
+
+class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm,
+ SDNode OpNode>
+ : BaseBaseAddSubCarry<isSub, regtype, asm,
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, NZCV)),
+ (implicit NZCV)]> {
+ let Defs = [NZCV];
+}
-// Format for Add-subtract (extended register) instructions.
-class A64I_addsubext<bit sf, bit op, bit S, bits<2> opt, bits<3> option,
- dag outs, dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<3> Imm3;
-
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = S;
- let Inst{28-24} = 0b01011;
- let Inst{23-22} = opt;
- let Inst{21} = 0b1;
- // Rm inherited in 20-16
- let Inst{15-13} = option;
- let Inst{12-10} = Imm3;
- // Rn inherited in 9-5
- // Rd inherited in 4-0
-}
-
-// Format for Add-subtract (immediate) instructions.
-class A64I_addsubimm<bit sf, bit op, bit S, bits<2> shift,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<12> Imm12;
+multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
+ SDNode OpNode, SDNode OpNode_setflags> {
+ def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> {
+ let Inst{31} = 0;
+ let Inst{29} = 0;
+ }
+ def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> {
+ let Inst{31} = 1;
+ let Inst{29} = 0;
+ }
+
+ // Sets flags.
+ def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags,
+ OpNode_setflags> {
+ let Inst{31} = 0;
+ let Inst{29} = 1;
+ }
+ def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags,
+ OpNode_setflags> {
+ let Inst{31} = 1;
+ let Inst{29} = 1;
+ }
+}
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = S;
- let Inst{28-24} = 0b10001;
- let Inst{23-22} = shift;
- let Inst{21-10} = Imm12;
-}
-
-// Format for Add-subtract (shifted register) instructions.
-class A64I_addsubshift<bit sf, bit op, bit S, bits<2> shift,
- dag outs, dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<6> Imm6;
-
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = S;
- let Inst{28-24} = 0b01011;
- let Inst{23-22} = shift;
- let Inst{21} = 0b0;
- // Rm inherited in 20-16
- let Inst{15-10} = Imm6;
- // Rn inherited in 9-5
- // Rd inherited in 4-0
-}
-
-// Format for Add-subtract (with carry) instructions.
-class A64I_addsubcarry<bit sf, bit op, bit S, bits<6> opcode2,
- dag outs, dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = S;
- let Inst{28-21} = 0b11010000;
- // Rm inherited in 20-16
- let Inst{15-10} = opcode2;
- // Rn inherited in 9-5
- // Rd inherited in 4-0
-}
-
-
-// Format for Bitfield instructions
-class A64I_bitfield<bit sf, bits<2> opc, bit n,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<6> ImmR;
- bits<6> ImmS;
+class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{30-21} = 0b0011010110;
+ let Inst{20-16} = Rm;
+ let Inst{15-14} = 0b00;
+ let Inst{13-10} = opc;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
- let Inst{31} = sf;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100110;
- let Inst{22} = n;
- let Inst{21-16} = ImmR;
- let Inst{15-10} = ImmS;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+class BaseDiv<bit isSigned, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode>
+ : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> {
+ let Inst{10} = isSigned;
}
-// Format for compare and branch (immediate) instructions.
-class A64I_cmpbr<bit sf, bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRt<outs, ins, asmstr, patterns, itin> {
- bits<19> Label;
+multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
+ def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
+ Sched<[WriteID32, ReadID, ReadID]> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
+ Sched<[WriteID64, ReadID, ReadID]> {
+ let Inst{31} = 1;
+ }
+}
- let Inst{31} = sf;
- let Inst{30-25} = 0b011010;
- let Inst{24} = op;
- let Inst{23-5} = Label;
- // Inherit Rt in 4-0
+class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
+ SDPatternOperator OpNode = null_frag>
+ : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
+ Sched<[WriteIS, ReadI]> {
+ let Inst{11-10} = shift_type;
}
-// Format for conditional branch (immediate) instructions.
-class A64I_condbr<bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- bits<19> Label;
- bits<4> Cond;
+multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
+ def Wr : BaseShift<shift_type, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xr : BaseShift<shift_type, GPR64, asm, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn,
+ (EXTRACT_SUBREG i64:$Rm, sub_32))>;
- let Inst{31-25} = 0b0101010;
- let Inst{24} = o1;
- let Inst{23-5} = Label;
- let Inst{4} = o0;
- let Inst{3-0} = Cond;
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
+
+ def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
+ (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
}
-// Format for conditional compare (immediate) instructions.
-class A64I_condcmpimm<bit sf, bit op, bit o2, bit o3, bit s,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
+class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst regtype:$dst, regtype:$src1, regtype:$src2), 0>;
+
+class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
+ RegisterClass addtype, string asm,
+ list<dag> pattern>
+ : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra),
+ asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> {
+ bits<5> Rd;
bits<5> Rn;
- bits<5> UImm5;
- bits<4> NZCVImm;
- bits<4> Cond;
+ bits<5> Rm;
+ bits<5> Ra;
+ let Inst{30-24} = 0b0011011;
+ let Inst{23-21} = opc;
+ let Inst{20-16} = Rm;
+ let Inst{15} = isSub;
+ let Inst{14-10} = Ra;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = s;
- let Inst{28-21} = 0b11010010;
- let Inst{20-16} = UImm5;
- let Inst{15-12} = Cond;
- let Inst{11} = 0b1;
- let Inst{10} = o2;
- let Inst{9-5} = Rn;
- let Inst{4} = o3;
- let Inst{3-0} = NZCVImm;
+multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
+ def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
+ [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
+ Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ let Inst{31} = 0;
+ }
+
+ def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
+ [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
+ Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> {
+ let Inst{31} = 1;
+ }
+}
+
+class WideMulAccum<bit isSub, bits<3> opc, string asm,
+ SDNode AccNode, SDNode ExtNode>
+ : BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
+ [(set GPR64:$Rd, (AccNode GPR64:$Ra,
+ (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
+ Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> {
+ let Inst{31} = 1;
}
-// Format for conditional compare (register) instructions.
-class A64I_condcmpreg<bit sf, bit op, bit o2, bit o3, bit s,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
+class MulHi<bits<3> opc, string asm, SDNode OpNode>
+ : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
+ Sched<[WriteIM64, ReadIM, ReadIM]> {
+ bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
- bits<4> NZCVImm;
- bits<4> Cond;
+ let Inst{31-24} = 0b10011011;
+ let Inst{23-21} = opc;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ // The Ra field of SMULH and UMULH is unused: it should be assembled as 31
+ // (i.e. all bits 1) but is ignored by the processor.
+ let PostEncoderMethod = "fixMulHigh";
+}
+class MulAccumWAlias<string asm, Instruction inst>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
+class MulAccumXAlias<string asm, Instruction inst>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
+class WideMulAccumAlias<string asm, Instruction inst>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
+
+class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
+ SDPatternOperator OpNode, string asm>
+ : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = s;
- let Inst{28-21} = 0b11010010;
+ let Inst{30-21} = 0b0011010110;
let Inst{20-16} = Rm;
- let Inst{15-12} = Cond;
- let Inst{11} = 0b0;
- let Inst{10} = o2;
+ let Inst{15-13} = 0b010;
+ let Inst{12} = C;
+ let Inst{11-10} = sz;
let Inst{9-5} = Rn;
- let Inst{4} = o3;
- let Inst{3-0} = NZCVImm;
+ let Inst{4-0} = Rd;
+ let Predicates = [HasCRC];
}
-// Format for conditional select instructions.
-class A64I_condsel<bit sf, bit op, bit s, bits<2> op2,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<4> Cond;
+//---
+// Address generation.
+//---
+
+class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
+ : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "",
+ pattern>,
+ Sched<[WriteI]> {
+ bits<5> Xd;
+ bits<21> label;
+ let Inst{31} = page;
+ let Inst{30-29} = label{1-0};
+ let Inst{28-24} = 0b10000;
+ let Inst{23-5} = label{20-2};
+ let Inst{4-0} = Xd;
- let Inst{31} = sf;
- let Inst{30} = op;
- let Inst{29} = s;
- let Inst{28-21} = 0b11010100;
- // Inherit Rm in 20-16
- let Inst{15-12} = Cond;
- let Inst{11-10} = op2;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ let DecoderMethod = "DecodeAdrInstruction";
}
-// Format for data processing (1 source) instructions
-class A64I_dp_1src<bit sf, bit S, bits<5> opcode2, bits<6> opcode,
- string asmstr, dag outs, dag ins,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = sf;
- let Inst{30} = 0b1;
- let Inst{29} = S;
- let Inst{28-21} = 0b11010110;
- let Inst{20-16} = opcode2;
- let Inst{15-10} = opcode;
-}
-
-// Format for data processing (2 source) instructions
-class A64I_dp_2src<bit sf, bits<6> opcode, bit S,
- string asmstr, dag outs, dag ins,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = sf;
- let Inst{30} = 0b0;
- let Inst{29} = S;
- let Inst{28-21} = 0b11010110;
- let Inst{15-10} = opcode;
+//---
+// Move immediate.
+//---
+
+def movimm32_imm : Operand<i32> {
+ let ParserMatchClass = Imm0_65535Operand;
+ let EncoderMethod = "getMoveWideImmOpValue";
+ let PrintMethod = "printHexImm";
+}
+def movimm32_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = MovImm32ShifterOperand;
+}
+def movimm64_shift : Operand<i32> {
+ let PrintMethod = "printShifter";
+ let ParserMatchClass = MovImm64ShifterOperand;
}
-// Format for data-processing (3 source) instructions
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
+ string asm>
+ : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift),
+ asm, "\t$Rd, $imm$shift", "", []>,
+ Sched<[WriteImm]> {
+ bits<5> Rd;
+ bits<16> imm;
+ bits<6> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = shift{5-4};
+ let Inst{20-5} = imm;
+ let Inst{4-0} = Rd;
-class A64I_dp3<bit sf, bits<6> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = sf;
- let Inst{30-29} = opcode{5-4};
- let Inst{28-24} = 0b11011;
- let Inst{23-21} = opcode{3-1};
- // Inherits Rm in 20-16
- let Inst{15} = opcode{0};
- // {14-10} mostly Ra, but unspecified for SMULH/UMULH
- // Inherits Rn in 9-5
- // Inherits Rd in 4-0
-}
-
-// Format for exception generation instructions
-class A64I_exception<bits<3> opc, bits<3> op2, bits<2> ll,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- bits<16> UImm16;
+ let DecoderMethod = "DecodeMoveImmInstruction";
+}
- let Inst{31-24} = 0b11010100;
- let Inst{23-21} = opc;
- let Inst{20-5} = UImm16;
- let Inst{4-2} = op2;
- let Inst{1-0} = ll;
+multiclass MoveImmediate<bits<2> opc, string asm> {
+ def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> {
+ let Inst{31} = 1;
+ }
}
-// Format for extract (immediate) instructions
-class A64I_extract<bit sf, bits<3> op, bit n,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<6> LSB;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
+ string asm>
+ : I<(outs regtype:$Rd),
+ (ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
+ asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<16> imm;
+ bits<6> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100101;
+ let Inst{22-21} = shift{5-4};
+ let Inst{20-5} = imm;
+ let Inst{4-0} = Rd;
- let Inst{31} = sf;
- let Inst{30-29} = op{2-1};
- let Inst{28-23} = 0b100111;
- let Inst{22} = n;
- let Inst{21} = op{0};
- // Inherits Rm in bits 20-16
- let Inst{15-10} = LSB;
- // Inherits Rn in 9-5
- // Inherits Rd in 4-0
+ let DecoderMethod = "DecodeMoveImmInstruction";
}
-let Predicates = [HasFPARMv8] in {
+multiclass InsertImmediate<bits<2> opc, string asm> {
+ def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> {
+ let Inst{31} = 0;
+ }
+
+ def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> {
+ let Inst{31} = 1;
+ }
+}
-// Format for floating-point compare instructions.
-class A64I_fpcmp<bit m, bit s, bits<2> type, bits<2> op, bits<5> opcode2,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
+//---
+// Add/Subtract
+//---
+
+class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass srcRegtype, addsub_shifted_imm immtype,
+ string asm, SDPatternOperator OpNode>
+ : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "",
+ [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<14> imm;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b10001;
+ let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
+ let Inst{21-10} = imm{11-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+ let DecoderMethod = "DecodeBaseAddSubImm";
+}
+
+class BaseAddSubRegPseudo<RegisterClass regtype,
+ SDPatternOperator OpNode>
+ : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]>;
+
+class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
+ arith_shifted_reg shifted_regtype, string asm,
+ SDPatternOperator OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "",
+ [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ // The operands are in order to match the 'addr' MI operands, so we
+ // don't need an encoder method and by-name matching. Just use the default
+ // in-order handling. Since we're using by-order, make sure the names
+ // do not match.
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<8> shift;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-22} = shift{7-6};
+ let Inst{21} = 0;
+ let Inst{20-16} = src2;
+ let Inst{15-10} = shift{5-0};
+ let Inst{9-5} = src1;
+ let Inst{4-0} = dst;
+
+ let DecoderMethod = "DecodeThreeAddrSRegInstruction";
+}
+
+class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, Operand src2Regtype,
+ string asm, SDPatternOperator OpNode>
+ : I<(outs dstRegtype:$R1),
+ (ins src1Regtype:$R2, src2Regtype:$R3),
+ asm, "\t$R1, $R2, $R3", "",
+ [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
+ Sched<[WriteIEReg, ReadI, ReadIEReg]> {
+ bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
+ bits<6> ext;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-21} = 0b001;
+ let Inst{20-16} = Rm;
+ let Inst{15-13} = ext{5-3};
+ let Inst{12-10} = ext{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
+ let DecoderMethod = "DecodeAddSubERegInstruction";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, RegisterClass src2Regtype,
+ Operand ext_op, string asm>
+ : I<(outs dstRegtype:$Rd),
+ (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
+ asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
+ Sched<[WriteIEReg, ReadI, ReadIEReg]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<6> ext;
+ let Inst{30} = isSub;
+ let Inst{29} = setFlags;
+ let Inst{28-24} = 0b01011;
+ let Inst{23-21} = 0b001;
let Inst{20-16} = Rm;
- let Inst{15-14} = op;
- let Inst{13-10} = 0b1000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = opcode2;
+ let Inst{15} = ext{5};
+ let Inst{12-10} = ext{2-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeAddSubERegInstruction";
+}
+
+// Aliases for register+register add/subtract.
+class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
+ RegisterClass src1Regtype, RegisterClass src2Regtype,
+ int shiftExt>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
+ shiftExt)>;
+
+multiclass AddSub<bit isSub, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ let hasSideEffects = 0 in {
+ // Add/Subtract immediate
+ def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
+ mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
+ mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract register - Only used for CodeGen
+ def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
+
+ // Add/Subtract shifted register
+ def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic,
+ OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic,
+ OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ // Add/Subtract extended register
+ let AddedComplexity = 1, hasSideEffects = 0 in {
+ def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
+ arith_extended_reg32<i32>, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
+ arith_extended_reg32to64<i64>, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64,
+ arith_extendlsl64, mnemonic> {
+ // UXTX and SXTX only.
+ let Inst{14-13} = 0b11;
+ let Inst{31} = 1;
+ }
+
+ // Register/register aliases with no shift when SP is not used.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
+ GPR32, GPR32, GPR32, 0>;
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
+ GPR64, GPR64, GPR64, 0>;
+
+ // Register/register aliases with no shift when either the destination or
+ // first source register is SP.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32sponly, GPR32sp, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32sp, GPR32sponly, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64sponly, GPR64sp, GPR64, 24>; // UXTX #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64sp, GPR64sponly, GPR64, 24>; // UXTX #0
}
-// Format for floating-point conditional compare instructions.
-class A64I_fpccmp<bit m, bit s, bits<2> type, bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
+multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> {
+ let isCompare = 1, Defs = [NZCV] in {
+ // Add/Subtract immediate
+ def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
+ mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
+ mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract register
+ def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
+
+ // Add/Subtract shifted register
+ def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic,
+ OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic,
+ OpNode> {
+ let Inst{31} = 1;
+ }
+
+ // Add/Subtract extended register
+ let AddedComplexity = 1 in {
+ def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
+ arith_extended_reg32<i32>, mnemonic, OpNode> {
+ let Inst{31} = 0;
+ }
+ def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
+ arith_extended_reg32<i64>, mnemonic, OpNode> {
+ let Inst{31} = 1;
+ }
+ }
+
+ def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64,
+ arith_extendlsl64, mnemonic> {
+ // UXTX and SXTX only.
+ let Inst{14-13} = 0b11;
+ let Inst{31} = 1;
+ }
+ } // Defs = [NZCV]
+
+ // Compare aliases
+ def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Wri")
+ WZR, GPR32sp:$src, addsub_shifted_imm32:$imm), 5>;
+ def : InstAlias<cmp#" $src, $imm", (!cast<Instruction>(NAME#"Xri")
+ XZR, GPR64sp:$src, addsub_shifted_imm64:$imm), 5>;
+ def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrx")
+ WZR, GPR32sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
+ def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx")
+ XZR, GPR64sp:$src1, GPR32:$src2, arith_extend:$sh), 4>;
+ def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrx64")
+ XZR, GPR64sp:$src1, GPR64:$src2, arith_extendlsl64:$sh), 4>;
+ def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Wrs")
+ WZR, GPR32:$src1, GPR32:$src2, arith_shift32:$sh), 4>;
+ def : InstAlias<cmp#" $src1, $src2$sh", (!cast<Instruction>(NAME#"Xrs")
+ XZR, GPR64:$src1, GPR64:$src2, arith_shift64:$sh), 4>;
+
+ // Compare shorthands
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrs")
+ WZR, GPR32:$src1, GPR32:$src2, 0), 5>;
+ def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs")
+ XZR, GPR64:$src1, GPR64:$src2, 0), 5>;
+
+ // Register/register aliases with no shift when SP is not used.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
+ GPR32, GPR32, GPR32, 0>;
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
+ GPR64, GPR64, GPR64, 0>;
+
+ // Register/register aliases with no shift when the first source register
+ // is SP.
+ def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
+ GPR32, GPR32sponly, GPR32, 16>; // UXTW #0
+ def : AddSubRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrx64"),
+ GPR64, GPR64sponly, GPR64, 24>; // UXTX #0
+}
+
+//---
+// Extract
+//---
+def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+ SDTCisPtrTy<3>]>;
+def AArch64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
+
+class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
+ list<dag> patterns>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm),
+ asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>,
+ Sched<[WriteExtr, ReadExtrHi]> {
+ bits<5> Rd;
bits<5> Rn;
bits<5> Rm;
- bits<4> NZCVImm;
- bits<4> Cond;
+ bits<6> imm;
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
+ let Inst{30-23} = 0b00100111;
+ let Inst{21} = 0;
let Inst{20-16} = Rm;
- let Inst{15-12} = Cond;
- let Inst{11-10} = 0b01;
- let Inst{9-5} = Rn;
- let Inst{4} = op;
- let Inst{3-0} = NZCVImm;
+ let Inst{15-10} = imm;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format for floating-point conditional select instructions.
-class A64I_fpcondsel<bit m, bit s, bits<2> type,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<4> Cond;
+multiclass ExtractImm<string asm> {
+ def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
+ [(set GPR32:$Rd,
+ (AArch64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imm<5> must be zero.
+ let imm{5} = 0;
+ }
+ def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
+ [(set GPR64:$Rd,
+ (AArch64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
+
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
- let Inst{15-12} = Cond;
- let Inst{11-10} = 0b11;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+//---
+// Bitfield
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseBitfieldImm<bits<2> opc,
+ RegisterClass regtype, Operand imm_type, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
+ asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
+ Sched<[WriteIS, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> immr;
+ bits<6> imms;
+
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{21-16} = immr;
+ let Inst{15-10} = imms;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
+multiclass BitfieldImm<bits<2> opc, string asm> {
+ def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imms<5> and immr<5> must be zero, else ReservedValue().
+ let Inst{21} = 0;
+ let Inst{15} = 0;
+ }
+ def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> {
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
-// Format for floating-point data-processing (1 source) instructions.
-class A64I_fpdp1<bit m, bit s, bits<2> type, bits<6> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
- let Inst{20-15} = opcode;
- let Inst{14-10} = 0b10000;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format for floating-point data-processing (2 sources) instructions.
-class A64I_fpdp2<bit m, bit s, bits<2> type, bits<4> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
- let Inst{15-12} = opcode;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseBitfieldImmWith2RegArgs<bits<2> opc,
+ RegisterClass regtype, Operand imm_type, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
+ imm_type:$imms),
+ asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
+ Sched<[WriteIS, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> immr;
+ bits<6> imms;
+
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100110;
+ let Inst{21-16} = immr;
+ let Inst{15-10} = imms;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> {
+ def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> {
+ let Inst{31} = 0;
+ let Inst{22} = 0;
+ // imms<5> and immr<5> must be zero, else ReservedValue().
+ let Inst{21} = 0;
+ let Inst{15} = 0;
+ }
+ def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> {
+ let Inst{31} = 1;
+ let Inst{22} = 1;
+ }
+}
+
+//---
+// Logical
+//---
+
+// Logical (immediate)
+class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
+ RegisterClass sregtype, Operand imm_type, string asm,
+ list<dag> pattern>
+ : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
+ asm, "\t$Rd, $Rn, $imm", "", pattern>,
+ Sched<[WriteI, ReadI]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<13> imm;
+ let Inst{30-29} = opc;
+ let Inst{28-23} = 0b100100;
+ let Inst{22} = imm{12};
+ let Inst{21-16} = imm{11-6};
+ let Inst{15-10} = imm{5-0};
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeLogicalImmInstruction";
+}
+
+// Logical (shifted register)
+class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
+ logical_shifted_reg shifted_regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteISReg, ReadI, ReadISReg]> {
+ // The operands are in order to match the 'addr' MI operands, so we
+ // don't need an encoder method and by-name matching. Just use the default
+ // in-order handling. Since we're using by-order, make sure the names
+ // do not match.
+ bits<5> dst;
+ bits<5> src1;
+ bits<5> src2;
+ bits<8> shift;
+ let Inst{30-29} = opc;
+ let Inst{28-24} = 0b01010;
+ let Inst{23-22} = shift{7-6};
+ let Inst{21} = N;
+ let Inst{20-16} = src2;
+ let Inst{15-10} = shift{5-0};
+ let Inst{9-5} = src1;
+ let Inst{4-0} = dst;
+
+ let DecoderMethod = "DecodeThreeAddrSRegInstruction";
+}
+
+// Aliases for register+register logical instructions.
+class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
+ : InstAlias<asm#" $dst, $src1, $src2",
+ (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
+
+let AddedComplexity = 6 in
+multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
+ def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
+ [(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
+ logical_imm32:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
+ }
+ def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
+ [(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
+ logical_imm64:$imm))]> {
+ let Inst{31} = 1;
+ }
+}
+
+multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
+ let isCompare = 1, Defs = [NZCV] in {
+ def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
+ let Inst{31} = 0;
+ let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
+ }
+ def Xri : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> {
+ let Inst{31} = 1;
+ }
+ } // end Defs = [NZCV]
+}
+
+class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
+ : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteI, ReadI, ReadI]>;
+
+// Split from LogicalImm as not all instructions have both.
+multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
+ SDPatternOperator OpNode> {
+ def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+
+ def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn,
+ logical_shifted_reg32:$Rm))]> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn,
+ logical_shifted_reg64:$Rm))]> {
+ let Inst{31} = 1;
+ }
+
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Wrs"), GPR32>;
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrs"), GPR64>;
+}
+
+// Split from LogicalReg to allow setting NZCV Defs
+multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic,
+ SDPatternOperator OpNode = null_frag> {
+ let Defs = [NZCV], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
+ def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
+
+ def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
+ [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_shifted_reg32:$Rm))]> {
+ let Inst{31} = 0;
+ }
+ def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
+ [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_shifted_reg64:$Rm))]> {
+ let Inst{31} = 1;
+ }
+ } // Defs = [NZCV]
+
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Wrs"), GPR32>;
+ def : LogicalRegAlias<mnemonic,
+ !cast<Instruction>(NAME#"Xrs"), GPR64>;
+}
+
+//---
+// Conditionally set flags
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
+ : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
+ asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
+ Sched<[WriteI, ReadI]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
+ bits<5> Rn;
+ bits<5> imm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b111010010;
+ let Inst{20-16} = imm;
+ let Inst{15-12} = cond;
let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = nzcv;
}
-// Format for floating-point data-processing (3 sources) instructions.
-class A64I_fpdp3<bit m, bit s, bits<2> type, bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<5> Ra;
+multiclass CondSetFlagsImm<bit op, string asm> {
+ def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+ def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
+ let Inst{31} = 1;
+ }
+}
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11111;
- let Inst{23-22} = type;
- let Inst{21} = o1;
- // Inherit Rm in 20-16
- let Inst{15} = o0;
- let Inst{14-10} = Ra;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
+ asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+ let Defs = [NZCV];
+
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b111010010;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4} = 0b0;
+ let Inst{3-0} = nzcv;
}
-// Format for floating-point <-> fixed-point conversion instructions.
-class A64I_fpfixed<bit sf, bit s, bits<2> type, bits<2> mode, bits<3> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<6> Scale;
+multiclass CondSetFlagsReg<bit op, string asm> {
+ def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
+ let Inst{31} = 1;
+ }
+}
- let Inst{31} = sf;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b0;
- let Inst{20-19} = mode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = Scale;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+//---
+// Conditional select
+//---
+
+class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
+
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
+
+ let Inst{30} = op;
+ let Inst{29-21} = 0b011010100;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = op2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format for floating-point <-> integer conversion instructions.
-class A64I_fpint<bit sf, bit s, bits<2> type, bits<2> rmode, bits<3> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = sf;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = 0b000000;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+multiclass CondSelect<bit op, bits<2> op2, string asm> {
+ def Wr : BaseCondSelect<op, op2, GPR32, asm> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondSelect<op, op2, GPR64, asm> {
+ let Inst{31} = 1;
+ }
}
+class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
+ PatFrag frag>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel regtype:$Rn, (frag regtype:$Rm),
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteI, ReadI, ReadI]> {
+ let Uses = [NZCV];
-// Format for floating-point immediate instructions.
-class A64I_fpimm<bit m, bit s, bits<2> type, bits<5> imm5,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs, ins, asmstr, patterns, itin> {
- bits<8> Imm8;
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
- let Inst{31} = m;
- let Inst{30} = 0b0;
- let Inst{29} = s;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0b1;
- let Inst{20-13} = Imm8;
- let Inst{12-10} = 0b100;
- let Inst{9-5} = imm5;
- // Inherit Rd in 4-0
+ let Inst{30} = op;
+ let Inst{29-21} = 0b011010100;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = op2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+def inv_cond_XFORM : SDNodeXForm<imm, [{
+ AArch64CC::CondCode CC = static_cast<AArch64CC::CondCode>(N->getZExtValue());
+ return CurDAG->getTargetConstant(AArch64CC::getInvertedCondCode(CC), MVT::i32);
+}]>;
+
+multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
+ def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> {
+ let Inst{31} = 0;
+ }
+ def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> {
+ let Inst{31} = 1;
+ }
+
+ def : Pat<(AArch64csel (frag GPR32:$Rm), GPR32:$Rn, (i32 imm:$cond), NZCV),
+ (!cast<Instruction>(NAME # Wr) GPR32:$Rn, GPR32:$Rm,
+ (inv_cond_XFORM imm:$cond))>;
+
+ def : Pat<(AArch64csel (frag GPR64:$Rm), GPR64:$Rn, (i32 imm:$cond), NZCV),
+ (!cast<Instruction>(NAME # Xr) GPR64:$Rn, GPR64:$Rm,
+ (inv_cond_XFORM imm:$cond))>;
+}
+
+//---
+// Special Mask Value
+//---
+def maski8_or_more : Operand<i32>,
+ ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> {
+}
+def maski16_or_more : Operand<i32>,
+ ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> {
+}
+
+
+//---
+// Load/store
+//---
+
+// (unsigned immediate)
+// Indexed for 8-bit registers. offset is in range [0,4095].
+def am_indexed8 : ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []>;
+def am_indexed16 : ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []>;
+def am_indexed32 : ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []>;
+def am_indexed64 : ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []>;
+def am_indexed128 : ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []>;
+
+class UImm12OffsetOperand<int Scale> : AsmOperandClass {
+ let Name = "UImm12Offset" # Scale;
+ let RenderMethod = "addUImm12OffsetOperands<" # Scale # ">";
+ let PredicateMethod = "isUImm12Offset<" # Scale # ">";
+ let DiagnosticType = "InvalidMemoryIndexed" # Scale;
+}
+
+def UImm12OffsetScale1Operand : UImm12OffsetOperand<1>;
+def UImm12OffsetScale2Operand : UImm12OffsetOperand<2>;
+def UImm12OffsetScale4Operand : UImm12OffsetOperand<4>;
+def UImm12OffsetScale8Operand : UImm12OffsetOperand<8>;
+def UImm12OffsetScale16Operand : UImm12OffsetOperand<16>;
+
+class uimm12_scaled<int Scale> : Operand<i64> {
+ let ParserMatchClass
+ = !cast<AsmOperandClass>("UImm12OffsetScale" # Scale # "Operand");
+ let EncoderMethod
+ = "getLdStUImm12OpValue<AArch64::fixup_aarch64_ldst_imm12_scale" # Scale # ">";
+ let PrintMethod = "printUImm12Offset<" # Scale # ">";
+}
+
+def uimm12s1 : uimm12_scaled<1>;
+def uimm12s2 : uimm12_scaled<2>;
+def uimm12s4 : uimm12_scaled<4>;
+def uimm12s8 : uimm12_scaled<8>;
+def uimm12s16 : uimm12_scaled<16>;
+
+class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> {
+ bits<5> Rt;
+
+ bits<5> Rn;
+ bits<12> offset;
+
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b01;
+ let Inst{23-22} = opc;
+ let Inst{21-10} = offset;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeUnsignedLdStInstruction";
+}
+
+multiclass LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
+multiclass StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ Operand indextype, string asm, list<dag> pattern> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def ui : BaseLoadStoreUI<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, indextype:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "ui") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+def PrefetchOperand : AsmOperandClass {
+ let Name = "Prefetch";
+ let ParserMethod = "tryParsePrefetch";
+}
+def prfop : Operand<i32> {
+ let PrintMethod = "printPrefetchOp";
+ let ParserMatchClass = PrefetchOperand;
}
-// Format for load-register (literal) instructions.
-class A64I_LDRlit<bits<2> opc, bit v,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRt<outs, ins, asmstr, patterns, itin> {
- bits<19> Imm19;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
+ : BaseLoadStoreUI<sz, V, opc,
+ (outs), (ins prfop:$Rt, GPR64sp:$Rn, uimm12s8:$offset),
+ asm, pat>,
+ Sched<[WriteLD]>;
+
+//---
+// Load literal
+//---
+
+// Load literal address: 19-bit immediate. The low two bits of the target
+// offset are implied zero and so are not part of the immediate.
+def am_ldrlit : Operand<OtherVT> {
+ let EncoderMethod = "getLoadLiteralOpValue";
+ let DecoderMethod = "DecodePCRelLabel19";
+ let PrintMethod = "printAlignedLabel";
+ let ParserMatchClass = PCRelLabel19Operand;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class LoadLiteral<bits<2> opc, bit V, RegisterClass regtype, string asm>
+ : I<(outs regtype:$Rt), (ins am_ldrlit:$label),
+ asm, "\t$Rt, $label", "", []>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<19> label;
let Inst{31-30} = opc;
let Inst{29-27} = 0b011;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
- let Inst{23-5} = Imm19;
- // Inherit Rt in 4-0
+ let Inst{23-5} = label;
+ let Inst{4-0} = Rt;
}
-// Format for load-store exclusive instructions.
-class A64I_LDSTex_tn<bits<2> size, bit o2, bit L, bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list <dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- let Inst{31-30} = size;
- let Inst{29-24} = 0b001000;
- let Inst{23} = o2;
- let Inst{22} = L;
- let Inst{21} = o1;
- let Inst{15} = o0;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat>
+ : I<(outs), (ins prfop:$Rt, am_ldrlit:$label),
+ asm, "\t$Rt, $label", "", pat>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<19> label;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b011;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-5} = label;
+ let Inst{4-0} = Rt;
}
-class A64I_LDSTex_tt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list <dag> patterns, InstrItinClass itin>:
- A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
- bits<5> Rt2;
- let Inst{14-10} = Rt2;
+//---
+// Load/store register offset
+//---
+
+def ro_Xindexed8 : ComplexPattern<i64, 4, "SelectAddrModeXRO<8>", []>;
+def ro_Xindexed16 : ComplexPattern<i64, 4, "SelectAddrModeXRO<16>", []>;
+def ro_Xindexed32 : ComplexPattern<i64, 4, "SelectAddrModeXRO<32>", []>;
+def ro_Xindexed64 : ComplexPattern<i64, 4, "SelectAddrModeXRO<64>", []>;
+def ro_Xindexed128 : ComplexPattern<i64, 4, "SelectAddrModeXRO<128>", []>;
+
+def ro_Windexed8 : ComplexPattern<i64, 4, "SelectAddrModeWRO<8>", []>;
+def ro_Windexed16 : ComplexPattern<i64, 4, "SelectAddrModeWRO<16>", []>;
+def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
+def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
+def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
+
+class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
+ let Name = "Mem" # Reg # "Extend" # Width;
+ let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";
+ let RenderMethod = "addMemExtendOperands";
+ let DiagnosticType = "InvalidMemory" # Reg # "Extend" # Width;
}
-class A64I_LDSTex_stn<bits<2> size, bit o2, bit L, bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list <dag> patterns, InstrItinClass itin>:
- A64I_LDSTex_tn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
- bits<5> Rs;
- let Inst{20-16} = Rs;
+def MemWExtend8Operand : MemExtendOperand<"W", 8> {
+ // The address "[x0, x1, lsl #0]" actually maps to the variant which performs
+ // the trivial shift.
+ let RenderMethod = "addMemExtend8Operands";
+}
+def MemWExtend16Operand : MemExtendOperand<"W", 16>;
+def MemWExtend32Operand : MemExtendOperand<"W", 32>;
+def MemWExtend64Operand : MemExtendOperand<"W", 64>;
+def MemWExtend128Operand : MemExtendOperand<"W", 128>;
+
+def MemXExtend8Operand : MemExtendOperand<"X", 8> {
+ // The address "[x0, x1, lsl #0]" actually maps to the variant which performs
+ // the trivial shift.
+ let RenderMethod = "addMemExtend8Operands";
+}
+def MemXExtend16Operand : MemExtendOperand<"X", 16>;
+def MemXExtend32Operand : MemExtendOperand<"X", 32>;
+def MemXExtend64Operand : MemExtendOperand<"X", 64>;
+def MemXExtend128Operand : MemExtendOperand<"X", 128>;
+
+class ro_extend<AsmOperandClass ParserClass, string Reg, int Width>
+ : Operand<i32> {
+ let ParserMatchClass = ParserClass;
+ let PrintMethod = "printMemExtend<'" # Reg # "', " # Width # ">";
+ let DecoderMethod = "DecodeMemExtend";
+ let EncoderMethod = "getMemExtendOpValue";
+ let MIOperandInfo = (ops i32imm:$signed, i32imm:$doshift);
}
-class A64I_LDSTex_stt2n<bits<2> size, bit o2, bit L, bit o1, bit o0,
- dag outs, dag ins, string asmstr,
- list <dag> patterns, InstrItinClass itin>:
- A64I_LDSTex_stn<size, o2, L, o1, o0, outs, ins, asmstr, patterns, itin>{
- bits<5> Rt2;
- let Inst{14-10} = Rt2;
+def ro_Wextend8 : ro_extend<MemWExtend8Operand, "w", 8>;
+def ro_Wextend16 : ro_extend<MemWExtend16Operand, "w", 16>;
+def ro_Wextend32 : ro_extend<MemWExtend32Operand, "w", 32>;
+def ro_Wextend64 : ro_extend<MemWExtend64Operand, "w", 64>;
+def ro_Wextend128 : ro_extend<MemWExtend128Operand, "w", 128>;
+
+def ro_Xextend8 : ro_extend<MemXExtend8Operand, "x", 8>;
+def ro_Xextend16 : ro_extend<MemXExtend16Operand, "x", 16>;
+def ro_Xextend32 : ro_extend<MemXExtend32Operand, "x", 32>;
+def ro_Xextend64 : ro_extend<MemXExtend64Operand, "x", 64>;
+def ro_Xextend128 : ro_extend<MemXExtend128Operand, "x", 128>;
+
+class ROAddrMode<ComplexPattern windex, ComplexPattern xindex,
+ Operand wextend, Operand xextend> {
+ // CodeGen-level pattern covering the entire addressing mode.
+ ComplexPattern Wpat = windex;
+ ComplexPattern Xpat = xindex;
+
+ // Asm-level Operand covering the valid "uxtw #3" style syntax.
+ Operand Wext = wextend;
+ Operand Xext = xextend;
}
-// Format for load-store register (immediate post-indexed) instructions
-class A64I_LSpostind<bits<2> size, bit v, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<9> SImm9;
+def ro8 : ROAddrMode<ro_Windexed8, ro_Xindexed8, ro_Wextend8, ro_Xextend8>;
+def ro16 : ROAddrMode<ro_Windexed16, ro_Xindexed16, ro_Wextend16, ro_Xextend16>;
+def ro32 : ROAddrMode<ro_Windexed32, ro_Xindexed32, ro_Wextend32, ro_Xextend32>;
+def ro64 : ROAddrMode<ro_Windexed64, ro_Xindexed64, ro_Wextend64, ro_Xextend64>;
+def ro128 : ROAddrMode<ro_Windexed128, ro_Xindexed128, ro_Wextend128,
+ ro_Xextend128>;
- let Inst{31-30} = size;
+class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21} = 0b0;
- let Inst{20-12} = SImm9;
- let Inst{11-10} = 0b01;
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
}
-// Format for load-store register (immediate pre-indexed) instructions
-class A64I_LSpreind<bits<2> size, bit v, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<9> SImm9;
+class ROInstAlias<string asm, RegisterClass regtype, Instruction INST>
+ : InstAlias<asm # " $Rt, [$Rn, $Rm]",
+ (INST regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
+
+multiclass Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore8RO<sz, V, opc, regtype, asm,
+ (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore8RO<sz, V, opc, regtype, asm,
+ (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+multiclass Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend8:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed8 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend8:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore8RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend8:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed8 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend8:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- let Inst{31-30} = size;
+class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21} = 0b0;
- let Inst{20-12} = SImm9;
- let Inst{11-10} = 0b11;
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
}
-// Format for load-store register (unprivileged) instructions
-class A64I_LSunpriv<bits<2> size, bit v, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<9> SImm9;
+multiclass Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
+multiclass Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend16:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore16RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend16:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- let Inst{31-30} = size;
+class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21} = 0b0;
- let Inst{20-12} = SImm9;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
+
+multiclass Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
}
-// Format for load-store (unscaled immediate) instructions.
-class A64I_LSunalimm<bits<2> size, bit v, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<9> SImm9;
+multiclass Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10 in
+ def roW : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend32:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10 in
+ def roX : LoadStore32RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend32:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- let Inst{31-30} = size;
+class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21} = 0b0;
- let Inst{20-12} = SImm9;
- let Inst{11-10} = 0b00;
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
}
+multiclass Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
-// Format for load-store (unsigned immediate) instructions.
-class A64I_LSunsigimm<bits<2> size, bit v, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<12> UImm12;
+multiclass Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roW : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roX : LoadStore64RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- let Inst{31-30} = size;
+class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, dag ins, dag outs, list<dag> pat>
+ : I<ins, outs, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
- let Inst{25-24} = 0b01;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21-10} = UImm12;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
}
-// Format for load-store register (register offset) instructions.
-class A64I_LSregoff<bits<2> size, bit v, bits<2> opc, bit optionlo,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin> {
- bits<5> Rm;
+multiclass Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator loadop> {
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+ def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
+ [(set (Ty regtype:$Rt),
+ (loadop (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend)))]>,
+ Sched<[WriteLDIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- // Complex operand selection needed for these instructions, so they
- // need an "addr" field for encoding/decoding to be generated.
- bits<3> Ext;
- // OptionHi = Ext{2-1}
- // S = Ext{0}
+multiclass Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, ValueType Ty, SDPatternOperator storeop> {
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roW : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend128:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b0;
+ }
+
+ let AddedComplexity = 10, mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+ def roX : LoadStore128RO<sz, V, opc, regtype, asm, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend128:$extend),
+ [(storeop (Ty regtype:$Rt),
+ (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend128:$extend))]>,
+ Sched<[WriteSTIdx, ReadAdrBase]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : ROInstAlias<asm, regtype, !cast<Instruction>(NAME # "roX")>;
+}
- let Inst{31-30} = size;
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class BasePrefetchRO<bits<2> sz, bit V, bits<2> opc, dag outs, dag ins,
+ string asm, list<dag> pat>
+ : I<outs, ins, asm, "\t$Rt, [$Rn, $Rm, $extend]", "", pat>,
+ Sched<[WriteLD]> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<2> extend;
+ let Inst{31-30} = sz;
let Inst{29-27} = 0b111;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-24} = 0b00;
let Inst{23-22} = opc;
- let Inst{21} = 0b1;
+ let Inst{21} = 1;
let Inst{20-16} = Rm;
- let Inst{15-14} = Ext{2-1};
- let Inst{13} = optionlo;
- let Inst{12} = Ext{0};
+ let Inst{15} = extend{1}; // sign extend Rm?
+ let Inst{14} = 1;
+ let Inst{12} = extend{0}; // do shift?
let Inst{11-10} = 0b10;
- // Inherits Rn in 9-5
- // Inherits Rt in 4-0
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+}
- let AddedComplexity = 50;
+multiclass PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm> {
+ def roW : BasePrefetchRO<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend),
+ asm, [(AArch64Prefetch imm:$Rt,
+ (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))]> {
+ let Inst{13} = 0b0;
+ }
+
+ def roX : BasePrefetchRO<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend),
+ asm, [(AArch64Prefetch imm:$Rt,
+ (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))]> {
+ let Inst{13} = 0b1;
+ }
+
+ def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
+ (!cast<Instruction>(NAME # "roX") prfop:$Rt,
+ GPR64sp:$Rn, GPR64:$Rm, 0, 0)>;
}
-// Format for Load-store register pair (offset) instructions
-class A64I_LSPoffset<bits<2> opc, bit v, bit l,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
- bits<7> SImm7;
+//---
+// Load/store unscaled immediate
+//---
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b101;
- let Inst{26} = v;
- let Inst{25-23} = 0b010;
- let Inst{22} = l;
- let Inst{21-15} = SImm7;
- // Inherit Rt2 in 14-10
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+def am_unscaled8 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>;
+def am_unscaled16 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>;
+def am_unscaled32 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
+def am_unscaled64 : ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
+def am_unscaled128 :ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
+
+class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", pattern> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+multiclass LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, list<dag> pattern> {
+ let AddedComplexity = 1 in // try this before LoadUI
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm, pattern>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, list<dag> pattern> {
+ let AddedComplexity = 1 in // try this before StoreUI
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, pattern>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm,
+ list<dag> pat> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+ def i : BaseLoadStoreUnscale<sz, V, opc, (outs),
+ (ins prfop:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, pat>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") prfop:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+//---
+// Load/store unscaled immediate, unprivileged
+//---
+
+class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ dag oops, dag iops, string asm>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+multiclass LoadUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ RegisterClass regtype, string asm> {
+ let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in
+ def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm>,
+ Sched<[WriteLD]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
+}
+
+multiclass StoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
+ RegisterClass regtype, string asm> {
+ let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
+ def i : BaseLoadStoreUnprivileged<sz, V, opc, (outs),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm>,
+ Sched<[WriteST]>;
+
+ def : InstAlias<asm # " $Rt, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, GPR64sp:$Rn, 0)>;
}
-// Format for Load-store register pair (post-indexed) instructions
-class A64I_LSPpostind<bits<2> opc, bit v, bit l,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
- bits<7> SImm7;
+//---
+// Load/store pre-indexed
+//---
+
+class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm, "\t$Rt, [$Rn, $offset]!", cstr, pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b11;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm>
+ : BaseLoadStorePreIdx<sz, V, opc,
+ (outs GPR64sp:$wback, regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset), asm,
+ "$Rn = $wback", []>,
+ Sched<[WriteLD, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, SDPatternOperator storeop, ValueType Ty>
+ : BaseLoadStorePreIdx<sz, V, opc,
+ (outs GPR64sp:$wback),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback",
+ [(set GPR64sp:$wback,
+ (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
+ Sched<[WriteAdr, WriteST]>;
+} // hasSideEffects = 0
+
+//---
+// Load/store post-indexed
+//---
+
+// (pre-index) load/stores.
+class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
+ string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm, "\t$Rt, [$Rn], $offset", cstr, pat> {
+ bits<5> Rt;
+ bits<5> Rn;
+ bits<9> offset;
+ let Inst{31-30} = sz;
+ let Inst{29-27} = 0b111;
+ let Inst{26} = V;
+ let Inst{25-24} = 0b00;
+ let Inst{23-22} = opc;
+ let Inst{21} = 0b0;
+ let Inst{20-12} = offset;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodeSignedLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm>
+ : BaseLoadStorePostIdx<sz, V, opc,
+ (outs GPR64sp:$wback, regtype:$Rt),
+ (ins GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback", []>,
+ Sched<[WriteLD, WriteI]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
+ string asm, SDPatternOperator storeop, ValueType Ty>
+ : BaseLoadStorePostIdx<sz, V, opc,
+ (outs GPR64sp:$wback),
+ (ins regtype:$Rt, GPR64sp:$Rn, simm9:$offset),
+ asm, "$Rn = $wback",
+ [(set GPR64sp:$wback,
+ (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>,
+ Sched<[WriteAdr, WriteST, ReadAdrBase]>;
+} // hasSideEffects = 0
+
+
+//---
+// Load/store pair
+//---
+
+// (indexed, offset)
+
+class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
let Inst{31-30} = opc;
let Inst{29-27} = 0b101;
- let Inst{26} = v;
- let Inst{25-23} = 0b001;
- let Inst{22} = l;
- let Inst{21-15} = SImm7;
- // Inherit Rt2 in 14-10
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{26} = V;
+ let Inst{25-23} = 0b010;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+multiclass LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStorePairOffset<opc, V, 1,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi]>;
+
+ def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
}
-// Format for Load-store register pair (pre-indexed) instructions
-class A64I_LSPpreind<bits<2> opc, bit v, bit l,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
- bits<7> SImm7;
+multiclass StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+ def i : BaseLoadStorePairOffset<opc, V, 0, (outs),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # " $Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
+
+// (pre-indexed)
+class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]!", "$Rn = $wback", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
let Inst{31-30} = opc;
let Inst{29-27} = 0b101;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-23} = 0b011;
- let Inst{22} = l;
- let Inst{21-15} = SImm7;
- // Inherit Rt2 in 14-10
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let DecoderMethod = "DecodePairLdStInstruction";
}
-// Format for Load-store non-temporal register pair (offset) instructions
-class A64I_LSPnontemp<bits<2> opc, bit v, bit l,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtt2n<outs, ins, asmstr, patterns, itin> {
- bits<7> SImm7;
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm>
+ : BaseLoadStorePairPreIdx<opc, V, 1,
+ (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm>
+ : BaseLoadStorePairPreIdx<opc, V, 0, (outs GPR64sp:$wback),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteAdr, WriteSTP]>;
+} // hasSideEffects = 0
+
+// (post-indexed)
+
+class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn], $offset", "$Rn = $wback", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
+ let Inst{31-30} = opc;
+ let Inst{29-27} = 0b101;
+ let Inst{26} = V;
+ let Inst{25-23} = 0b001;
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let DecoderMethod = "DecodePairLdStInstruction";
+}
+
+let hasSideEffects = 0 in {
+let mayStore = 0, mayLoad = 1 in
+class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
+ Operand idxtype, string asm>
+ : BaseLoadStorePairPostIdx<opc, V, 1,
+ (outs GPR64sp:$wback, regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, idxtype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi, WriteAdr]>;
+
+let mayStore = 1, mayLoad = 0 in
+class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
+ Operand idxtype, string asm>
+ : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
+ (ins GPR64sp:$wback, regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, idxtype:$offset),
+ asm>,
+ Sched<[WriteAdr, WriteSTP]>;
+} // hasSideEffects = 0
+
+// (no-allocate)
+
+class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops,
+ string asm>
+ : I<oops, iops, asm, "\t$Rt, $Rt2, [$Rn, $offset]", "", []> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ bits<7> offset;
let Inst{31-30} = opc;
let Inst{29-27} = 0b101;
- let Inst{26} = v;
+ let Inst{26} = V;
let Inst{25-23} = 0b000;
- let Inst{22} = l;
- let Inst{21-15} = SImm7;
- // Inherit Rt2 in 14-10
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format for Logical (immediate) instructions
-class A64I_logicalimm<bit sf, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bit N;
- bits<6> ImmR;
- bits<6> ImmS;
-
- // N, ImmR and ImmS have no separate existence in any assembly syntax (or for
- // selection), so we'll combine them into a single field here.
- bits<13> Imm;
- // N = Imm{12};
- // ImmR = Imm{11-6};
- // ImmS = Imm{5-0};
+ let Inst{22} = L;
+ let Inst{21-15} = offset;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
- let Inst{31} = sf;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100100;
- let Inst{22} = Imm{12};
- let Inst{21-16} = Imm{11-6};
- let Inst{15-10} = Imm{5-0};
- // Rn inherited in 9-5
- // Rd inherited in 4-0
+ let DecoderMethod = "DecodePairLdStInstruction";
}
-// Format for Logical (shifted register) instructions
-class A64I_logicalshift<bit sf, bits<2> opc, bits<2> shift, bit N,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- bits<6> Imm6;
+multiclass LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 0, mayLoad = 1 in
+ def i : BaseLoadStorePairNoAlloc<opc, V, 1,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp:$Rn, indextype:$offset), asm>,
+ Sched<[WriteLD, WriteLDHi]>;
- let Inst{31} = sf;
- let Inst{30-29} = opc;
- let Inst{28-24} = 0b01010;
- let Inst{23-22} = shift;
- let Inst{21} = N;
- // Rm inherited
- let Inst{15-10} = Imm6;
- // Rn inherited
- // Rd inherited
-}
-
-// Format for Move wide (immediate)
-class A64I_movw<bit sf, bits<2> opc,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs, ins, asmstr, patterns, itin> {
- bits<16> UImm16;
- bits<2> Shift; // Called "hw" officially
- let Inst{31} = sf;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100101;
- let Inst{22-21} = Shift;
- let Inst{20-5} = UImm16;
- // Inherits Rd in 4-0
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
}
-// Format for PC-relative addressing instructions, ADR and ADRP.
-class A64I_PCADR<bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs, ins, asmstr, patterns, itin> {
- bits<21> Label;
+multiclass StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
+ Operand indextype, string asm> {
+ let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in
+ def i : BaseLoadStorePairNoAlloc<opc, V, 0, (outs),
+ (ins regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, indextype:$offset),
+ asm>,
+ Sched<[WriteSTP]>;
+
+ def : InstAlias<asm # "\t$Rt, $Rt2, [$Rn]",
+ (!cast<Instruction>(NAME # "i") regtype:$Rt, regtype:$Rt2,
+ GPR64sp:$Rn, 0)>;
+}
- let Inst{31} = op;
- let Inst{30-29} = Label{1-0};
- let Inst{28-24} = 0b10000;
- let Inst{23-5} = Label{20-2};
+//---
+// Load/store exclusive
+//---
+
+// True exclusive operations write to and/or read from the system's exclusive
+// monitors, which as far as a compiler is concerned can be modelled as a
+// random shared memory address. Hence LoadExclusive mayStore.
+//
+// Since these instructions have the undefined register bits set to 1 in
+// their canonical form, we need a post encoder method to set those bits
+// to 1 when encoding these instructions. We do this using the
+// fixLoadStoreExclusive function. This function has template parameters:
+//
+// fixLoadStoreExclusive<int hasRs, int hasRt2>
+//
+// hasRs indicates that the instruction uses the Rs field, so we won't set
+// it to 1 (and the same for Rt2). We don't need template parameters for
+// the other register fields since Rt and Rn are always used.
+//
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ dag oops, dag iops, string asm, string operands>
+ : I<oops, iops, asm, operands, "", []> {
+ let Inst{31-30} = sz;
+ let Inst{29-24} = 0b001000;
+ let Inst{23} = o2;
+ let Inst{22} = L;
+ let Inst{21} = o1;
+ let Inst{15} = o0;
+
+ let DecoderMethod = "DecodeExclusiveLdStInstruction";
}
-// Format for system instructions
-class A64I_system<bit l,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- bits<2> Op0;
- bits<3> Op1;
- bits<4> CRn;
- bits<4> CRm;
- bits<3> Op2;
+// Neither Rs nor Rt2 operands.
+class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ dag oops, dag iops, string asm, string operands>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
bits<5> Rt;
+ bits<5> Rn;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
- let Inst{31-22} = 0b1101010100;
- let Inst{21} = l;
- let Inst{20-19} = Op0;
- let Inst{18-16} = Op1;
- let Inst{15-12} = CRn;
- let Inst{11-8} = CRm;
- let Inst{7-5} = Op2;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
+}
+
+// Simple load acquires don't set the exclusive monitor
+let mayLoad = 1, mayStore = 0 in
+class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteLD]>;
+
+class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
+ (ins GPR64sp0:$Rn), asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteLD]>;
+
+class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
+ (outs regtype:$Rt, regtype:$Rt2),
+ (ins GPR64sp0:$Rn), asm,
+ "\t$Rt, $Rt2, [$Rn]">,
+ Sched<[WriteLD, WriteLDHi]> {
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
let Inst{4-0} = Rt;
- // These instructions can do horrible things.
- let hasSideEffects = 1;
+ let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
}
-// Format for unconditional branch (immediate) instructions
-class A64I_Bimm<bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- // Doubly special in not even sharing register fields with other
- // instructions, so we create our own Rn here.
- bits<26> Label;
+// Simple store release operations do not check the exclusive monitor.
+let mayLoad = 0, mayStore = 1 in
+class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm, "\t$Rt, [$Rn]">,
+ Sched<[WriteST]>;
+
+let mayLoad = 1, mayStore = 1 in
+class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws),
+ (ins regtype:$Rt, GPR64sp0:$Rn),
+ asm, "\t$Ws, $Rt, [$Rn]">,
+ Sched<[WriteSTX]> {
+ bits<5> Ws;
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{20-16} = Ws;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
- let Inst{31} = op;
- let Inst{30-26} = 0b00101;
- let Inst{25-0} = Label;
+ let Constraints = "@earlyclobber $Ws";
+ let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
+}
+
+class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
+ RegisterClass regtype, string asm>
+ : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
+ (outs GPR32:$Ws),
+ (ins regtype:$Rt, regtype:$Rt2, GPR64sp0:$Rn),
+ asm, "\t$Ws, $Rt, $Rt2, [$Rn]">,
+ Sched<[WriteSTX]> {
+ bits<5> Ws;
+ bits<5> Rt;
+ bits<5> Rt2;
+ bits<5> Rn;
+ let Inst{20-16} = Ws;
+ let Inst{14-10} = Rt2;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+
+ let Constraints = "@earlyclobber $Ws";
}
-// Format for Test & branch (immediate) instructions
-class A64I_TBimm<bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRt<outs, ins, asmstr, patterns, itin> {
- // Doubly special in not even sharing register fields with other
- // instructions, so we create our own Rn here.
- bits<6> Imm;
- bits<14> Label;
+//---
+// Exception generation
+//---
- let Inst{31} = Imm{5};
- let Inst{30-25} = 0b011011;
- let Inst{24} = op;
- let Inst{23-19} = Imm{4-0};
- let Inst{18-5} = Label;
- // Inherit Rt in 4-0
+let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
+class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
+ : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
+ Sched<[WriteSys]> {
+ bits<16> imm;
+ let Inst{31-24} = 0b11010100;
+ let Inst{23-21} = op1;
+ let Inst{20-5} = imm;
+ let Inst{4-2} = 0b000;
+ let Inst{1-0} = ll;
+}
+
+let Predicates = [HasFPARMv8] in {
+
+//---
+// Floating point to integer conversion
+//---
+
+class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn),
+ asm, "\t$Rd, $Rn", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-29} = 0b00;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format for Unconditional branch (register) instructions, including
-// RET. Shares no fields with instructions further up the hierarchy
-// so top-level.
-class A64I_Breg<bits<4> opc, bits<5> op2, bits<6> op3, bits<5> op4,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64Inst<outs, ins, asmstr, patterns, itin> {
- // Doubly special in not even sharing register fields with other
- // instructions, so we create our own Rn here.
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ Operand immType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
+ asm, "\t$Rd, $Rn, $scale", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-29} = 0b00;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = type;
+ let Inst{21} = 0;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = scale;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
- let Inst{31-25} = 0b1101011;
- let Inst{24-21} = opc;
- let Inst{20-16} = op2;
- let Inst{15-10} = op3;
+multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN> {
+ // Unscaled single-precision to 32-bit
+ def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
+ [(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ }
+
+ // Unscaled single-precision to 64-bit
+ def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm,
+ [(set GPR64:$Rd, (OpN FPR32:$Rn))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+
+ // Unscaled double-precision to 32-bit
+ def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm,
+ [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ }
+
+ // Unscaled double-precision to 64-bit
+ def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm,
+ [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+}
+
+multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
+ SDPatternOperator OpN> {
+ // Scaled single-precision to 32-bit
+ def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
+ fixedpoint_f32_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn,
+ fixedpoint_f32_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ }
+
+ // Scaled single-precision to 64-bit
+ def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64,
+ fixedpoint_f32_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn,
+ fixedpoint_f32_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+
+ // Scaled double-precision to 32-bit
+ def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32,
+ fixedpoint_f64_i32, asm,
+ [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn,
+ fixedpoint_f64_i32:$scale)))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let scale{5} = 1;
+ }
+
+ // Scaled double-precision to 64-bit
+ def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64,
+ fixedpoint_f64_i64, asm,
+ [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn,
+ fixedpoint_f64_i64:$scale)))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ }
+}
+
+//---
+// Integer to floating point conversion
+//---
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseIntegerToFP<bit isUnsigned,
+ RegisterClass srcType, RegisterClass dstType,
+ Operand immType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
+ asm, "\t$Rd, $Rn, $scale", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-23} = 0b00111100;
+ let Inst{21-17} = 0b00001;
+ let Inst{16} = isUnsigned;
+ let Inst{15-10} = scale;
let Inst{9-5} = Rn;
- let Inst{4-0} = op4;
+ let Inst{4-0} = Rd;
}
+class BaseIntegerToFPUnscaled<bit isUnsigned,
+ RegisterClass srcType, RegisterClass dstType,
+ ValueType dvt, string asm, SDNode node>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn),
+ asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<6> scale;
+ let Inst{30-23} = 0b00111100;
+ let Inst{21-17} = 0b10001;
+ let Inst{16} = isUnsigned;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
-//===----------------------------------------------------------------------===//
-//
-// Neon Instruction Format Definitions.
-//
+multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
+ // Unscaled
+ def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ }
+
+ def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ }
+
+ def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ }
+
+ def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ }
+
+ // Scaled
+ def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_f32_i32, asm,
+ [(set FPR32:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f32_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ let scale{5} = 1;
+ }
+
+ def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_f64_i32, asm,
+ [(set FPR64:$Rd,
+ (fdiv (node GPR32:$Rn),
+ fixedpoint_f64_i32:$scale))]> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ let scale{5} = 1;
+ }
+
+ def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_f32_i64, asm,
+ [(set FPR32:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f32_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ }
+
+ def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_f64_i64, asm,
+ [(set FPR64:$Rd,
+ (fdiv (node GPR64:$Rn),
+ fixedpoint_f64_i64:$scale))]> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ }
+}
+
+//---
+// Unscaled integer <-> floating point conversion (i.e. FMOV)
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterClass dstType,
+ string asm>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "",
+ // We use COPY_TO_REGCLASS for these bitconvert operations.
+ // copyPhysReg() expands the resultant COPY instructions after
+ // regalloc is done. This gives greater freedom for the allocator
+ // and related passes (coalescing, copy propagation, et. al.) to
+ // be more effective.
+ [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-23} = 0b00111100;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode,
+ RegisterClass srcType, RegisterOperand dstType, string asm,
+ string kind>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm,
+ "{\t$Rd"#kind#"$idx, $Rn|"#kind#"\t$Rd$idx, $Rn}", "", []>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-23} = 0b00111101;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeFMOVLaneInstruction";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
+ RegisterOperand srcType, RegisterClass dstType, string asm,
+ string kind>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn, VectorIndex1:$idx), asm,
+ "{\t$Rd, $Rn"#kind#"$idx|"#kind#"\t$Rd, $Rn$idx}", "", []>,
+ Sched<[WriteFCopy]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{30-23} = 0b00111101;
+ let Inst{21} = 1;
+ let Inst{20-19} = rmode;
+ let Inst{18-16} = opcode;
+ let Inst{15-10} = 0b000000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+
+ let DecoderMethod = "DecodeFMOVLaneInstruction";
+}
+
+
+
+multiclass UnscaledConversion<string asm> {
+ def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ }
+
+ def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ }
+
+ def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
+ let Inst{31} = 0; // 32-bit GPR flag
+ let Inst{22} = 0; // 32-bit FPR flag
+ }
+
+ def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
+ let Inst{31} = 1; // 64-bit GPR flag
+ let Inst{22} = 1; // 64-bit FPR flag
+ }
+
+ def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
+ asm, ".d"> {
+ let Inst{31} = 1;
+ let Inst{22} = 0;
+ }
+
+ def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64,
+ asm, ".d"> {
+ let Inst{31} = 1;
+ let Inst{22} = 0;
+ }
+}
+
+//---
+// Floating point conversion
+//---
+
+class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
+ RegisterClass srcType, string asm, list<dag> pattern>
+ : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>,
+ Sched<[WriteFCvt]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-24} = 0b00011110;
+ let Inst{23-22} = type;
+ let Inst{21-17} = 0b10001;
+ let Inst{16-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPConversion<string asm> {
+ // Double-precision to Half-precision
+ def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm,
+ [(set FPR16:$Rd, (fround FPR64:$Rn))]>;
+
+ // Double-precision to Single-precision
+ def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
+ [(set FPR32:$Rd, (fround FPR64:$Rn))]>;
+
+ // Half-precision to Double-precision
+ def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
+ [(set FPR64:$Rd, (fextend FPR16:$Rn))]>;
+
+ // Half-precision to Single-precision
+ def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm,
+ [(set FPR32:$Rd, (fextend FPR16:$Rn))]>;
+
+ // Single-precision to Double-precision
+ def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
+ [(set FPR64:$Rd, (fextend FPR32:$Rn))]>;
+
+ // Single-precision to Half-precision
+ def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
+ [(set FPR16:$Rd, (fround FPR32:$Rn))]>;
+}
+
+//---
+// Single operand floating point data processing
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
+ ValueType vt, string asm, SDPatternOperator node>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-23} = 0b000111100;
+ let Inst{21-19} = 0b100;
+ let Inst{18-15} = opcode;
+ let Inst{14-10} = 0b10000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SingleOperandFPData<bits<4> opcode, string asm,
+ SDPatternOperator node = null_frag> {
+ def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
+ let Inst{22} = 0; // 32-bit size flag
+ }
+
+ def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
+ let Inst{22} = 1; // 64-bit size flag
+ }
+}
+
+//---
+// Two operand floating point data processing
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
+ asm, "\t$Rd, $Rn, $Rm", "", pat>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass TwoOperandFPData<bits<4> opcode, string asm,
+ SDPatternOperator node = null_frag> {
+ def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
+ [(set (f32 FPR32:$Rd),
+ (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
+ let Inst{22} = 0; // 32-bit size flag
+ }
+
+ def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
+ [(set (f64 FPR64:$Rd),
+ (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
+ let Inst{22} = 1; // 64-bit size flag
+ }
+}
+
+multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
+ def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
+ [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
+ let Inst{22} = 0; // 32-bit size flag
+ }
+
+ def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
+ [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
+ let Inst{22} = 1; // 64-bit size flag
+ }
+}
+
+
+//---
+// Three operand floating point data processing
+//---
+
+class BaseThreeOperandFPData<bit isNegated, bit isSub,
+ RegisterClass regtype, string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra),
+ asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>,
+ Sched<[WriteFMul]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<5> Ra;
+ let Inst{31-23} = 0b000111110;
+ let Inst{21} = isNegated;
+ let Inst{20-16} = Rm;
+ let Inst{15} = isSub;
+ let Inst{14-10} = Ra;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
+ SDPatternOperator node> {
+ def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
+ [(set FPR32:$Rd,
+ (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
+ let Inst{22} = 0; // 32-bit size flag
+ }
+
+ def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
+ [(set FPR64:$Rd,
+ (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
+ let Inst{22} = 1; // 64-bit size flag
+ }
+}
+
+//---
+// Floating point data comparisons
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseOneOperandFPComparison<bit signalAllNans,
+ RegisterClass regtype, string asm,
+ list<dag> pat>
+ : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
+ Sched<[WriteFCmp]> {
+ bits<5> Rn;
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = 0b1000;
+
+ // Rm should be 0b00000 canonically, but we need to accept any value.
+ let PostEncoderMethod = "fixOneOperandFPComparison";
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
+ string asm, list<dag> pat>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>,
+ Sched<[WriteFCmp]> {
+ bits<5> Rm;
+ bits<5> Rn;
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-10} = 0b001000;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = 0b0000;
+}
+
+multiclass FPComparison<bit signalAllNans, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let Defs = [NZCV] in {
+ def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
+ [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit NZCV)]> {
+ let Inst{22} = 0;
+ }
+
+ def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
+ [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{22} = 0;
+ }
+
+ def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
+ [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit NZCV)]> {
+ let Inst{22} = 1;
+ }
+
+ def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
+ [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit NZCV)]> {
+ let Inst{22} = 1;
+ }
+ } // Defs = [NZCV]
+}
+
+//---
+// Floating point conditional comparisons
+//---
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseFPCondComparison<bit signalAllNans,
+ RegisterClass regtype, string asm>
+ : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
+ asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
+ Sched<[WriteFCmp]> {
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> nzcv;
+ bits<4> cond;
+
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = Rn;
+ let Inst{4} = signalAllNans;
+ let Inst{3-0} = nzcv;
+}
+
+multiclass FPCondComparison<bit signalAllNans, string asm> {
+ let Defs = [NZCV], Uses = [NZCV] in {
+ def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
+ let Inst{22} = 0;
+ }
+
+ def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
+ let Inst{22} = 1;
+ }
+ } // Defs = [NZCV], Uses = [NZCV]
+}
+
+//---
+// Floating point conditional select
+//---
+
+class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
+ asm, "\t$Rd, $Rn, $Rm, $cond", "",
+ [(set regtype:$Rd,
+ (AArch64csel (vt regtype:$Rn), regtype:$Rm,
+ (i32 imm:$cond), NZCV))]>,
+ Sched<[WriteF]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> cond;
+
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = cond;
+ let Inst{11-10} = 0b11;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPCondSelect<string asm> {
+ let Uses = [NZCV] in {
+ def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
+ let Inst{22} = 0;
+ }
+
+ def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
+ let Inst{22} = 1;
+ }
+ } // Uses = [NZCV]
+}
+
+//---
+// Floating move immediate
+//---
+
+class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
+ : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "",
+ [(set regtype:$Rd, fpimmtype:$imm)]>,
+ Sched<[WriteFImm]> {
+ bits<5> Rd;
+ bits<8> imm;
+ let Inst{31-23} = 0b000111100;
+ let Inst{21} = 1;
+ let Inst{20-13} = imm;
+ let Inst{12-5} = 0b10000000;
+ let Inst{4-0} = Rd;
+}
+
+multiclass FPMoveImmediate<string asm> {
+ def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
+ let Inst{22} = 0;
+ }
+
+ def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
+ let Inst{22} = 1;
+ }
+}
+} // end of 'let Predicates = [HasFPARMv8]'
+
+//----------------------------------------------------------------------------
+// AdvSIMD
+//----------------------------------------------------------------------------
let Predicates = [HasNEON] in {
-class NeonInstAlias<string Asm, dag Result, bit Emit = 0b1>
- : InstAlias<Asm, Result, Emit> {
+//----------------------------------------------------------------------------
+// AdvSIMD three register vector instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string kind,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string kind,
+ list<dag> pattern>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// All operand sizes distinguished in the encoding.
+multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+ def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
+ asm, ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
+}
+
+// As above, but D sized elements unsupported.
+multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b",
+ [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b",
+ [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h",
+ [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h",
+ [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ asm, ".2s",
+ [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ asm, ".4s",
+ [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
+}
+
+multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
+ def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+}
+
+// As above, but only B sized elements supported.
+multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd),
+ (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
}
-// Format AdvSIMD bitwise extract
-class NeonI_BitExtract<bit q, bits<2> op2,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
+// As above, but only S and D sized floating point elements supported.
+multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
+ string asm, SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ asm, ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
+ asm, ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
+ string asm, SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
+ asm, ".2s",
+ [(set (v2f32 V64:$dst),
+ (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
+ def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
+ asm, ".4s",
+ [(set (v4f32 V128:$dst),
+ (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
+ def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
+ asm, ".2d",
+ [(set (v2f64 V128:$dst),
+ (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
+}
+
+// As above, but D and B sized elements unsupported.
+multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+}
+
+// Logical three vector ops share opcode bits, and only use B sized elements.
+multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
+ def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
+
+ def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+ def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)),
+ (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
+
+ def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
+ (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
+}
+
+multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
+ string asm, SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
+ asm, ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
+ asm, ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (v16i8 V128:$Rm)))]>;
+
+ def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS),
+ (v4i16 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS),
+ (v2i32 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+ def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS),
+ (v1i64 V64:$RHS))),
+ (!cast<Instruction>(NAME#"v8i8")
+ V64:$LHS, V64:$MHS, V64:$RHS)>;
+
+ def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS),
+ (v8i16 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS),
+ (v4i32 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+ def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS),
+ (v2i64 V128:$RHS))),
+ (!cast<Instruction>(NAME#"v16i8")
+ V128:$LHS, V128:$MHS, V128:$RHS)>;
+}
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD two register vector instructions.
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string dstkind,
+ string srckind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind #
+ "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, string asm, string dstkind,
+ string srckind, list<dag> pattern>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind #
+ "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Supports B, H, and S element sizes.
+multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
+ RegisterOperand regtype, string asm, string dstkind,
+ string srckind, string amount>
+ : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
+ "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
let Inst{29-24} = 0b101110;
- let Inst{23-22} = op2;
- let Inst{21} = 0b0;
- // Inherit Rm in 20-16
- let Inst{15} = 0b0;
- // imm4 in 14-11
- let Inst{10} = 0b0;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD perm
-class NeonI_Perm<bit q, bits<2> size, bits<3> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29-24} = 0b001110;
let Inst{23-22} = size;
- let Inst{21} = 0b0;
- // Inherit Rm in 20-16
- let Inst{15} = 0b0;
- let Inst{14-12} = opcode;
+ let Inst{21-10} = 0b100001001110;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDVectorLShiftLongBySizeBHS {
+ let neverHasSideEffects = 1 in {
+ def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
+ "shll", ".8h", ".8b", "8">;
+ def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
+ "shll2", ".8h", ".16b", "8">;
+ def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64,
+ "shll", ".4s", ".4h", "16">;
+ def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128,
+ "shll2", ".4s", ".8h", "16">;
+ def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64,
+ "shll", ".2d", ".2s", "32">;
+ def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128,
+ "shll2", ".2d", ".4s", "32">;
+ }
+}
+
+// Supports all element sizes.
+multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ asm, ".4h", ".8b",
+ [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ asm, ".8h", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ asm, ".2s", ".4h",
+ [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ asm, ".4s", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ asm, ".1d", ".2s",
+ [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ asm, ".2d", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ asm, ".4h", ".8b",
+ [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
+ (v8i8 V64:$Rn)))]>;
+ def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ asm, ".8h", ".16b",
+ [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
+ (v16i8 V128:$Rn)))]>;
+ def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ asm, ".2s", ".4h",
+ [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
+ (v4i16 V64:$Rn)))]>;
+ def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ asm, ".4s", ".8h",
+ [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
+ (v8i16 V128:$Rn)))]>;
+ def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ asm, ".1d", ".2s",
+ [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
+ (v2i32 V64:$Rn)))]>;
+ def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ asm, ".2d", ".4s",
+ [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
+ (v4i32 V128:$Rn)))]>;
+}
+
+// Supports all element sizes, except 1xD.
+multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
+ def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
+}
+
+multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+}
+
+
+// Supports only B element sizes.
+multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+
+}
+
+// Supports only B and H element sizes.
+multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
+ def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
+ def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
+ def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
+}
+
+// Supports only S and D element sizes, uses high bit of the size field
+// as an extra opcode bit.
+multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+// Supports only S element size.
+multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+}
+
+
+multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+}
+
+multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
+ def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+}
+
+
+class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand inreg, RegisterOperand outreg,
+ string asm, string outkind, string inkind,
+ list<dag> pattern>
+ : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind #
+ "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format AdvSIMD table lookup
-class NeonI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29-24} = 0b001110;
- let Inst{23-22} = op2;
- let Inst{21} = 0b0;
- // Inherit Rm in 20-16
- let Inst{15} = 0b0;
- let Inst{14-13} = len;
- let Inst{12} = op;
- let Inst{11-10} = 0b00;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD 3 vector registers with same vector type
-class NeonI_3VSame<bit q, bit u, bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
+class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand inreg, RegisterOperand outreg,
+ string asm, string outkind, string inkind,
+ list<dag> pattern>
+ : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind #
+ "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
- let Inst{15-11} = opcode;
- let Inst{10} = 0b1;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD 3 vector registers with different vector type
-class NeonI_3VDiff<bit q, bit u, bits<2> size, bits<4> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64,
+ asm, ".8b", ".8h",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
+ def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128,
+ asm#"2", ".16b", ".8h", []>;
+ def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64,
+ asm, ".4h", ".4s",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
+ def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128,
+ asm#"2", ".8h", ".4s", []>;
+ def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64,
+ asm, ".2s", ".2d",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
+ def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+
+ def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v16i8")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v8i16")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v4i32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+}
+
+class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype,
+ string asm, string kind, string zero,
+ ValueType dty, ValueType sty, SDNode OpNode>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero #
+ "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "",
+ [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
- let Inst{15-12} = opcode;
- let Inst{11} = 0b0;
- let Inst{10} = 0b0;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD two registers and an element
-class NeonI_2VElem<bit q, bit u, bits<2> size, bits<4> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
- let Inst{28-24} = 0b01111;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// Comparisons support all element sizes, except 1xD.
+multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
+ SDNode OpNode> {
+ def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
+ asm, ".8b", "0",
+ v8i8, v8i8, OpNode>;
+ def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
+ asm, ".16b", "0",
+ v16i8, v16i8, OpNode>;
+ def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
+ asm, ".4h", "0",
+ v4i16, v4i16, OpNode>;
+ def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
+ asm, ".8h", "0",
+ v8i16, v8i16, OpNode>;
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
+ asm, ".2s", "0",
+ v2i32, v2i32, OpNode>;
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
+ asm, ".4s", "0",
+ v4i32, v4i32, OpNode>;
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
+ asm, ".2d", "0",
+ v2i64, v2i64, OpNode>;
+}
+
+// FP Comparisons support only S and D element sizes.
+multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
+ string asm, SDNode OpNode> {
+
+ def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
+ asm, ".2s", "0.0",
+ v2i32, v2f32, OpNode>;
+ def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
+ asm, ".4s", "0.0",
+ v4i32, v4f32, OpNode>;
+ def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
+ asm, ".2d", "0.0",
+ v2i64, v2f64, OpNode>;
+
+ def : InstAlias<asm # " $Vd.2s, $Vn.2s, #0",
+ (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # " $Vd.4s, $Vn.4s, #0",
+ (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
+ def : InstAlias<asm # " $Vd.2d, $Vn.2d, #0",
+ (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
+ def : InstAlias<asm # ".2s $Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>;
+ def : InstAlias<asm # ".4s $Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>;
+ def : InstAlias<asm # ".2d $Vd, $Vn, #0",
+ (!cast<Instruction>(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand outtype, RegisterOperand intype,
+ string asm, string VdTy, string VnTy,
+ list<dag> pattern>
+ : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
+ !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand outtype, RegisterOperand intype,
+ string asm, string VdTy, string VnTy,
+ list<dag> pattern>
+ : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
+ !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- // l in Inst{21}
- // m in Inst{20}
- // Inherit Rm in 19-16
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> {
+ def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64,
+ asm, ".4s", ".4h", []>;
+ def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128,
+ asm#"2", ".4s", ".8h", []>;
+ def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64,
+ asm, ".2d", ".2s", []>;
+ def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".2d", ".4s", []>;
+}
+
+multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> {
+ def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128,
+ asm, ".4h", ".4s", []>;
+ def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128,
+ asm#"2", ".8h", ".4s", []>;
+ def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
+ asm, ".2s", ".2d", []>;
+ def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+}
+
+multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
+ asm, ".2s", ".2d",
+ [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
+ def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
+ asm#"2", ".4s", ".2d", []>;
+
+ def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))),
+ (!cast<Instruction>(NAME # "v4f32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD three register different-size vector instructions.
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
+ RegisterOperand outtype, RegisterOperand intype1,
+ RegisterOperand intype2, string asm,
+ string outkind, string inkind1, string inkind2,
+ list<dag> pattern>
+ : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
+ "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
let Inst{15-12} = opcode;
- // h in Inst{11}
- let Inst{10} = 0b0;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD 1 vector register with modified immediate
-class NeonI_1VModImm<bit q, bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRd<outs,ins, asmstr, patterns, itin> {
- bits<8> Imm;
- bits<4> cmode;
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = op;
- let Inst{28-19} = 0b0111100000;
- let Inst{15-12} = cmode;
- let Inst{11} = 0b0; // o2
- let Inst{10} = 1;
- // Inherit Rd in 4-0
- let Inst{18-16} = Imm{7-5}; // imm a:b:c
- let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
+ RegisterOperand outtype, RegisterOperand intype1,
+ RegisterOperand intype2, string asm,
+ string outkind, string inkind1, string inkind2,
+ list<dag> pattern>
+ : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
+ "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
+ "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opcode;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+// FIXME: TableGen doesn't know how to deal with expanded types that also
+// change the element count (in this case, placing the results in
+// the high elements of the result register rather than the low
+// elements). Until that's fixed, we can't code-gen those.
+multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm,
+ Intrinsic IntOp> {
+ def v8i16_v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V64, V128, V128,
+ asm, ".8b", ".8h", ".8h",
+ [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+ def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".16b", ".8h", ".8h",
+ []>;
+ def v4i32_v4i16 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V64, V128, V128,
+ asm, ".4h", ".4s", ".4s",
+ [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+ def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".4s", ".4s",
+ []>;
+ def v2i64_v2i32 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V64, V128, V128,
+ asm, ".2s", ".2d", ".2d",
+ [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
+ def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".2d", ".2d",
+ []>;
+
+
+ // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in
+ // a version attached to an instruction.
+ def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn),
+ (v8i16 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v8i16_v16i8")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v4i32_v8i16")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn),
+ (v2i64 V128:$Rm))),
+ (!cast<Instruction>(NAME # "v2i64_v4i32")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+}
+
+multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
+ Intrinsic IntOp> {
+ def v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b", []>;
+ let Predicates = [HasCrypto] in {
+ def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
+ V128, V64, V64,
+ asm, ".1q", ".1d", ".1d", []>;
+ def v2i64 : BaseSIMDDifferentThreeVector<U, 0b111, opc,
+ V128, V128, V128,
+ asm#"2", ".1q", ".2d", ".2d", []>;
+ }
+
+ def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
+ (v8i8 (extract_high_v16i8 V128:$Rm)))),
+ (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
+}
+
+multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$dst),
+ (add (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$dst),
+ (add (v8i16 V128:$Rd),
+ (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm))))))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (add (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (add (v4i32 V128:$Rd),
+ (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm))))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (add (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (add (v2i64 V128:$Rd),
+ (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm))))))]>;
+}
+
+multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
+ string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
+ V128, V64, V64,
+ asm, ".8h", ".8b", ".8b",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".16b", ".16b",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd),
+ (extract_high_v16i8 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd),
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd),
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
+ V128, V64, V64,
+ asm, ".4s", ".4h", ".4h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
+ (v4i16 V64:$Rm)))))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".8h", ".8h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
+ V128, V64, V64,
+ asm, ".2d", ".2s", ".2s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull (v2i32 V64:$Rn),
+ (v2i32 V64:$Rm)))))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".4s", ".4s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))))]>;
+}
+
+multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
+ V128, V128, V64,
+ asm, ".8h", ".8h", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>;
+ def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
+ V128, V128, V128,
+ asm#"2", ".8h", ".8h", ".16b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (extract_high_v16i8 V128:$Rm)))]>;
+ def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
+ V128, V128, V64,
+ asm, ".4s", ".4s", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>;
+ def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
+ V128, V128, V128,
+ asm#"2", ".4s", ".4s", ".8h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (extract_high_v8i16 V128:$Rm)))]>;
+ def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
+ V128, V128, V64,
+ asm, ".2d", ".2d", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>;
+ def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
+ V128, V128, V128,
+ asm#"2", ".2d", ".2d", ".4s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (extract_high_v4i32 V128:$Rm)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD bitwise extract from vector
+//----------------------------------------------------------------------------
+
+class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
+ string asm, string kind>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" #
+ "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
+ [(set (vty regtype:$Rd),
+ (AArch64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ bits<4> imm;
+ let Inst{31} = 0;
+ let Inst{30} = size;
+ let Inst{29-21} = 0b101110000;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-11} = imm;
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+multiclass SIMDBitwiseExtract<string asm> {
+ def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b"> {
+ let imm{3} = 0;
+ }
+ def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD zip vector
+//----------------------------------------------------------------------------
+
+class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
+ string asm, string kind, SDNode OpNode, ValueType valty>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
+ "|" # kind # "\t$Rd, $Rn, $Rm}", "",
+ [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31} = 0;
+ let Inst{30} = size{0};
+ let Inst{29-24} = 0b001110;
+ let Inst{23-22} = size{2-1};
+ let Inst{21} = 0;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format AdvSIMD 3 scalar registers with same type
+multiclass SIMDZipVector<bits<3>opc, string asm,
+ SDNode OpNode> {
+ def v8i8 : BaseSIMDZipVector<0b000, opc, V64,
+ asm, ".8b", OpNode, v8i8>;
+ def v16i8 : BaseSIMDZipVector<0b001, opc, V128,
+ asm, ".16b", OpNode, v16i8>;
+ def v4i16 : BaseSIMDZipVector<0b010, opc, V64,
+ asm, ".4h", OpNode, v4i16>;
+ def v8i16 : BaseSIMDZipVector<0b011, opc, V128,
+ asm, ".8h", OpNode, v8i16>;
+ def v2i32 : BaseSIMDZipVector<0b100, opc, V64,
+ asm, ".2s", OpNode, v2i32>;
+ def v4i32 : BaseSIMDZipVector<0b101, opc, V128,
+ asm, ".4s", OpNode, v4i32>;
+ def v2i64 : BaseSIMDZipVector<0b111, opc, V128,
+ asm, ".2d", OpNode, v2i64>;
+
+ def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
+ (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
+ def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>;
+ def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)),
+ (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>;
+}
-class NeonI_Scalar3Same<bit u, bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = 0b1;
- let Inst{29} = u;
+//----------------------------------------------------------------------------
+// AdvSIMD three register scalar instructions
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, string asm,
+ list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
+ "\t$Rd, $Rn, $Rm", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
let Inst{15-11} = opcode;
- let Inst{10} = 0b1;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
+multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+}
-// Format AdvSIMD 2 vector registers miscellaneous
-class NeonI_2VMisc<bit q, bit u, bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
- let Inst{28-24} = 0b01110;
+multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
+ def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+ def v1i8 : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
+
+ def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
+ def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
+ (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
+}
+
+multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
+}
+
+multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
+ }
+
+ def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
+}
+
+multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
+ def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
+}
+
+class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
+ dag oops, dag iops, string asm, string cstr, list<dag> pat>
+ : I<oops, iops, asm,
+ "\t$Rd, $Rn, $Rm", cstr, pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21} = 1;
+ let Inst{20-16} = Rm;
+ let Inst{15-11} = opcode;
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
+ (outs FPR32:$Rd),
+ (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>;
+ def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
+ (outs FPR64:$Rd),
+ (ins FPR32:$Rn, FPR32:$Rm), asm, "",
+ [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
+ (outs FPR32:$dst),
+ (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm),
+ asm, "$Rd = $dst", []>;
+ def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
+ (outs FPR64:$dst),
+ (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm),
+ asm, "$Rd = $dst",
+ [(set (i64 FPR64:$dst),
+ (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD two register scalar instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, RegisterClass regtype2,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
+ "\t$Rd, $Rn", "", pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21-17} = 0b10000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, RegisterClass regtype2,
+ string asm, list<dag> pat>
+ : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
+ "\t$Rd, $Rn", "$Rd = $dst", pat>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
+ let Inst{23-22} = size;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format AdvSIMD 2 vector 1 immediate shift
-class NeonI_2VShiftImm<bit q, bit u, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<7> Imm;
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
- let Inst{28-23} = 0b011110;
- let Inst{22-16} = Imm;
- let Inst{15-11} = opcode;
- let Inst{10} = 0b1;
-
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD duplicate and insert
-class NeonI_copy<bit q, bit op, bits<4> imm4,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<5> Imm5;
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = op;
- let Inst{28-21} = 0b01110000;
- let Inst{20-16} = Imm5;
- let Inst{15} = 0b0;
- let Inst{14-11} = imm4;
- let Inst{10} = 0b1;
-
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-// Format AdvSIMD insert from element to vector
-class NeonI_insert<bit q, bit op,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<5> Imm5;
- bits<4> Imm4;
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = op;
- let Inst{28-21} = 0b01110000;
- let Inst{20-16} = Imm5;
- let Inst{15} = 0b0;
- let Inst{14-11} = Imm4;
- let Inst{10} = 0b1;
-
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD scalar pairwise
-class NeonI_ScalarPair<bit u, bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = 0b1;
- let Inst{29} = u;
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, string asm, string zero>
+ : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
+ "\t$Rd, $Rn, #" # zero, "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b11000;
+ let Inst{21-17} = 0b10000;
+ let Inst{16-12} = opcode;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
+ : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
+ [(set (f32 FPR32:$Rd), (int_aarch64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-17} = 0b011111100110000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm, "0">;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ def : Pat<(v1i64 (OpNode FPR64:$Rn)),
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
}
-// Format AdvSIMD 2 vector across lanes
-class NeonI_2VAcross<bit q, bit u, bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29} = u;
- let Inst{28-24} = 0b01110;
+multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm, "0.0">;
+ def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm, "0.0">;
+
+ def : InstAlias<asm # " $Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>;
+ def : InstAlias<asm # " $Rd, $Rn, #0",
+ (!cast<Instruction>(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>;
+
+ def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
+}
+
+multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
+
+ def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
+}
+
+multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
+}
+
+multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
+ [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
+ [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
+}
+
+multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
+}
+
+multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm,
+ [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
+ def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm,
+ [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>;
+ def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
+ (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
+}
+
+
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
+ def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
+ def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar pairwise instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
+ RegisterOperand regtype, RegisterOperand vectype,
+ string asm, string kind>
+ : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
+ "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b11110;
let Inst{23-22} = size;
let Inst{21-17} = 0b11000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
+ def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128,
+ asm, ".2d">;
}
-// Format AdvSIMD scalar two registers miscellaneous
-class NeonI_Scalar2SameMisc<bit u, bits<2> size, bits<5> opcode, dag outs, dag ins,
- string asmstr, list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31} = 0b0;
- let Inst{30} = 0b1;
- let Inst{29} = u;
- let Inst{28-24} = 0b11110;
+multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
+ def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
+ asm, ".2s">;
+ def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
+ asm, ".2d">;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD across lanes instructions
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
+ RegisterClass regtype, RegisterOperand vectype,
+ string asm, string kind, list<dag> pattern>
+ : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
+ "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-24} = 0b01110;
let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
+ let Inst{21-17} = 0b11000;
let Inst{16-12} = opcode;
let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD vector load/store multiple N-element structure
-class NeonI_LdStMult<bit q, bit l, bits<4> opcode, bits<2> size,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = q;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode,
+ string asm> {
+ def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64,
+ asm, ".8b", []>;
+ def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128,
+ asm, ".16b", []>;
+ def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64,
+ asm, ".4h", []>;
+ def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128,
+ asm, ".8h", []>;
+ def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
+ def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64,
+ asm, ".8b", []>;
+ def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128,
+ asm, ".16b", []>;
+ def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64,
+ asm, ".4h", []>;
+ def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128,
+ asm, ".8h", []>;
+ def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
+ Intrinsic intOp> {
+ def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
+ asm, ".4s",
+ [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD INS/DUP instructions
+//----------------------------------------------------------------------------
+
+// FIXME: There has got to be a better way to factor these. ugh.
+
+class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
+ string operands, string constraints, list<dag> pattern>
+ : I<outs, ins, asm, operands, constraints, pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = op;
+ let Inst{28-21} = 0b01110000;
+ let Inst{15} = 0;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
+ RegisterOperand vecreg, RegisterClass regtype>
+ : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup",
+ "{\t$Rd" # size # ", $Rn" #
+ "|" # size # "\t$Rd, $Rn}", "",
+ [(set (vectype vecreg:$Rd), (AArch64dup regtype:$Rn))]> {
+ let Inst{20-16} = imm5;
+ let Inst{14-11} = 0b0001;
+}
+
+class SIMDDupFromElement<bit Q, string dstkind, string srckind,
+ ValueType vectype, ValueType insreg,
+ RegisterOperand vecreg, Operand idxtype,
+ ValueType elttype, SDNode OpNode>
+ : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
+ "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
+ "|" # dstkind # "\t$Rd, $Rn$idx}", "",
+ [(set (vectype vecreg:$Rd),
+ (OpNode (insreg V128:$Rn), idxtype:$idx))]> {
+ let Inst{14-11} = 0b0000;
+}
+
+class SIMDDup64FromElement
+ : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
+ VectorIndexD, i64, AArch64duplane64> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+}
+
+class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
+ VectorIndexS, i64, AArch64duplane32> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+}
+
+class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
+ VectorIndexH, i64, AArch64duplane16> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+}
+
+class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
+ RegisterOperand vecreg>
+ : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
+ VectorIndexB, i64, AArch64duplane8> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+}
+
+class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype,
+ Operand idxtype, string asm, list<dag> pattern>
+ : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm,
+ "{\t$Rd, $Rn" # size # "$idx" #
+ "|" # size # "\t$Rd, $Rn$idx}", "", pattern> {
+ let Inst{14-11} = imm4;
+}
+
+class SIMDSMov<bit Q, string size, RegisterClass regtype,
+ Operand idxtype>
+ : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>;
+class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype,
+ Operand idxtype>
+ : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov",
+ [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>;
+
+class SIMDMovAlias<string asm, string size, Instruction inst,
+ RegisterClass regtype, Operand idxtype>
+ : InstAlias<asm#"{\t$dst, $src"#size#"$idx" #
+ "|" # size # "\t$dst, $src$idx}",
+ (inst regtype:$dst, V128:$src, idxtype:$idx)>;
+
+multiclass SMov {
+ def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+}
+
+multiclass UMov {
+ def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+ def : SIMDMovAlias<"mov", ".s",
+ !cast<Instruction>(NAME#"vi32"),
+ GPR32, VectorIndexS>;
+ def : SIMDMovAlias<"mov", ".d",
+ !cast<Instruction>(NAME#"vi64"),
+ GPR64, VectorIndexD>;
+}
+
+class SIMDInsFromMain<string size, ValueType vectype,
+ RegisterClass regtype, Operand idxtype>
+ : BaseSIMDInsDup<1, 0, (outs V128:$dst),
+ (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins",
+ "{\t$Rd" # size # "$idx, $Rn" #
+ "|" # size # "\t$Rd$idx, $Rn}",
+ "$Rd = $dst",
+ [(set V128:$dst,
+ (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
+ let Inst{14-11} = 0b0011;
+}
+
+class SIMDInsFromElement<string size, ValueType vectype,
+ ValueType elttype, Operand idxtype>
+ : BaseSIMDInsDup<1, 1, (outs V128:$dst),
+ (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins",
+ "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" #
+ "|" # size # "\t$Rd$idx, $Rn$idx2}",
+ "$Rd = $dst",
+ [(set V128:$dst,
+ (vector_insert
+ (vectype V128:$Rd),
+ (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
+ idxtype:$idx))]>;
+
+class SIMDInsMainMovAlias<string size, Instruction inst,
+ RegisterClass regtype, Operand idxtype>
+ : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" #
+ "|" # size #"\t$dst$idx, $src}",
+ (inst V128:$dst, idxtype:$idx, regtype:$src)>;
+class SIMDInsElementMovAlias<string size, Instruction inst,
+ Operand idxtype>
+ : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
+ # "|" # size #" $dst$idx, $src$idx2}",
+ (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
+
+
+multiclass SIMDIns {
+ def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+
+ def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> {
+ bits<4> idx;
+ bits<4> idx2;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ let Inst{14-11} = idx2;
+ }
+ def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> {
+ bits<3> idx;
+ bits<3> idx2;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ let Inst{14-12} = idx2;
+ let Inst{11} = 0;
+ }
+ def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
+ bits<2> idx;
+ bits<2> idx2;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ let Inst{14-13} = idx2;
+ let Inst{12-11} = 0;
+ }
+ def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
+ bits<1> idx;
+ bits<1> idx2;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ let Inst{14} = idx2;
+ let Inst{13-11} = 0;
+ }
+
+ // For all forms of the INS instruction, the "mov" mnemonic is the
+ // preferred alias. Why they didn't just call the instruction "mov" in
+ // the first place is a very good question indeed...
+ def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"),
+ GPR32, VectorIndexB>;
+ def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"),
+ GPR32, VectorIndexH>;
+ def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"),
+ GPR32, VectorIndexS>;
+ def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"),
+ GPR64, VectorIndexD>;
+
+ def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"),
+ VectorIndexB>;
+ def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"),
+ VectorIndexH>;
+ def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"),
+ VectorIndexS>;
+ def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"),
+ VectorIndexD>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD TBL/TBX
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
+ RegisterOperand listtype, string asm, string kind>
+ : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
+ "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
+ Sched<[WriteV]> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-21} = 0b001110000;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype,
+ RegisterOperand listtype, string asm, string kind>
+ : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
+ "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
+ Sched<[WriteV]> {
+ bits<5> Vd;
+ bits<5> Vn;
+ bits<5> Vm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29-21} = 0b001110000;
+ let Inst{20-16} = Vm;
+ let Inst{15} = 0;
+ let Inst{14-13} = len;
+ let Inst{12} = op;
+ let Inst{11-10} = 0b00;
+ let Inst{9-5} = Vn;
+ let Inst{4-0} = Vd;
+}
+
+class SIMDTableLookupAlias<string asm, Instruction inst,
+ RegisterOperand vectype, RegisterOperand listtype>
+ : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"),
+ (inst vectype:$dst, listtype:$lst, vectype:$index), 0>;
+
+multiclass SIMDTableLookup<bit op, string asm> {
+ def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b,
+ asm, ".8b">;
+ def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b,
+ asm, ".8b">;
+ def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b,
+ asm, ".8b">;
+ def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b,
+ asm, ".8b">;
+ def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b,
+ asm, ".16b">;
+ def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b,
+ asm, ".16b">;
+ def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b,
+ asm, ".16b">;
+ def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b,
+ asm, ".16b">;
+
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8One"),
+ V64, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Two"),
+ V64, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Three"),
+ V64, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Four"),
+ V64, VecListFour128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8One"),
+ V128, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Two"),
+ V128, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Three"),
+ V128, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Four"),
+ V128, VecListFour128>;
+}
+
+multiclass SIMDTableLookupTied<bit op, string asm> {
+ def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b,
+ asm, ".8b">;
+ def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b,
+ asm, ".8b">;
+ def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b,
+ asm, ".8b">;
+ def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b,
+ asm, ".8b">;
+ def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b,
+ asm, ".16b">;
+ def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b,
+ asm, ".16b">;
+ def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b,
+ asm, ".16b">;
+ def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b,
+ asm, ".16b">;
+
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8One"),
+ V64, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Two"),
+ V64, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Three"),
+ V64, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".8b",
+ !cast<Instruction>(NAME#"v8i8Four"),
+ V64, VecListFour128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8One"),
+ V128, VecListOne128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Two"),
+ V128, VecListTwo128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Three"),
+ V128, VecListThree128>;
+ def : SIMDTableLookupAlias<asm # ".16b",
+ !cast<Instruction>(NAME#"v16i8Four"),
+ V128, VecListFour128>;
+}
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar CPY
+//----------------------------------------------------------------------------
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
+ string kind, Operand idxtype>
+ : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
+ "{\t$dst, $src" # kind # "$idx" #
+ "|\t$dst, $src$idx}", "", []>,
+ Sched<[WriteV]> {
+ bits<5> dst;
+ bits<5> src;
+ let Inst{31-21} = 0b01011110000;
+ let Inst{15-10} = 0b000001;
+ let Inst{9-5} = src;
+ let Inst{4-0} = dst;
+}
+
+class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
+ RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
+ : InstAlias<asm # "{\t$dst, $src" # size # "$index" #
+ # "|\t$dst, $src$index}",
+ (inst regtype:$dst, vectype:$src, idxtype:$index), 0>;
+
+
+multiclass SIMDScalarCPY<string asm> {
+ def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> {
+ bits<4> idx;
+ let Inst{20-17} = idx;
+ let Inst{16} = 1;
+ }
+ def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
+ bits<3> idx;
+ let Inst{20-18} = idx;
+ let Inst{17-16} = 0b10;
+ }
+ def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
+ bits<2> idx;
+ let Inst{20-19} = idx;
+ let Inst{18-16} = 0b100;
+ }
+ def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
+ bits<1> idx;
+ let Inst{20} = idx;
+ let Inst{19-16} = 0b1000;
+ }
+
+ def : Pat<(v1i64 (scalar_to_vector (i64 (vector_extract (v2i64 V128:$src),
+ VectorIndexD:$idx)))),
+ (!cast<Instruction>(NAME # i64) V128:$src, VectorIndexD:$idx)>;
+
+ // 'DUP' mnemonic aliases.
+ def : SIMDScalarCPYAlias<"dup", ".b",
+ !cast<Instruction>(NAME#"i8"),
+ FPR8, V128, VectorIndexB>;
+ def : SIMDScalarCPYAlias<"dup", ".h",
+ !cast<Instruction>(NAME#"i16"),
+ FPR16, V128, VectorIndexH>;
+ def : SIMDScalarCPYAlias<"dup", ".s",
+ !cast<Instruction>(NAME#"i32"),
+ FPR32, V128, VectorIndexS>;
+ def : SIMDScalarCPYAlias<"dup", ".d",
+ !cast<Instruction>(NAME#"i64"),
+ FPR64, V128, VectorIndexD>;
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD modified immediate instructions
+//----------------------------------------------------------------------------
+
+class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
+ string asm, string op_string,
+ string cstr, list<dag> pattern>
+ : I<oops, iops, asm, op_string, cstr, pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<8> imm8;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = op;
+ let Inst{28-19} = 0b0111100000;
+ let Inst{18-16} = imm8{7-5};
+ let Inst{11-10} = 0b01;
+ let Inst{9-5} = imm8{4-0};
+ let Inst{4-0} = Rd;
+}
+
+class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
+ Operand immtype, dag opt_shift_iop,
+ string opt_shift, string asm, string kind,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
+ !con((ins immtype:$imm8), opt_shift_iop), asm,
+ "{\t$Rd" # kind # ", $imm8" # opt_shift #
+ "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
+ "", pattern> {
+ let DecoderMethod = "DecodeModImmInstruction";
+}
+
+class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
+ Operand immtype, dag opt_shift_iop,
+ string opt_shift, string asm, string kind,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
+ !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
+ asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
+ "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
+ "$Rd = $dst", pattern> {
+ let DecoderMethod = "DecodeModImmTiedInstruction";
+}
+
+class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ (ins logical_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14-13} = shift;
+ let Inst{12} = b15_b12{0};
+}
+
+class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
+ (ins logical_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14-13} = shift;
+ let Inst{12} = b15_b12{0};
+}
+
+
+class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ (ins logical_vec_hw_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14} = 0;
+ let Inst{13} = shift{0};
+ let Inst{12} = b15_b12{0};
+}
+
+class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
+ (ins logical_vec_hw_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<2> shift;
+ let Inst{15} = b15_b12{1};
+ let Inst{14} = 0;
+ let Inst{13} = shift{0};
+ let Inst{12} = b15_b12{0};
+}
+
+multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode,
+ string asm> {
+ def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64,
+ asm, ".4h", []>;
+ def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128,
+ asm, ".8h", []>;
+
+ def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64,
+ asm, ".2s", []>;
+ def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128,
+ asm, ".4s", []>;
+}
+
+multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
+ bits<2> w_cmode, string asm,
+ SDNode OpNode> {
+ def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64,
+ asm, ".4h",
+ [(set (v4i16 V64:$dst), (OpNode V64:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+ def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128,
+ asm, ".8h",
+ [(set (v8i16 V128:$dst), (OpNode V128:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+
+ def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64,
+ asm, ".2s",
+ [(set (v2i32 V64:$dst), (OpNode V64:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+ def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128,
+ asm, ".4s",
+ [(set (v4i32 V128:$dst), (OpNode V128:$Rd,
+ imm0_255:$imm8,
+ (i32 imm:$shift)))]>;
+}
+
+class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
+ RegisterOperand vectype, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
+ (ins move_vec_shift:$shift),
+ "$shift", asm, kind, pattern> {
+ bits<1> shift;
+ let Inst{15-13} = cmode{3-1};
+ let Inst{12} = shift;
+}
+
+class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
+ RegisterOperand vectype,
+ Operand imm_type, string asm,
+ string kind, list<dag> pattern>
+ : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
+ asm, kind, pattern> {
+ let Inst{15-12} = cmode;
+}
+
+class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
+ list<dag> pattern>
+ : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
+ "\t$Rd, $imm8", "", pattern> {
+ let Inst{15-12} = cmode;
+ let DecoderMethod = "DecodeModImmInstruction";
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD indexed element
+//----------------------------------------------------------------------------
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
+ RegisterOperand dst_reg, RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx, string asm,
+ string apple_kind, string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx),
+ asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
+ "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opc;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
+ RegisterOperand dst_reg, RegisterOperand lhs_reg,
+ RegisterOperand rhs_reg, Operand vec_idx, string asm,
+ string apple_kind, string dst_kind, string lhs_kind,
+ string rhs_kind, list<dag> pattern>
+ : I<(outs dst_reg:$dst),
+ (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
+ "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
+ "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28} = Scalar;
+ let Inst{27-24} = 0b1111;
+ let Inst{23-22} = size;
+ // Bit 21 must be set by the derived class.
+ let Inst{20-16} = Rm;
+ let Inst{15-12} = opc;
+ // Bit 11 must be set by the derived class.
+ let Inst{10} = 0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2f32 V64:$Rd),
+ (OpNode (v2f32 V64:$Rn),
+ (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4f32 V128:$Rd),
+ (OpNode (v4f32 V128:$Rn),
+ (v4f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc,
+ V128, V128,
+ V128, VectorIndexD,
+ asm, ".2d", ".2d", ".2d", ".d",
+ [(set (v2f64 V128:$Rd),
+ (OpNode (v2f64 V128:$Rn),
+ (v2f64 (AArch64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (f32 FPR32Op:$Rd),
+ (OpNode (f32 FPR32Op:$Rn),
+ (f32 (vector_extract (v4f32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc,
+ FPR64Op, FPR64Op, V128, VectorIndexD,
+ asm, ".d", "", "", ".d",
+ [(set (f64 FPR64Op:$Rd),
+ (OpNode (f64 FPR64Op:$Rn),
+ (f64 (vector_extract (v2f64 V128:$Rm),
+ VectorIndexD:$idx))))]> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+}
+
+multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
+ // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64duplane32 (v4f32 V128:$Rm),
+ VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # v2i32_indexed)
+ V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64dup (f32 FPR32Op:$Rm)))),
+ (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+
+ // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64duplane32 (v4f32 V128:$Rm),
+ VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v4i32_indexed")
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64dup (f32 FPR32Op:$Rm)))),
+ (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar.
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64duplane64 (v2f64 V128:$Rm),
+ VectorIndexD:$idx))),
+ (!cast<Instruction>(INST # "v2i64_indexed")
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64dup (f64 FPR64Op:$Rm)))),
+ (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
+
+ // 2 variants for 32-bit scalar version: extract from .2s or from .4s
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))),
+ (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
+
+ // 1 variant for 64-bit scalar version: extract from .1d or from .2d
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
+ (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))),
+ (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn,
+ V128:$Rm, VectorIndexD:$idx)>;
+}
+
+multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc,
+ V128, V128,
+ V128, VectorIndexD,
+ asm, ".2d", ".2d", ".2d", ".d", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+
+
+ def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc,
+ FPR64Op, FPR64Op, V128, VectorIndexD,
+ asm, ".d", "", "", ".d", []> {
+ bits<1> idx;
+ let Inst{11} = idx{0};
+ let Inst{21} = 0;
+ }
+}
+
+multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i32 FPR32Op:$Rd),
+ (OpNode FPR32Op:$Rn,
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4h", ".4h", ".4h", ".h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm, ".8h", ".8h", ".8h", ".h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V64, V64,
+ V128, VectorIndexS,
+ asm, ".2s", ".2s", ".2s", ".s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm, ".4s", ".4s", ".4s", ".s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
+ FPR32Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
+ FPR64Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s", []> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator Accum> {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
+ (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an
+ // intermediate EXTRACT_SUBREG would be untyped.
+ def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
+ (i32 (vector_extract (v4i32
+ (int_aarch64_neon_sqdmull (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx)))),
+ (i64 0))))),
+ (EXTRACT_SUBREG
+ (!cast<Instruction>(NAME # v4i16_indexed)
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn,
+ V128_lo:$Rm, VectorIndexH:$idx),
+ ssub)>;
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$dst),
+ (Accum (v4i32 V128:$Rd),
+ (v4i32 (int_aarch64_neon_sqdmull
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16
+ (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
+ (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$dst),
+ (Accum (v2i64 V128:$Rd),
+ (v2i64 (int_aarch64_neon_sqdmull
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32
+ (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR32Op, FPR16Op, V128_lo, VectorIndexH,
+ asm, ".h", "", "", ".h", []> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+
+ def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR64Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
+ [(set (i64 FPR64Op:$dst),
+ (Accum (i64 FPR64Op:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar
+ (i32 FPR32Op:$Rn),
+ (i32 (vector_extract (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))))]> {
+
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+}
+
+multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ }
+}
+
+multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
+ SDPatternOperator OpNode> {
+ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
+ def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
+ V128, V64,
+ V128_lo, VectorIndexH,
+ asm, ".4s", ".4s", ".4h", ".h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
+ (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
+ V128, V128,
+ V128_lo, VectorIndexH,
+ asm#"2", ".4s", ".4s", ".8h", ".h",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd),
+ (extract_high_v8i16 V128:$Rn),
+ (extract_high_v8i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
+ bits<3> idx;
+ let Inst{11} = idx{2};
+ let Inst{21} = idx{1};
+ let Inst{20} = idx{0};
+ }
+
+ def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
+ V128, V64,
+ V128, VectorIndexS,
+ asm, ".2d", ".2d", ".2s", ".s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
+ (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+
+ def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
+ V128, V128,
+ V128, VectorIndexS,
+ asm#"2", ".2d", ".2d", ".4s", ".s",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd),
+ (extract_high_v4i32 V128:$Rn),
+ (extract_high_v4i32 (AArch64duplane32 (v4i32 V128:$Rm),
+ VectorIndexS:$idx))))]> {
+ bits<2> idx;
+ let Inst{11} = idx{1};
+ let Inst{21} = idx{0};
+ }
+ }
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar shift by immediate
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterClass regtype1, RegisterClass regtype2,
+ Operand immtype, string asm, list<dag> pattern>
+ : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<7> imm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterClass regtype1, RegisterClass regtype2,
+ Operand immtype, string asm, list<dag> pattern>
+ : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
+ asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<7> imm;
+ let Inst{31-30} = 0b01;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b111110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+
+multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftR32, asm, []> {
+ let Inst{20-16} = imm{4-0};
+ }
+
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm,
+ [(set (i64 FPR64:$Rd),
+ (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>;
+}
+
+multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm,
+ [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn,
+ vecshiftR64:$imm)>;
+}
+
+multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm,
+ [(set (v1i64 FPR64:$Rd),
+ (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> {
+ def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR16, vecshiftR8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR32, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR64, vecshiftR32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
+ let Inst{20-16} = imm{4-0};
+ }
+}
+
+multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR8, vecshiftL8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftL16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftL32, asm,
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> {
+ let Inst{20-16} = imm{4-0};
+ }
+
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftL64, asm,
+ [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
+ let Inst{21-16} = imm{5-0};
+ }
+
+ def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm))),
+ (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftL64:$imm)>;
+}
+
+multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
+ def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
+ FPR8, FPR8, vecshiftR8, asm, []> {
+ let Inst{18-16} = imm{2-0};
+ }
+
+ def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
+ FPR16, FPR16, vecshiftR16, asm, []> {
+ let Inst{19-16} = imm{3-0};
+ }
+
+ def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
+ FPR32, FPR32, vecshiftR32, asm, []> {
+ let Inst{20-16} = imm{4-0};
+ }
+
+ def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
+ FPR64, FPR64, vecshiftR64, asm, []> {
+ let Inst{21-16} = imm{5-0};
+ }
+}
+
+//----------------------------------------------------------------------------
+// AdvSIMD vector x indexed element
+//----------------------------------------------------------------------------
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterOperand dst_reg, RegisterOperand src_reg,
+ Operand immtype,
+ string asm, string dst_kind, string src_kind,
+ list<dag> pattern>
+ : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
+ asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
+ "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
+class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
+ RegisterOperand vectype1, RegisterOperand vectype2,
+ Operand immtype,
+ string asm, string dst_kind, string src_kind,
+ list<dag> pattern>
+ : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
+ asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
+ "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
+ Sched<[WriteV]> {
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
+ let Inst{29} = U;
+ let Inst{28-23} = 0b011110;
+ let Inst{22-16} = fixed_imm;
+ let Inst{15-11} = opc;
+ let Inst{10} = 1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
+}
+
+multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
+ Intrinsic OpNode> {
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V128, vecshiftR16Narrow,
+ asm, ".8b", ".8h",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR16Narrow,
+ asm#"2", ".16b", ".8h", []> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V128, vecshiftR32Narrow,
+ asm, ".4h", ".4s",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR32Narrow,
+ asm#"2", ".8h", ".4s", []> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V128, vecshiftR64Narrow,
+ asm, ".2s", ".2d",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR64Narrow,
+ asm#"2", ".4s", ".2d", []> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ let hasSideEffects = 0;
+ }
+
+ // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions
+ // themselves, so put them here instead.
+
+ // Patterns involving what's effectively an insert high and a normal
+ // intrinsic, represented by CONCAT_VECTORS.
+ def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn),
+ vecshiftR16Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v16i8_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR16Narrow:$imm)>;
+ def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn),
+ vecshiftR32Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v8i16_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+ def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn),
+ vecshiftR64Narrow:$imm)),
+ (!cast<Instruction>(NAME # "v4i32_shift")
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR64Narrow:$imm)>;
+}
+
+multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftL8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftL16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftL32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftL64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (i32 vecshiftL64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftR8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftR8, asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftR8, asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (i32 vecshiftR8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftR16, asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftR16, asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (i32 vecshiftR16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftR32, asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftR32, asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (i32 vecshiftR32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftR64,
+ asm, ".2d", ".2d", [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
+ (i32 vecshiftR64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode = null_frag> {
+ def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
+ V64, V64, vecshiftL8,
+ asm, ".8b", ".8b",
+ [(set (v8i8 V64:$dst),
+ (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm, ".16b", ".16b",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
+ (i32 vecshiftL8:$imm)))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
+ V64, V64, vecshiftL16,
+ asm, ".4h", ".4h",
+ [(set (v4i16 V64:$dst),
+ (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm, ".8h", ".8h",
+ [(set (v8i16 V128:$dst),
+ (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
+ (i32 vecshiftL16:$imm)))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
+ V64, V64, vecshiftL32,
+ asm, ".2s", ".2s",
+ [(set (v2i32 V64:$dst),
+ (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm, ".4s", ".4s",
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (i32 vecshiftL32:$imm)))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
+ V128, V128, vecshiftL64,
+ asm, ".2d", ".2d",
+ [(set (v2i64 V128:$dst),
+ (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
+ (i32 vecshiftL64:$imm)))]> {
+ bits<6> imm;
+ let Inst{21-16} = imm;
+ }
+}
+
+multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
+ SDPatternOperator OpNode> {
+ def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
+ V128, V64, vecshiftL8, asm, ".8h", ".8b",
+ [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
+ V128, V128, vecshiftL8,
+ asm#"2", ".8h", ".16b",
+ [(set (v8i16 V128:$Rd),
+ (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
+ bits<3> imm;
+ let Inst{18-16} = imm;
+ }
+
+ def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
+ V128, V64, vecshiftL16, asm, ".4s", ".4h",
+ [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> {
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
+ V128, V128, vecshiftL16,
+ asm#"2", ".4s", ".8h",
+ [(set (v4i32 V128:$Rd),
+ (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
+
+ bits<4> imm;
+ let Inst{19-16} = imm;
+ }
+
+ def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
+ V128, V64, vecshiftL32, asm, ".2d", ".2s",
+ [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+
+ def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
+ V128, V128, vecshiftL32,
+ asm#"2", ".2d", ".4s",
+ [(set (v2i64 V128:$Rd),
+ (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
+ bits<5> imm;
+ let Inst{20-16} = imm;
+ }
+}
+
+
+//---
+// Vector load/store
+//---
+// SIMD ldX/stX no-index memory references don't allow the optional
+// ", #0" constant and handle post-indexing explicitly, so we use
+// a more specialized parse method for them. Otherwise, it's the same as
+// the general GPR64sp handling.
+
+class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size,
+ string asm, dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, "\t$Vt, [$Rn]", "", pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
let Inst{29-23} = 0b0011000;
- let Inst{22} = l;
+ let Inst{22} = L;
let Inst{21-16} = 0b000000;
let Inst{15-12} = opcode;
let Inst{11-10} = size;
-
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD vector load/store multiple N-element structure (post-index)
-class NeonI_LdStMult_Post<bit q, bit l, bits<4> opcode, bits<2> size,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtnm<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = q;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : I<oops, iops, asm, "\t$Vt, [$Rn], $Xm", "$Rn = $wback", []> {
+ bits<5> Vt;
+ bits<5> Rn;
+ bits<5> Xm;
+ let Inst{31} = 0;
+ let Inst{30} = Q;
let Inst{29-23} = 0b0011001;
- let Inst{22} = l;
- let Inst{21} = 0b0;
- // Inherit Rm in 20-16
+ let Inst{22} = L;
+ let Inst{21} = 0;
+ let Inst{20-16} = Xm;
let Inst{15-12} = opcode;
let Inst{11-10} = size;
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD vector load Single N-element structure to all lanes
-class NeonI_LdOne_Dup<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
- dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29-23} = 0b0011010;
- let Inst{22} = 0b1;
- let Inst{21} = r;
- let Inst{20-16} = 0b00000;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+// The immediate form of AdvSIMD post-indexed addressing is encoded with
+// register post-index addressing from the zero register.
+multiclass SIMDLdStAliases<string asm, string layout, string Count,
+ int Offset, int Size> {
+ // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16"
+ // "ld1\t$Vt, [$Rn], #16"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ XZR), 1>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1], #16"
+ // "ld1.8b\t$Vt, [$Rn], #16"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ XZR), 0>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1]"
+ // "ld1\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Twov8b VecListTwo64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]",
+ (!cast<Instruction>(NAME # Count # "v" # layout)
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1.8b { v0, v1 }, [x1], x2"
+ // "ld1\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Twov8b_POST VecListTwo64:$Vt, GPR64sp:$Rn, GPR64pi8:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm",
+ (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
+
+multiclass BaseSIMDLdN<string Count, string asm, string veclist, int Offset128,
+ int Offset64, bits<4> opcode> {
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm,
+ (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm,
+ (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm,
+ (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm,
+ (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm,
+ (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm,
+ (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+ def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm,
+ (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+
+
+ def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "16b"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "8h"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "4s"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "2d"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "8b"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "4h"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "2s"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
+}
+
+// Only ld1/st1 has a v1d version.
+multiclass BaseSIMDStN<string Count, string asm, string veclist, int Offset128,
+ int Offset64, bits<4> opcode> {
+ let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in {
+ def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
+ GPR64sp:$Rn), []>;
+ def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
+ GPR64sp:$Rn), []>;
+
+ def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
+ def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
+ defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
+ defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
+}
+
+multiclass BaseSIMDLd1<string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode>
+ : BaseSIMDLdN<Count, asm, veclist, Offset128, Offset64, opcode> {
+
+ // LD1 instructions have extra "1d" variants.
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+ def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm,
+ (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
+ (ins GPR64sp:$Rn), []>;
+
+ def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm,
+ (outs GPR64sp:$wback,
+ !cast<RegisterOperand>(veclist # "1d"):$Vt),
+ (ins GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
+}
+
+multiclass BaseSIMDSt1<string Count, string asm, string veclist,
+ int Offset128, int Offset64, bits<4> opcode>
+ : BaseSIMDStN<Count, asm, veclist, Offset128, Offset64, opcode> {
+
+ // ST1 instructions have extra "1d" variants.
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+ def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs),
+ (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
+ GPR64sp:$Rn), []>;
+
+ def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm,
+ (outs GPR64sp:$wback),
+ (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
+ }
+
+ defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
+}
+
+multiclass SIMDLd1Multiple<string asm> {
+ defm One : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8, 0b0111>;
+ defm Two : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
+ defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
+ defm Four : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
+}
+
+multiclass SIMDSt1Multiple<string asm> {
+ defm One : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8, 0b0111>;
+ defm Two : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
+ defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
+ defm Four : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
+}
+
+multiclass SIMDLd2Multiple<string asm> {
+ defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
+}
+
+multiclass SIMDSt2Multiple<string asm> {
+ defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
+}
+
+multiclass SIMDLd3Multiple<string asm> {
+ defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
+}
+
+multiclass SIMDSt3Multiple<string asm> {
+ defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
+}
+
+multiclass SIMDLd4Multiple<string asm> {
+ defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
+}
+
+multiclass SIMDSt4Multiple<string asm> {
+ defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
+}
+
+//---
+// AdvSIMD Load/store single-element
+//---
+
+class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode,
+ string asm, string operands, string cst,
+ dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, operands, cst, pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{29-24} = 0b001101;
+ let Inst{22} = L;
+ let Inst{21} = R;
+ let Inst{15-13} = opcode;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode,
+ string asm, string operands, string cst,
+ dag oops, dag iops, list<dag> pattern>
+ : I<oops, iops, asm, operands, "$Vt = $dst," # cst, pattern> {
+ bits<5> Vt;
+ bits<5> Rn;
+ let Inst{31} = 0;
+ let Inst{29-24} = 0b001101;
+ let Inst{22} = L;
+ let Inst{21} = R;
let Inst{15-13} = opcode;
- let Inst{12} = 0b0;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Vt;
+}
+
+
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm,
+ Operand listtype>
+ : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn]", "",
+ (outs listtype:$Vt), (ins GPR64sp:$Rn),
+ []> {
+ let Inst{30} = Q;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = S;
+ let Inst{11-10} = size;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size,
+ string asm, Operand listtype, Operand GPR64pi>
+ : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, [$Rn], $Xm",
+ "$Rn = $wback",
+ (outs GPR64sp:$wback, listtype:$Vt),
+ (ins GPR64sp:$Rn, GPR64pi:$Xm), []> {
+ bits<5> Xm;
+ let Inst{30} = Q;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = S;
let Inst{11-10} = size;
+}
+
+multiclass SIMDLdrAliases<string asm, string layout, string Count,
+ int Offset, int Size> {
+ // E.g. "ld1r { v0.8b }, [x1], #1"
+ // "ld1r.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ XZR), 1>;
+
+ // E.g. "ld1r.8b { v0 }, [x1], #1"
+ // "ld1r.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ XZR), 0>;
+
+ // E.g. "ld1r.8b { v0 }, [x1]"
+ // "ld1r.8b\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn]",
+ (!cast<Instruction>(NAME # "v" # layout)
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1r.8b { v0 }, [x1], x2"
+ // "ld1r.8b\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt, [$Rn], $Xm",
+ (!cast<Instruction>(NAME # "v" # layout # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
+
+multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count,
+ int Offset1, int Offset2, int Offset4, int Offset8> {
+ def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm,
+ !cast<Operand>("VecList" # Count # "8b")>;
+ def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm,
+ !cast<Operand>("VecList" # Count #"16b")>;
+ def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm,
+ !cast<Operand>("VecList" # Count #"4h")>;
+ def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm,
+ !cast<Operand>("VecList" # Count #"8h")>;
+ def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm,
+ !cast<Operand>("VecList" # Count #"2s")>;
+ def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm,
+ !cast<Operand>("VecList" # Count #"4s")>;
+ def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm,
+ !cast<Operand>("VecList" # Count #"1d")>;
+ def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm,
+ !cast<Operand>("VecList" # Count #"2d")>;
+
+ def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm,
+ !cast<Operand>("VecList" # Count # "8b"),
+ !cast<Operand>("GPR64pi" # Offset1)>;
+ def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm,
+ !cast<Operand>("VecList" # Count # "16b"),
+ !cast<Operand>("GPR64pi" # Offset1)>;
+ def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm,
+ !cast<Operand>("VecList" # Count # "4h"),
+ !cast<Operand>("GPR64pi" # Offset2)>;
+ def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm,
+ !cast<Operand>("VecList" # Count # "8h"),
+ !cast<Operand>("GPR64pi" # Offset2)>;
+ def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm,
+ !cast<Operand>("VecList" # Count # "2s"),
+ !cast<Operand>("GPR64pi" # Offset4)>;
+ def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm,
+ !cast<Operand>("VecList" # Count # "4s"),
+ !cast<Operand>("GPR64pi" # Offset4)>;
+ def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm,
+ !cast<Operand>("VecList" # Count # "1d"),
+ !cast<Operand>("GPR64pi" # Offset8)>;
+ def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm,
+ !cast<Operand>("VecList" # Count # "2d"),
+ !cast<Operand>("GPR64pi" # Offset8)>;
+
+ defm : SIMDLdrAliases<asm, "8b", Count, Offset1, 64>;
+ defm : SIMDLdrAliases<asm, "16b", Count, Offset1, 128>;
+ defm : SIMDLdrAliases<asm, "4h", Count, Offset2, 64>;
+ defm : SIMDLdrAliases<asm, "8h", Count, Offset2, 128>;
+ defm : SIMDLdrAliases<asm, "2s", Count, Offset4, 64>;
+ defm : SIMDLdrAliases<asm, "4s", Count, Offset4, 128>;
+ defm : SIMDLdrAliases<asm, "1d", Count, Offset8, 64>;
+ defm : SIMDLdrAliases<asm, "2d", Count, Offset8, 128>;
+}
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD vector load/store Single N-element structure to/from one lane
-class NeonI_LdStOne_Lane<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
- dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtn<outs, ins, asmstr, patterns, itin>
-{
- bits<4> lane;
- let Inst{31} = 0b0;
- let Inst{29-23} = 0b0011010;
- let Inst{22} = l;
- let Inst{21} = r;
+class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ let Inst{30} = idx{3};
+ let Inst{23} = 0;
let Inst{20-16} = 0b00000;
- let Inst{15-14} = op2_1;
- let Inst{13} = op0;
-
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD post-index vector load Single N-element structure to all lanes
-class NeonI_LdOne_Dup_Post<bit q, bit r, bits<3> opcode, bits<2> size, dag outs,
- dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRtnm<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = q;
- let Inst{29-23} = 0b0011011;
- let Inst{22} = 0b1;
- let Inst{21} = r;
- // Inherit Rm in 20-16
- let Inst{15-13} = opcode;
- let Inst{12} = 0b0;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ let Inst{30} = idx{3};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{3};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size fields.
+ bits<4> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{3};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{2};
+ let Inst{11-10} = idx{1-0};
+}
+
+class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ let Inst{30} = idx{2};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ let Inst{30} = idx{2};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+
+class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{2};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm,
+ dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S:size<1> fields.
+ bits<3> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{2};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{1};
+ let Inst{11} = idx{0};
+ let Inst{10} = size;
+}
+class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ let Inst{30} = idx{1};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ let Inst{30} = idx{1};
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{1};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q:S fields.
+ bits<2> idx;
+ bits<5> Xm;
+ let Inst{30} = idx{1};
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = idx{0};
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "", oops, iops,
+ pattern> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ let Inst{30} = idx;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
+ dag oops, dag iops, list<dag> pattern>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn]", "",
+ oops, iops, pattern> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ let Inst{30} = idx;
+ let Inst{23} = 0;
+ let Inst{20-16} = 0b00000;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ bits<5> Xm;
+ let Inst{30} = idx;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = 0;
+ let Inst{11-10} = size;
+}
+class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
+ string asm, dag oops, dag iops>
+ : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, [$Rn], $Xm",
+ "$Rn = $wback", oops, iops, []> {
+ // idx encoded in Q field.
+ bits<1> idx;
+ bits<5> Xm;
+ let Inst{30} = idx;
+ let Inst{23} = 1;
+ let Inst{20-16} = Xm;
+ let Inst{12} = 0;
let Inst{11-10} = size;
+}
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD post-index vector load/store Single N-element structure
-// to/from one lane
-class NeonI_LdStOne_Lane_Post<bit l, bit r, bits<2> op2_1, bit op0, dag outs,
- dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRtnm<outs, ins, asmstr, patterns, itin>
-{
- bits<4> lane;
- let Inst{31} = 0b0;
- let Inst{29-23} = 0b0011011;
- let Inst{22} = l;
- let Inst{21} = r;
- // Inherit Rm in 20-16
- let Inst{15-14} = op2_1;
- let Inst{13} = op0;
-
- // Inherit Rn in 9-5
- // Inherit Rt in 4-0
-}
-
-// Format AdvSIMD 3 scalar registers with different type
-
-class NeonI_Scalar3Diff<bit u, bits<2> size, bits<4> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31-30} = 0b01;
- let Inst{29} = u;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21} = 0b1;
- // Inherit Rm in 20-16
- let Inst{15-12} = opcode;
- let Inst{11-10} = 0b00;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i8 : SIMDLdStSingleBTied<1, R, opcode, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
+ RegisterOperand listtype,
+ RegisterOperand GPR64pi> {
+ def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
+multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
+ (outs listtype:$dst),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm,
+ (outs GPR64sp:$wback, listtype:$dst),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i8 : SIMDLdStSingleB<0, R, opcode, asm,
+ (outs), (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexB:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexH:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexS:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
+}
+let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
+multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
+ RegisterOperand listtype, RegisterOperand GPR64pi> {
+ def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
+ (outs), (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn), []>;
+
+ def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
+ (outs GPR64sp:$wback),
+ (ins listtype:$Vt, VectorIndexD:$idx,
+ GPR64sp:$Rn, GPR64pi:$Xm)>;
}
-// Format AdvSIMD scalar shift by immediate
+multiclass SIMDLdStSingleAliases<string asm, string layout, string Type,
+ string Count, int Offset, Operand idxtype> {
+ // E.g. "ld1 { v0.8b }[0], [x1], #1"
+ // "ld1\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne8b:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "\t$Vt$idx, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
+ idxtype:$idx, XZR), 1>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1], #1"
+ // "ld1.8b\t$Vt, [$Rn], #1"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, XZR)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], #" # Offset,
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx, XZR), 0>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1]"
+ // "ld1.8b\t$Vt, [$Rn]"
+ // may get mapped to
+ // (LD1Rv8b VecListOne64:$Vt, GPR64sp:$Rn)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn]",
+ (!cast<Instruction>(NAME # Type)
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx, GPR64sp:$Rn), 0>;
+
+ // E.g. "ld1.8b { v0 }[0], [x1], x2"
+ // "ld1.8b\t$Vt, [$Rn], $Xm"
+ // may get mapped to
+ // (LD1Rv8b_POST VecListOne64:$Vt, GPR64sp:$Rn, GPR64pi1:$Xm)
+ def : InstAlias<asm # "." # layout # "\t$Vt$idx, [$Rn], $Xm",
+ (!cast<Instruction>(NAME # Type # "_POST")
+ GPR64sp:$Rn,
+ !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
+ idxtype:$idx,
+ !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
+}
-class NeonI_ScalarShiftImm<bit u, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- bits<4> Imm4;
- bits<3> Imm3;
- let Inst{31-30} = 0b01;
- let Inst{29} = u;
- let Inst{28-23} = 0b111110;
- let Inst{22-19} = Imm4;
- let Inst{18-16} = Imm3;
- let Inst{15-11} = opcode;
- let Inst{10} = 0b1;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+multiclass SIMDLdSt1SingleAliases<string asm> {
+ defm : SIMDLdStSingleAliases<asm, "b", "i8", "One", 1, VectorIndexB>;
+ defm : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>;
+ defm : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>;
+ defm : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>;
}
-// Format AdvSIMD crypto AES
-class NeonI_Crypto_AES<bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31-24} = 0b01001110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10100;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+multiclass SIMDLdSt2SingleAliases<string asm> {
+ defm : SIMDLdStSingleAliases<asm, "b", "i8", "Two", 2, VectorIndexB>;
+ defm : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4, VectorIndexH>;
+ defm : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8, VectorIndexS>;
+ defm : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>;
}
-// Format AdvSIMD crypto SHA
-class NeonI_Crypto_SHA<bits<2> size, bits<5> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdn<outs, ins, asmstr, patterns, itin> {
- let Inst{31-24} = 0b01011110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10100;
- let Inst{16-12} = opcode;
+multiclass SIMDLdSt3SingleAliases<string asm> {
+ defm : SIMDLdStSingleAliases<asm, "b", "i8", "Three", 3, VectorIndexB>;
+ defm : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6, VectorIndexH>;
+ defm : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>;
+ defm : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>;
+}
+
+multiclass SIMDLdSt4SingleAliases<string asm> {
+ defm : SIMDLdStSingleAliases<asm, "b", "i8", "Four", 4, VectorIndexB>;
+ defm : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8, VectorIndexH>;
+ defm : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>;
+ defm : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>;
+}
+} // end of 'let Predicates = [HasNEON]'
+
+//----------------------------------------------------------------------------
+// Crypto extensions
+//----------------------------------------------------------------------------
+
+let Predicates = [HasCrypto] in {
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
+ list<dag> pat>
+ : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-16} = 0b0100111000101000;
+ let Inst{15-12} = opc;
let Inst{11-10} = 0b10;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
-// Format AdvSIMD crypto 3V SHA
-class NeonI_Crypto_3VSHA<bits<2> size, bits<3> opcode,
- dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin> {
- let Inst{31-24} = 0b01011110;
- let Inst{23-22} = size;
- let Inst{21} = 0b0;
- // Inherit Rm in 20-16
- let Inst{15} = 0b0;
- let Inst{14-12} = opcode;
+class AESInst<bits<4> opc, string asm, Intrinsic OpNode>
+ : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "",
+ [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
+
+class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode>
+ : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn),
+ "$Rd = $dst",
+ [(set (v16i8 V128:$dst),
+ (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
+ dag oops, dag iops, list<dag> pat>
+ : I<oops, iops, asm,
+ "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
+ "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ bits<5> Rm;
+ let Inst{31-21} = 0b01011110000;
+ let Inst{20-16} = Rm;
+ let Inst{15} = 0;
+ let Inst{14-12} = opc;
let Inst{11-10} = 0b00;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-
-// Format AdvSIMD scalar x indexed element
-class NeonI_ScalarXIndexedElem<bit u, bit szhi, bit szlo,
- bits<4> opcode, dag outs, dag ins,
- string asmstr, list<dag> patterns,
- InstrItinClass itin>
- : A64InstRdnm<outs, ins, asmstr, patterns, itin>
-{
- let Inst{31} = 0b0;
- let Inst{30} = 0b1;
- let Inst{29} = u;
- let Inst{28-24} = 0b11111;
- let Inst{23} = szhi;
- let Inst{22} = szlo;
- // l in Inst{21}
- // m in Instr{20}
- // Inherit Rm in 19-16
- let Inst{15-12} = opcode;
- // h in Inst{11}
- let Inst{10} = 0b0;
- // Inherit Rn in 9-5
- // Inherit Rd in 4-0
-}
-// Format AdvSIMD scalar copy - insert from element to scalar
-class NeonI_ScalarCopy<dag outs, dag ins, string asmstr,
- list<dag> patterns, InstrItinClass itin>
- : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> {
- let Inst{28} = 0b1;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
+
+class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
+ (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm),
+ [(set (v4i32 FPR128:$dst),
+ (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn, V128:$Rm),
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode>
+ : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
+ (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm),
+ [(set (v4i32 FPR128:$dst),
+ (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn),
+ (v4i32 V128:$Rm)))]>;
+
+let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
+class SHA2OpInst<bits<4> opc, string asm, string kind,
+ string cstr, dag oops, dag iops,
+ list<dag> pat>
+ : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
+ "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
+ Sched<[WriteV]>{
+ bits<5> Rd;
+ bits<5> Rn;
+ let Inst{31-16} = 0b0101111000101000;
+ let Inst{15-12} = opc;
+ let Inst{11-10} = 0b10;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rd;
}
+class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode>
+ : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst),
+ (ins V128:$Rd, V128:$Rn),
+ [(set (v4i32 V128:$dst),
+ (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
+
+class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
+ : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn),
+ [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
+} // end of 'let Predicates = [HasCrypto]'
+
+// Allow the size specifier tokens to be upper case, not just lower.
+def : TokenAlias<".8B", ".8b">;
+def : TokenAlias<".4H", ".4h">;
+def : TokenAlias<".2S", ".2s">;
+def : TokenAlias<".1D", ".1d">;
+def : TokenAlias<".16B", ".16b">;
+def : TokenAlias<".8H", ".8h">;
+def : TokenAlias<".4S", ".4s">;
+def : TokenAlias<".2D", ".2d">;
+def : TokenAlias<".1Q", ".1q">;
+def : TokenAlias<".B", ".b">;
+def : TokenAlias<".H", ".h">;
+def : TokenAlias<".S", ".s">;
+def : TokenAlias<".D", ".d">;
+def : TokenAlias<".Q", ".q">;
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index afb2034..ff115c0 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -11,257 +11,83 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64.h"
#include "AArch64InstrInfo.h"
-#include "AArch64MachineFunctionInfo.h"
-#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "Utils/AArch64BaseInfo.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineDominators.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#include <algorithm>
+
+using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "AArch64GenInstrInfo.inc"
-using namespace llvm;
-
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
- : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
- Subtarget(STI) {}
+ : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
+ RI(this, &STI), Subtarget(STI) {}
-void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- unsigned Opc = 0;
- unsigned ZeroReg = 0;
- if (DestReg == AArch64::XSP || SrcReg == AArch64::XSP) {
- // E.g. ADD xDst, xsp, #0 (, lsl #0)
- BuildMI(MBB, I, DL, get(AArch64::ADDxxi_lsl0_s), DestReg)
- .addReg(SrcReg)
- .addImm(0);
- return;
- } else if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
- // E.g. ADD wDST, wsp, #0 (, lsl #0)
- BuildMI(MBB, I, DL, get(AArch64::ADDwwi_lsl0_s), DestReg)
- .addReg(SrcReg)
- .addImm(0);
- return;
- } else if (DestReg == AArch64::NZCV) {
- assert(AArch64::GPR64RegClass.contains(SrcReg));
- // E.g. MSR NZCV, xDST
- BuildMI(MBB, I, DL, get(AArch64::MSRix))
- .addImm(A64SysReg::NZCV)
- .addReg(SrcReg);
- } else if (SrcReg == AArch64::NZCV) {
- assert(AArch64::GPR64RegClass.contains(DestReg));
- // E.g. MRS xDST, NZCV
- BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg)
- .addImm(A64SysReg::NZCV);
- } else if (AArch64::GPR64RegClass.contains(DestReg)) {
- if(AArch64::GPR64RegClass.contains(SrcReg)){
- Opc = AArch64::ORRxxx_lsl;
- ZeroReg = AArch64::XZR;
- } else{
- assert(AArch64::FPR64RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg)
- .addReg(SrcReg);
- return;
- }
- } else if (AArch64::GPR32RegClass.contains(DestReg)) {
- if(AArch64::GPR32RegClass.contains(SrcReg)){
- Opc = AArch64::ORRwww_lsl;
- ZeroReg = AArch64::WZR;
- } else{
- assert(AArch64::FPR32RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg)
- .addReg(SrcReg);
- return;
- }
- } else if (AArch64::FPR32RegClass.contains(DestReg)) {
- if(AArch64::FPR32RegClass.contains(SrcReg)){
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg)
- .addReg(SrcReg);
- return;
- }
- else {
- assert(AArch64::GPR32RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg)
- .addReg(SrcReg);
- return;
- }
- } else if (AArch64::FPR64RegClass.contains(DestReg)) {
- if(AArch64::FPR64RegClass.contains(SrcReg)){
- BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg)
- .addReg(SrcReg);
- return;
- }
- else {
- assert(AArch64::GPR64RegClass.contains(SrcReg));
- BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg)
- .addReg(SrcReg);
- return;
- }
- } else if (AArch64::FPR128RegClass.contains(DestReg)) {
- assert(AArch64::FPR128RegClass.contains(SrcReg));
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be. This returns the maximum number of bytes.
+unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+ const MCInstrDesc &Desc = MI->getDesc();
- // If NEON is enable, we use ORR to implement this copy.
- // If NEON isn't available, emit STR and LDR to handle this.
- if(getSubTarget().hasNEON()) {
- BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg)
- .addReg(SrcReg)
- .addReg(SrcReg);
- return;
- } else {
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP)
- .addReg(SrcReg)
- .addReg(AArch64::XSP)
- .addImm(0x1ff & -16);
-
- BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg)
- .addReg(AArch64::XSP, RegState::Define)
- .addReg(AArch64::XSP)
- .addImm(16);
- return;
- }
- } else if (AArch64::FPR8RegClass.contains(DestReg, SrcReg)) {
- // The copy of two FPR8 registers is implemented by the copy of two FPR32
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_8,
- &AArch64::FPR32RegClass);
- unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_8,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
- .addReg(Src);
- return;
- } else if (AArch64::FPR16RegClass.contains(DestReg, SrcReg)) {
- // The copy of two FPR16 registers is implemented by the copy of two FPR32
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- unsigned Dst = TRI->getMatchingSuperReg(DestReg, AArch64::sub_16,
- &AArch64::FPR32RegClass);
- unsigned Src = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_16,
- &AArch64::FPR32RegClass);
- BuildMI(MBB, I, DL, get(AArch64::FMOVss), Dst)
- .addReg(Src);
- return;
- } else {
- CopyPhysRegTuple(MBB, I, DL, DestReg, SrcReg);
- return;
+ switch (Desc.getOpcode()) {
+ default:
+ // Anything not explicitly designated otherwise is a nomal 4-byte insn.
+ return 4;
+ case TargetOpcode::DBG_VALUE:
+ case TargetOpcode::EH_LABEL:
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ return 0;
}
- // E.g. ORR xDst, xzr, xSrc, lsl #0
- BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addReg(ZeroReg)
- .addReg(SrcReg)
- .addImm(0);
-}
-
-void AArch64InstrInfo::CopyPhysRegTuple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL, unsigned DestReg,
- unsigned SrcReg) const {
- unsigned SubRegs;
- bool IsQRegs;
- if (AArch64::DPairRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 2;
- IsQRegs = false;
- } else if (AArch64::DTripleRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 3;
- IsQRegs = false;
- } else if (AArch64::DQuadRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 4;
- IsQRegs = false;
- } else if (AArch64::QPairRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 2;
- IsQRegs = true;
- } else if (AArch64::QTripleRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 3;
- IsQRegs = true;
- } else if (AArch64::QQuadRegClass.contains(DestReg, SrcReg)) {
- SubRegs = 4;
- IsQRegs = true;
- } else
- llvm_unreachable("Unknown register class");
-
- unsigned BeginIdx = IsQRegs ? AArch64::qsub_0 : AArch64::dsub_0;
- int Spacing = 1;
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- // Copy register tuples backward when the first Dest reg overlaps
- // with SrcReg.
- if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
- BeginIdx = BeginIdx + (SubRegs - 1);
- Spacing = -1;
- }
-
- unsigned Opc = IsQRegs ? AArch64::ORRvvv_16B : AArch64::ORRvvv_8B;
- for (unsigned i = 0; i != SubRegs; ++i) {
- unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);
- unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);
- assert(Dst && Src && "Bad sub-register");
- BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
- .addReg(Src)
- .addReg(Src);
- }
- return;
-}
-
-/// Does the Opcode represent a conditional branch that we can remove and re-add
-/// at the end of a basic block?
-static bool isCondBranch(unsigned Opc) {
- return Opc == AArch64::Bcc || Opc == AArch64::CBZw || Opc == AArch64::CBZx ||
- Opc == AArch64::CBNZw || Opc == AArch64::CBNZx ||
- Opc == AArch64::TBZwii || Opc == AArch64::TBZxii ||
- Opc == AArch64::TBNZwii || Opc == AArch64::TBNZxii;
-}
-
-/// Takes apart a given conditional branch MachineInstr (see isCondBranch),
-/// setting TBB to the destination basic block and populating the Cond vector
-/// with data necessary to recreate the conditional branch at a later
-/// date. First element will be the opcode, and subsequent ones define the
-/// conditions being branched on in an instruction-specific manner.
-static void classifyCondBranch(MachineInstr *I, MachineBasicBlock *&TBB,
- SmallVectorImpl<MachineOperand> &Cond) {
- switch(I->getOpcode()) {
- case AArch64::Bcc:
- case AArch64::CBZw:
- case AArch64::CBZx:
- case AArch64::CBNZw:
- case AArch64::CBNZx:
- // These instructions just have one predicate operand in position 0 (either
- // a condition code or a register being compared).
- Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
- Cond.push_back(I->getOperand(0));
- TBB = I->getOperand(1).getMBB();
- return;
- case AArch64::TBZwii:
- case AArch64::TBZxii:
- case AArch64::TBNZwii:
- case AArch64::TBNZxii:
- // These have two predicate operands: a register and a bit position.
- Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));
- Cond.push_back(I->getOperand(0));
- Cond.push_back(I->getOperand(1));
- TBB = I->getOperand(2).getMBB();
- return;
+ llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
+}
+
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ // Block ends with fall-through condbranch.
+ switch (LastInst->getOpcode()) {
default:
- llvm_unreachable("Unknown conditional branch to classify");
+ llvm_unreachable("Unknown branch instruction?");
+ case AArch64::Bcc:
+ Target = LastInst->getOperand(1).getMBB();
+ Cond.push_back(LastInst->getOperand(0));
+ break;
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ Target = LastInst->getOperand(1).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(-1));
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ break;
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ Target = LastInst->getOperand(2).getMBB();
+ Cond.push_back(MachineOperand::CreateImm(-1));
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+ Cond.push_back(LastInst->getOperand(0));
+ Cond.push_back(LastInst->getOperand(1));
}
}
-
-bool
-AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
+// Branch analysis.
+bool AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
@@ -281,15 +107,16 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst->getOpcode();
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (LastOpc == AArch64::Bimm) {
+ if (isUncondBranchOpcode(LastOpc)) {
TBB = LastInst->getOperand(0).getMBB();
return false;
}
- if (isCondBranch(LastOpc)) {
- classifyCondBranch(LastInst, TBB, Cond);
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
return false;
}
- return true; // Can't handle indirect branch.
+ return true; // Can't handle indirect branch.
}
// Get the instruction before it if it is a terminator.
@@ -298,8 +125,8 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
// If AllowModify is true and the block ends with two or more unconditional
// branches, delete all but the first unconditional branch.
- if (AllowModify && LastOpc == AArch64::Bimm) {
- while (SecondLastOpc == AArch64::Bimm) {
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
LastInst->eraseFromParent();
LastInst = SecondLastInst;
LastOpc = LastInst->getOpcode();
@@ -319,23 +146,15 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
return true;
// If the block ends with a B and a Bcc, handle it.
- if (LastOpc == AArch64::Bimm) {
- if (SecondLastOpc == AArch64::Bcc) {
- TBB = SecondLastInst->getOperand(1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(AArch64::Bcc));
- Cond.push_back(SecondLastInst->getOperand(0));
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- } else if (isCondBranch(SecondLastOpc)) {
- classifyCondBranch(SecondLastInst, TBB, Cond);
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ parseCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
}
// If the block ends with two unconditional branches, handle it. The second
// one is not executed, so remove it.
- if (SecondLastOpc == AArch64::Bimm && LastOpc == AArch64::Bimm) {
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
TBB = SecondLastInst->getOperand(0).getMBB();
I = LastInst;
if (AllowModify)
@@ -343,84 +162,72 @@ AArch64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
return false;
}
+ // ...likewise if it ends with an indirect branch followed by an unconditional
+ // branch.
+ if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
// Otherwise, can't handle this.
return true;
}
bool AArch64InstrInfo::ReverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const {
- switch (Cond[0].getImm()) {
- case AArch64::Bcc: {
- A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(Cond[1].getImm());
- CC = A64InvertCondCode(CC);
- Cond[1].setImm(CC);
- return false;
- }
- case AArch64::CBZw:
- Cond[0].setImm(AArch64::CBNZw);
- return false;
- case AArch64::CBZx:
- Cond[0].setImm(AArch64::CBNZx);
- return false;
- case AArch64::CBNZw:
- Cond[0].setImm(AArch64::CBZw);
- return false;
- case AArch64::CBNZx:
- Cond[0].setImm(AArch64::CBZx);
- return false;
- case AArch64::TBZwii:
- Cond[0].setImm(AArch64::TBNZwii);
- return false;
- case AArch64::TBZxii:
- Cond[0].setImm(AArch64::TBNZxii);
- return false;
- case AArch64::TBNZwii:
- Cond[0].setImm(AArch64::TBZwii);
- return false;
- case AArch64::TBNZxii:
- Cond[0].setImm(AArch64::TBZxii);
- return false;
- default:
- llvm_unreachable("Unknown branch type");
- }
-}
-
-
-unsigned
-AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const {
- if (FBB == 0 && Cond.empty()) {
- BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(TBB);
- return 1;
- } else if (FBB == 0) {
- MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- for (int i = 1, e = Cond.size(); i != e; ++i)
- MIB.addOperand(Cond[i]);
- MIB.addMBB(TBB);
- return 1;
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond[0].getImm() != -1) {
+ // Regular Bcc
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
+ Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
+ } else {
+ // Folded compare-and-branch
+ switch (Cond[1].getImm()) {
+ default:
+ llvm_unreachable("Unknown conditional branch!");
+ case AArch64::CBZW:
+ Cond[1].setImm(AArch64::CBNZW);
+ break;
+ case AArch64::CBNZW:
+ Cond[1].setImm(AArch64::CBZW);
+ break;
+ case AArch64::CBZX:
+ Cond[1].setImm(AArch64::CBNZX);
+ break;
+ case AArch64::CBNZX:
+ Cond[1].setImm(AArch64::CBZX);
+ break;
+ case AArch64::TBZW:
+ Cond[1].setImm(AArch64::TBNZW);
+ break;
+ case AArch64::TBNZW:
+ Cond[1].setImm(AArch64::TBZW);
+ break;
+ case AArch64::TBZX:
+ Cond[1].setImm(AArch64::TBNZX);
+ break;
+ case AArch64::TBNZX:
+ Cond[1].setImm(AArch64::TBZX);
+ break;
+ }
}
- MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
- for (int i = 1, e = Cond.size(); i != e; ++i)
- MIB.addOperand(Cond[i]);
- MIB.addMBB(TBB);
-
- BuildMI(&MBB, DL, get(AArch64::Bimm)).addMBB(FBB);
- return 2;
+ return false;
}
unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin()) return 0;
+ if (I == MBB.begin())
+ return 0;
--I;
while (I->isDebugValue()) {
if (I == MBB.begin())
return 0;
--I;
}
- if (I->getOpcode() != AArch64::Bimm && !isCondBranch(I->getOpcode()))
+ if (!isUncondBranchOpcode(I->getOpcode()) &&
+ !isCondBranchOpcode(I->getOpcode()))
return 0;
// Remove the branch.
@@ -428,9 +235,10 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
I = MBB.end();
- if (I == MBB.begin()) return 1;
+ if (I == MBB.begin())
+ return 1;
--I;
- if (!isCondBranch(I->getOpcode()))
+ if (!isCondBranchOpcode(I->getOpcode()))
return 1;
// Remove the branch.
@@ -438,542 +246,1838 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-bool
-AArch64InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MBBI) const {
- MachineInstr &MI = *MBBI;
- MachineBasicBlock &MBB = *MI.getParent();
+void AArch64InstrInfo::instantiateCondBranch(
+ MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ if (Cond[0].getImm() != -1) {
+ // Regular Bcc
+ BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
+ } else {
+ // Folded compare-and-branch
+ const MachineInstrBuilder MIB =
+ BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg());
+ if (Cond.size() > 3)
+ MIB.addImm(Cond[3].getImm());
+ MIB.addMBB(TBB);
+ }
+}
- unsigned Opcode = MI.getOpcode();
- switch (Opcode) {
- case AArch64::TLSDESC_BLRx: {
- MachineInstr *NewMI =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), get(AArch64::TLSDESCCALL))
- .addOperand(MI.getOperand(1));
- MI.setDesc(get(AArch64::BLRx));
-
- llvm::finalizeBundle(MBB, NewMI, *++MBBI);
- return true;
- }
+unsigned AArch64InstrInfo::InsertBranch(
+ MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
+ // Shouldn't be a fall through.
+ assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+
+ if (!FBB) {
+ if (Cond.empty()) // Unconditional branch?
+ BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
+ else
+ instantiateCondBranch(MBB, DL, TBB, Cond);
+ return 1;
+ }
+
+ // Two-way conditional branch.
+ instantiateCondBranch(MBB, DL, TBB, Cond);
+ BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
+ return 2;
+}
+
+// Find the original register that VReg is copied from.
+static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
+ while (TargetRegisterInfo::isVirtualRegister(VReg)) {
+ const MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ if (!DefMI->isFullCopy())
+ return VReg;
+ VReg = DefMI->getOperand(1).getReg();
+ }
+ return VReg;
+}
+
+// Determine if VReg is defined by an instruction that can be folded into a
+// csel instruction. If so, return the folded opcode, and the replacement
+// register.
+static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
+ unsigned *NewVReg = nullptr) {
+ VReg = removeCopies(MRI, VReg);
+ if (!TargetRegisterInfo::isVirtualRegister(VReg))
+ return 0;
+
+ bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
+ const MachineInstr *DefMI = MRI.getVRegDef(VReg);
+ unsigned Opc = 0;
+ unsigned SrcOpNum = 0;
+ switch (DefMI->getOpcode()) {
+ case AArch64::ADDSXri:
+ case AArch64::ADDSWri:
+ // if NZCV is used, do not fold.
+ if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
+ return 0;
+ // fall-through to ADDXri and ADDWri.
+ case AArch64::ADDXri:
+ case AArch64::ADDWri:
+ // add x, 1 -> csinc.
+ if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
+ DefMI->getOperand(3).getImm() != 0)
+ return 0;
+ SrcOpNum = 1;
+ Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
+ break;
+
+ case AArch64::ORNXrr:
+ case AArch64::ORNWrr: {
+ // not x -> csinv, represented as orn dst, xzr, src.
+ unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
+ if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
+ return 0;
+ SrcOpNum = 2;
+ Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
+ break;
+ }
+
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSWrr:
+ // if NZCV is used, do not fold.
+ if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
+ return 0;
+ // fall-through to SUBXrr and SUBWrr.
+ case AArch64::SUBXrr:
+ case AArch64::SUBWrr: {
+ // neg x -> csneg, represented as sub dst, xzr, src.
+ unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
+ if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
+ return 0;
+ SrcOpNum = 2;
+ Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
+ break;
+ }
default:
+ return 0;
+ }
+ assert(Opc && SrcOpNum && "Missing parameters");
+
+ if (NewVReg)
+ *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
+ return Opc;
+}
+
+bool AArch64InstrInfo::canInsertSelect(
+ const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
+ int &FalseCycles) const {
+ // Check register classes.
+ const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+ const TargetRegisterClass *RC =
+ RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+ if (!RC)
return false;
+
+ // Expanding cbz/tbz requires an extra cycle of latency on the condition.
+ unsigned ExtraCondLat = Cond.size() != 1;
+
+ // GPRs are handled by csel.
+ // FIXME: Fold in x+1, -x, and ~x when applicable.
+ if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
+ AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
+ // Single-cycle csel, csinc, csinv, and csneg.
+ CondCycles = 1 + ExtraCondLat;
+ TrueCycles = FalseCycles = 1;
+ if (canFoldIntoCSel(MRI, TrueReg))
+ TrueCycles = 0;
+ else if (canFoldIntoCSel(MRI, FalseReg))
+ FalseCycles = 0;
+ return true;
}
+ // Scalar floating point is handled by fcsel.
+ // FIXME: Form fabs, fmin, and fmax when applicable.
+ if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
+ AArch64::FPR32RegClass.hasSubClassEq(RC)) {
+ CondCycles = 5 + ExtraCondLat;
+ TrueCycles = FalseCycles = 2;
+ return true;
+ }
+
+ // Can't do vectors.
return false;
}
-void
-AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill,
- int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(MBBI);
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
-
- MachineMemOperand *MMO
- = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOStore,
- MFI.getObjectSize(FrameIdx),
- Align);
-
- unsigned StoreOp = 0;
- if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
- switch(RC->getSize()) {
- case 4: StoreOp = AArch64::LS32_STR; break;
- case 8: StoreOp = AArch64::LS64_STR; break;
+void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const {
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ // Parse the condition code, see parseCondBranch() above.
+ AArch64CC::CondCode CC;
+ switch (Cond.size()) {
+ default:
+ llvm_unreachable("Unknown condition opcode in Cond");
+ case 1: // b.cc
+ CC = AArch64CC::CondCode(Cond[0].getImm());
+ break;
+ case 3: { // cbz/cbnz
+ // We must insert a compare against 0.
+ bool Is64Bit;
+ switch (Cond[1].getImm()) {
default:
- llvm_unreachable("Unknown size for regclass");
- }
- } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
- StoreOp = AArch64::LSFP8_STR;
- } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
- StoreOp = AArch64::LSFP16_STR;
- } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
- RC->hasType(MVT::f128)) {
- switch (RC->getSize()) {
- case 4: StoreOp = AArch64::LSFP32_STR; break;
- case 8: StoreOp = AArch64::LSFP64_STR; break;
- case 16: StoreOp = AArch64::LSFP128_STR; break;
+ llvm_unreachable("Unknown branch opcode in Cond");
+ case AArch64::CBZW:
+ Is64Bit = 0;
+ CC = AArch64CC::EQ;
+ break;
+ case AArch64::CBZX:
+ Is64Bit = 1;
+ CC = AArch64CC::EQ;
+ break;
+ case AArch64::CBNZW:
+ Is64Bit = 0;
+ CC = AArch64CC::NE;
+ break;
+ case AArch64::CBNZX:
+ Is64Bit = 1;
+ CC = AArch64CC::NE;
+ break;
+ }
+ unsigned SrcReg = Cond[2].getReg();
+ if (Is64Bit) {
+ // cmp reg, #0 is actually subs xzr, reg, #0.
+ MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
+ BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
+ .addReg(SrcReg)
+ .addImm(0)
+ .addImm(0);
+ } else {
+ MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
+ BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
+ .addReg(SrcReg)
+ .addImm(0)
+ .addImm(0);
+ }
+ break;
+ }
+ case 4: { // tbz/tbnz
+ // We must insert a tst instruction.
+ switch (Cond[1].getImm()) {
default:
- llvm_unreachable("Unknown size for regclass");
- }
- } else { // For a super register class has more than one sub registers
- if (AArch64::DPairRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x2_8B;
- else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x3_8B;
- else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x4_8B;
- else if (AArch64::QPairRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x2_16B;
- else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x3_16B;
- else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
- StoreOp = AArch64::ST1x4_16B;
+ llvm_unreachable("Unknown branch opcode in Cond");
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ CC = AArch64CC::EQ;
+ break;
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ CC = AArch64CC::NE;
+ break;
+ }
+ // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
+ if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
+ BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
+ .addReg(Cond[2].getReg())
+ .addImm(
+ AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
else
- llvm_unreachable("Unknown reg class");
+ BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
+ .addReg(Cond[2].getReg())
+ .addImm(
+ AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
+ break;
+ }
+ }
- MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
- // Vector store has different operands from other store instructions.
- NewMI.addFrameIndex(FrameIdx)
- .addReg(SrcReg, getKillRegState(isKill))
- .addMemOperand(MMO);
- return;
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = nullptr;
+ bool TryFold = false;
+ if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
+ RC = &AArch64::GPR64RegClass;
+ Opc = AArch64::CSELXr;
+ TryFold = true;
+ } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
+ RC = &AArch64::GPR32RegClass;
+ Opc = AArch64::CSELWr;
+ TryFold = true;
+ } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
+ RC = &AArch64::FPR64RegClass;
+ Opc = AArch64::FCSELDrrr;
+ } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
+ RC = &AArch64::FPR32RegClass;
+ Opc = AArch64::FCSELSrrr;
+ }
+ assert(RC && "Unsupported regclass");
+
+ // Try folding simple instructions into the csel.
+ if (TryFold) {
+ unsigned NewVReg = 0;
+ unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
+ if (FoldedOpc) {
+ // The folded opcodes csinc, csinc and csneg apply the operation to
+ // FalseReg, so we need to invert the condition.
+ CC = AArch64CC::getInvertedCondCode(CC);
+ TrueReg = FalseReg;
+ } else
+ FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
+
+ // Fold the operation. Leave any dead instructions for DCE to clean up.
+ if (FoldedOpc) {
+ FalseReg = NewVReg;
+ Opc = FoldedOpc;
+ // The extends the live range of NewVReg.
+ MRI.clearKillFlags(NewVReg);
+ }
}
- MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(StoreOp));
- NewMI.addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FrameIdx)
- .addImm(0)
- .addMemOperand(MMO);
+ // Pull all virtual register into the appropriate class.
+ MRI.constrainRegClass(TrueReg, RC);
+ MRI.constrainRegClass(FalseReg, RC);
+ // Insert the csel.
+ BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
+ CC);
}
-void
-AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(MBBI);
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FrameIdx);
-
- MachineMemOperand *MMO
- = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
- MachineMemOperand::MOLoad,
- MFI.getObjectSize(FrameIdx),
- Align);
-
- unsigned LoadOp = 0;
- if (RC->hasType(MVT::i64) || RC->hasType(MVT::i32)) {
- switch(RC->getSize()) {
- case 4: LoadOp = AArch64::LS32_LDR; break;
- case 8: LoadOp = AArch64::LS64_LDR; break;
- default:
- llvm_unreachable("Unknown size for regclass");
- }
- } else if (AArch64::FPR8RegClass.hasSubClassEq(RC)) {
- LoadOp = AArch64::LSFP8_LDR;
- } else if (AArch64::FPR16RegClass.hasSubClassEq(RC)) {
- LoadOp = AArch64::LSFP16_LDR;
- } else if (RC->hasType(MVT::f32) || RC->hasType(MVT::f64) ||
- RC->hasType(MVT::f128)) {
- switch (RC->getSize()) {
- case 4: LoadOp = AArch64::LSFP32_LDR; break;
- case 8: LoadOp = AArch64::LSFP64_LDR; break;
- case 16: LoadOp = AArch64::LSFP128_LDR; break;
- default:
- llvm_unreachable("Unknown size for regclass");
- }
- } else { // For a super register class has more than one sub registers
- if (AArch64::DPairRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x2_8B;
- else if (AArch64::DTripleRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x3_8B;
- else if (AArch64::DQuadRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x4_8B;
- else if (AArch64::QPairRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x2_16B;
- else if (AArch64::QTripleRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x3_16B;
- else if (AArch64::QQuadRegClass.hasSubClassEq(RC))
- LoadOp = AArch64::LD1x4_16B;
- else
- llvm_unreachable("Unknown reg class");
+bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SubIdx) const {
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ case AArch64::SBFMXri: // aka sxtw
+ case AArch64::UBFMXri: // aka uxtw
+ // Check for the 32 -> 64 bit extension case, these instructions can do
+ // much more.
+ if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
+ return false;
+ // This is a signed or unsigned 32 -> 64 bit extension.
+ SrcReg = MI.getOperand(1).getReg();
+ DstReg = MI.getOperand(0).getReg();
+ SubIdx = AArch64::sub_32;
+ return true;
+ }
+}
- MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
- // Vector load has different operands from other load instructions.
- NewMI.addFrameIndex(FrameIdx)
- .addMemOperand(MMO);
- return;
+/// analyzeCompare - For a comparison instruction, return the source registers
+/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
+/// Return true if the comparison instruction can be analyzed.
+bool AArch64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBSXrx:
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDSXrx:
+ // Replace SUBSWrr with SUBWrr if NZCV is not used.
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = MI->getOperand(2).getReg();
+ CmpMask = ~0;
+ CmpValue = 0;
+ return true;
+ case AArch64::SUBSWri:
+ case AArch64::ADDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::ADDSXri:
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = MI->getOperand(2).getImm();
+ return true;
+ case AArch64::ANDSWri:
+ case AArch64::ANDSXri:
+ // ANDS does not use the same encoding scheme as the others xxxS
+ // instructions.
+ SrcReg = MI->getOperand(1).getReg();
+ SrcReg2 = 0;
+ CmpMask = ~0;
+ CmpValue = AArch64_AM::decodeLogicalImmediate(
+ MI->getOperand(2).getImm(),
+ MI->getOpcode() == AArch64::ANDSWri ? 32 : 64);
+ return true;
+ }
+
+ return false;
+}
+
+static bool UpdateOperandRegClass(MachineInstr *Instr) {
+ MachineBasicBlock *MBB = Instr->getParent();
+ assert(MBB && "Can't get MachineBasicBlock here");
+ MachineFunction *MF = MBB->getParent();
+ assert(MF && "Can't get MachineFunction here");
+ const TargetMachine *TM = &MF->getTarget();
+ const TargetInstrInfo *TII = TM->getInstrInfo();
+ const TargetRegisterInfo *TRI = TM->getRegisterInfo();
+ MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+ for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
+ ++OpIdx) {
+ MachineOperand &MO = Instr->getOperand(OpIdx);
+ const TargetRegisterClass *OpRegCstraints =
+ Instr->getRegClassConstraint(OpIdx, TII, TRI);
+
+ // If there's no constraint, there's nothing to do.
+ if (!OpRegCstraints)
+ continue;
+ // If the operand is a frame index, there's nothing to do here.
+ // A frame index operand will resolve correctly during PEI.
+ if (MO.isFI())
+ continue;
+
+ assert(MO.isReg() &&
+ "Operand has register constraints without being a register!");
+
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!OpRegCstraints->contains(Reg))
+ return false;
+ } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
+ !MRI->constrainRegClass(Reg, OpRegCstraints))
+ return false;
}
- MachineInstrBuilder NewMI = BuildMI(MBB, MBBI, DL, get(LoadOp), DestReg);
- NewMI.addFrameIndex(FrameIdx)
- .addImm(0)
- .addMemOperand(MMO);
+ return true;
}
-unsigned AArch64InstrInfo::estimateRSStackLimit(MachineFunction &MF) const {
- unsigned Limit = (1 << 16) - 1;
- for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!I->getOperand(i).isFI()) continue;
+/// optimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool AArch64InstrInfo::optimizeCompareInstr(
+ MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
+ int CmpValue, const MachineRegisterInfo *MRI) const {
+
+ // Replace SUBSWrr with SUBWrr if NZCV is not used.
+ int Cmp_NZCV = CmpInstr->findRegisterDefOperandIdx(AArch64::NZCV, true);
+ if (Cmp_NZCV != -1) {
+ unsigned NewOpc;
+ switch (CmpInstr->getOpcode()) {
+ default:
+ return false;
+ case AArch64::ADDSWrr: NewOpc = AArch64::ADDWrr; break;
+ case AArch64::ADDSWri: NewOpc = AArch64::ADDWri; break;
+ case AArch64::ADDSWrs: NewOpc = AArch64::ADDWrs; break;
+ case AArch64::ADDSWrx: NewOpc = AArch64::ADDWrx; break;
+ case AArch64::ADDSXrr: NewOpc = AArch64::ADDXrr; break;
+ case AArch64::ADDSXri: NewOpc = AArch64::ADDXri; break;
+ case AArch64::ADDSXrs: NewOpc = AArch64::ADDXrs; break;
+ case AArch64::ADDSXrx: NewOpc = AArch64::ADDXrx; break;
+ case AArch64::SUBSWrr: NewOpc = AArch64::SUBWrr; break;
+ case AArch64::SUBSWri: NewOpc = AArch64::SUBWri; break;
+ case AArch64::SUBSWrs: NewOpc = AArch64::SUBWrs; break;
+ case AArch64::SUBSWrx: NewOpc = AArch64::SUBWrx; break;
+ case AArch64::SUBSXrr: NewOpc = AArch64::SUBXrr; break;
+ case AArch64::SUBSXri: NewOpc = AArch64::SUBXri; break;
+ case AArch64::SUBSXrs: NewOpc = AArch64::SUBXrs; break;
+ case AArch64::SUBSXrx: NewOpc = AArch64::SUBXrx; break;
+ }
+
+ const MCInstrDesc &MCID = get(NewOpc);
+ CmpInstr->setDesc(MCID);
+ CmpInstr->RemoveOperand(Cmp_NZCV);
+ bool succeeded = UpdateOperandRegClass(CmpInstr);
+ (void)succeeded;
+ assert(succeeded && "Some operands reg class are incompatible!");
+ return true;
+ }
+
+ // Continue only if we have a "ri" where immediate is zero.
+ if (CmpValue != 0 || SrcReg2 != 0)
+ return false;
+
+ // CmpInstr is a Compare instruction if destination register is not used.
+ if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
+ return false;
+
+ // Get the unique definition of SrcReg.
+ MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
+ if (!MI)
+ return false;
+
+ // We iterate backward, starting from the instruction before CmpInstr and
+ // stop when reaching the definition of the source register or done with the
+ // basic block, to check whether NZCV is used or modified in between.
+ MachineBasicBlock::iterator I = CmpInstr, E = MI,
+ B = CmpInstr->getParent()->begin();
+
+ // Early exit if CmpInstr is at the beginning of the BB.
+ if (I == B)
+ return false;
+
+ // Check whether the definition of SrcReg is in the same basic block as
+ // Compare. If not, we can't optimize away the Compare.
+ if (MI->getParent() != CmpInstr->getParent())
+ return false;
+
+ // Check that NZCV isn't set between the comparison instruction and the one we
+ // want to change.
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ for (--I; I != E; --I) {
+ const MachineInstr &Instr = *I;
- // When using ADDxxi_lsl0_s to get the address of a stack object, 0xfff
- // is the largest offset guaranteed to fit in the immediate offset.
- if (I->getOpcode() == AArch64::ADDxxi_lsl0_s) {
- Limit = std::min(Limit, 0xfffu);
- break;
- }
+ if (Instr.modifiesRegister(AArch64::NZCV, TRI) ||
+ Instr.readsRegister(AArch64::NZCV, TRI))
+ // This instruction modifies or uses NZCV after the one we want to
+ // change. We can't do this transformation.
+ return false;
+ if (I == B)
+ // The 'and' is below the comparison instruction.
+ return false;
+ }
+
+ unsigned NewOpc = MI->getOpcode();
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case AArch64::ADDSWrr:
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXrr:
+ case AArch64::ADDSXri:
+ case AArch64::SUBSWrr:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXrr:
+ case AArch64::SUBSXri:
+ break;
+ case AArch64::ADDWrr: NewOpc = AArch64::ADDSWrr; break;
+ case AArch64::ADDWri: NewOpc = AArch64::ADDSWri; break;
+ case AArch64::ADDXrr: NewOpc = AArch64::ADDSXrr; break;
+ case AArch64::ADDXri: NewOpc = AArch64::ADDSXri; break;
+ case AArch64::ADCWr: NewOpc = AArch64::ADCSWr; break;
+ case AArch64::ADCXr: NewOpc = AArch64::ADCSXr; break;
+ case AArch64::SUBWrr: NewOpc = AArch64::SUBSWrr; break;
+ case AArch64::SUBWri: NewOpc = AArch64::SUBSWri; break;
+ case AArch64::SUBXrr: NewOpc = AArch64::SUBSXrr; break;
+ case AArch64::SUBXri: NewOpc = AArch64::SUBSXri; break;
+ case AArch64::SBCWr: NewOpc = AArch64::SBCSWr; break;
+ case AArch64::SBCXr: NewOpc = AArch64::SBCSXr; break;
+ case AArch64::ANDWri: NewOpc = AArch64::ANDSWri; break;
+ case AArch64::ANDXri: NewOpc = AArch64::ANDSXri; break;
+ }
- int AccessScale, MinOffset, MaxOffset;
- getAddressConstraints(*I, AccessScale, MinOffset, MaxOffset);
- Limit = std::min(Limit, static_cast<unsigned>(MaxOffset));
+ // Scan forward for the use of NZCV.
+ // When checking against MI: if it's a conditional code requires
+ // checking of V bit, then this is not safe to do.
+ // It is safe to remove CmpInstr if NZCV is redefined or killed.
+ // If we are done with the basic block, we need to check whether NZCV is
+ // live-out.
+ bool IsSafe = false;
+ for (MachineBasicBlock::iterator I = CmpInstr,
+ E = CmpInstr->getParent()->end();
+ !IsSafe && ++I != E;) {
+ const MachineInstr &Instr = *I;
+ for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
+ ++IO) {
+ const MachineOperand &MO = Instr.getOperand(IO);
+ if (MO.isRegMask() && MO.clobbersPhysReg(AArch64::NZCV)) {
+ IsSafe = true;
+ break;
+ }
+ if (!MO.isReg() || MO.getReg() != AArch64::NZCV)
+ continue;
+ if (MO.isDef()) {
+ IsSafe = true;
+ break;
+ }
- break; // At most one FI per instruction
+ // Decode the condition code.
+ unsigned Opc = Instr.getOpcode();
+ AArch64CC::CondCode CC;
+ switch (Opc) {
+ default:
+ return false;
+ case AArch64::Bcc:
+ CC = (AArch64CC::CondCode)Instr.getOperand(IO - 2).getImm();
+ break;
+ case AArch64::CSINVWr:
+ case AArch64::CSINVXr:
+ case AArch64::CSINCWr:
+ case AArch64::CSINCXr:
+ case AArch64::CSELWr:
+ case AArch64::CSELXr:
+ case AArch64::CSNEGWr:
+ case AArch64::CSNEGXr:
+ case AArch64::FCSELSrrr:
+ case AArch64::FCSELDrrr:
+ CC = (AArch64CC::CondCode)Instr.getOperand(IO - 1).getImm();
+ break;
+ }
+
+ // It is not safe to remove Compare instruction if Overflow(V) is used.
+ switch (CC) {
+ default:
+ // NZCV can be used multiple times, we should continue.
+ break;
+ case AArch64CC::VS:
+ case AArch64CC::VC:
+ case AArch64CC::GE:
+ case AArch64CC::LT:
+ case AArch64CC::GT:
+ case AArch64CC::LE:
+ return false;
}
}
}
- return Limit;
+ // If NZCV is not killed nor re-defined, we should check whether it is
+ // live-out. If it is live-out, do not optimize.
+ if (!IsSafe) {
+ MachineBasicBlock *ParentBlock = CmpInstr->getParent();
+ for (auto *MBB : ParentBlock->successors())
+ if (MBB->isLiveIn(AArch64::NZCV))
+ return false;
+ }
+
+ // Update the instruction to set NZCV.
+ MI->setDesc(get(NewOpc));
+ CmpInstr->eraseFromParent();
+ bool succeeded = UpdateOperandRegClass(MI);
+ (void)succeeded;
+ assert(succeeded && "Some operands reg class are incompatible!");
+ MI->addRegisterDefined(AArch64::NZCV, TRI);
+ return true;
}
-void AArch64InstrInfo::getAddressConstraints(const MachineInstr &MI,
- int &AccessScale, int &MinOffset,
- int &MaxOffset) const {
- switch (MI.getOpcode()) {
+
+/// Return true if this is this instruction has a non-zero immediate
+bool AArch64InstrInfo::hasShiftedReg(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
default:
- llvm_unreachable("Unknown load/store kind");
- case TargetOpcode::DBG_VALUE:
- AccessScale = 1;
- MinOffset = INT_MIN;
- MaxOffset = INT_MAX;
- return;
- case AArch64::LS8_LDR: case AArch64::LS8_STR:
- case AArch64::LSFP8_LDR: case AArch64::LSFP8_STR:
- case AArch64::LDRSBw:
- case AArch64::LDRSBx:
- AccessScale = 1;
- MinOffset = 0;
- MaxOffset = 0xfff;
- return;
- case AArch64::LS16_LDR: case AArch64::LS16_STR:
- case AArch64::LSFP16_LDR: case AArch64::LSFP16_STR:
- case AArch64::LDRSHw:
- case AArch64::LDRSHx:
- AccessScale = 2;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LS32_LDR: case AArch64::LS32_STR:
- case AArch64::LSFP32_LDR: case AArch64::LSFP32_STR:
- case AArch64::LDRSWx:
- case AArch64::LDPSWx:
- AccessScale = 4;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LS64_LDR: case AArch64::LS64_STR:
- case AArch64::LSFP64_LDR: case AArch64::LSFP64_STR:
- case AArch64::PRFM:
- AccessScale = 8;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LSFP128_LDR: case AArch64::LSFP128_STR:
- AccessScale = 16;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LSPair32_LDR: case AArch64::LSPair32_STR:
- case AArch64::LSFPPair32_LDR: case AArch64::LSFPPair32_STR:
- AccessScale = 4;
- MinOffset = -0x40 * AccessScale;
- MaxOffset = 0x3f * AccessScale;
- return;
- case AArch64::LSPair64_LDR: case AArch64::LSPair64_STR:
- case AArch64::LSFPPair64_LDR: case AArch64::LSFPPair64_STR:
- AccessScale = 8;
- MinOffset = -0x40 * AccessScale;
- MaxOffset = 0x3f * AccessScale;
- return;
- case AArch64::LSFPPair128_LDR: case AArch64::LSFPPair128_STR:
- AccessScale = 16;
- MinOffset = -0x40 * AccessScale;
- MaxOffset = 0x3f * AccessScale;
- return;
- case AArch64::LD1x2_8B: case AArch64::ST1x2_8B:
- AccessScale = 16;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LD1x3_8B: case AArch64::ST1x3_8B:
- AccessScale = 24;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LD1x4_8B: case AArch64::ST1x4_8B:
- case AArch64::LD1x2_16B: case AArch64::ST1x2_16B:
- AccessScale = 32;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LD1x3_16B: case AArch64::ST1x3_16B:
- AccessScale = 48;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
- case AArch64::LD1x4_16B: case AArch64::ST1x4_16B:
- AccessScale = 64;
- MinOffset = 0;
- MaxOffset = 0xfff * AccessScale;
- return;
+ break;
+ case AArch64::ADDSWrs:
+ case AArch64::ADDSXrs:
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ case AArch64::ANDSWrs:
+ case AArch64::ANDSXrs:
+ case AArch64::ANDWrs:
+ case AArch64::ANDXrs:
+ case AArch64::BICSWrs:
+ case AArch64::BICSXrs:
+ case AArch64::BICWrs:
+ case AArch64::BICXrs:
+ case AArch64::CRC32Brr:
+ case AArch64::CRC32CBrr:
+ case AArch64::CRC32CHrr:
+ case AArch64::CRC32CWrr:
+ case AArch64::CRC32CXrr:
+ case AArch64::CRC32Hrr:
+ case AArch64::CRC32Wrr:
+ case AArch64::CRC32Xrr:
+ case AArch64::EONWrs:
+ case AArch64::EONXrs:
+ case AArch64::EORWrs:
+ case AArch64::EORXrs:
+ case AArch64::ORNWrs:
+ case AArch64::ORNXrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORRXrs:
+ case AArch64::SUBSWrs:
+ case AArch64::SUBSXrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ if (MI->getOperand(3).isImm()) {
+ unsigned val = MI->getOperand(3).getImm();
+ return (val != 0);
+ }
+ break;
}
+ return false;
}
-unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
- const MCInstrDesc &MCID = MI.getDesc();
- const MachineBasicBlock &MBB = *MI.getParent();
- const MachineFunction &MF = *MBB.getParent();
- const MCAsmInfo &MAI = *MF.getTarget().getMCAsmInfo();
+/// Return true if this is this instruction has a non-zero immediate
+bool AArch64InstrInfo::hasExtendedReg(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::ADDSWrx:
+ case AArch64::ADDSXrx:
+ case AArch64::ADDSXrx64:
+ case AArch64::ADDWrx:
+ case AArch64::ADDXrx:
+ case AArch64::ADDXrx64:
+ case AArch64::SUBSWrx:
+ case AArch64::SUBSXrx:
+ case AArch64::SUBSXrx64:
+ case AArch64::SUBWrx:
+ case AArch64::SUBXrx:
+ case AArch64::SUBXrx64:
+ if (MI->getOperand(3).isImm()) {
+ unsigned val = MI->getOperand(3).getImm();
+ return (val != 0);
+ }
+ break;
+ }
- if (MCID.getSize())
- return MCID.getSize();
+ return false;
+}
- if (MI.getOpcode() == AArch64::INLINEASM)
- return getInlineAsmLength(MI.getOperand(0).getSymbolName(), MAI);
+// Return true if this instruction simply sets its single destination register
+// to zero. This is equivalent to a register rename of the zero-register.
+bool AArch64InstrInfo::isGPRZero(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::MOVZWi:
+ case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
+ if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
+ assert(MI->getDesc().getNumOperands() == 3 &&
+ MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
+ return true;
+ }
+ break;
+ case AArch64::ANDWri: // and Rd, Rzr, #imm
+ return MI->getOperand(1).getReg() == AArch64::WZR;
+ case AArch64::ANDXri:
+ return MI->getOperand(1).getReg() == AArch64::XZR;
+ case TargetOpcode::COPY:
+ return MI->getOperand(1).getReg() == AArch64::WZR;
+ }
+ return false;
+}
- switch (MI.getOpcode()) {
- case TargetOpcode::BUNDLE:
- return getInstBundleLength(MI);
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::CFI_INSTRUCTION:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::GC_LABEL:
- case TargetOpcode::DBG_VALUE:
- case AArch64::TLSDESCCALL:
- return 0;
+// Return true if this instruction simply renames a general register without
+// modifying bits.
+bool AArch64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
default:
- llvm_unreachable("Unknown instruction class");
+ break;
+ case TargetOpcode::COPY: {
+ // GPR32 copies will by lowered to ORRXrs
+ unsigned DstReg = MI->getOperand(0).getReg();
+ return (AArch64::GPR32RegClass.contains(DstReg) ||
+ AArch64::GPR64RegClass.contains(DstReg));
}
+ case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
+ if (MI->getOperand(1).getReg() == AArch64::XZR) {
+ assert(MI->getDesc().getNumOperands() == 4 &&
+ MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
+ return true;
+ }
+ case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
+ if (MI->getOperand(2).getImm() == 0) {
+ assert(MI->getDesc().getNumOperands() == 4 &&
+ MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
+ return true;
+ }
+ }
+ return false;
}
-unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
- unsigned Size = 0;
- MachineBasicBlock::const_instr_iterator I = MI;
- MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
- while (++I != E && I->isInsideBundle()) {
- assert(!I->isBundle() && "No nested bundle!");
- Size += getInstSizeInBytes(*I);
+// Return true if this instruction simply renames a general register without
+// modifying bits.
+bool AArch64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::COPY: {
+ // FPR64 copies will by lowered to ORR.16b
+ unsigned DstReg = MI->getOperand(0).getReg();
+ return (AArch64::FPR64RegClass.contains(DstReg) ||
+ AArch64::FPR128RegClass.contains(DstReg));
}
- return Size;
+ case AArch64::ORRv16i8:
+ if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+ assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
+ "invalid ORRv16i8 operands");
+ return true;
+ }
+ }
+ return false;
}
-bool llvm::rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const AArch64InstrInfo &TII) {
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
+unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::LDRWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRBui:
+ case AArch64::LDRHui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
+ }
- MFI.getObjectOffset(FrameRegIdx);
- llvm_unreachable("Unimplemented rewriteFrameIndex");
+ return 0;
}
-void llvm::emitRegUpdate(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- DebugLoc dl, const TargetInstrInfo &TII,
- unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
- int64_t NumBytes, MachineInstr::MIFlag MIFlags) {
- if (NumBytes == 0 && DstReg == SrcReg)
- return;
- else if (abs64(NumBytes) & ~0xffffff) {
- // Generically, we have to materialize the offset into a temporary register
- // and subtract it. There are a couple of ways this could be done, for now
- // we'll use a movz/movk or movn/movk sequence.
- uint64_t Bits = static_cast<uint64_t>(abs64(NumBytes));
- BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVZxii), ScratchReg)
- .addImm(0xffff & Bits).addImm(0)
- .setMIFlags(MIFlags);
-
- Bits >>= 16;
- if (Bits & 0xffff) {
- BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
- .addReg(ScratchReg)
- .addImm(0xffff & Bits).addImm(1)
- .setMIFlags(MIFlags);
- }
-
- Bits >>= 16;
- if (Bits & 0xffff) {
- BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
- .addReg(ScratchReg)
- .addImm(0xffff & Bits).addImm(2)
- .setMIFlags(MIFlags);
- }
-
- Bits >>= 16;
- if (Bits & 0xffff) {
- BuildMI(MBB, MBBI, dl, TII.get(AArch64::MOVKxii), ScratchReg)
- .addReg(ScratchReg)
- .addImm(0xffff & Bits).addImm(3)
- .setMIFlags(MIFlags);
- }
-
- // ADD DST, SRC, xTMP (, lsl #0)
- unsigned AddOp = NumBytes > 0 ? AArch64::ADDxxx_uxtx : AArch64::SUBxxx_uxtx;
- BuildMI(MBB, MBBI, dl, TII.get(AddOp), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addReg(ScratchReg, RegState::Kill)
- .addImm(0)
- .setMIFlag(MIFlags);
- return;
+unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STRBui:
+ case AArch64::STRHui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
+ MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
+ FrameIndex = MI->getOperand(1).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ break;
}
+ return 0;
+}
- // Now we know that the adjustment can be done in at most two add/sub
- // (immediate) instructions, which is always more efficient than a
- // literal-pool load, or even a hypothetical movz/movk/add sequence
+/// Return true if this is load/store scales or extends its register offset.
+/// This refers to scaling a dynamic index as opposed to scaled immediates.
+/// MI should be a memory op that allows scaled addressing.
+bool AArch64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case AArch64::LDRBBroW:
+ case AArch64::LDRBroW:
+ case AArch64::LDRDroW:
+ case AArch64::LDRHHroW:
+ case AArch64::LDRHroW:
+ case AArch64::LDRQroW:
+ case AArch64::LDRSBWroW:
+ case AArch64::LDRSBXroW:
+ case AArch64::LDRSHWroW:
+ case AArch64::LDRSHXroW:
+ case AArch64::LDRSWroW:
+ case AArch64::LDRSroW:
+ case AArch64::LDRWroW:
+ case AArch64::LDRXroW:
+ case AArch64::STRBBroW:
+ case AArch64::STRBroW:
+ case AArch64::STRDroW:
+ case AArch64::STRHHroW:
+ case AArch64::STRHroW:
+ case AArch64::STRQroW:
+ case AArch64::STRSroW:
+ case AArch64::STRWroW:
+ case AArch64::STRXroW:
+ case AArch64::LDRBBroX:
+ case AArch64::LDRBroX:
+ case AArch64::LDRDroX:
+ case AArch64::LDRHHroX:
+ case AArch64::LDRHroX:
+ case AArch64::LDRQroX:
+ case AArch64::LDRSBWroX:
+ case AArch64::LDRSBXroX:
+ case AArch64::LDRSHWroX:
+ case AArch64::LDRSHXroX:
+ case AArch64::LDRSWroX:
+ case AArch64::LDRSroX:
+ case AArch64::LDRWroX:
+ case AArch64::LDRXroX:
+ case AArch64::STRBBroX:
+ case AArch64::STRBroX:
+ case AArch64::STRDroX:
+ case AArch64::STRHHroX:
+ case AArch64::STRHroX:
+ case AArch64::STRQroX:
+ case AArch64::STRSroX:
+ case AArch64::STRWroX:
+ case AArch64::STRXroX:
+
+ unsigned Val = MI->getOperand(3).getImm();
+ AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
+ return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
+ }
+ return false;
+}
- // Decide whether we're doing addition or subtraction
- unsigned LowOp, HighOp;
- if (NumBytes >= 0) {
- LowOp = AArch64::ADDxxi_lsl0_s;
- HighOp = AArch64::ADDxxi_lsl12_s;
- } else {
- LowOp = AArch64::SUBxxi_lsl0_s;
- HighOp = AArch64::SUBxxi_lsl12_s;
- NumBytes = abs64(NumBytes);
+/// Check all MachineMemOperands for a hint to suppress pairing.
+bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
+ assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
+ "Too many target MO flags");
+ for (auto *MM : MI->memoperands()) {
+ if (MM->getFlags() &
+ (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
+ return true;
+ }
}
+ return false;
+}
- // If we're here, at the very least a move needs to be produced, which just
- // happens to be materializable by an ADD.
- if ((NumBytes & 0xfff) || NumBytes == 0) {
- BuildMI(MBB, MBBI, dl, TII.get(LowOp), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(NumBytes & 0xfff)
- .setMIFlag(MIFlags);
+/// Set a flag on the first MachineMemOperand to suppress pairing.
+void AArch64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
+ if (MI->memoperands_empty())
+ return;
- // Next update should use the register we've just defined.
- SrcReg = DstReg;
- }
+ assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
+ "Too many target MO flags");
+ (*MI->memoperands_begin())
+ ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
+}
+
+bool
+AArch64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+ unsigned &Offset,
+ const TargetRegisterInfo *TRI) const {
+ switch (LdSt->getOpcode()) {
+ default:
+ return false;
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
+ if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
+ return false;
+ BaseReg = LdSt->getOperand(1).getReg();
+ MachineFunction &MF = *LdSt->getParent()->getParent();
+ unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
+ Offset = LdSt->getOperand(2).getImm() * Width;
+ return true;
+ };
+}
- if (NumBytes & 0xfff000) {
- BuildMI(MBB, MBBI, dl, TII.get(HighOp), DstReg)
- .addReg(SrcReg, RegState::Kill)
- .addImm(NumBytes >> 12)
- .setMIFlag(MIFlags);
+/// Detect opportunities for ldp/stp formation.
+///
+/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
+bool AArch64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
+ MachineInstr *SecondLdSt,
+ unsigned NumLoads) const {
+ // Only cluster up to a single pair.
+ if (NumLoads > 1)
+ return false;
+ if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
+ return false;
+ // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
+ unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
+ // Allow 6 bits of positive range.
+ if (Ofs1 > 64)
+ return false;
+ // The caller should already have ordered First/SecondLdSt by offset.
+ unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
+ return Ofs1 + 1 == Ofs2;
+}
+
+bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
+ MachineInstr *Second) const {
+ // Cyclone can fuse CMN, CMP followed by Bcc.
+
+ // FIXME: B0 can also fuse:
+ // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
+ if (Second->getOpcode() != AArch64::Bcc)
+ return false;
+ switch (First->getOpcode()) {
+ default:
+ return false;
+ case AArch64::SUBSWri:
+ case AArch64::ADDSWri:
+ case AArch64::ANDSWri:
+ case AArch64::SUBSXri:
+ case AArch64::ADDSXri:
+ case AArch64::ANDSXri:
+ return true;
}
}
-void llvm::emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- DebugLoc dl, const TargetInstrInfo &TII,
- unsigned ScratchReg, int64_t NumBytes,
- MachineInstr::MIFlag MIFlags) {
- emitRegUpdate(MBB, MI, dl, TII, AArch64::XSP, AArch64::XSP, AArch64::X16,
- NumBytes, MIFlags);
+MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+ int FrameIx,
+ uint64_t Offset,
+ const MDNode *MDPtr,
+ DebugLoc DL) const {
+ MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE))
+ .addFrameIndex(FrameIx)
+ .addImm(0)
+ .addImm(Offset)
+ .addMetadata(MDPtr);
+ return &*MIB;
}
+static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx,
+ unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
-namespace {
- struct LDTLSCleanup : public MachineFunctionPass {
- static char ID;
- LDTLSCleanup() : MachineFunctionPass(ID) {}
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- AArch64MachineFunctionInfo* MFI
- = MF.getInfo<AArch64MachineFunctionInfo>();
- if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
- // No point folding accesses if there isn't at least two.
- return false;
- }
+static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
+ unsigned NumRegs) {
+ // We really want the positive remainder mod 32 here, that happens to be
+ // easily obtainable with a mask.
+ return ((DestReg - SrcReg) & 0x1f) < NumRegs;
+}
- MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
- return VisitNode(DT->getRootNode(), 0);
- }
-
- // Visit the dominator subtree rooted at Node in pre-order.
- // If TLSBaseAddrReg is non-null, then use that to replace any
- // TLS_base_addr instructions. Otherwise, create the register
- // when the first such instruction is seen, and then use it
- // as we encounter more instructions.
- bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
- MachineBasicBlock *BB = Node->getBlock();
- bool Changed = false;
-
- // Traverse the current block.
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
- ++I) {
- switch (I->getOpcode()) {
- case AArch64::TLSDESC_BLRx:
- // Make sure it's a local dynamic access.
- if (!I->getOperand(1).isSymbol() ||
- strcmp(I->getOperand(1).getSymbolName(), "_TLS_MODULE_BASE_"))
- break;
-
- if (TLSBaseAddrReg)
- I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
- else
- I = SetRegister(I, &TLSBaseAddrReg);
- Changed = true;
- break;
- default:
- break;
- }
- }
+void AArch64InstrInfo::copyPhysRegTuple(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode,
+ llvm::ArrayRef<unsigned> Indices) const {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register copy without NEON");
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+ uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+ unsigned NumRegs = Indices.size();
+
+ int SubReg = 0, End = NumRegs, Incr = 1;
+ if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
+ SubReg = NumRegs - 1;
+ End = -1;
+ Incr = -1;
+ }
- // Visit the children of this block in the dominator tree.
- for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
- I != E; ++I) {
- Changed |= VisitNode(*I, TLSBaseAddrReg);
+ for (; SubReg != End; SubReg += Incr) {
+ const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
+ AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+ }
+}
+
+void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (AArch64::GPR32spRegClass.contains(DestReg) &&
+ (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+
+ if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
+ // If either operand is WSP, expand to ADD #0.
+ if (Subtarget.hasZeroCycleRegMove()) {
+ // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
+ unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ // This instruction is reading and writing X registers. This may upset
+ // the register scavenger and machine verifier, so we need to indicate
+ // that we are reading an undefined value from SrcRegX, but a proper
+ // value from SrcReg.
+ BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
+ .addReg(SrcRegX, RegState::Undef)
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
}
+ } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroing()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg).addImm(0).addImm(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ if (Subtarget.hasZeroCycleRegMove()) {
+ // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
+ unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
+ &AArch64::GPR64spRegClass);
+ // This instruction is reading and writing X registers. This may upset
+ // the register scavenger and machine verifier, so we need to indicate
+ // that we are reading an undefined value from SrcRegX, but a proper
+ // value from SrcReg.
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
+ .addReg(AArch64::XZR)
+ .addReg(SrcRegX, RegState::Undef)
+ .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
+ } else {
+ // Otherwise, expand to ORR WZR.
+ BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
+ .addReg(AArch64::WZR)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ }
+ return;
+ }
- return Changed;
+ if (AArch64::GPR64spRegClass.contains(DestReg) &&
+ (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
+ if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
+ // If either operand is SP, expand to ADD #0.
+ BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroing()) {
+ BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg).addImm(0).addImm(
+ AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
+ } else {
+ // Otherwise, expand to ORR XZR.
+ BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
+ .addReg(AArch64::XZR)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
+ return;
+ }
- // Replace the TLS_base_addr instruction I with a copy from
- // TLSBaseAddrReg, returning the new instruction.
- MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
- unsigned TLSBaseAddrReg) {
- MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ // Copy a DDDD register quad by copying the individual sub-registers.
+ if (AArch64::DDDDRegClass.contains(DestReg) &&
+ AArch64::DDDDRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2, AArch64::dsub3 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
+ Indices);
+ return;
+ }
- // Insert a Copy from TLSBaseAddrReg to x0, which is where the rest of the
- // code sequence assumes the address will be.
- MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- AArch64::X0)
- .addReg(TLSBaseAddrReg);
+ // Copy a DDD register triple by copying the individual sub-registers.
+ if (AArch64::DDDRegClass.contains(DestReg) &&
+ AArch64::DDDRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1,
+ AArch64::dsub2 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
+ Indices);
+ return;
+ }
- // Erase the TLS_base_addr instruction.
- I->eraseFromParent();
+ // Copy a DD register pair by copying the individual sub-registers.
+ if (AArch64::DDRegClass.contains(DestReg) &&
+ AArch64::DDRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::dsub0, AArch64::dsub1 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
+ Indices);
+ return;
+ }
+
+ // Copy a QQQQ register quad by copying the individual sub-registers.
+ if (AArch64::QQQQRegClass.contains(DestReg) &&
+ AArch64::QQQQRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2, AArch64::qsub3 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
+ Indices);
+ return;
+ }
+
+ // Copy a QQQ register triple by copying the individual sub-registers.
+ if (AArch64::QQQRegClass.contains(DestReg) &&
+ AArch64::QQQRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1,
+ AArch64::qsub2 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
+ Indices);
+ return;
+ }
+
+ // Copy a QQ register pair by copying the individual sub-registers.
+ if (AArch64::QQRegClass.contains(DestReg) &&
+ AArch64::QQRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = { AArch64::qsub0, AArch64::qsub1 };
+ copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
+ Indices);
+ return;
+ }
- return Copy;
+ if (AArch64::FPR128RegClass.contains(DestReg) &&
+ AArch64::FPR128RegClass.contains(SrcReg)) {
+ if(getSubTarget().hasNEON()) {
+ BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::STRQpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(AArch64::SP)
+ .addImm(-16);
+ BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
+ .addReg(AArch64::SP, RegState::Define)
+ .addReg(DestReg, RegState::Define)
+ .addReg(AArch64::SP)
+ .addImm(16);
}
+ return;
+ }
- // Create a virtal register in *TLSBaseAddrReg, and populate it by
- // inserting a copy instruction after I. Returns the new instruction.
- MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
- MachineFunction *MF = I->getParent()->getParent();
- const AArch64TargetMachine *TM =
- static_cast<const AArch64TargetMachine *>(&MF->getTarget());
- const AArch64InstrInfo *TII = TM->getInstrInfo();
+ if (AArch64::FPR64RegClass.contains(DestReg) &&
+ AArch64::FPR64RegClass.contains(SrcReg)) {
+ if(getSubTarget().hasNEON()) {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
+ &AArch64::FPR128RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
+ &AArch64::FPR128RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ return;
+ }
- // Create a virtual register for the TLS base address.
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- *TLSBaseAddrReg = RegInfo.createVirtualRegister(&AArch64::GPR64RegClass);
+ if (AArch64::FPR32RegClass.contains(DestReg) &&
+ AArch64::FPR32RegClass.contains(SrcReg)) {
+ if(getSubTarget().hasNEON()) {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
+ &AArch64::FPR128RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
+ &AArch64::FPR128RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ return;
+ }
- // Insert a copy from X0 to TLSBaseAddrReg for later.
- MachineInstr *Next = I->getNextNode();
- MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- *TLSBaseAddrReg)
- .addReg(AArch64::X0);
+ if (AArch64::FPR16RegClass.contains(DestReg) &&
+ AArch64::FPR16RegClass.contains(SrcReg)) {
+ if(getSubTarget().hasNEON()) {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+ &AArch64::FPR128RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+ &AArch64::FPR128RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
+ &AArch64::FPR32RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
+ &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ }
+ return;
+ }
- return Copy;
+ if (AArch64::FPR8RegClass.contains(DestReg) &&
+ AArch64::FPR8RegClass.contains(SrcReg)) {
+ if(getSubTarget().hasNEON()) {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+ &AArch64::FPR128RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+ &AArch64::FPR128RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
+ .addReg(SrcReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else {
+ DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
+ &AArch64::FPR32RegClass);
+ SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
+ &AArch64::FPR32RegClass);
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
+ return;
+ }
+
+ // Copies between GPR64 and FPR64.
+ if (AArch64::FPR64RegClass.contains(DestReg) &&
+ AArch64::GPR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (AArch64::GPR64RegClass.contains(DestReg) &&
+ AArch64::FPR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ // Copies between GPR32 and FPR32.
+ if (AArch64::FPR32RegClass.contains(DestReg) &&
+ AArch64::GPR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ if (AArch64::GPR32RegClass.contains(DestReg) &&
+ AArch64::FPR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- virtual const char *getPassName() const {
- return "Local Dynamic TLS Access Clean-up";
+ if (DestReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
+ BuildMI(MBB, I, DL, get(AArch64::MSR))
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
+ return;
+ }
+
+ if (SrcReg == AArch64::NZCV) {
+ assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
+ BuildMI(MBB, I, DL, get(AArch64::MRS))
+ .addReg(DestReg)
+ .addImm(AArch64SysReg::NZCV)
+ .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
+ return;
+ }
+
+ llvm_unreachable("unimplemented reg-to-reg copy");
+}
+
+void AArch64InstrInfo::storeRegToStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool isKill, int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+
+ MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
+ unsigned Opc = 0;
+ bool Offset = true;
+ switch (RC->getSize()) {
+ case 1:
+ if (AArch64::FPR8RegClass.hasSubClassEq(RC))
+ Opc = AArch64::STRBui;
+ break;
+ case 2:
+ if (AArch64::FPR16RegClass.hasSubClassEq(RC))
+ Opc = AArch64::STRHui;
+ break;
+ case 4:
+ if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::STRWui;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
+ else
+ assert(SrcReg != AArch64::WSP);
+ } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
+ Opc = AArch64::STRSui;
+ break;
+ case 8:
+ if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::STRXui;
+ if (TargetRegisterInfo::isVirtualRegister(SrcReg))
+ MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
+ else
+ assert(SrcReg != AArch64::SP);
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ Opc = AArch64::STRDui;
+ break;
+ case 16:
+ if (AArch64::FPR128RegClass.hasSubClassEq(RC))
+ Opc = AArch64::STRQui;
+ else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Twov1d, Offset = false;
+ }
+ break;
+ case 24:
+ if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Threev1d, Offset = false;
+ }
+ break;
+ case 32:
+ if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Fourv1d, Offset = false;
+ } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Twov2d, Offset = false;
}
+ break;
+ case 48:
+ if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Threev2d, Offset = false;
+ }
+ break;
+ case 64:
+ if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register store without NEON");
+ Opc = AArch64::ST1Fourv2d, Offset = false;
+ }
+ break;
+ }
+ assert(Opc && "Unknown register class");
+
+ const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI);
+
+ if (Offset)
+ MI.addImm(0);
+ MI.addMemOperand(MMO);
+}
+
+void AArch64InstrInfo::loadRegFromStackSlot(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ int FI, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc DL;
+ if (MBBI != MBB.end())
+ DL = MBBI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
+ MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- MachineFunctionPass::getAnalysisUsage(AU);
+ unsigned Opc = 0;
+ bool Offset = true;
+ switch (RC->getSize()) {
+ case 1:
+ if (AArch64::FPR8RegClass.hasSubClassEq(RC))
+ Opc = AArch64::LDRBui;
+ break;
+ case 2:
+ if (AArch64::FPR16RegClass.hasSubClassEq(RC))
+ Opc = AArch64::LDRHui;
+ break;
+ case 4:
+ if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::LDRWui;
+ if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
+ else
+ assert(DestReg != AArch64::WSP);
+ } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
+ Opc = AArch64::LDRSui;
+ break;
+ case 8:
+ if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
+ Opc = AArch64::LDRXui;
+ if (TargetRegisterInfo::isVirtualRegister(DestReg))
+ MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
+ else
+ assert(DestReg != AArch64::SP);
+ } else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ Opc = AArch64::LDRDui;
+ break;
+ case 16:
+ if (AArch64::FPR128RegClass.hasSubClassEq(RC))
+ Opc = AArch64::LDRQui;
+ else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Twov1d, Offset = false;
}
- };
+ break;
+ case 24:
+ if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Threev1d, Offset = false;
+ }
+ break;
+ case 32:
+ if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Fourv1d, Offset = false;
+ } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Twov2d, Offset = false;
+ }
+ break;
+ case 48:
+ if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Threev2d, Offset = false;
+ }
+ break;
+ case 64:
+ if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
+ assert(getSubTarget().hasNEON() &&
+ "Unexpected register load without NEON");
+ Opc = AArch64::LD1Fourv2d, Offset = false;
+ }
+ break;
+ }
+ assert(Opc && "Unknown register class");
+
+ const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
+ .addReg(DestReg, getDefRegState(true))
+ .addFrameIndex(FI);
+ if (Offset)
+ MI.addImm(0);
+ MI.addMemOperand(MMO);
+}
+
+void llvm::emitFrameOffset(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg, int Offset,
+ const AArch64InstrInfo *TII,
+ MachineInstr::MIFlag Flag, bool SetNZCV) {
+ if (DestReg == SrcReg && Offset == 0)
+ return;
+
+ bool isSub = Offset < 0;
+ if (isSub)
+ Offset = -Offset;
+
+ // FIXME: If the offset won't fit in 24-bits, compute the offset into a
+ // scratch register. If DestReg is a virtual register, use it as the
+ // scratch register; otherwise, create a new virtual register (to be
+ // replaced by the scavenger at the end of PEI). That case can be optimized
+ // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
+ // register can be loaded with offset%8 and the add/sub can use an extending
+ // instruction with LSL#3.
+ // Currently the function handles any offsets but generates a poor sequence
+ // of code.
+ // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
+
+ unsigned Opc;
+ if (SetNZCV)
+ Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
+ else
+ Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
+ const unsigned MaxEncoding = 0xfff;
+ const unsigned ShiftSize = 12;
+ const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
+ while (((unsigned)Offset) >= (1 << ShiftSize)) {
+ unsigned ThisVal;
+ if (((unsigned)Offset) > MaxEncodableValue) {
+ ThisVal = MaxEncodableValue;
+ } else {
+ ThisVal = Offset & MaxEncodableValue;
+ }
+ assert((ThisVal >> ShiftSize) <= MaxEncoding &&
+ "Encoding cannot handle value that big");
+ BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ .addReg(SrcReg)
+ .addImm(ThisVal >> ShiftSize)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
+ .setMIFlag(Flag);
+
+ SrcReg = DestReg;
+ Offset -= ThisVal;
+ if (Offset == 0)
+ return;
+ }
+ BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
+ .addReg(SrcReg)
+ .addImm(Offset)
+ .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
+ .setMIFlag(Flag);
}
-char LDTLSCleanup::ID = 0;
-FunctionPass*
-llvm::createAArch64CleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }
+MachineInstr *
+AArch64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const {
+ // This is a bit of a hack. Consider this instruction:
+ //
+ // %vreg0<def> = COPY %SP; GPR64all:%vreg0
+ //
+ // We explicitly chose GPR64all for the virtual register so such a copy might
+ // be eliminated by RegisterCoalescer. However, that may not be possible, and
+ // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
+ // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
+ //
+ // To prevent that, we are going to constrain the %vreg0 register class here.
+ //
+ // <rdar://problem/11522048>
+ //
+ if (MI->isCopy()) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ if (SrcReg == AArch64::SP &&
+ TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
+ return nullptr;
+ }
+ if (DstReg == AArch64::SP &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
+ return nullptr;
+ }
+ }
+
+ // Cannot fold.
+ return nullptr;
+}
+
+int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+ bool *OutUseUnscaledOp,
+ unsigned *OutUnscaledOp,
+ int *EmittableOffset) {
+ int Scale = 1;
+ bool IsSigned = false;
+ // The ImmIdx should be changed case by case if it is not 2.
+ unsigned ImmIdx = 2;
+ unsigned UnscaledOp = 0;
+ // Set output values in case of early exit.
+ if (EmittableOffset)
+ *EmittableOffset = 0;
+ if (OutUseUnscaledOp)
+ *OutUseUnscaledOp = false;
+ if (OutUnscaledOp)
+ *OutUnscaledOp = 0;
+ switch (MI.getOpcode()) {
+ default:
+ assert(0 && "unhandled opcode in rewriteAArch64FrameIndex");
+ // Vector spills/fills can't take an immediate offset.
+ case AArch64::LD1Twov2d:
+ case AArch64::LD1Threev2d:
+ case AArch64::LD1Fourv2d:
+ case AArch64::LD1Twov1d:
+ case AArch64::LD1Threev1d:
+ case AArch64::LD1Fourv1d:
+ case AArch64::ST1Twov2d:
+ case AArch64::ST1Threev2d:
+ case AArch64::ST1Fourv2d:
+ case AArch64::ST1Twov1d:
+ case AArch64::ST1Threev1d:
+ case AArch64::ST1Fourv1d:
+ return AArch64FrameOffsetCannotUpdate;
+ case AArch64::PRFMui:
+ Scale = 8;
+ UnscaledOp = AArch64::PRFUMi;
+ break;
+ case AArch64::LDRXui:
+ Scale = 8;
+ UnscaledOp = AArch64::LDURXi;
+ break;
+ case AArch64::LDRWui:
+ Scale = 4;
+ UnscaledOp = AArch64::LDURWi;
+ break;
+ case AArch64::LDRBui:
+ Scale = 1;
+ UnscaledOp = AArch64::LDURBi;
+ break;
+ case AArch64::LDRHui:
+ Scale = 2;
+ UnscaledOp = AArch64::LDURHi;
+ break;
+ case AArch64::LDRSui:
+ Scale = 4;
+ UnscaledOp = AArch64::LDURSi;
+ break;
+ case AArch64::LDRDui:
+ Scale = 8;
+ UnscaledOp = AArch64::LDURDi;
+ break;
+ case AArch64::LDRQui:
+ Scale = 16;
+ UnscaledOp = AArch64::LDURQi;
+ break;
+ case AArch64::LDRBBui:
+ Scale = 1;
+ UnscaledOp = AArch64::LDURBBi;
+ break;
+ case AArch64::LDRHHui:
+ Scale = 2;
+ UnscaledOp = AArch64::LDURHHi;
+ break;
+ case AArch64::LDRSBXui:
+ Scale = 1;
+ UnscaledOp = AArch64::LDURSBXi;
+ break;
+ case AArch64::LDRSBWui:
+ Scale = 1;
+ UnscaledOp = AArch64::LDURSBWi;
+ break;
+ case AArch64::LDRSHXui:
+ Scale = 2;
+ UnscaledOp = AArch64::LDURSHXi;
+ break;
+ case AArch64::LDRSHWui:
+ Scale = 2;
+ UnscaledOp = AArch64::LDURSHWi;
+ break;
+ case AArch64::LDRSWui:
+ Scale = 4;
+ UnscaledOp = AArch64::LDURSWi;
+ break;
+
+ case AArch64::STRXui:
+ Scale = 8;
+ UnscaledOp = AArch64::STURXi;
+ break;
+ case AArch64::STRWui:
+ Scale = 4;
+ UnscaledOp = AArch64::STURWi;
+ break;
+ case AArch64::STRBui:
+ Scale = 1;
+ UnscaledOp = AArch64::STURBi;
+ break;
+ case AArch64::STRHui:
+ Scale = 2;
+ UnscaledOp = AArch64::STURHi;
+ break;
+ case AArch64::STRSui:
+ Scale = 4;
+ UnscaledOp = AArch64::STURSi;
+ break;
+ case AArch64::STRDui:
+ Scale = 8;
+ UnscaledOp = AArch64::STURDi;
+ break;
+ case AArch64::STRQui:
+ Scale = 16;
+ UnscaledOp = AArch64::STURQi;
+ break;
+ case AArch64::STRBBui:
+ Scale = 1;
+ UnscaledOp = AArch64::STURBBi;
+ break;
+ case AArch64::STRHHui:
+ Scale = 2;
+ UnscaledOp = AArch64::STURHHi;
+ break;
+
+ case AArch64::LDPXi:
+ case AArch64::LDPDi:
+ case AArch64::STPXi:
+ case AArch64::STPDi:
+ IsSigned = true;
+ Scale = 8;
+ break;
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
+ IsSigned = true;
+ Scale = 16;
+ break;
+ case AArch64::LDPWi:
+ case AArch64::LDPSi:
+ case AArch64::STPWi:
+ case AArch64::STPSi:
+ IsSigned = true;
+ Scale = 4;
+ break;
+
+ case AArch64::LDURXi:
+ case AArch64::LDURWi:
+ case AArch64::LDURBi:
+ case AArch64::LDURHi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSBWi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSHWi:
+ case AArch64::LDURSWi:
+ case AArch64::STURXi:
+ case AArch64::STURWi:
+ case AArch64::STURBi:
+ case AArch64::STURHi:
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURBBi:
+ case AArch64::STURHHi:
+ Scale = 1;
+ break;
+ }
+
+ Offset += MI.getOperand(ImmIdx).getImm() * Scale;
+
+ bool useUnscaledOp = false;
+ // If the offset doesn't match the scale, we rewrite the instruction to
+ // use the unscaled instruction instead. Likewise, if we have a negative
+ // offset (and have an unscaled op to use).
+ if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
+ useUnscaledOp = true;
+
+ // Use an unscaled addressing mode if the instruction has a negative offset
+ // (or if the instruction is already using an unscaled addressing mode).
+ unsigned MaskBits;
+ if (IsSigned) {
+ // ldp/stp instructions.
+ MaskBits = 7;
+ Offset /= Scale;
+ } else if (UnscaledOp == 0 || useUnscaledOp) {
+ MaskBits = 9;
+ IsSigned = true;
+ Scale = 1;
+ } else {
+ MaskBits = 12;
+ IsSigned = false;
+ Offset /= Scale;
+ }
+
+ // Attempt to fold address computation.
+ int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
+ int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
+ if (Offset >= MinOff && Offset <= MaxOff) {
+ if (EmittableOffset)
+ *EmittableOffset = Offset;
+ Offset = 0;
+ } else {
+ int NewOff = Offset < 0 ? MinOff : MaxOff;
+ if (EmittableOffset)
+ *EmittableOffset = NewOff;
+ Offset = (Offset - NewOff) * Scale;
+ }
+ if (OutUseUnscaledOp)
+ *OutUseUnscaledOp = useUnscaledOp;
+ if (OutUnscaledOp)
+ *OutUnscaledOp = UnscaledOp;
+ return AArch64FrameOffsetCanUpdate |
+ (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
+}
+
+bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo *TII) {
+ unsigned Opcode = MI.getOpcode();
+ unsigned ImmIdx = FrameRegIdx + 1;
+
+ if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
+ Offset += MI.getOperand(ImmIdx).getImm();
+ emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
+ MI.getOperand(0).getReg(), FrameReg, Offset, TII,
+ MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
+ MI.eraseFromParent();
+ Offset = 0;
+ return true;
+ }
+
+ int NewOffset;
+ unsigned UnscaledOp;
+ bool UseUnscaledOp;
+ int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
+ &UnscaledOp, &NewOffset);
+ if (Status & AArch64FrameOffsetCanUpdate) {
+ if (Status & AArch64FrameOffsetIsLegal)
+ // Replace the FrameIndex with FrameReg.
+ MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+ if (UseUnscaledOp)
+ MI.setDesc(TII->get(UnscaledOp));
+
+ MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
+ return Offset == 0;
+ }
+
+ return false;
+}
+
+void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+ NopInst.setOpcode(AArch64::HINT);
+ NopInst.addOperand(MCOperand::CreateImm(0));
+}
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index ad20f9c..90ce75f 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -11,9 +11,10 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64INSTRINFO_H
-#define LLVM_TARGET_AARCH64INSTRINFO_H
+#ifndef LLVM_TARGET_AArch64INSTRINFO_H
+#define LLVM_TARGET_AArch64INSTRINFO_H
+#include "AArch64.h"
#include "AArch64RegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -23,89 +24,208 @@
namespace llvm {
class AArch64Subtarget;
+class AArch64TargetMachine;
class AArch64InstrInfo : public AArch64GenInstrInfo {
+ // Reserve bits in the MachineMemOperand target hint flags, starting at 1.
+ // They will be shifted into MOTargetHintStart when accessed.
+ enum TargetMemOperandFlags {
+ MOSuppressPair = 1
+ };
+
const AArch64RegisterInfo RI;
const AArch64Subtarget &Subtarget;
+
public:
- explicit AArch64InstrInfo(const AArch64Subtarget &TM);
+ explicit AArch64InstrInfo(const AArch64Subtarget &STI);
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
- ///
- const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+ const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
const AArch64Subtarget &getSubTarget() const { return Subtarget; }
- void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
- void CopyPhysRegTuple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+ bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
+ unsigned &DstReg, unsigned &SubIdx) const override;
+
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ /// Returns true if there is a shiftable register and that the shift value
+ /// is non-zero.
+ bool hasShiftedReg(const MachineInstr *MI) const;
+
+ /// Returns true if there is an extendable register and that the extending
+ /// value is non-zero.
+ bool hasExtendedReg(const MachineInstr *MI) const;
+
+ /// \brief Does this instruction set its full destination register to zero?
+ bool isGPRZero(const MachineInstr *MI) const;
+
+ /// \brief Does this instruction rename a GPR without modifying bits?
+ bool isGPRCopy(const MachineInstr *MI) const;
+
+ /// \brief Does this instruction rename an FPR without modifying bits?
+ bool isFPRCopy(const MachineInstr *MI) const;
+
+ /// Return true if this is load/store scales or extends its register offset.
+ /// This refers to scaling a dynamic index as opposed to scaled immediates.
+ /// MI should be a memory op that allows scaled addressing.
+ bool isScaledAddr(const MachineInstr *MI) const;
+
+ /// Return true if pairing the given load or store is hinted to be
+ /// unprofitable.
+ bool isLdStPairSuppressed(const MachineInstr *MI) const;
+
+ /// Hint that pairing the given load or store is unprofitable.
+ void suppressLdStPair(MachineInstr *MI) const;
+
+ bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
+ unsigned &Offset,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool enableClusterLoads() const override { return true; }
+
+ bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
+ unsigned NumLoads) const override;
+
+ bool shouldScheduleAdjacent(MachineInstr *First,
+ MachineInstr *Second) const override;
+
+ MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+ uint64_t Offset, const MDNode *MDPtr,
+ DebugLoc DL) const;
+ void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc, unsigned Opcode,
+ llvm::ArrayRef<unsigned> Indices) const;
+ void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
+ MachineBasicBlock::iterator MBBI, unsigned SrcReg,
+ bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
+
void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MBBI, unsigned DestReg,
+ int FrameIndex, const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ MachineInstr *
+ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify = false) const;
+ bool AllowModify = false) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
-
- /// Look through the instructions in this function and work out the largest
- /// the stack frame can be while maintaining the ability to address local
- /// slots with no complexities.
- unsigned estimateRSStackLimit(MachineFunction &MF) const;
-
- /// getAddressConstraints - For loads and stores (and PRFMs) taking an
- /// immediate offset, this function determines the constraints required for
- /// the immediate. It must satisfy:
- /// + MinOffset <= imm <= MaxOffset
- /// + imm % OffsetScale == 0
- void getAddressConstraints(const MachineInstr &MI, int &AccessScale,
- int &MinOffset, int &MaxOffset) const;
-
-
- unsigned getInstSizeInBytes(const MachineInstr &MI) const;
-
- unsigned getInstBundleLength(const MachineInstr &MI) const;
-
+ DebugLoc DL) const override;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ bool canInsertSelect(const MachineBasicBlock &,
+ const SmallVectorImpl<MachineOperand> &Cond, unsigned,
+ unsigned, int &, int &, int &) const override;
+ void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ DebugLoc DL, unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+ void getNoopForMachoTarget(MCInst &NopInst) const override;
+
+ /// analyzeCompare - For a comparison instruction, return the source registers
+ /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
+ /// Return true if the comparison instruction can be analyzed.
+ bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+ unsigned &SrcReg2, int &CmpMask,
+ int &CmpValue) const override;
+ /// optimizeCompareInstr - Convert the instruction supplying the argument to
+ /// the comparison into one that sets the zero bit in the flags register.
+ bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+ unsigned SrcReg2, int CmpMask, int CmpValue,
+ const MachineRegisterInfo *MRI) const override;
+
+private:
+ void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
+ MachineBasicBlock *TBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
};
-bool rewriteA64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const AArch64InstrInfo &TII);
-
+/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
+/// plus Offset. This is intended to be used from within the prolog/epilog
+/// insertion (PEI) pass, where a virtual scratch register may be allocated
+/// if necessary, to be replaced by the scavenger at the end of PEI.
+void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
+ const AArch64InstrInfo *TII,
+ MachineInstr::MIFlag = MachineInstr::NoFlags,
+ bool SetNZCV = false);
+
+/// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the
+/// FP. Return false if the offset could not be handled directly in MI, and
+/// return the left-over portion by reference.
+bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+ unsigned FrameReg, int &Offset,
+ const AArch64InstrInfo *TII);
+
+/// \brief Use to report the frame offset status in isAArch64FrameOffsetLegal.
+enum AArch64FrameOffsetStatus {
+ AArch64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
+ AArch64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
+ AArch64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
+};
-void emitRegUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- DebugLoc dl, const TargetInstrInfo &TII,
- unsigned DstReg, unsigned SrcReg, unsigned ScratchReg,
- int64_t NumBytes,
- MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+/// \brief Check if the @p Offset is a valid frame offset for @p MI.
+/// The returned value reports the validity of the frame offset for @p MI.
+/// It uses the values defined by AArch64FrameOffsetStatus for that.
+/// If result == AArch64FrameOffsetCannotUpdate, @p MI cannot be updated to
+/// use an offset.eq
+/// If result & AArch64FrameOffsetIsLegal, @p Offset can completely be
+/// rewriten in @p MI.
+/// If result & AArch64FrameOffsetCanUpdate, @p Offset contains the
+/// amount that is off the limit of the legal offset.
+/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
+/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
+/// If set, @p EmittableOffset contains the amount that can be set in @p MI
+/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
+/// is a legal offset.
+int isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
+ bool *OutUseUnscaledOp = nullptr,
+ unsigned *OutUnscaledOp = nullptr,
+ int *EmittableOffset = nullptr);
+
+static inline bool isUncondBranchOpcode(int Opc) { return Opc == AArch64::B; }
+
+static inline bool isCondBranchOpcode(int Opc) {
+ switch (Opc) {
+ case AArch64::Bcc:
+ case AArch64::CBZW:
+ case AArch64::CBZX:
+ case AArch64::CBNZW:
+ case AArch64::CBNZX:
+ case AArch64::TBZW:
+ case AArch64::TBZX:
+ case AArch64::TBNZW:
+ case AArch64::TBNZX:
+ return true;
+ default:
+ return false;
+ }
+}
-void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- DebugLoc dl, const TargetInstrInfo &TII,
- unsigned ScratchReg, int64_t NumBytes,
- MachineInstr::MIFlag MIFlags = MachineInstr::NoFlags);
+static inline bool isIndirectBranchOpcode(int Opc) { return Opc == AArch64::BR; }
-}
+} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td
index 7d7a641..9ad36e8 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1,4 +1,4 @@
-//===----- AArch64InstrInfo.td - AArch64 Instruction Info ----*- tablegen -*-=//
+//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the AArch64 scalar instructions in TableGen format.
+// AArch64 Instruction definitions.
//
//===----------------------------------------------------------------------===//
@@ -19,5368 +19,5266 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicate<"FeatureNEON", "neon">;
def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicate<"FeatureCrypto","crypto">;
-
-// Use fused MAC if more precision in FP computation is allowed.
-def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion =="
- " FPOpFusion::Fast)">;
-include "AArch64InstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-// AArch64 specific pattern fragments.
-//
-// An 'fmul' node with a single use.
-def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
- return N->hasOneUse();
-}]>;
-
+ AssemblerPredicate<"FeatureCrypto", "crypto">;
+def HasCRC : Predicate<"Subtarget->hasCRC()">,
+ AssemblerPredicate<"FeatureCRC", "crc">;
+def IsLE : Predicate<"Subtarget->isLittleEndian()">;
+def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
//===----------------------------------------------------------------------===//
-// Target-specific ISD nodes and profiles
-//===----------------------------------------------------------------------===//
-
-def SDT_A64ret : SDTypeProfile<0, 0, []>;
-def A64ret : SDNode<"AArch64ISD::Ret", SDT_A64ret, [SDNPHasChain,
- SDNPOptInGlue,
- SDNPVariadic]>;
-
-// (ins NZCV, Condition, Dest)
-def SDT_A64br_cc : SDTypeProfile<0, 3, [SDTCisVT<0, i32>]>;
-def A64br_cc : SDNode<"AArch64ISD::BR_CC", SDT_A64br_cc, [SDNPHasChain]>;
-
-// (outs Result), (ins NZCV, IfTrue, IfFalse, Condition)
-def SDT_A64select_cc : SDTypeProfile<1, 4, [SDTCisVT<1, i32>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<2, 3>]>;
-def A64select_cc : SDNode<"AArch64ISD::SELECT_CC", SDT_A64select_cc>;
-
-// (outs NZCV), (ins LHS, RHS, Condition)
-def SDT_A64setcc : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
- SDTCisSameAs<1, 2>]>;
-def A64setcc : SDNode<"AArch64ISD::SETCC", SDT_A64setcc>;
-
-
-// (outs GPR64), (ins)
-def A64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
-
-// A64 compares don't care about the cond really (they set all flags) so a
-// simple binary operator is useful.
-def A64cmp : PatFrag<(ops node:$lhs, node:$rhs),
- (A64setcc node:$lhs, node:$rhs, cond)>;
-
-
-// When matching a notional (CMP op1, (sub 0, op2)), we'd like to use a CMN
-// instruction on the grounds that "op1 - (-op2) == op1 + op2". However, the C
-// and V flags can be set differently by this operation. It comes down to
-// whether "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are
-// then everything is fine. If not then the optimization is wrong. Thus general
-// comparisons are only valid if op2 != 0.
-
-// So, finally, the only LLVM-native comparisons that don't mention C and V are
-// SETEQ and SETNE. They're the only ones we can safely use CMN for in the
-// absence of information about op2.
-def equality_cond : PatLeaf<(cond), [{
- return N->get() == ISD::SETEQ || N->get() == ISD::SETNE;
-}]>;
-
-def A64cmn : PatFrag<(ops node:$lhs, node:$rhs),
- (A64setcc node:$lhs, (sub 0, node:$rhs), equality_cond)>;
-
-// There are two layers of indirection here, driven by the following
-// considerations.
-// + TableGen does not know CodeModel or Reloc so that decision should be
-// made for a variable/address at ISelLowering.
-// + The output of ISelLowering should be selectable (hence the Wrapper,
-// rather than a bare target opcode)
-def SDTAArch64WrapperLarge : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisSameAs<0, 4>,
- SDTCisPtrTy<0>]>;
-
-def A64WrapperLarge :SDNode<"AArch64ISD::WrapperLarge", SDTAArch64WrapperLarge>;
-
-def SDTAArch64WrapperSmall : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisVT<3, i32>,
- SDTCisPtrTy<0>]>;
-
-def A64WrapperSmall :SDNode<"AArch64ISD::WrapperSmall", SDTAArch64WrapperSmall>;
-
-
-def SDTAArch64GOTLoad : SDTypeProfile<1, 1, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
-def A64GOTLoad : SDNode<"AArch64ISD::GOTLoad", SDTAArch64GOTLoad,
- [SDNPHasChain]>;
-
-
-// (A64BFI LHS, RHS, LSB, Width)
-def SDTA64BFI : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisVT<3, i64>,
- SDTCisVT<4, i64>]>;
-
-def A64Bfi : SDNode<"AArch64ISD::BFI", SDTA64BFI>;
-
-// (A64EXTR HiReg, LoReg, LSB)
-def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
- SDTCisVT<3, i64>]>;
-def A64Extr : SDNode<"AArch64ISD::EXTR", SDTA64EXTR>;
-
-// (A64[SU]BFX Field, ImmR, ImmS).
+// AArch64-specific DAG Nodes.
//
-// Note that ImmR and ImmS are already encoded for the actual instructions. The
-// more natural LSB and Width mix together to form ImmR and ImmS, something
-// which TableGen can't handle.
-def SDTA64BFX : SDTypeProfile<1, 3, [SDTCisVT<2, i64>, SDTCisVT<3, i64>]>;
-def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
-def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
+// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
+def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
+def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<0>,
+ SDTCisVT<3, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVT<4, i32>]>;
+
+def SDT_AArch64Brcond : SDTypeProfile<0, 3,
+ [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>]>;
+def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
+def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
+ SDTCisVT<2, OtherVT>]>;
+
+
+def SDT_AArch64CSel : SDTypeProfile<1, 4,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisInt<3>,
+ SDTCisVT<4, i32>]>;
+def SDT_AArch64FCmp : SDTypeProfile<0, 2,
+ [SDTCisFP<0>,
+ SDTCisSameAs<0, 1>]>;
+def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
+def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
+def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
+def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
+def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisInt<2>, SDTCisInt<3>]>;
+def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+
+def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
+def SDT_AArch64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
+def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>;
+def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>,
+ SDTCisSameAs<0,3>]>;
+def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
+def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+
+def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
+
+def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>]>;
+def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
+ SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
+ SDTCisSameAs<1, 4>]>;
+
+
+// Node definitions.
+def AArch64adrp : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
+def AArch64addlow : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
+def AArch64LOADgot : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
+def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
+ SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
+ [SDNPHasChain, SDNPOutGlue]>;
+def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END",
+ SDCallSeqEnd<[ SDTCisVT<0, i32>,
+ SDTCisVT<1, i32> ]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def AArch64call : SDNode<"AArch64ISD::CALL",
+ SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+def AArch64brcond : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
+ [SDNPHasChain]>;
+def AArch64cbz : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
+ [SDNPHasChain]>;
+def AArch64cbnz : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
+ [SDNPHasChain]>;
+def AArch64tbz : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
+ [SDNPHasChain]>;
+def AArch64tbnz : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
+ [SDNPHasChain]>;
+
+
+def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
+def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
+def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
+def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
+def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
+def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
+def AArch64add_flag : SDNode<"AArch64ISD::ADDS", SDTBinaryArithWithFlagsOut,
+ [SDNPCommutative]>;
+def AArch64sub_flag : SDNode<"AArch64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
+def AArch64and_flag : SDNode<"AArch64ISD::ANDS", SDTBinaryArithWithFlagsOut,
+ [SDNPCommutative]>;
+def AArch64adc_flag : SDNode<"AArch64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
+def AArch64sbc_flag : SDNode<"AArch64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
+
+def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
+
+def AArch64fcmp : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
+
+def AArch64fmax : SDNode<"AArch64ISD::FMAX", SDTFPBinOp>;
+def AArch64fmin : SDNode<"AArch64ISD::FMIN", SDTFPBinOp>;
+
+def AArch64dup : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
+def AArch64duplane8 : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
+def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
+def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
+def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+
+def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
+def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
+def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
+def AArch64uzp2 : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
+def AArch64trn1 : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
+def AArch64trn2 : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
+
+def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
+def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
+def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
+def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
+def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
+def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
+def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
+
+def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
+def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
+def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
+def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
+
+def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
+def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
+def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
+def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
+def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
+def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
+def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
+def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+
+def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
+def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
+def AArch64bsl: SDNode<"AArch64ISD::BSL", SDT_AArch64trivec>;
+
+def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
+def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
+def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
+def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
+def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
+
+def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
+def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
+def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
+
+def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
+def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
+def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
+def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
+def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
+def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
+ (AArch64not (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
+
+def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
+def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
+def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
+def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
+def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
+
+def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
+def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
+
+def AArch64neg : SDNode<"AArch64ISD::NEG", SDT_AArch64unvec>;
+
+def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
+ [SDNPHasChain, SDNPSideEffect]>;
+
+def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
+def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
+
+def AArch64tlsdesc_call : SDNode<"AArch64ISD::TLSDESC_CALL",
+ SDT_AArch64TLSDescCall,
+ [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
+ SDNPVariadic]>;
+
+def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
+ SDT_AArch64WrapperLarge>;
-class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
//===----------------------------------------------------------------------===//
-// Call sequence pseudo-instructions
-//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
-def SDT_AArch64Call : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
-def AArch64Call : SDNode<"AArch64ISD::Call", SDT_AArch64Call,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
-
-def AArch64tcret : SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64Call,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-// The TLSDESCCALL node is a variant call which goes to an indirectly calculated
-// destination but needs a relocation against a fixed symbol. As such it has two
-// certain operands: the callee and the relocated variable.
+// AArch64 Instruction Predicate Definitions.
//
-// The TLS ABI only allows it to be selected to a BLR instructin (with
-// appropriate relocation).
-def SDTTLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
-
-def A64tlsdesc_blr : SDNode<"AArch64ISD::TLSDESCCALL", SDTTLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
-
-
-def SDT_AArch64CallSeqStart : SDCallSeqStart<[ SDTCisPtrTy<0> ]>;
-def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AArch64CallSeqStart,
- [SDNPHasChain, SDNPOutGlue]>;
+def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
+def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
+def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
+def ForCodeSize : Predicate<"ForCodeSize">;
+def NotForCodeSize : Predicate<"!ForCodeSize">;
-def SDT_AArch64CallSeqEnd : SDCallSeqEnd<[ SDTCisPtrTy<0>, SDTCisPtrTy<1> ]>;
-def AArch64callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AArch64CallSeqEnd,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-
-
-// These pseudo-instructions have special semantics by virtue of being passed to
-// the InstrInfo constructor. CALLSEQ_START/CALLSEQ_END are produced by
-// LowerCall to (in our case) tell the back-end about stack adjustments for
-// arguments passed on the stack. Here we select those markers to
-// pseudo-instructions which explicitly set the stack, and finally in the
-// RegisterInfo we convert them to a true stack adjustment.
-let Defs = [XSP], Uses = [XSP] in {
- def ADJCALLSTACKDOWN : PseudoInst<(outs), (ins i64imm:$amt),
- [(AArch64callseq_start timm:$amt)]>;
-
- def ADJCALLSTACKUP : PseudoInst<(outs), (ins i64imm:$amt1, i64imm:$amt2),
- [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
-}
+include "AArch64InstrFormats.td"
//===----------------------------------------------------------------------===//
-// Atomic operation pseudo-instructions
-//===----------------------------------------------------------------------===//
-
-// These get selected from C++ code as a pretty much direct translation from the
-// generic DAG nodes. The one exception is the AtomicOrdering is added as an
-// operand so that the eventual lowering can make use of it and choose
-// acquire/release operations when required.
-
-let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
-multiclass AtomicSizes {
- def _I8 : PseudoInst<(outs GPR32:$dst),
- (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I16 : PseudoInst<(outs GPR32:$dst),
- (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I32 : PseudoInst<(outs GPR32:$dst),
- (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I64 : PseudoInst<(outs GPR64:$dst),
- (ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
-}
-}
-
-defm ATOMIC_LOAD_ADD : AtomicSizes;
-defm ATOMIC_LOAD_SUB : AtomicSizes;
-defm ATOMIC_LOAD_AND : AtomicSizes;
-defm ATOMIC_LOAD_OR : AtomicSizes;
-defm ATOMIC_LOAD_XOR : AtomicSizes;
-defm ATOMIC_LOAD_NAND : AtomicSizes;
-defm ATOMIC_SWAP : AtomicSizes;
-let Defs = [NZCV] in {
- // These operations need a CMP to calculate the correct value
- defm ATOMIC_LOAD_MIN : AtomicSizes;
- defm ATOMIC_LOAD_MAX : AtomicSizes;
- defm ATOMIC_LOAD_UMIN : AtomicSizes;
- defm ATOMIC_LOAD_UMAX : AtomicSizes;
-}
-
-class AtomicCmpSwap<RegisterClass GPRData>
- : PseudoInst<(outs GPRData:$dst),
- (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
- i32imm:$ordering), []> {
- let usesCustomInserter = 1;
- let hasCtrlDep = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let Defs = [NZCV];
-}
-
-def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
//===----------------------------------------------------------------------===//
-// Add-subtract (extended register) instructions
+// Miscellaneous instructions.
//===----------------------------------------------------------------------===//
-// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP
-
-// The RHS of these operations is conceptually a sign/zero-extended
-// register, optionally shifted left by 1-4. The extension can be a
-// NOP (e.g. "sxtx" sign-extending a 64-bit register to 64-bits) but
-// must be specified with one exception:
-
-// If one of the registers is sp/wsp then LSL is an alias for UXTW in
-// 32-bit instructions and UXTX in 64-bit versions, the shift amount
-// is not optional in that case (but can explicitly be 0), and the
-// entire suffix can be skipped (e.g. "add sp, x3, x2").
-
-multiclass extend_operands<string PREFIX, string Diag> {
- def _asmoperand : AsmOperandClass {
- let Name = PREFIX;
- let RenderMethod = "addRegExtendOperands";
- let PredicateMethod = "isRegExtend<A64SE::" # PREFIX # ">";
- let DiagnosticType = "AddSubRegExtend" # Diag;
- }
-
- def _operand : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 4; }]> {
- let PrintMethod = "printRegExtendOperand<A64SE::" # PREFIX # ">";
- let DecoderMethod = "DecodeRegExtendOperand";
- let ParserMatchClass = !cast<AsmOperandClass>(PREFIX # "_asmoperand");
- }
-}
-
-defm UXTB : extend_operands<"UXTB", "Small">;
-defm UXTH : extend_operands<"UXTH", "Small">;
-defm UXTW : extend_operands<"UXTW", "Small">;
-defm UXTX : extend_operands<"UXTX", "Large">;
-defm SXTB : extend_operands<"SXTB", "Small">;
-defm SXTH : extend_operands<"SXTH", "Small">;
-defm SXTW : extend_operands<"SXTW", "Small">;
-defm SXTX : extend_operands<"SXTX", "Large">;
-
-def LSL_extasmoperand : AsmOperandClass {
- let Name = "RegExtendLSL";
- let RenderMethod = "addRegExtendOperands";
- let DiagnosticType = "AddSubRegExtendLarge";
-}
-
-def LSL_extoperand : Operand<i64> {
- let ParserMatchClass = LSL_extasmoperand;
-}
-
-
-// The patterns for various sign-extensions are a little ugly and
-// non-uniform because everything has already been promoted to the
-// legal i64 and i32 types. We'll wrap the various variants up in a
-// class for use later.
-class extend_types {
- dag uxtb; dag uxth; dag uxtw; dag uxtx;
- dag sxtb; dag sxth; dag sxtw; dag sxtx;
- ValueType ty;
- RegisterClass GPR;
-}
-
-def extends_to_i64 : extend_types {
- let uxtb = (and (anyext i32:$Rm), 255);
- let uxth = (and (anyext i32:$Rm), 65535);
- let uxtw = (zext i32:$Rm);
- let uxtx = (i64 $Rm);
- let sxtb = (sext_inreg (anyext i32:$Rm), i8);
- let sxth = (sext_inreg (anyext i32:$Rm), i16);
- let sxtw = (sext i32:$Rm);
- let sxtx = (i64 $Rm);
-
- let ty = i64;
- let GPR = GPR64xsp;
-}
-
-
-def extends_to_i32 : extend_types {
- let uxtb = (and i32:$Rm, 255);
- let uxth = (and i32:$Rm, 65535);
- let uxtw = (i32 i32:$Rm);
- let uxtx = (i32 i32:$Rm);
-
- let sxtb = (sext_inreg i32:$Rm, i8);
- let sxth = (sext_inreg i32:$Rm, i16);
- let sxtw = (i32 i32:$Rm);
- let sxtx = (i32 i32:$Rm);
-
- let ty = i32;
- let GPR = GPR32wsp;
-}
-
-// Now, six of the extensions supported are easy and uniform: if the source size
-// is 32-bits or less, then Rm is always a 32-bit register. We'll instantiate
-// those instructions in one block.
-
-// The uxtx/sxtx could potentially be merged in, but three facts dissuaded me:
-// + It would break the naming scheme: either ADDxx_uxtx or ADDww_uxtx would
-// be impossible.
-// + Patterns are very different as well.
-// + Passing different registers would be ugly (more fields in extend_types
-// would probably be the best option).
-multiclass addsub_exts<bit sf, bit op, bit S, string asmop,
- SDPatternOperator opfrag,
- dag outs, extend_types exts> {
- def w_uxtb : A64I_addsubext<sf, op, S, 0b00, 0b000,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTB_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.uxtb, UXTB_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- def w_uxth : A64I_addsubext<sf, op, S, 0b00, 0b001,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTH_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.uxth, UXTH_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- def w_uxtw : A64I_addsubext<sf, op, S, 0b00, 0b010,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, UXTW_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.uxtw, UXTW_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def w_sxtb : A64I_addsubext<sf, op, S, 0b00, 0b100,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTB_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.sxtb, SXTB_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- def w_sxth : A64I_addsubext<sf, op, S, 0b00, 0b101,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTH_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.sxth, SXTH_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- def w_sxtw : A64I_addsubext<sf, op, S, 0b00, 0b110,
- outs, (ins exts.GPR:$Rn, GPR32:$Rm, SXTW_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag exts.ty:$Rn, (shl exts.sxtw, SXTW_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-}
-
-// These two could be merge in with the above, but their patterns aren't really
-// necessary and the naming-scheme would necessarily break:
-multiclass addsub_xxtx<bit op, bit S, string asmop, SDPatternOperator opfrag,
- dag outs> {
- def x_uxtx : A64I_addsubext<0b1, op, S, 0b00, 0b011,
- outs,
- (ins GPR64xsp:$Rn, GPR64:$Rm, UXTX_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [(opfrag i64:$Rn, (shl i64:$Rm, UXTX_operand:$Imm3))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def x_sxtx : A64I_addsubext<0b1, op, S, 0b00, 0b111,
- outs,
- (ins GPR64xsp:$Rn, GPR64:$Rm, SXTX_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [/* No Pattern: same as uxtx */],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-}
-
-multiclass addsub_wxtx<bit op, bit S, string asmop, dag outs> {
- def w_uxtx : A64I_addsubext<0b0, op, S, 0b00, 0b011,
- outs, (ins GPR32wsp:$Rn, GPR32:$Rm, UXTX_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [/* No pattern: probably same as uxtw */],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def w_sxtx : A64I_addsubext<0b0, op, S, 0b00, 0b111,
- outs, (ins GPR32wsp:$Rn, GPR32:$Rm, SXTX_operand:$Imm3),
- !strconcat(asmop, "$Rn, $Rm, $Imm3"),
- [/* No Pattern: probably same as uxtw */],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-}
-
-class SetRD<RegisterClass RC, SDPatternOperator op>
- : PatFrag<(ops node:$lhs, node:$rhs), (set RC:$Rd, (op node:$lhs, node:$rhs))>;
-class SetNZCV<SDPatternOperator op>
- : PatFrag<(ops node:$lhs, node:$rhs), (set NZCV, (op node:$lhs, node:$rhs))>;
-
-defm ADDxx :addsub_exts<0b1, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
- (outs GPR64xsp:$Rd), extends_to_i64>,
- addsub_xxtx< 0b0, 0b0, "add\t$Rd, ", SetRD<GPR64xsp, add>,
- (outs GPR64xsp:$Rd)>;
-defm ADDww :addsub_exts<0b0, 0b0, 0b0, "add\t$Rd, ", SetRD<GPR32wsp, add>,
- (outs GPR32wsp:$Rd), extends_to_i32>,
- addsub_wxtx< 0b0, 0b0, "add\t$Rd, ",
- (outs GPR32wsp:$Rd)>;
-defm SUBxx :addsub_exts<0b1, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
- (outs GPR64xsp:$Rd), extends_to_i64>,
- addsub_xxtx< 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR64xsp, sub>,
- (outs GPR64xsp:$Rd)>;
-defm SUBww :addsub_exts<0b0, 0b1, 0b0, "sub\t$Rd, ", SetRD<GPR32wsp, sub>,
- (outs GPR32wsp:$Rd), extends_to_i32>,
- addsub_wxtx< 0b1, 0b0, "sub\t$Rd, ",
- (outs GPR32wsp:$Rd)>;
-
-let Defs = [NZCV] in {
-defm ADDSxx :addsub_exts<0b1, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
- (outs GPR64:$Rd), extends_to_i64>,
- addsub_xxtx< 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR64, addc>,
- (outs GPR64:$Rd)>;
-defm ADDSww :addsub_exts<0b0, 0b0, 0b1, "adds\t$Rd, ", SetRD<GPR32, addc>,
- (outs GPR32:$Rd), extends_to_i32>,
- addsub_wxtx< 0b0, 0b1, "adds\t$Rd, ",
- (outs GPR32:$Rd)>;
-defm SUBSxx :addsub_exts<0b1, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
- (outs GPR64:$Rd), extends_to_i64>,
- addsub_xxtx< 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR64, subc>,
- (outs GPR64:$Rd)>;
-defm SUBSww :addsub_exts<0b0, 0b1, 0b1, "subs\t$Rd, ", SetRD<GPR32, subc>,
- (outs GPR32:$Rd), extends_to_i32>,
- addsub_wxtx< 0b1, 0b1, "subs\t$Rd, ",
- (outs GPR32:$Rd)>;
-
-
-let SchedRW = [WriteCMP, ReadCMP, ReadCMP], Rd = 0b11111, isCompare = 1 in {
-defm CMNx : addsub_exts<0b1, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
- (outs), extends_to_i64>,
- addsub_xxtx< 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>, (outs)>;
-defm CMNw : addsub_exts<0b0, 0b0, 0b1, "cmn\t", SetNZCV<A64cmn>,
- (outs), extends_to_i32>,
- addsub_wxtx< 0b0, 0b1, "cmn\t", (outs)>;
-defm CMPx : addsub_exts<0b1, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
- (outs), extends_to_i64>,
- addsub_xxtx< 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>, (outs)>;
-defm CMPw : addsub_exts<0b0, 0b1, 0b1, "cmp\t", SetNZCV<A64cmp>,
- (outs), extends_to_i32>,
- addsub_wxtx< 0b1, 0b1, "cmp\t", (outs)>;
-}
-}
-
-// Now patterns for the operation without a shift being needed. No patterns are
-// created for uxtx/sxtx since they're non-uniform and it's expected that
-// add/sub (shifted register) will handle those cases anyway.
-multiclass addsubext_noshift_patterns<string prefix, SDPatternOperator nodeop,
- extend_types exts> {
- def : Pat<(nodeop exts.ty:$Rn, exts.uxtb),
- (!cast<Instruction>(prefix # "w_uxtb") $Rn, $Rm, 0)>;
- def : Pat<(nodeop exts.ty:$Rn, exts.uxth),
- (!cast<Instruction>(prefix # "w_uxth") $Rn, $Rm, 0)>;
- def : Pat<(nodeop exts.ty:$Rn, exts.uxtw),
- (!cast<Instruction>(prefix # "w_uxtw") $Rn, $Rm, 0)>;
-
- def : Pat<(nodeop exts.ty:$Rn, exts.sxtb),
- (!cast<Instruction>(prefix # "w_sxtb") $Rn, $Rm, 0)>;
- def : Pat<(nodeop exts.ty:$Rn, exts.sxth),
- (!cast<Instruction>(prefix # "w_sxth") $Rn, $Rm, 0)>;
- def : Pat<(nodeop exts.ty:$Rn, exts.sxtw),
- (!cast<Instruction>(prefix # "w_sxtw") $Rn, $Rm, 0)>;
-}
-
-defm : addsubext_noshift_patterns<"ADDxx", add, extends_to_i64>;
-defm : addsubext_noshift_patterns<"ADDww", add, extends_to_i32>;
-defm : addsubext_noshift_patterns<"SUBxx", sub, extends_to_i64>;
-defm : addsubext_noshift_patterns<"SUBww", sub, extends_to_i32>;
-
-defm : addsubext_noshift_patterns<"CMNx", A64cmn, extends_to_i64>;
-defm : addsubext_noshift_patterns<"CMNw", A64cmn, extends_to_i32>;
-defm : addsubext_noshift_patterns<"CMPx", A64cmp, extends_to_i64>;
-defm : addsubext_noshift_patterns<"CMPw", A64cmp, extends_to_i32>;
-
-// An extend of "lsl #imm" is valid if and only if one of Rn and Rd is
-// sp/wsp. It is synonymous with uxtx/uxtw depending on the size of the
-// operation. Also permitted in this case is complete omission of the argument,
-// which implies "lsl #0".
-multiclass lsl_aliases<string asmop, Instruction inst, RegisterClass GPR_Rd,
- RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
- def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
- (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
-
- def : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm, $LSL"),
- (inst GPR_Rd:$Rd, GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
-
-}
-
-defm : lsl_aliases<"add", ADDxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
-defm : lsl_aliases<"add", ADDxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
-defm : lsl_aliases<"add", ADDwww_uxtw, Rwsp, GPR32wsp, GPR32>;
-defm : lsl_aliases<"add", ADDwww_uxtw, GPR32wsp, Rwsp, GPR32>;
-defm : lsl_aliases<"sub", SUBxxx_uxtx, Rxsp, GPR64xsp, GPR64>;
-defm : lsl_aliases<"sub", SUBxxx_uxtx, GPR64xsp, Rxsp, GPR64>;
-defm : lsl_aliases<"sub", SUBwww_uxtw, Rwsp, GPR32wsp, GPR32>;
-defm : lsl_aliases<"sub", SUBwww_uxtw, GPR32wsp, Rwsp, GPR32>;
-
-// Rd cannot be sp for flag-setting variants so only half of the aliases are
-// needed.
-defm : lsl_aliases<"adds", ADDSxxx_uxtx, GPR64, Rxsp, GPR64>;
-defm : lsl_aliases<"adds", ADDSwww_uxtw, GPR32, Rwsp, GPR32>;
-defm : lsl_aliases<"subs", SUBSxxx_uxtx, GPR64, Rxsp, GPR64>;
-defm : lsl_aliases<"subs", SUBSwww_uxtw, GPR32, Rwsp, GPR32>;
-
-// CMP unfortunately has to be different because the instruction doesn't have a
-// dest register.
-multiclass cmp_lsl_aliases<string asmop, Instruction inst,
- RegisterClass GPR_Rn, RegisterClass GPR_Rm> {
- def : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
- (inst GPR_Rn:$Rn, GPR_Rm:$Rm, 0)>;
-
- def : InstAlias<!strconcat(asmop, " $Rn, $Rm, $LSL"),
- (inst GPR_Rn:$Rn, GPR_Rm:$Rm, LSL_extoperand:$LSL)>;
-}
-
-defm : cmp_lsl_aliases<"cmp", CMPxx_uxtx, Rxsp, GPR64>;
-defm : cmp_lsl_aliases<"cmp", CMPww_uxtw, Rwsp, GPR32>;
-defm : cmp_lsl_aliases<"cmn", CMNxx_uxtx, Rxsp, GPR64>;
-defm : cmp_lsl_aliases<"cmn", CMNww_uxtw, Rwsp, GPR32>;
+let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+ [(AArch64callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+ [(AArch64callseq_end timm:$amt1, timm:$amt2)]>;
+} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
+
+let isReMaterializable = 1, isCodeGenOnly = 1 in {
+// FIXME: The following pseudo instructions are only needed because remat
+// cannot handle multiple instructions. When that changes, they can be
+// removed, along with the AArch64Wrapper node.
+
+let AddedComplexity = 10 in
+def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
+ [(set GPR64:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
+ Sched<[WriteLDAdr]>;
+
+// The MOVaddr instruction should match only when the add is not folded
+// into a load or store address.
+def MOVaddr
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
+ tglobaladdr:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrJT
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
+ tjumptable:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrCP
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
+ tconstpool:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrBA
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
+ tblockaddress:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrTLS
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
+ tglobaltlsaddr:$low))]>,
+ Sched<[WriteAdrAdr]>;
+def MOVaddrEXT
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
+ [(set GPR64:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
+ texternalsym:$low))]>,
+ Sched<[WriteAdrAdr]>;
+
+} // isReMaterializable, isCodeGenOnly
+
+def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
+ (LOADgot tglobaltlsaddr:$addr)>;
+
+def : Pat<(AArch64LOADgot texternalsym:$addr),
+ (LOADgot texternalsym:$addr)>;
+
+def : Pat<(AArch64LOADgot tconstpool:$addr),
+ (LOADgot tconstpool:$addr)>;
//===----------------------------------------------------------------------===//
-// Add-subtract (immediate) instructions
+// System instructions.
//===----------------------------------------------------------------------===//
-// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, MOV
-
-// These instructions accept a 12-bit unsigned immediate, optionally shifted
-// left by 12 bits. Official assembly format specifies a 12 bit immediate with
-// one of "", "LSL #0", "LSL #12" supplementary operands.
-
-// There are surprisingly few ways to make this work with TableGen, so this
-// implementation has separate instructions for the "LSL #0" and "LSL #12"
-// variants.
-
-// If the MCInst retained a single combined immediate (which could be 0x123000,
-// for example) then both components (imm & shift) would have to be delegated to
-// a single assembly operand. This would entail a separate operand parser
-// (because the LSL would have to live in the same AArch64Operand as the
-// immediate to be accessible); assembly parsing is rather complex and
-// error-prone C++ code.
-//
-// By splitting the immediate, we can delegate handling this optional operand to
-// an InstAlias. Supporting functions to generate the correct MCInst are still
-// required, but these are essentially trivial and parsing can remain generic.
-//
-// Rejected plans with rationale:
-// ------------------------------
-//
-// In an ideal world you'de have two first class immediate operands (in
-// InOperandList, specifying imm12 and shift). Unfortunately this is not
-// selectable by any means I could discover.
-//
-// An Instruction with two MCOperands hidden behind a single entry in
-// InOperandList (expanded by ComplexPatterns and MIOperandInfo) was functional,
-// but required more C++ code to handle encoding/decoding. Parsing (the intended
-// main beneficiary) ended up equally complex because of the optional nature of
-// "LSL #0".
-//
-// Attempting to circumvent the need for a custom OperandParser above by giving
-// InstAliases without the "lsl #0" failed. add/sub could be accommodated but
-// the cmp/cmn aliases didn't use the MIOperandInfo to determine how operands
-// should be parsed: there was no way to accommodate an "lsl #12".
-
-let ParserMethod = "ParseImmWithLSLOperand",
- RenderMethod = "addImmWithLSLOperands" in {
- // Derived PredicateMethod fields are different for each
- def addsubimm_lsl0_asmoperand : AsmOperandClass {
- let Name = "AddSubImmLSL0";
- // If an error is reported against this operand, instruction could also be a
- // register variant.
- let DiagnosticType = "AddSubSecondSource";
- }
-
- def addsubimm_lsl12_asmoperand : AsmOperandClass {
- let Name = "AddSubImmLSL12";
- let DiagnosticType = "AddSubSecondSource";
- }
-}
-
-def shr_12_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getSExtValue() >> 12, MVT::i32);
-}]>;
-def shr_12_neg_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((-N->getSExtValue()) >> 12, MVT::i32);
-}]>;
-
-def neg_XFORM : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(-N->getSExtValue(), MVT::i32);
-}]>;
+def HINT : HintI<"hint">;
+def : InstAlias<"nop", (HINT 0b000)>;
+def : InstAlias<"yield",(HINT 0b001)>;
+def : InstAlias<"wfe", (HINT 0b010)>;
+def : InstAlias<"wfi", (HINT 0b011)>;
+def : InstAlias<"sev", (HINT 0b100)>;
+def : InstAlias<"sevl", (HINT 0b101)>;
+ // As far as LLVM is concerned this writes to the system's exclusive monitors.
+let mayLoad = 1, mayStore = 1 in
+def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
-multiclass addsub_imm_operands<ValueType ty> {
- let PrintMethod = "printAddSubImmLSL0Operand",
- EncoderMethod = "getAddSubImmOpValue",
- ParserMatchClass = addsubimm_lsl0_asmoperand in {
- def _posimm_lsl0 : Operand<ty>,
- ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff) == 0; }]>;
- def _negimm_lsl0 : Operand<ty>,
- ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff) == 0; }],
- neg_XFORM>;
- }
-
- let PrintMethod = "printAddSubImmLSL12Operand",
- EncoderMethod = "getAddSubImmOpValue",
- ParserMatchClass = addsubimm_lsl12_asmoperand in {
- def _posimm_lsl12 : Operand<ty>,
- ImmLeaf<ty, [{ return Imm >= 0 && (Imm & ~0xfff000) == 0; }],
- shr_12_XFORM>;
-
- def _negimm_lsl12 : Operand<ty>,
- ImmLeaf<ty, [{ return Imm < 0 && (-Imm & ~0xfff000) == 0; }],
- shr_12_neg_XFORM>;
- }
-}
-
-// The add operands don't need any transformation
-defm addsubimm_operand_i32 : addsub_imm_operands<i32>;
-defm addsubimm_operand_i64 : addsub_imm_operands<i64>;
-
-multiclass addsubimm_varieties<string prefix, bit sf, bit op, bits<2> shift,
- string asmop, string cmpasmop,
- Operand imm_operand, Operand cmp_imm_operand,
- RegisterClass GPR, RegisterClass GPRsp,
- AArch64Reg ZR, ValueType Ty> {
- // All registers for non-S variants allow SP
- def _s : A64I_addsubimm<sf, op, 0b0, shift,
- (outs GPRsp:$Rd),
- (ins GPRsp:$Rn, imm_operand:$Imm12),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm12"),
- [(set Ty:$Rd, (add Ty:$Rn, imm_operand:$Imm12))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-
-
- // S variants can read SP but would write to ZR
- def _S : A64I_addsubimm<sf, op, 0b1, shift,
- (outs GPR:$Rd),
- (ins GPRsp:$Rn, imm_operand:$Imm12),
- !strconcat(asmop, "s\t$Rd, $Rn, $Imm12"),
- [(set Ty:$Rd, (addc Ty:$Rn, imm_operand:$Imm12))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let Defs = [NZCV];
- }
-
- // Note that the pattern here for ADDS is subtle. Canonically CMP
- // a, b becomes SUBS a, b. If b < 0 then this is equivalent to
- // ADDS a, (-b). This is not true in general.
- def _cmp : A64I_addsubimm<sf, op, 0b1, shift,
- (outs), (ins GPRsp:$Rn, imm_operand:$Imm12),
- !strconcat(cmpasmop, " $Rn, $Imm12"),
- [(set NZCV,
- (A64cmp Ty:$Rn, cmp_imm_operand:$Imm12))],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP]> {
- let Rd = 0b11111;
- let Defs = [NZCV];
- let isCompare = 1;
- }
-}
+def DMB : CRmSystemI<barrier_op, 0b101, "dmb">;
+def DSB : CRmSystemI<barrier_op, 0b100, "dsb">;
+def ISB : CRmSystemI<barrier_op, 0b110, "isb">;
+def : InstAlias<"clrex", (CLREX 0xf)>;
+def : InstAlias<"isb", (ISB 0xf)>;
+def MRS : MRSI;
+def MSR : MSRI;
+def MSRpstate: MSRpstateI;
-multiclass addsubimm_shifts<string prefix, bit sf, bit op,
- string asmop, string cmpasmop, string operand, string cmpoperand,
- RegisterClass GPR, RegisterClass GPRsp, AArch64Reg ZR,
- ValueType Ty> {
- defm _lsl0 : addsubimm_varieties<prefix # "_lsl0", sf, op, 0b00,
- asmop, cmpasmop,
- !cast<Operand>(operand # "_lsl0"),
- !cast<Operand>(cmpoperand # "_lsl0"),
- GPR, GPRsp, ZR, Ty>;
-
- defm _lsl12 : addsubimm_varieties<prefix # "_lsl12", sf, op, 0b01,
- asmop, cmpasmop,
- !cast<Operand>(operand # "_lsl12"),
- !cast<Operand>(cmpoperand # "_lsl12"),
- GPR, GPRsp, ZR, Ty>;
-}
+// The thread pointer (on Linux, at least, where this has been implemented) is
+// TPIDR_EL0.
+def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
-defm ADDwwi : addsubimm_shifts<"ADDwi", 0b0, 0b0, "add", "cmn",
- "addsubimm_operand_i32_posimm",
- "addsubimm_operand_i32_negimm",
- GPR32, GPR32wsp, WZR, i32>;
-defm ADDxxi : addsubimm_shifts<"ADDxi", 0b1, 0b0, "add", "cmn",
- "addsubimm_operand_i64_posimm",
- "addsubimm_operand_i64_negimm",
- GPR64, GPR64xsp, XZR, i64>;
-defm SUBwwi : addsubimm_shifts<"SUBwi", 0b0, 0b1, "sub", "cmp",
- "addsubimm_operand_i32_negimm",
- "addsubimm_operand_i32_posimm",
- GPR32, GPR32wsp, WZR, i32>;
-defm SUBxxi : addsubimm_shifts<"SUBxi", 0b1, 0b1, "sub", "cmp",
- "addsubimm_operand_i64_negimm",
- "addsubimm_operand_i64_posimm",
- GPR64, GPR64xsp, XZR, i64>;
-
-multiclass MOVsp<RegisterClass GPRsp, RegisterClass SP, Instruction addop> {
- def _fromsp : InstAlias<"mov $Rd, $Rn",
- (addop GPRsp:$Rd, SP:$Rn, 0),
- 0b1>;
-
- def _tosp : InstAlias<"mov $Rd, $Rn",
- (addop SP:$Rd, GPRsp:$Rn, 0),
- 0b1>;
-}
+// Generic system instructions
+def SYSxt : SystemXtI<0, "sys">;
+def SYSLxt : SystemLXtI<1, "sysl">;
-// Recall Rxsp is a RegisterClass containing *just* xsp.
-defm MOVxx : MOVsp<GPR64xsp, Rxsp, ADDxxi_lsl0_s>;
-defm MOVww : MOVsp<GPR32wsp, Rwsp, ADDwwi_lsl0_s>;
+def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
+ (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
+ sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
//===----------------------------------------------------------------------===//
-// Add-subtract (shifted register) instructions
+// Move immediate instructions.
//===----------------------------------------------------------------------===//
-// Contains: ADD, ADDS, SUB, SUBS + aliases CMN, CMP, NEG, NEGS
-
-//===-------------------------------
-// 1. The "shifted register" operands. Shared with logical insts.
-//===-------------------------------
-
-multiclass shift_operands<string prefix, string form> {
- def _asmoperand_i32 : AsmOperandClass {
- let Name = "Shift" # form # "i32";
- let RenderMethod = "addShiftOperands";
- let PredicateMethod = "isShift<A64SE::" # form # ", false>";
- let DiagnosticType = "AddSubRegShift32";
- }
- // Note that the operand type is intentionally i64 because the DAGCombiner
- // puts these into a canonical form.
- def _i32 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
- let ParserMatchClass
- = !cast<AsmOperandClass>(prefix # "_asmoperand_i32");
- let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
- let DecoderMethod = "Decode32BitShiftOperand";
- }
+defm MOVK : InsertImmediate<0b11, "movk">;
+defm MOVN : MoveImmediate<0b00, "movn">;
- def _asmoperand_i64 : AsmOperandClass {
- let Name = "Shift" # form # "i64";
- let RenderMethod = "addShiftOperands";
- let PredicateMethod = "isShift<A64SE::" # form # ", true>";
- let DiagnosticType = "AddSubRegShift64";
- }
+let PostEncoderMethod = "fixMOVZ" in
+defm MOVZ : MoveImmediate<0b10, "movz">;
- def _i64 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
- let ParserMatchClass
- = !cast<AsmOperandClass>(prefix # "_asmoperand_i64");
- let PrintMethod = "printShiftOperand<A64SE::" # form # ">";
- }
-}
+// First group of aliases covers an implicit "lsl #0".
+def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
+def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
-defm lsl_operand : shift_operands<"lsl_operand", "LSL">;
-defm lsr_operand : shift_operands<"lsr_operand", "LSR">;
-defm asr_operand : shift_operands<"asr_operand", "ASR">;
-
-// Not used for add/sub, but defined here for completeness. The "logical
-// (shifted register)" instructions *do* have an ROR variant.
-defm ror_operand : shift_operands<"ror_operand", "ROR">;
-
-//===-------------------------------
-// 2. The basic 3.5-operand ADD/SUB/ADDS/SUBS instructions.
-//===-------------------------------
-
-// N.b. the commutable parameter is just !N. It will be first against the wall
-// when the revolution comes.
-multiclass addsub_shifts<string prefix, bit sf, bit op, bit s, bit commutable,
- string asmop, SDPatternOperator opfrag, ValueType ty,
- RegisterClass GPR, list<Register> defs> {
- let isCommutable = commutable, Defs = defs in {
- def _lsl : A64I_addsubshift<sf, op, s, 0b00,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set GPR:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-
- def _lsr : A64I_addsubshift<sf, op, s, 0b01,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-
- def _asr : A64I_addsubshift<sf, op, s, 0b10,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
- }
-
- def _noshift
- : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
- GPR:$Rm, 0)>;
-
- def : Pat<(opfrag ty:$Rn, ty:$Rm),
- (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
-}
-
-multiclass addsub_sizes<string prefix, bit op, bit s, bit commutable,
- string asmop, SDPatternOperator opfrag,
- list<Register> defs> {
- defm xxx : addsub_shifts<prefix # "xxx", 0b1, op, s,
- commutable, asmop, opfrag, i64, GPR64, defs>;
- defm www : addsub_shifts<prefix # "www", 0b0, op, s,
- commutable, asmop, opfrag, i32, GPR32, defs>;
-}
+// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-defm ADD : addsub_sizes<"ADD", 0b0, 0b0, 0b1, "add", add, []>;
-defm SUB : addsub_sizes<"SUB", 0b1, 0b0, 0b0, "sub", sub, []>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g3:$sym, 48)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
-defm ADDS : addsub_sizes<"ADDS", 0b0, 0b1, 0b1, "adds", addc, [NZCV]>;
-defm SUBS : addsub_sizes<"SUBS", 0b1, 0b1, 0b0, "subs", subc, [NZCV]>;
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
-//===-------------------------------
-// 1. The NEG/NEGS aliases
-//===-------------------------------
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
+def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
-multiclass neg_alias<Instruction INST, RegisterClass GPR, Register ZR,
- ValueType ty, Operand shift_operand, SDNode shiftop> {
- def : InstAlias<"neg $Rd, $Rm, $Imm6",
- (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
+def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
- def : Pat<(sub 0, (shiftop ty:$Rm, shift_operand:$Imm6)),
- (INST ZR, $Rm, shift_operand:$Imm6)>;
-}
+// Final group of aliases covers true "mov $Rd, $imm" cases.
+multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
+ int width, int shift> {
+ def _asmoperand : AsmOperandClass {
+ let Name = basename # width # "_lsl" # shift # "MovAlias";
+ let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
+ # shift # ">";
+ let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
+ }
-defm : neg_alias<SUBwww_lsl, GPR32, WZR, i32, lsl_operand_i32, shl>;
-defm : neg_alias<SUBwww_lsr, GPR32, WZR, i32, lsr_operand_i32, srl>;
-defm : neg_alias<SUBwww_asr, GPR32, WZR, i32, asr_operand_i32, sra>;
-def : InstAlias<"neg $Rd, $Rm", (SUBwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
-def : Pat<(sub 0, i32:$Rm), (SUBwww_lsl WZR, $Rm, 0)>;
-
-defm : neg_alias<SUBxxx_lsl, GPR64, XZR, i64, lsl_operand_i64, shl>;
-defm : neg_alias<SUBxxx_lsr, GPR64, XZR, i64, lsr_operand_i64, srl>;
-defm : neg_alias<SUBxxx_asr, GPR64, XZR, i64, asr_operand_i64, sra>;
-def : InstAlias<"neg $Rd, $Rm", (SUBxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
-def : Pat<(sub 0, i64:$Rm), (SUBxxx_lsl XZR, $Rm, 0)>;
-
-// NEGS doesn't get any patterns yet: defining multiple outputs means C++ has to
-// be involved.
-class negs_alias<Instruction INST, RegisterClass GPR,
- Register ZR, Operand shift_operand, SDNode shiftop>
- : InstAlias<"negs $Rd, $Rm, $Imm6",
- (INST GPR:$Rd, ZR, GPR:$Rm, shift_operand:$Imm6)>;
-
-def : negs_alias<SUBSwww_lsl, GPR32, WZR, lsl_operand_i32, shl>;
-def : negs_alias<SUBSwww_lsr, GPR32, WZR, lsr_operand_i32, srl>;
-def : negs_alias<SUBSwww_asr, GPR32, WZR, asr_operand_i32, sra>;
-def : InstAlias<"negs $Rd, $Rm", (SUBSwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
-
-def : negs_alias<SUBSxxx_lsl, GPR64, XZR, lsl_operand_i64, shl>;
-def : negs_alias<SUBSxxx_lsr, GPR64, XZR, lsr_operand_i64, srl>;
-def : negs_alias<SUBSxxx_asr, GPR64, XZR, asr_operand_i64, sra>;
-def : InstAlias<"negs $Rd, $Rm", (SUBSxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
-
-//===-------------------------------
-// 1. The CMP/CMN aliases
-//===-------------------------------
-
-multiclass cmp_shifts<string prefix, bit sf, bit op, bit commutable,
- string asmop, SDPatternOperator opfrag, ValueType ty,
- RegisterClass GPR> {
- let isCommutable = commutable, Rd = 0b11111, Defs = [NZCV] in {
- def _lsl : A64I_addsubshift<sf, op, 0b1, 0b00,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
- [(set NZCV, (opfrag ty:$Rn, (shl ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]>;
-
- def _lsr : A64I_addsubshift<sf, op, 0b1, 0b01,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
- [(set NZCV, (opfrag ty:$Rn, (srl ty:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]>;
-
- def _asr : A64I_addsubshift<sf, op, 0b1, 0b10,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rn, $Rm, $Imm6"),
- [(set NZCV, (opfrag ty:$Rn, (sra ty:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]>;
+ def _movimm : Operand<i32> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
}
- def _noshift
- : InstAlias<!strconcat(asmop, " $Rn, $Rm"),
- (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
+ def : InstAlias<"mov $Rd, $imm",
+ (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
+}
+
+defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
+defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
+
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
+defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
+
+defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
+defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
+
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
+defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
+
+let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
+ isAsCheapAsAMove = 1 in {
+// FIXME: The following pseudo instructions are only needed because remat
+// cannot handle multiple instructions. When that changes, we can select
+// directly to the real instructions and get rid of these pseudos.
+
+def MOVi32imm
+ : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
+ [(set GPR32:$dst, imm:$src)]>,
+ Sched<[WriteImm]>;
+def MOVi64imm
+ : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
+ [(set GPR64:$dst, imm:$src)]>,
+ Sched<[WriteImm]>;
+} // isReMaterializable, isCodeGenOnly
+
+// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
+// eventual expansion code fewer bits to worry about getting right. Marshalling
+// the types is a little tricky though:
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
- def : Pat<(opfrag ty:$Rn, ty:$Rm),
- (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
-}
+def trunc_imm : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i32);
+}]>;
-defm CMPww : cmp_shifts<"CMPww", 0b0, 0b1, 0b0, "cmp", A64cmp, i32, GPR32>;
-defm CMPxx : cmp_shifts<"CMPxx", 0b1, 0b1, 0b0, "cmp", A64cmp, i64, GPR64>;
+def : Pat<(i64 i64imm_32bit:$src),
+ (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
+
+// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
+// sequences.
+def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
+ tglobaladdr:$g1, tglobaladdr:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
+ tglobaladdr:$g2, 32),
+ tglobaladdr:$g1, 16),
+ tglobaladdr:$g0, 0)>;
+
+def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
+ tblockaddress:$g1, tblockaddress:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
+ tblockaddress:$g2, 32),
+ tblockaddress:$g1, 16),
+ tblockaddress:$g0, 0)>;
+
+def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
+ tconstpool:$g1, tconstpool:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
+ tconstpool:$g2, 32),
+ tconstpool:$g1, 16),
+ tconstpool:$g0, 0)>;
+
+def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
+ tjumptable:$g1, tjumptable:$g0),
+ (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g3, 48),
+ tjumptable:$g2, 32),
+ tjumptable:$g1, 16),
+ tjumptable:$g0, 0)>;
-defm CMNww : cmp_shifts<"CMNww", 0b0, 0b0, 0b1, "cmn", A64cmn, i32, GPR32>;
-defm CMNxx : cmp_shifts<"CMNxx", 0b1, 0b0, 0b1, "cmn", A64cmn, i64, GPR64>;
//===----------------------------------------------------------------------===//
-// Add-subtract (with carry) instructions
+// Arithmetic instructions.
//===----------------------------------------------------------------------===//
-// Contains: ADC, ADCS, SBC, SBCS + aliases NGC, NGCS
-
-multiclass A64I_addsubcarrySizes<bit op, bit s, string asmop> {
- let Uses = [NZCV] in {
- def www : A64I_addsubcarry<0b0, op, s, 0b000000,
- (outs GPR32:$Rd), (ins GPR32:$Rn, GPR32:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def xxx : A64I_addsubcarry<0b1, op, s, 0b000000,
- (outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- }
-}
-
-let isCommutable = 1 in {
- defm ADC : A64I_addsubcarrySizes<0b0, 0b0, "adc">;
-}
-
-defm SBC : A64I_addsubcarrySizes<0b1, 0b0, "sbc">;
-
-let Defs = [NZCV] in {
- let isCommutable = 1 in {
- defm ADCS : A64I_addsubcarrySizes<0b0, 0b1, "adcs">;
- }
- defm SBCS : A64I_addsubcarrySizes<0b1, 0b1, "sbcs">;
-}
-
-def : InstAlias<"ngc $Rd, $Rm", (SBCwww GPR32:$Rd, WZR, GPR32:$Rm)>;
-def : InstAlias<"ngc $Rd, $Rm", (SBCxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
-def : InstAlias<"ngcs $Rd, $Rm", (SBCSwww GPR32:$Rd, WZR, GPR32:$Rm)>;
-def : InstAlias<"ngcs $Rd, $Rm", (SBCSxxx GPR64:$Rd, XZR, GPR64:$Rm)>;
+// Add/subtract with carry.
+defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
+defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
+
+def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
+def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
+def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
+def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
+
+// Add/subtract
+defm ADD : AddSub<0, "add", add>;
+defm SUB : AddSub<1, "sub">;
+
+def : InstAlias<"mov $dst, $src",
+ (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
+def : InstAlias<"mov $dst, $src",
+ (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
+
+defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn">;
+defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp">;
+
+// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
+def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
+def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
+def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
+ (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
+def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
+ (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
+def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
+ (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
+def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
+ (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
+def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
+ (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
+def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
+ (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
+
+// Because of the immediate format for add/sub-imm instructions, the
+// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
+// These patterns capture that transformation.
+let AddedComplexity = 1 in {
+def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+}
+
+// Because of the immediate format for add/sub-imm instructions, the
+// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
+// These patterns capture that transformation.
+let AddedComplexity = 1 in {
+def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
+ (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
+def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
+ (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
+}
+
+def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
+def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+def : InstAlias<"neg $dst, $src$shift",
+ (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+def : InstAlias<"neg $dst, $src$shift",
+ (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
+
+def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
+def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
+def : InstAlias<"negs $dst, $src$shift",
+ (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
+def : InstAlias<"negs $dst, $src$shift",
+ (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
+
+
+// Unsigned/Signed divide
+defm UDIV : Div<0, "udiv", udiv>;
+defm SDIV : Div<1, "sdiv", sdiv>;
+let isCodeGenOnly = 1 in {
+defm UDIV_Int : Div<0, "udiv", int_aarch64_udiv>;
+defm SDIV_Int : Div<1, "sdiv", int_aarch64_sdiv>;
+}
+
+// Variable shift
+defm ASRV : Shift<0b10, "asr", sra>;
+defm LSLV : Shift<0b00, "lsl", shl>;
+defm LSRV : Shift<0b01, "lsr", srl>;
+defm RORV : Shift<0b11, "ror", rotr>;
+
+def : ShiftAlias<"asrv", ASRVWr, GPR32>;
+def : ShiftAlias<"asrv", ASRVXr, GPR64>;
+def : ShiftAlias<"lslv", LSLVWr, GPR32>;
+def : ShiftAlias<"lslv", LSLVXr, GPR64>;
+def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
+def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
+def : ShiftAlias<"rorv", RORVWr, GPR32>;
+def : ShiftAlias<"rorv", RORVXr, GPR64>;
+
+// Multiply-add
+let AddedComplexity = 7 in {
+defm MADD : MulAccum<0, "madd", add>;
+defm MSUB : MulAccum<1, "msub", sub>;
+
+def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
+ (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
+ (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+
+def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
+ (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
+def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
+ (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
+} // AddedComplexity = 7
+
+let AddedComplexity = 5 in {
+def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
+def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
+def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
+def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
+
+def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
+ (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
+ (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+
+def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
+ (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
+ (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
+} // AddedComplexity = 5
+
+def : MulAccumWAlias<"mul", MADDWrrr>;
+def : MulAccumXAlias<"mul", MADDXrrr>;
+def : MulAccumWAlias<"mneg", MSUBWrrr>;
+def : MulAccumXAlias<"mneg", MSUBXrrr>;
+def : WideMulAccumAlias<"smull", SMADDLrrr>;
+def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
+def : WideMulAccumAlias<"umull", UMADDLrrr>;
+def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
+
+// Multiply-high
+def SMULHrr : MulHi<0b010, "smulh", mulhs>;
+def UMULHrr : MulHi<0b110, "umulh", mulhu>;
+
+// CRC32
+def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
+def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
+def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
+def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
+
+def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
+def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
+def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
+def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
-// Note that adde and sube can form a chain longer than two (e.g. for 256-bit
-// addition). So the flag-setting instructions are appropriate.
-def : Pat<(adde i32:$Rn, i32:$Rm), (ADCSwww $Rn, $Rm)>;
-def : Pat<(adde i64:$Rn, i64:$Rm), (ADCSxxx $Rn, $Rm)>;
-def : Pat<(sube i32:$Rn, i32:$Rm), (SBCSwww $Rn, $Rm)>;
-def : Pat<(sube i64:$Rn, i64:$Rm), (SBCSxxx $Rn, $Rm)>;
//===----------------------------------------------------------------------===//
-// Bitfield
+// Logical instructions.
//===----------------------------------------------------------------------===//
-// Contains: SBFM, BFM, UBFM, [SU]XT[BHW], ASR, LSR, LSL, SBFI[ZX], BFI, BFXIL,
-// UBFIZ, UBFX
-
-// Because of the rather complicated nearly-overlapping aliases, the decoding of
-// this range of instructions is handled manually. The architectural
-// instructions are BFM, SBFM and UBFM but a disassembler should never produce
-// these.
-//
-// In the end, the best option was to use BFM instructions for decoding under
-// almost all circumstances, but to create aliasing *Instructions* for each of
-// the canonical forms and specify a completely custom decoder which would
-// substitute the correct MCInst as needed.
-//
-// This also simplifies instruction selection, parsing etc because the MCInsts
-// have a shape that's closer to their use in code.
-
-//===-------------------------------
-// 1. The architectural BFM instructions
-//===-------------------------------
-
-def uimm5_asmoperand : AsmOperandClass {
- let Name = "UImm5";
- let PredicateMethod = "isUImm<5>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm5";
-}
-
-def uimm6_asmoperand : AsmOperandClass {
- let Name = "UImm6";
- let PredicateMethod = "isUImm<6>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm6";
-}
-
-def bitfield32_imm : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm < 32; }]> {
- let ParserMatchClass = uimm5_asmoperand;
-
- let DecoderMethod = "DecodeBitfield32ImmOperand";
-}
-
-
-def bitfield64_imm : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm < 64; }]> {
- let ParserMatchClass = uimm6_asmoperand;
-
- // Default decoder works in 64-bit case: the 6-bit field can take any value.
-}
-
-multiclass A64I_bitfieldSizes<bits<2> opc, string asmop> {
- def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let DecoderMethod = "DecodeBitfieldInstruction";
- }
-
- def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let DecoderMethod = "DecodeBitfieldInstruction";
- }
-}
-
-defm SBFM : A64I_bitfieldSizes<0b00, "sbfm">;
-defm UBFM : A64I_bitfieldSizes<0b10, "ubfm">;
-
-// BFM instructions modify the destination register rather than defining it
-// completely.
-def BFMwwii :
- A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bitfield32_imm:$ImmS),
- "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- let DecoderMethod = "DecodeBitfieldInstruction";
- let Constraints = "$src = $Rd";
-}
-
-def BFMxxii :
- A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bitfield64_imm:$ImmS),
- "bfm\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- let DecoderMethod = "DecodeBitfieldInstruction";
- let Constraints = "$src = $Rd";
-}
-
-
-//===-------------------------------
-// 2. Extend aliases to 64-bit dest
-//===-------------------------------
-
-// Unfortunately the extensions that end up as 64-bits cannot be handled by an
-// instruction alias: their syntax is (for example) "SXTB x0, w0", which needs
-// to be mapped to "SBFM x0, x0, #0, 7" (changing the class of Rn). InstAlias is
-// not capable of such a map as far as I'm aware
-
-// Note that these instructions are strictly more specific than the
-// BFM ones (in ImmR) so they can handle their own decoding.
-class A64I_bf_ext<bit sf, bits<2> opc, RegisterClass GPRDest, ValueType dty,
- string asmop, bits<6> imms, dag pattern>
- : A64I_bitfield<sf, opc, sf,
- (outs GPRDest:$Rd), (ins GPR32:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn"),
- [(set dty:$Rd, pattern)], NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let ImmR = 0b000000;
- let ImmS = imms;
-}
-
-// Signed extensions
-def SXTBxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtb", 7,
- (sext_inreg (anyext i32:$Rn), i8)>;
-def SXTBww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxtb", 7,
- (sext_inreg i32:$Rn, i8)>;
-def SXTHxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxth", 15,
- (sext_inreg (anyext i32:$Rn), i16)>;
-def SXTHww : A64I_bf_ext<0b0, 0b00, GPR32, i32, "sxth", 15,
- (sext_inreg i32:$Rn, i16)>;
-def SXTWxw : A64I_bf_ext<0b1, 0b00, GPR64, i64, "sxtw", 31, (sext i32:$Rn)>;
-
-// Unsigned extensions
-def UXTBww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxtb", 7,
- (and i32:$Rn, 255)>;
-def UXTHww : A64I_bf_ext<0b0, 0b10, GPR32, i32, "uxth", 15,
- (and i32:$Rn, 65535)>;
-
-// The 64-bit unsigned variants are not strictly architectural but recommended
-// for consistency.
-let isAsmParserOnly = 1 in {
- def UXTBxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxtb", 7,
- (and (anyext i32:$Rn), 255)>;
- def UXTHxw : A64I_bf_ext<0b0, 0b10, GPR64, i64, "uxth", 15,
- (and (anyext i32:$Rn), 65535)>;
-}
-
-// Extra patterns for when the source register is actually 64-bits
-// too. There's no architectural difference here, it's just LLVM
-// shinanigans. There's no need for equivalent zero-extension patterns
-// because they'll already be caught by logical (immediate) matching.
-def : Pat<(sext_inreg i64:$Rn, i8),
- (SXTBxw (EXTRACT_SUBREG $Rn, sub_32))>;
-def : Pat<(sext_inreg i64:$Rn, i16),
- (SXTHxw (EXTRACT_SUBREG $Rn, sub_32))>;
-def : Pat<(sext_inreg i64:$Rn, i32),
- (SXTWxw (EXTRACT_SUBREG $Rn, sub_32))>;
-
-
-//===-------------------------------
-// 3. Aliases for ASR and LSR (the simple shifts)
-//===-------------------------------
-
-// These also handle their own decoding because ImmS being set makes
-// them take precedence over BFM.
-multiclass A64I_shift<bits<2> opc, string asmop, SDNode opnode> {
- def wwi : A64I_bitfield<0b0, opc, 0b0,
- (outs GPR32:$Rd), (ins GPR32:$Rn, bitfield32_imm:$ImmR),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
- [(set i32:$Rd, (opnode i32:$Rn, bitfield32_imm:$ImmR))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let ImmS = 31;
- }
-
- def xxi : A64I_bitfield<0b1, opc, 0b1,
- (outs GPR64:$Rd), (ins GPR64:$Rn, bitfield64_imm:$ImmR),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR"),
- [(set i64:$Rd, (opnode i64:$Rn, bitfield64_imm:$ImmR))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- let ImmS = 63;
- }
-
-}
-
-defm ASR : A64I_shift<0b00, "asr", sra>;
-defm LSR : A64I_shift<0b10, "lsr", srl>;
-
-//===-------------------------------
-// 4. Aliases for LSL
-//===-------------------------------
-
-// Unfortunately LSL and subsequent aliases are much more complicated. We need
-// to be able to say certain output instruction fields depend in a complex
-// manner on combinations of input assembly fields).
-//
-// MIOperandInfo *might* have been able to do it, but at the cost of
-// significantly more C++ code.
-
-// N.b. contrary to usual practice these operands store the shift rather than
-// the machine bits in an MCInst. The complexity overhead of consistency
-// outweighed the benefits in this case (custom asmparser, printer and selection
-// vs custom encoder).
-def bitfield32_lsl_imm : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
- let ParserMatchClass = uimm5_asmoperand;
- let EncoderMethod = "getBitfield32LSLOpValue";
-}
-
-def bitfield64_lsl_imm : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
- let ParserMatchClass = uimm6_asmoperand;
- let EncoderMethod = "getBitfield64LSLOpValue";
-}
-
-class A64I_bitfield_lsl<bit sf, RegisterClass GPR, ValueType ty,
- Operand operand>
- : A64I_bitfield<sf, 0b10, sf, (outs GPR:$Rd), (ins GPR:$Rn, operand:$FullImm),
- "lsl\t$Rd, $Rn, $FullImm",
- [(set ty:$Rd, (shl ty:$Rn, operand:$FullImm))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- bits<12> FullImm;
- let ImmR = FullImm{5-0};
- let ImmS = FullImm{11-6};
-
- // No disassembler allowed because it would overlap with BFM which does the
- // actual work.
- let isAsmParserOnly = 1;
-}
-
-def LSLwwi : A64I_bitfield_lsl<0b0, GPR32, i32, bitfield32_lsl_imm>;
-def LSLxxi : A64I_bitfield_lsl<0b1, GPR64, i64, bitfield64_lsl_imm>;
-
-//===-------------------------------
-// 5. Aliases for bitfield extract instructions
-//===-------------------------------
-
-def bfx32_width_asmoperand : AsmOperandClass {
- let Name = "BFX32Width";
- let PredicateMethod = "isBitfieldWidth<32>";
- let RenderMethod = "addBFXWidthOperands";
- let DiagnosticType = "Width32";
-}
-
-def bfx32_width : Operand<i64>, ImmLeaf<i64, [{ return true; }]> {
- let PrintMethod = "printBFXWidthOperand";
- let ParserMatchClass = bfx32_width_asmoperand;
-}
-
-def bfx64_width_asmoperand : AsmOperandClass {
- let Name = "BFX64Width";
- let PredicateMethod = "isBitfieldWidth<64>";
- let RenderMethod = "addBFXWidthOperands";
- let DiagnosticType = "Width64";
-}
-
-def bfx64_width : Operand<i64> {
- let PrintMethod = "printBFXWidthOperand";
- let ParserMatchClass = bfx64_width_asmoperand;
-}
-
-
-multiclass A64I_bitfield_extract<bits<2> opc, string asmop, SDNode op> {
- def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [(set i32:$Rd, (op i32:$Rn, imm:$ImmR, imm:$ImmS))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- }
-
- def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [(set i64:$Rd, (op i64:$Rn, imm:$ImmR, imm:$ImmS))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- }
-}
-
-defm SBFX : A64I_bitfield_extract<0b00, "sbfx", A64Sbfx>;
-defm UBFX : A64I_bitfield_extract<0b10, "ubfx", A64Ubfx>;
-
-// Again, variants based on BFM modify Rd so need it as an input too.
-def BFXILwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$src, GPR32:$Rn, bitfield32_imm:$ImmR, bfx32_width:$ImmS),
- "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- let Constraints = "$src = $Rd";
-}
-
-def BFXILxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$src, GPR64:$Rn, bitfield64_imm:$ImmR, bfx64_width:$ImmS),
- "bfxil\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- let Constraints = "$src = $Rd";
-}
-
-// SBFX instructions can do a 1-instruction sign-extension of boolean values.
-def : Pat<(sext_inreg i64:$Rn, i1), (SBFXxxii $Rn, 0, 0)>;
-def : Pat<(sext_inreg i32:$Rn, i1), (SBFXwwii $Rn, 0, 0)>;
-def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)),
- (SBFXxxii (SUBREG_TO_REG (i64 0), $Rn, sub_32), 0, 0)>;
-
-// UBFX makes sense as an implementation of a 64-bit zero-extension too. Could
-// use either 64-bit or 32-bit variant, but 32-bit might be more efficient.
-def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31),
- sub_32)>;
-
-//===-------------------------------
-// 6. Aliases for bitfield insert instructions
-//===-------------------------------
-
-def bfi32_lsb_asmoperand : AsmOperandClass {
- let Name = "BFI32LSB";
- let PredicateMethod = "isUImm<5>";
- let RenderMethod = "addBFILSBOperands<32>";
- let DiagnosticType = "UImm5";
-}
-
-def bfi32_lsb : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 31; }]> {
- let PrintMethod = "printBFILSBOperand<32>";
- let ParserMatchClass = bfi32_lsb_asmoperand;
-}
-
-def bfi64_lsb_asmoperand : AsmOperandClass {
- let Name = "BFI64LSB";
- let PredicateMethod = "isUImm<6>";
- let RenderMethod = "addBFILSBOperands<64>";
- let DiagnosticType = "UImm6";
-}
-
-def bfi64_lsb : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 0 && Imm <= 63; }]> {
- let PrintMethod = "printBFILSBOperand<64>";
- let ParserMatchClass = bfi64_lsb_asmoperand;
-}
-
-// Width verification is performed during conversion so width operand can be
-// shared between 32/64-bit cases. Still needed for the print method though
-// because ImmR encodes "width - 1".
-def bfi32_width_asmoperand : AsmOperandClass {
- let Name = "BFI32Width";
- let PredicateMethod = "isBitfieldWidth<32>";
- let RenderMethod = "addBFIWidthOperands";
- let DiagnosticType = "Width32";
-}
-
-def bfi32_width : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 32; }]> {
- let PrintMethod = "printBFIWidthOperand";
- let ParserMatchClass = bfi32_width_asmoperand;
-}
-
-def bfi64_width_asmoperand : AsmOperandClass {
- let Name = "BFI64Width";
- let PredicateMethod = "isBitfieldWidth<64>";
- let RenderMethod = "addBFIWidthOperands";
- let DiagnosticType = "Width64";
-}
-
-def bfi64_width : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= 1 && Imm <= 64; }]> {
- let PrintMethod = "printBFIWidthOperand";
- let ParserMatchClass = bfi64_width_asmoperand;
-}
-
-multiclass A64I_bitfield_insert<bits<2> opc, string asmop> {
- def wwii : A64I_bitfield<0b0, opc, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- }
-
- def xxii : A64I_bitfield<0b1, opc, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
- !strconcat(asmop, "\t$Rd, $Rn, $ImmR, $ImmS"),
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- }
-}
-
-defm SBFIZ : A64I_bitfield_insert<0b00, "sbfiz">;
-defm UBFIZ : A64I_bitfield_insert<0b10, "ubfiz">;
+// (immediate)
+defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>;
+defm AND : LogicalImm<0b00, "and", and>;
+defm EOR : LogicalImm<0b10, "eor", xor>;
+defm ORR : LogicalImm<0b01, "orr", or>;
+
+// FIXME: these aliases *are* canonical sometimes (when movz can't be
+// used). Actually, it seems to be working right now, but putting logical_immXX
+// here is a bit dodgy on the AsmParser side too.
+def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
+ logical_imm32:$imm), 0>;
+def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
+ logical_imm64:$imm), 0>;
+
+
+// (register)
+defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
+defm BICS : LogicalRegS<0b11, 1, "bics",
+ BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
+defm AND : LogicalReg<0b00, 0, "and", and>;
+defm BIC : LogicalReg<0b00, 1, "bic",
+ BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm EON : LogicalReg<0b10, 1, "eon",
+ BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
+defm EOR : LogicalReg<0b10, 0, "eor", xor>;
+defm ORN : LogicalReg<0b01, 1, "orn",
+ BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
+defm ORR : LogicalReg<0b01, 0, "orr", or>;
+
+def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
+def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
+
+def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
+def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
+
+def : InstAlias<"mvn $Wd, $Wm$sh",
+ (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
+def : InstAlias<"mvn $Xd, $Xm$sh",
+ (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
+
+def : InstAlias<"tst $src1, $src2",
+ (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
+def : InstAlias<"tst $src1, $src2",
+ (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
+
+def : InstAlias<"tst $src1, $src2",
+ (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
+def : InstAlias<"tst $src1, $src2",
+ (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
+
+def : InstAlias<"tst $src1, $src2$sh",
+ (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
+def : InstAlias<"tst $src1, $src2$sh",
+ (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
+
+
+def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
+def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
-def BFIwwii : A64I_bitfield<0b0, 0b01, 0b0, (outs GPR32:$Rd),
- (ins GPR32:$src, GPR32:$Rn, bfi32_lsb:$ImmR, bfi32_width:$ImmS),
- "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- let Constraints = "$src = $Rd";
-}
-
-def BFIxxii : A64I_bitfield<0b1, 0b01, 0b1, (outs GPR64:$Rd),
- (ins GPR64:$src, GPR64:$Rn, bfi64_lsb:$ImmR, bfi64_width:$ImmS),
- "bfi\t$Rd, $Rn, $ImmR, $ImmS", [], NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]> {
- // As above, no disassembler allowed.
- let isAsmParserOnly = 1;
- let Constraints = "$src = $Rd";
-}
//===----------------------------------------------------------------------===//
-// Compare and branch (immediate)
+// One operand data processing instructions.
//===----------------------------------------------------------------------===//
-// Contains: CBZ, CBNZ
-class label_asmoperand<int width, int scale> : AsmOperandClass {
- let Name = "Label" # width # "_" # scale;
- let PredicateMethod = "isLabel<" # width # "," # scale # ">";
- let RenderMethod = "addLabelOperands<" # width # ", " # scale # ">";
- let DiagnosticType = "Label";
-}
-
-def label_wid19_scal4_asmoperand : label_asmoperand<19, 4>;
-
-// All conditional immediate branches are the same really: 19 signed bits scaled
-// by the instruction-size (4).
-def bcc_target : Operand<OtherVT> {
- // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
- let ParserMatchClass = label_wid19_scal4_asmoperand;
- let PrintMethod = "printLabelOperand<19, 4>";
- let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_condbr>";
- let OperandType = "OPERAND_PCREL";
-}
-
-multiclass cmpbr_sizes<bit op, string asmop, ImmLeaf SETOP> {
- let isBranch = 1, isTerminator = 1 in {
- def x : A64I_cmpbr<0b1, op,
- (outs),
- (ins GPR64:$Rt, bcc_target:$Label),
- !strconcat(asmop,"\t$Rt, $Label"),
- [(A64br_cc (A64cmp i64:$Rt, 0), SETOP, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr, ReadBr]>;
-
- def w : A64I_cmpbr<0b0, op,
- (outs),
- (ins GPR32:$Rt, bcc_target:$Label),
- !strconcat(asmop,"\t$Rt, $Label"),
- [(A64br_cc (A64cmp i32:$Rt, 0), SETOP, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr, ReadBr]>;
- }
-}
-
-defm CBZ : cmpbr_sizes<0b0, "cbz", ImmLeaf<i32, [{
- return Imm == A64CC::EQ;
-}]> >;
-defm CBNZ : cmpbr_sizes<0b1, "cbnz", ImmLeaf<i32, [{
- return Imm == A64CC::NE;
-}]> >;
+defm CLS : OneOperandData<0b101, "cls">;
+defm CLZ : OneOperandData<0b100, "clz", ctlz>;
+defm RBIT : OneOperandData<0b000, "rbit">;
+def REV16Wr : OneWRegData<0b001, "rev16",
+ UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
+def REV16Xr : OneXRegData<0b001, "rev16", null_frag>;
+
+def : Pat<(cttz GPR32:$Rn),
+ (CLZWr (RBITWr GPR32:$Rn))>;
+def : Pat<(cttz GPR64:$Rn),
+ (CLZXr (RBITXr GPR64:$Rn))>;
+def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
+ (i32 1))),
+ (CLSWr GPR32:$Rn)>;
+def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
+ (i64 1))),
+ (CLSXr GPR64:$Rn)>;
+
+// Unlike the other one operand instructions, the instructions with the "rev"
+// mnemonic do *not* just different in the size bit, but actually use different
+// opcode bits for the different sizes.
+def REVWr : OneWRegData<0b010, "rev", bswap>;
+def REVXr : OneXRegData<0b011, "rev", bswap>;
+def REV32Xr : OneXRegData<0b010, "rev32",
+ UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
+
+// The bswap commutes with the rotr so we want a pattern for both possible
+// orders.
+def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
+def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
//===----------------------------------------------------------------------===//
-// Conditional branch (immediate) instructions
+// Bitfield immediate extraction instruction.
//===----------------------------------------------------------------------===//
-// Contains: B.cc
-
-def cond_code_asmoperand : AsmOperandClass {
- let Name = "CondCode";
- let DiagnosticType = "CondCode";
-}
-
-def cond_code : Operand<i32>, ImmLeaf<i32, [{
- return Imm >= 0 && Imm <= 15;
-}]> {
- let PrintMethod = "printCondCodeOperand";
- let ParserMatchClass = cond_code_asmoperand;
-}
-
-def Bcc : A64I_condbr<0b0, 0b0, (outs),
- (ins cond_code:$Cond, bcc_target:$Label),
- "b.$Cond $Label", [(A64br_cc NZCV, (i32 imm:$Cond), bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr]> {
- let Uses = [NZCV];
- let isBranch = 1;
- let isTerminator = 1;
-}
+let neverHasSideEffects = 1 in
+defm EXTR : ExtractImm<"extr">;
+def : InstAlias<"ror $dst, $src, $shift",
+ (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
+def : InstAlias<"ror $dst, $src, $shift",
+ (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
+
+def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
+ (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
+def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
+ (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
//===----------------------------------------------------------------------===//
-// Conditional compare (immediate) instructions
+// Other bitfield immediate instructions.
//===----------------------------------------------------------------------===//
-// Contains: CCMN, CCMP
-
-def uimm4_asmoperand : AsmOperandClass {
- let Name = "UImm4";
- let PredicateMethod = "isUImm<4>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm4";
-}
-
-def uimm4 : Operand<i32> {
- let ParserMatchClass = uimm4_asmoperand;
+let neverHasSideEffects = 1 in {
+defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
+defm SBFM : BitfieldImm<0b00, "sbfm">;
+defm UBFM : BitfieldImm<0b10, "ubfm">;
}
-def uimm5 : Operand<i32> {
- let ParserMatchClass = uimm5_asmoperand;
-}
-
-// The only difference between this operand and the one for instructions like
-// B.cc is that it's parsed manually. The other get parsed implicitly as part of
-// the mnemonic handling.
-def cond_code_op_asmoperand : AsmOperandClass {
- let Name = "CondCodeOp";
- let RenderMethod = "addCondCodeOperands";
- let PredicateMethod = "isCondCode";
- let ParserMethod = "ParseCondCodeOperand";
- let DiagnosticType = "CondCode";
-}
+def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-def cond_code_op : Operand<i32> {
- let PrintMethod = "printCondCodeOperand";
- let ParserMatchClass = cond_code_op_asmoperand;
-}
+def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-class A64I_condcmpimmImpl<bit sf, bit op, RegisterClass GPR, string asmop>
- : A64I_condcmpimm<sf, op, 0b0, 0b0, 0b1, (outs),
- (ins GPR:$Rn, uimm5:$UImm5, uimm4:$NZCVImm, cond_code_op:$Cond),
- !strconcat(asmop, "\t$Rn, $UImm5, $NZCVImm, $Cond"),
- [], NoItinerary>,
- Sched<[WriteCMP, ReadCMP]> {
- let Defs = [NZCV];
-}
+// min(7, 31 - shift_amt)
+def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ enc = enc > 7 ? 7 : enc;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-def CCMNwi : A64I_condcmpimmImpl<0b0, 0b0, GPR32, "ccmn">;
-def CCMNxi : A64I_condcmpimmImpl<0b1, 0b0, GPR64, "ccmn">;
-def CCMPwi : A64I_condcmpimmImpl<0b0, 0b1, GPR32, "ccmp">;
-def CCMPxi : A64I_condcmpimmImpl<0b1, 0b1, GPR64, "ccmp">;
+// min(15, 31 - shift_amt)
+def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 31 - N->getZExtValue();
+ enc = enc > 15 ? 15 : enc;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-//===----------------------------------------------------------------------===//
-// Conditional compare (register) instructions
-//===----------------------------------------------------------------------===//
-// Contains: CCMN, CCMP
-
-class A64I_condcmpregImpl<bit sf, bit op, RegisterClass GPR, string asmop>
- : A64I_condcmpreg<sf, op, 0b0, 0b0, 0b1,
- (outs),
- (ins GPR:$Rn, GPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
- !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
- [], NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]> {
- let Defs = [NZCV];
-}
+def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-def CCMNww : A64I_condcmpregImpl<0b0, 0b0, GPR32, "ccmn">;
-def CCMNxx : A64I_condcmpregImpl<0b1, 0b0, GPR64, "ccmn">;
-def CCMPww : A64I_condcmpregImpl<0b0, 0b1, GPR32, "ccmp">;
-def CCMPxx : A64I_condcmpregImpl<0b1, 0b1, GPR64, "ccmp">;
+def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-//===----------------------------------------------------------------------===//
-// Conditional select instructions
-//===----------------------------------------------------------------------===//
-// Contains: CSEL, CSINC, CSINV, CSNEG + aliases CSET, CSETM, CINC, CINV, CNEG
-
-// Condition code which is encoded as the inversion (semantically rather than
-// bitwise) in the instruction.
-def inv_cond_code_op_asmoperand : AsmOperandClass {
- let Name = "InvCondCodeOp";
- let RenderMethod = "addInvCondCodeOperands";
- let PredicateMethod = "isCondCode";
- let ParserMethod = "ParseCondCodeOperand";
- let DiagnosticType = "CondCode";
-}
+// min(7, 63 - shift_amt)
+def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 7 ? 7 : enc;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-def inv_cond_code_op : Operand<i32> {
- let ParserMatchClass = inv_cond_code_op_asmoperand;
-}
+// min(15, 63 - shift_amt)
+def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 15 ? 15 : enc;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
+}]>;
-// Having a separate operand for the selectable use-case is debatable, but gives
-// consistency with cond_code.
-def inv_cond_XFORM : SDNodeXForm<imm, [{
- A64CC::CondCodes CC = static_cast<A64CC::CondCodes>(N->getZExtValue());
- return CurDAG->getTargetConstant(A64InvertCondCode(CC), MVT::i32);
+// min(31, 63 - shift_amt)
+def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
+ uint64_t enc = 63 - N->getZExtValue();
+ enc = enc > 31 ? 31 : enc;
+ return CurDAG->getTargetConstant(enc, MVT::i64);
}]>;
-def inv_cond_code
- : ImmLeaf<i32, [{ return Imm >= 0 && Imm <= 15; }], inv_cond_XFORM>;
-
-
-multiclass A64I_condselSizes<bit op, bits<2> op2, string asmop,
- SDPatternOperator select> {
- let Uses = [NZCV] in {
- def wwwc : A64I_condsel<0b0, op, 0b0, op2,
- (outs GPR32:$Rd),
- (ins GPR32:$Rn, GPR32:$Rm, cond_code_op:$Cond),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
- [(set i32:$Rd, (select i32:$Rn, i32:$Rm))],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]>;
-
-
- def xxxc : A64I_condsel<0b1, op, 0b0, op2,
- (outs GPR64:$Rd),
- (ins GPR64:$Rn, GPR64:$Rm, cond_code_op:$Cond),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Cond"),
- [(set i64:$Rd, (select i64:$Rn, i64:$Rm))],
- NoItinerary>,
- Sched<[WriteCMP, ReadCMP, ReadCMP]>;
- }
-}
+def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
+ (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_b imm0_31:$imm)))>;
+def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
+ (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_b imm0_63:$imm)))>;
-def simple_select
- : PatFrag<(ops node:$lhs, node:$rhs),
- (A64select_cc NZCV, node:$lhs, node:$rhs, (i32 imm:$Cond))>;
-
-class complex_select<SDPatternOperator opnode>
- : PatFrag<(ops node:$lhs, node:$rhs),
- (A64select_cc NZCV, node:$lhs, (opnode node:$rhs), (i32 imm:$Cond))>;
-
-
-defm CSEL : A64I_condselSizes<0b0, 0b00, "csel", simple_select>;
-defm CSINC : A64I_condselSizes<0b0, 0b01, "csinc",
- complex_select<PatFrag<(ops node:$val),
- (add node:$val, 1)>>>;
-defm CSINV : A64I_condselSizes<0b1, 0b00, "csinv", complex_select<not>>;
-defm CSNEG : A64I_condselSizes<0b1, 0b01, "csneg", complex_select<ineg>>;
-
-// Now the instruction aliases, which fit nicely into LLVM's model:
-
-def : InstAlias<"cset $Rd, $Cond",
- (CSINCwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cset $Rd, $Cond",
- (CSINCxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
-def : InstAlias<"csetm $Rd, $Cond",
- (CSINVwwwc GPR32:$Rd, WZR, WZR, inv_cond_code_op:$Cond)>;
-def : InstAlias<"csetm $Rd, $Cond",
- (CSINVxxxc GPR64:$Rd, XZR, XZR, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cinc $Rd, $Rn, $Cond",
- (CSINCwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cinc $Rd, $Rn, $Cond",
- (CSINCxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cinv $Rd, $Rn, $Cond",
- (CSINVwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cinv $Rd, $Rn, $Cond",
- (CSINVxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cneg $Rd, $Rn, $Cond",
- (CSNEGwwwc GPR32:$Rd, GPR32:$Rn, GPR32:$Rn, inv_cond_code_op:$Cond)>;
-def : InstAlias<"cneg $Rd, $Rn, $Cond",
- (CSNEGxxxc GPR64:$Rd, GPR64:$Rn, GPR64:$Rn, inv_cond_code_op:$Cond)>;
-
-// Finally some helper patterns.
-
-// For CSET (a.k.a. zero-extension of icmp)
-def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
- (CSINCwwwc WZR, WZR, cond_code:$Cond)>;
-def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
- (CSINCwwwc WZR, WZR, inv_cond_code:$Cond)>;
-
-def : Pat<(A64select_cc NZCV, 0, 1, cond_code:$Cond),
- (CSINCxxxc XZR, XZR, cond_code:$Cond)>;
-def : Pat<(A64select_cc NZCV, 1, 0, inv_cond_code:$Cond),
- (CSINCxxxc XZR, XZR, inv_cond_code:$Cond)>;
-
-// For CSETM (a.k.a. sign-extension of icmp)
-def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
- (CSINVwwwc WZR, WZR, cond_code:$Cond)>;
-def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
- (CSINVwwwc WZR, WZR, inv_cond_code:$Cond)>;
-
-def : Pat<(A64select_cc NZCV, 0, -1, cond_code:$Cond),
- (CSINVxxxc XZR, XZR, cond_code:$Cond)>;
-def : Pat<(A64select_cc NZCV, -1, 0, inv_cond_code:$Cond),
- (CSINVxxxc XZR, XZR, inv_cond_code:$Cond)>;
-
-// CINC, CINV and CNEG get dealt with automatically, which leaves the issue of
-// commutativity. The instructions are to complex for isCommutable to be used,
-// so we have to create the patterns manually:
-
-// No commutable pattern for CSEL since the commuted version is isomorphic.
-
-// CSINC
-def :Pat<(A64select_cc NZCV, (add i32:$Rm, 1), i32:$Rn, inv_cond_code:$Cond),
- (CSINCwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (add i64:$Rm, 1), i64:$Rn, inv_cond_code:$Cond),
- (CSINCxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
-
-// CSINV
-def :Pat<(A64select_cc NZCV, (not i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
- (CSINVwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (not i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
- (CSINVxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
-
-// CSNEG
-def :Pat<(A64select_cc NZCV, (ineg i32:$Rm), i32:$Rn, inv_cond_code:$Cond),
- (CSNEGwwwc $Rn, $Rm, inv_cond_code:$Cond)>;
-def :Pat<(A64select_cc NZCV, (ineg i64:$Rm), i64:$Rn, inv_cond_code:$Cond),
- (CSNEGxxxc $Rn, $Rm, inv_cond_code:$Cond)>;
+let AddedComplexity = 10 in {
+def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
+def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
+}
+
+def : InstAlias<"asr $dst, $src, $shift",
+ (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
+def : InstAlias<"asr $dst, $src, $shift",
+ (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
+def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
+def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
+def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
+def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
+def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
+
+def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
+ (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
+def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
+ (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
+
+def : InstAlias<"lsr $dst, $src, $shift",
+ (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
+def : InstAlias<"lsr $dst, $src, $shift",
+ (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
+def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
+def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
+def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
+def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
+def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
//===----------------------------------------------------------------------===//
-// Data Processing (1 source) instructions
+// Conditionally set flags instructions.
//===----------------------------------------------------------------------===//
-// Contains: RBIT, REV16, REV, REV32, CLZ, CLS.
-
-// We define an unary operator which always fails. We will use this to
-// define unary operators that cannot be matched.
-
-class A64I_dp_1src_impl<bit sf, bits<6> opcode, string asmop,
- list<dag> patterns, RegisterClass GPRrc,
- InstrItinClass itin>:
- A64I_dp_1src<sf,
- 0,
- 0b00000,
- opcode,
- !strconcat(asmop, "\t$Rd, $Rn"),
- (outs GPRrc:$Rd),
- (ins GPRrc:$Rn),
- patterns,
- itin>,
- Sched<[WriteALU, ReadALU]>;
-
-multiclass A64I_dp_1src <bits<6> opcode, string asmop> {
- let hasSideEffects = 0 in {
- def ww : A64I_dp_1src_impl<0b0, opcode, asmop, [], GPR32, NoItinerary>;
- def xx : A64I_dp_1src_impl<0b1, opcode, asmop, [], GPR64, NoItinerary>;
- }
-}
+defm CCMN : CondSetFlagsImm<0, "ccmn">;
+defm CCMP : CondSetFlagsImm<1, "ccmp">;
-defm RBIT : A64I_dp_1src<0b000000, "rbit">;
-defm CLS : A64I_dp_1src<0b000101, "cls">;
-defm CLZ : A64I_dp_1src<0b000100, "clz">;
-
-def : Pat<(ctlz i32:$Rn), (CLZww $Rn)>;
-def : Pat<(ctlz i64:$Rn), (CLZxx $Rn)>;
-def : Pat<(ctlz_zero_undef i32:$Rn), (CLZww $Rn)>;
-def : Pat<(ctlz_zero_undef i64:$Rn), (CLZxx $Rn)>;
-
-def : Pat<(cttz i32:$Rn), (CLZww (RBITww $Rn))>;
-def : Pat<(cttz i64:$Rn), (CLZxx (RBITxx $Rn))>;
-def : Pat<(cttz_zero_undef i32:$Rn), (CLZww (RBITww $Rn))>;
-def : Pat<(cttz_zero_undef i64:$Rn), (CLZxx (RBITxx $Rn))>;
-
-
-def REVww : A64I_dp_1src_impl<0b0, 0b000010, "rev",
- [(set i32:$Rd, (bswap i32:$Rn))],
- GPR32, NoItinerary>;
-def REVxx : A64I_dp_1src_impl<0b1, 0b000011, "rev",
- [(set i64:$Rd, (bswap i64:$Rn))],
- GPR64, NoItinerary>;
-def REV32xx : A64I_dp_1src_impl<0b1, 0b000010, "rev32",
- [(set i64:$Rd, (bswap (rotr i64:$Rn, (i64 32))))],
- GPR64, NoItinerary>;
-def REV16ww : A64I_dp_1src_impl<0b0, 0b000001, "rev16",
- [(set i32:$Rd, (bswap (rotr i32:$Rn, (i64 16))))],
- GPR32,
- NoItinerary>;
-def REV16xx : A64I_dp_1src_impl<0b1, 0b000001, "rev16", [], GPR64, NoItinerary>;
+defm CCMN : CondSetFlagsReg<0, "ccmn">;
+defm CCMP : CondSetFlagsReg<1, "ccmp">;
//===----------------------------------------------------------------------===//
-// Data Processing (2 sources) instructions
+// Conditional select instructions.
//===----------------------------------------------------------------------===//
-// Contains: CRC32C?[BHWX], UDIV, SDIV, LSLV, LSRV, ASRV, RORV + aliases LSL,
-// LSR, ASR, ROR
-
-
-class dp_2src_impl<bit sf, bits<6> opcode, string asmop, list<dag> patterns,
- RegisterClass GPRsp,
- InstrItinClass itin>:
- A64I_dp_2src<sf,
- opcode,
- 0,
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- (outs GPRsp:$Rd),
- (ins GPRsp:$Rn, GPRsp:$Rm),
- patterns,
- itin>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
-multiclass dp_2src_crc<bit c, string asmop> {
- def B_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 0},
- !strconcat(asmop, "b"), [], GPR32, NoItinerary>;
- def H_www : dp_2src_impl<0b0, {0, 1, 0, c, 0, 1},
- !strconcat(asmop, "h"), [], GPR32, NoItinerary>;
- def W_www : dp_2src_impl<0b0, {0, 1, 0, c, 1, 0},
- !strconcat(asmop, "w"), [], GPR32, NoItinerary>;
- def X_wwx : A64I_dp_2src<0b1, {0, 1, 0, c, 1, 1}, 0b0,
- !strconcat(asmop, "x\t$Rd, $Rn, $Rm"),
- (outs GPR32:$Rd), (ins GPR32:$Rn, GPR64:$Rm), [],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-}
-
-multiclass dp_2src_zext <bits<6> opcode, string asmop, SDPatternOperator op> {
- def www : dp_2src_impl<0b0,
- opcode,
- asmop,
- [(set i32:$Rd,
- (op i32:$Rn, (i64 (zext i32:$Rm))))],
- GPR32,
- NoItinerary>;
- def xxx : dp_2src_impl<0b1,
- opcode,
- asmop,
- [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
- GPR64,
- NoItinerary>;
-}
-
-
-multiclass dp_2src <bits<6> opcode, string asmop, SDPatternOperator op> {
- def www : dp_2src_impl<0b0,
- opcode,
- asmop,
- [(set i32:$Rd, (op i32:$Rn, i32:$Rm))],
- GPR32,
- NoItinerary>;
- def xxx : dp_2src_impl<0b1,
- opcode,
- asmop,
- [(set i64:$Rd, (op i64:$Rn, i64:$Rm))],
- GPR64,
- NoItinerary>;
-}
-
-// Here we define the data processing 2 source instructions.
-defm CRC32 : dp_2src_crc<0b0, "crc32">;
-defm CRC32C : dp_2src_crc<0b1, "crc32c">;
-
-let SchedRW = [WriteDiv, ReadDiv, ReadDiv] in {
- defm UDIV : dp_2src<0b000010, "udiv", udiv>;
- defm SDIV : dp_2src<0b000011, "sdiv", sdiv>;
-}
-
-let SchedRW = [WriteALUs, ReadALU, ReadALU] in {
- defm LSLV : dp_2src_zext<0b001000, "lsl", shl>;
- defm LSRV : dp_2src_zext<0b001001, "lsr", srl>;
- defm ASRV : dp_2src_zext<0b001010, "asr", sra>;
- defm RORV : dp_2src_zext<0b001011, "ror", rotr>;
-}
-
-// Extra patterns for an incoming 64-bit value for a 32-bit
-// operation. Since the LLVM operations are undefined (as in C) if the
-// RHS is out of range, it's perfectly permissible to discard the high
-// bits of the GPR64.
-def : Pat<(shl i32:$Rn, i64:$Rm),
- (LSLVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
-def : Pat<(srl i32:$Rn, i64:$Rm),
- (LSRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
-def : Pat<(sra i32:$Rn, i64:$Rm),
- (ASRVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
-def : Pat<(rotr i32:$Rn, i64:$Rm),
- (RORVwww $Rn, (EXTRACT_SUBREG $Rm, sub_32))>;
-
-// Here we define the aliases for the data processing 2 source instructions.
-def LSL_mnemonic : MnemonicAlias<"lslv", "lsl">;
-def LSR_mnemonic : MnemonicAlias<"lsrv", "lsr">;
-def ASR_menmonic : MnemonicAlias<"asrv", "asr">;
-def ROR_menmonic : MnemonicAlias<"rorv", "ror">;
+defm CSEL : CondSelect<0, 0b00, "csel">;
+
+def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
+defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
+defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
+defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
+
+def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
+ (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
+def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
+ (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
+
+def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
+ (CSINCWr WZR, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
+ (CSINCXr XZR, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
+ (CSINVWr WZR, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
+ (CSINVXr XZR, XZR, (i32 imm:$cc))>;
+
+// The inverse of the condition code from the alias instruction is what is used
+// in the aliased instruction. The parser all ready inverts the condition code
+// for these aliases.
+def : InstAlias<"cset $dst, $cc",
+ (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
+def : InstAlias<"cset $dst, $cc",
+ (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
+
+def : InstAlias<"csetm $dst, $cc",
+ (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
+def : InstAlias<"csetm $dst, $cc",
+ (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
+
+def : InstAlias<"cinc $dst, $src, $cc",
+ (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cinc $dst, $src, $cc",
+ (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
+
+def : InstAlias<"cinv $dst, $src, $cc",
+ (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cinv $dst, $src, $cc",
+ (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
+
+def : InstAlias<"cneg $dst, $src, $cc",
+ (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
+def : InstAlias<"cneg $dst, $src, $cc",
+ (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
//===----------------------------------------------------------------------===//
-// Data Processing (3 sources) instructions
+// PC-relative instructions.
//===----------------------------------------------------------------------===//
-// Contains: MADD, MSUB, SMADDL, SMSUBL, SMULH, UMADDL, UMSUBL, UMULH
-// + aliases MUL, MNEG, SMULL, SMNEGL, UMULL, UMNEGL
-
-class A64I_dp3_4operand<bit sf, bits<6> opcode, RegisterClass AccReg,
- ValueType AccTy, RegisterClass SrcReg,
- string asmop, dag pattern>
- : A64I_dp3<sf, opcode,
- (outs AccReg:$Rd), (ins SrcReg:$Rn, SrcReg:$Rm, AccReg:$Ra),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Ra"),
- [(set AccTy:$Rd, pattern)], NoItinerary>,
- Sched<[WriteMAC, ReadMAC, ReadMAC, ReadMAC]> {
- bits<5> Ra;
- let Inst{14-10} = Ra;
-
- RegisterClass AccGPR = AccReg;
- RegisterClass SrcGPR = SrcReg;
-}
-
-def MADDwwww : A64I_dp3_4operand<0b0, 0b000000, GPR32, i32, GPR32, "madd",
- (add i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
-def MADDxxxx : A64I_dp3_4operand<0b1, 0b000000, GPR64, i64, GPR64, "madd",
- (add i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
-
-def MSUBwwww : A64I_dp3_4operand<0b0, 0b000001, GPR32, i32, GPR32, "msub",
- (sub i32:$Ra, (mul i32:$Rn, i32:$Rm))>;
-def MSUBxxxx : A64I_dp3_4operand<0b1, 0b000001, GPR64, i64, GPR64, "msub",
- (sub i64:$Ra, (mul i64:$Rn, i64:$Rm))>;
-
-def SMADDLxwwx : A64I_dp3_4operand<0b1, 0b000010, GPR64, i64, GPR32, "smaddl",
- (add i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
-def SMSUBLxwwx : A64I_dp3_4operand<0b1, 0b000011, GPR64, i64, GPR32, "smsubl",
- (sub i64:$Ra, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
-
-def UMADDLxwwx : A64I_dp3_4operand<0b1, 0b001010, GPR64, i64, GPR32, "umaddl",
- (add i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
-def UMSUBLxwwx : A64I_dp3_4operand<0b1, 0b001011, GPR64, i64, GPR32, "umsubl",
- (sub i64:$Ra, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
-
-let isCommutable = 1, PostEncoderMethod = "fixMulHigh" in {
- def UMULHxxx : A64I_dp3<0b1, 0b001100, (outs GPR64:$Rd),
- (ins GPR64:$Rn, GPR64:$Rm),
- "umulh\t$Rd, $Rn, $Rm",
- [(set i64:$Rd, (mulhu i64:$Rn, i64:$Rm))],
- NoItinerary>,
- Sched<[WriteMAC, ReadMAC, ReadMAC]>;
-
- def SMULHxxx : A64I_dp3<0b1, 0b000100, (outs GPR64:$Rd),
- (ins GPR64:$Rn, GPR64:$Rm),
- "smulh\t$Rd, $Rn, $Rm",
- [(set i64:$Rd, (mulhs i64:$Rn, i64:$Rm))],
- NoItinerary>,
- Sched<[WriteMAC, ReadMAC, ReadMAC]>;
-}
-
-multiclass A64I_dp3_3operand<string asmop, A64I_dp3_4operand INST,
- Register ZR, dag pattern> {
- def : InstAlias<asmop # " $Rd, $Rn, $Rm",
- (INST INST.AccGPR:$Rd, INST.SrcGPR:$Rn, INST.SrcGPR:$Rm, ZR)>;
-
- def : Pat<pattern, (INST $Rn, $Rm, ZR)>;
-}
-
-defm : A64I_dp3_3operand<"mul", MADDwwww, WZR, (mul i32:$Rn, i32:$Rm)>;
-defm : A64I_dp3_3operand<"mul", MADDxxxx, XZR, (mul i64:$Rn, i64:$Rm)>;
-
-defm : A64I_dp3_3operand<"mneg", MSUBwwww, WZR,
- (sub 0, (mul i32:$Rn, i32:$Rm))>;
-defm : A64I_dp3_3operand<"mneg", MSUBxxxx, XZR,
- (sub 0, (mul i64:$Rn, i64:$Rm))>;
-
-defm : A64I_dp3_3operand<"smull", SMADDLxwwx, XZR,
- (mul (i64 (sext i32:$Rn)), (sext i32:$Rm))>;
-defm : A64I_dp3_3operand<"smnegl", SMSUBLxwwx, XZR,
- (sub 0, (mul (i64 (sext i32:$Rn)), (sext i32:$Rm)))>;
-
-defm : A64I_dp3_3operand<"umull", UMADDLxwwx, XZR,
- (mul (i64 (zext i32:$Rn)), (zext i32:$Rm))>;
-defm : A64I_dp3_3operand<"umnegl", UMSUBLxwwx, XZR,
- (sub 0, (mul (i64 (zext i32:$Rn)), (zext i32:$Rm)))>;
-
-
-//===----------------------------------------------------------------------===//
-// Exception generation
-//===----------------------------------------------------------------------===//
-// Contains: SVC, HVC, SMC, BRK, HLT, DCPS1, DCPS2, DCPS3
-
-def uimm16_asmoperand : AsmOperandClass {
- let Name = "UImm16";
- let PredicateMethod = "isUImm<16>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm16";
-}
+let isReMaterializable = 1 in {
+let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
+def ADR : ADRI<0, "adr", adrlabel, []>;
+} // neverHasSideEffects = 1
-def uimm16 : Operand<i32> {
- let ParserMatchClass = uimm16_asmoperand;
-}
-
-class A64I_exceptImpl<bits<3> opc, bits<2> ll, string asmop>
- : A64I_exception<opc, 0b000, ll, (outs), (ins uimm16:$UImm16),
- !strconcat(asmop, "\t$UImm16"), [], NoItinerary>,
- Sched<[WriteBr]> {
- let isBranch = 1;
- let isTerminator = 1;
-}
+def ADRP : ADRI<1, "adrp", adrplabel,
+ [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
+} // isReMaterializable = 1
-def SVCi : A64I_exceptImpl<0b000, 0b01, "svc">;
-def HVCi : A64I_exceptImpl<0b000, 0b10, "hvc">;
-def SMCi : A64I_exceptImpl<0b000, 0b11, "smc">;
-def BRKi : A64I_exceptImpl<0b001, 0b00, "brk">;
-def HLTi : A64I_exceptImpl<0b010, 0b00, "hlt">;
-
-def DCPS1i : A64I_exceptImpl<0b101, 0b01, "dcps1">;
-def DCPS2i : A64I_exceptImpl<0b101, 0b10, "dcps2">;
-def DCPS3i : A64I_exceptImpl<0b101, 0b11, "dcps3">;
-
-// The immediate is optional for the DCPS instructions, defaulting to 0.
-def : InstAlias<"dcps1", (DCPS1i 0)>;
-def : InstAlias<"dcps2", (DCPS2i 0)>;
-def : InstAlias<"dcps3", (DCPS3i 0)>;
+// page address of a constant pool entry, block address
+def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
+def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
//===----------------------------------------------------------------------===//
-// Extract (immediate)
+// Unconditional branch (register) instructions.
//===----------------------------------------------------------------------===//
-// Contains: EXTR + alias ROR
-
-def EXTRwwwi : A64I_extract<0b0, 0b000, 0b0,
- (outs GPR32:$Rd),
- (ins GPR32:$Rn, GPR32:$Rm, bitfield32_imm:$LSB),
- "extr\t$Rd, $Rn, $Rm, $LSB",
- [(set i32:$Rd,
- (A64Extr i32:$Rn, i32:$Rm, imm:$LSB))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-def EXTRxxxi : A64I_extract<0b1, 0b000, 0b1,
- (outs GPR64:$Rd),
- (ins GPR64:$Rn, GPR64:$Rm, bitfield64_imm:$LSB),
- "extr\t$Rd, $Rn, $Rm, $LSB",
- [(set i64:$Rd,
- (A64Extr i64:$Rn, i64:$Rm, imm:$LSB))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
-def : InstAlias<"ror $Rd, $Rs, $LSB",
- (EXTRwwwi GPR32:$Rd, GPR32:$Rs, GPR32:$Rs, bitfield32_imm:$LSB)>;
-def : InstAlias<"ror $Rd, $Rs, $LSB",
- (EXTRxxxi GPR64:$Rd, GPR64:$Rs, GPR64:$Rs, bitfield64_imm:$LSB)>;
-
-def : Pat<(rotr i32:$Rn, bitfield32_imm:$LSB),
- (EXTRwwwi $Rn, $Rn, bitfield32_imm:$LSB)>;
-def : Pat<(rotr i64:$Rn, bitfield64_imm:$LSB),
- (EXTRxxxi $Rn, $Rn, bitfield64_imm:$LSB)>;
-//===----------------------------------------------------------------------===//
-// Floating-point compare instructions
-//===----------------------------------------------------------------------===//
-// Contains: FCMP, FCMPE
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+def RET : BranchReg<0b0010, "ret", []>;
+def DRPS : SpecialReturn<0b0101, "drps">;
+def ERET : SpecialReturn<0b0100, "eret">;
+} // isReturn = 1, isTerminator = 1, isBarrier = 1
-def fpzero_asmoperand : AsmOperandClass {
- let Name = "FPZero";
- let ParserMethod = "ParseFPImmOperand";
- let DiagnosticType = "FPZero";
-}
+// Default to the LR register.
+def : InstAlias<"ret", (RET LR)>;
-def fpz32 : Operand<f32>,
- ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
- let ParserMatchClass = fpzero_asmoperand;
- let PrintMethod = "printFPZeroOperand";
- let DecoderMethod = "DecodeFPZeroOperand";
-}
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+def BLR : BranchReg<0b0001, "blr", [(AArch64call GPR64:$Rn)]>;
+} // isCall
-def fpz64 : Operand<f64>,
- ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
- let ParserMatchClass = fpzero_asmoperand;
- let PrintMethod = "printFPZeroOperand";
- let DecoderMethod = "DecodeFPZeroOperand";
-}
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
+} // isBranch, isTerminator, isBarrier, isIndirectBranch
-def fpz64movi : Operand<i64>,
- ComplexPattern<f64, 1, "SelectFPZeroOperand", [fpimm]> {
- let ParserMatchClass = fpzero_asmoperand;
- let PrintMethod = "printFPZeroOperand";
- let DecoderMethod = "DecodeFPZeroOperand";
+// Create a separate pseudo-instruction for codegen to use so that we don't
+// flag lr as used in every function. It'll be restored before the RET by the
+// epilogue if it's legitimately used.
+def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]> {
+ let isTerminator = 1;
+ let isBarrier = 1;
+ let isReturn = 1;
}
-multiclass A64I_fpcmpSignal<bits<2> type, bit imm, dag ins, dag pattern> {
- def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0},
- (outs), ins, "fcmp\t$Rn, $Rm", [pattern],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Defs = [NZCV];
- }
-
- def _sig : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b1, imm, 0b0, 0b0, 0b0},
- (outs), ins, "fcmpe\t$Rn, $Rm", [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Defs = [NZCV];
- }
+// This is a directive-like pseudo-instruction. The purpose is to insert an
+// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
+// (which in the usual case is a BLR).
+let hasSideEffects = 1 in
+def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
+ let AsmString = ".tlsdesccall $sym";
}
-defm FCMPss : A64I_fpcmpSignal<0b00, 0b0, (ins FPR32:$Rn, FPR32:$Rm),
- (set NZCV, (A64cmp f32:$Rn, f32:$Rm))>;
-defm FCMPdd : A64I_fpcmpSignal<0b01, 0b0, (ins FPR64:$Rn, FPR64:$Rm),
- (set NZCV, (A64cmp f64:$Rn, f64:$Rm))>;
-
-// What would be Rm should be written as 0; note that even though it's called
-// "$Rm" here to fit in with the InstrFormats, it's actually an immediate.
-defm FCMPsi : A64I_fpcmpSignal<0b00, 0b1, (ins FPR32:$Rn, fpz32:$Rm),
- (set NZCV, (A64cmp f32:$Rn, fpz32:$Rm))>;
-
-defm FCMPdi : A64I_fpcmpSignal<0b01, 0b1, (ins FPR64:$Rn, fpz64:$Rm),
- (set NZCV, (A64cmp f64:$Rn, fpz64:$Rm))>;
-
+// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
+// gets expanded to two MCInsts during lowering.
+let isCall = 1, Defs = [LR] in
+def TLSDESC_BLR
+ : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
+ [(AArch64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
+def : Pat<(AArch64tlsdesc_call GPR64:$dest, texternalsym:$sym),
+ (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
//===----------------------------------------------------------------------===//
-// Floating-point conditional compare instructions
+// Conditional branch (immediate) instruction.
//===----------------------------------------------------------------------===//
-// Contains: FCCMP, FCCMPE
-
-class A64I_fpccmpImpl<bits<2> type, bit op, RegisterClass FPR, string asmop>
- : A64I_fpccmp<0b0, 0b0, type, op,
- (outs),
- (ins FPR:$Rn, FPR:$Rm, uimm4:$NZCVImm, cond_code_op:$Cond),
- !strconcat(asmop, "\t$Rn, $Rm, $NZCVImm, $Cond"),
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Defs = [NZCV];
-}
-
-def FCCMPss : A64I_fpccmpImpl<0b00, 0b0, FPR32, "fccmp">;
-def FCCMPEss : A64I_fpccmpImpl<0b00, 0b1, FPR32, "fccmpe">;
-def FCCMPdd : A64I_fpccmpImpl<0b01, 0b0, FPR64, "fccmp">;
-def FCCMPEdd : A64I_fpccmpImpl<0b01, 0b1, FPR64, "fccmpe">;
+def Bcc : BranchCond;
//===----------------------------------------------------------------------===//
-// Floating-point conditional select instructions
+// Compare-and-branch instructions.
//===----------------------------------------------------------------------===//
-// Contains: FCSEL
-
-let Uses = [NZCV] in {
- def FCSELsssc : A64I_fpcondsel<0b0, 0b0, 0b00, (outs FPR32:$Rd),
- (ins FPR32:$Rn, FPR32:$Rm, cond_code_op:$Cond),
- "fcsel\t$Rd, $Rn, $Rm, $Cond",
- [(set f32:$Rd,
- (simple_select f32:$Rn, f32:$Rm))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-
- def FCSELdddc : A64I_fpcondsel<0b0, 0b0, 0b01, (outs FPR64:$Rd),
- (ins FPR64:$Rn, FPR64:$Rm, cond_code_op:$Cond),
- "fcsel\t$Rd, $Rn, $Rm, $Cond",
- [(set f64:$Rd,
- (simple_select f64:$Rn, f64:$Rm))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-}
+defm CBZ : CmpBranch<0, "cbz", AArch64cbz>;
+defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
//===----------------------------------------------------------------------===//
-// Floating-point data-processing (1 source)
+// Test-bit-and-branch instructions.
//===----------------------------------------------------------------------===//
-// Contains: FMOV, FABS, FNEG, FSQRT, FCVT, FRINT[NPMZAXI].
-
-def FPNoUnop : PatFrag<(ops node:$val), (fneg node:$val),
- [{ (void)N; return false; }]>;
-
-// First we do the fairly trivial bunch with uniform "OP s, s" and "OP d, d"
-// syntax. Default to no pattern because most are odd enough not to have one.
-multiclass A64I_fpdp1sizes<bits<6> opcode, string asmstr,
- SDPatternOperator opnode = FPNoUnop> {
- def ss : A64I_fpdp1<0b0, 0b0, 0b00, opcode, (outs FPR32:$Rd), (ins FPR32:$Rn),
- !strconcat(asmstr, "\t$Rd, $Rn"),
- [(set f32:$Rd, (opnode f32:$Rn))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def dd : A64I_fpdp1<0b0, 0b0, 0b01, opcode, (outs FPR64:$Rd), (ins FPR64:$Rn),
- !strconcat(asmstr, "\t$Rd, $Rn"),
- [(set f64:$Rd, (opnode f64:$Rn))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm FMOV : A64I_fpdp1sizes<0b000000, "fmov">;
-defm FABS : A64I_fpdp1sizes<0b000001, "fabs", fabs>;
-defm FNEG : A64I_fpdp1sizes<0b000010, "fneg", fneg>;
-let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
- defm FSQRT : A64I_fpdp1sizes<0b000011, "fsqrt", fsqrt>;
-}
-
-defm FRINTN : A64I_fpdp1sizes<0b001000, "frintn">;
-defm FRINTP : A64I_fpdp1sizes<0b001001, "frintp", fceil>;
-defm FRINTM : A64I_fpdp1sizes<0b001010, "frintm", ffloor>;
-defm FRINTZ : A64I_fpdp1sizes<0b001011, "frintz", ftrunc>;
-defm FRINTA : A64I_fpdp1sizes<0b001100, "frinta">;
-defm FRINTX : A64I_fpdp1sizes<0b001110, "frintx", frint>;
-defm FRINTI : A64I_fpdp1sizes<0b001111, "frinti", fnearbyint>;
-
-// The FCVT instrucitons have different source and destination register-types,
-// but the fields are uniform everywhere a D-register (say) crops up. Package
-// this information in a Record.
-class FCVTRegType<RegisterClass rc, bits<2> fld, ValueType vt> {
- RegisterClass Class = rc;
- ValueType VT = vt;
- bit t1 = fld{1};
- bit t0 = fld{0};
-}
-
-def FCVT16 : FCVTRegType<FPR16, 0b11, f16>;
-def FCVT32 : FCVTRegType<FPR32, 0b00, f32>;
-def FCVT64 : FCVTRegType<FPR64, 0b01, f64>;
-
-class A64I_fpdp1_fcvt<FCVTRegType DestReg, FCVTRegType SrcReg, SDNode opnode>
- : A64I_fpdp1<0b0, 0b0, {SrcReg.t1, SrcReg.t0},
- {0,0,0,1, DestReg.t1, DestReg.t0},
- (outs DestReg.Class:$Rd), (ins SrcReg.Class:$Rn),
- "fcvt\t$Rd, $Rn",
- [(set DestReg.VT:$Rd, (opnode SrcReg.VT:$Rn))], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def FCVTds : A64I_fpdp1_fcvt<FCVT64, FCVT32, fextend>;
-def FCVThs : A64I_fpdp1_fcvt<FCVT16, FCVT32, fround>;
-def FCVTsd : A64I_fpdp1_fcvt<FCVT32, FCVT64, fround>;
-def FCVThd : A64I_fpdp1_fcvt<FCVT16, FCVT64, fround>;
-def FCVTsh : A64I_fpdp1_fcvt<FCVT32, FCVT16, fextend>;
-def FCVTdh : A64I_fpdp1_fcvt<FCVT64, FCVT16, fextend>;
-
+defm TBZ : TestBranch<0, "tbz", AArch64tbz>;
+defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
//===----------------------------------------------------------------------===//
-// Floating-point data-processing (2 sources) instructions
+// Unconditional branch (immediate) instructions.
//===----------------------------------------------------------------------===//
-// Contains: FMUL, FDIV, FADD, FSUB, FMAX, FMIN, FMAXNM, FMINNM, FNMUL
-
-def FPNoBinop : PatFrag<(ops node:$lhs, node:$rhs), (fadd node:$lhs, node:$rhs),
- [{ (void)N; return false; }]>;
-
-multiclass A64I_fpdp2sizes<bits<4> opcode, string asmstr,
- SDPatternOperator opnode> {
- def sss : A64I_fpdp2<0b0, 0b0, 0b00, opcode,
- (outs FPR32:$Rd),
- (ins FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
- [(set f32:$Rd, (opnode f32:$Rn, f32:$Rm))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def ddd : A64I_fpdp2<0b0, 0b0, 0b01, opcode,
- (outs FPR64:$Rd),
- (ins FPR64:$Rn, FPR64:$Rm),
- !strconcat(asmstr, "\t$Rd, $Rn, $Rm"),
- [(set f64:$Rd, (opnode f64:$Rn, f64:$Rm))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-}
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def B : BranchImm<0, "b", [(br bb:$addr)]>;
+} // isBranch, isTerminator, isBarrier
-let isCommutable = 1 in {
- let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
- defm FMUL : A64I_fpdp2sizes<0b0000, "fmul", fmul>;
- }
- defm FADD : A64I_fpdp2sizes<0b0010, "fadd", fadd>;
-
- // No patterns for these.
- defm FMAX : A64I_fpdp2sizes<0b0100, "fmax", FPNoBinop>;
- defm FMIN : A64I_fpdp2sizes<0b0101, "fmin", FPNoBinop>;
- defm FMAXNM : A64I_fpdp2sizes<0b0110, "fmaxnm", FPNoBinop>;
- defm FMINNM : A64I_fpdp2sizes<0b0111, "fminnm", FPNoBinop>;
-
- let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
- defm FNMUL : A64I_fpdp2sizes<0b1000, "fnmul",
- PatFrag<(ops node:$lhs, node:$rhs),
- (fneg (fmul node:$lhs, node:$rhs))> >;
- }
-}
-
-let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
- defm FDIV : A64I_fpdp2sizes<0b0001, "fdiv", fdiv>;
-}
-defm FSUB : A64I_fpdp2sizes<0b0011, "fsub", fsub>;
+let isCall = 1, Defs = [LR], Uses = [SP] in {
+def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
+} // isCall
+def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
//===----------------------------------------------------------------------===//
-// Floating-point data-processing (3 sources) instructions
+// Exception generation instructions.
//===----------------------------------------------------------------------===//
-// Contains: FMADD, FMSUB, FNMADD, FNMSUB
-
-def fmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
- (fma (fneg node:$Rn), node:$Rm, node:$Ra)>;
-def fnmsub : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
- (fma node:$Rn, node:$Rm, (fneg node:$Ra))>;
-def fnmadd : PatFrag<(ops node:$Rn, node:$Rm, node:$Ra),
- (fma (fneg node:$Rn), node:$Rm, (fneg node:$Ra))>;
-
-class A64I_fpdp3Impl<string asmop, RegisterClass FPR, ValueType VT,
- bits<2> type, bit o1, bit o0, SDPatternOperator fmakind>
- : A64I_fpdp3<0b0, 0b0, type, o1, o0, (outs FPR:$Rd),
- (ins FPR:$Rn, FPR:$Rm, FPR:$Ra),
- !strconcat(asmop,"\t$Rd, $Rn, $Rm, $Ra"),
- [(set VT:$Rd, (fmakind VT:$Rn, VT:$Rm, VT:$Ra))],
- NoItinerary>,
- Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]>;
-
-def FMADDssss : A64I_fpdp3Impl<"fmadd", FPR32, f32, 0b00, 0b0, 0b0, fma>;
-def FMSUBssss : A64I_fpdp3Impl<"fmsub", FPR32, f32, 0b00, 0b0, 0b1, fmsub>;
-def FNMADDssss : A64I_fpdp3Impl<"fnmadd", FPR32, f32, 0b00, 0b1, 0b0, fnmadd>;
-def FNMSUBssss : A64I_fpdp3Impl<"fnmsub", FPR32, f32, 0b00, 0b1, 0b1, fnmsub>;
-
-def FMADDdddd : A64I_fpdp3Impl<"fmadd", FPR64, f64, 0b01, 0b0, 0b0, fma>;
-def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>;
-def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>;
-def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>;
-
-// Extra patterns for when we're allowed to optimise separate multiplication and
-// addition.
-let Predicates = [HasFPARMv8, UseFusedMAC] in {
-def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
- (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
- (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)))),
- (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def : Pat<(f32 (fsub (f32 (fmul_su FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)),
- (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-
-def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
- (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
- (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)))),
- (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def : Pat<(f64 (fsub (f64 (fmul_su FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)),
- (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-}
-
+def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
+def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
+def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
+def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
+def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
+def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
+def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
+def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
+
+// DCPSn defaults to an immediate operand of zero if unspecified.
+def : InstAlias<"dcps1", (DCPS1 0)>;
+def : InstAlias<"dcps2", (DCPS2 0)>;
+def : InstAlias<"dcps3", (DCPS3 0)>;
//===----------------------------------------------------------------------===//
-// Floating-point <-> fixed-point conversion instructions
+// Load instructions.
//===----------------------------------------------------------------------===//
-// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
-
-// #1-#32 allowed, encoded as "64 - <specified imm>
-def fixedpos_asmoperand_i32 : AsmOperandClass {
- let Name = "CVTFixedPos32";
- let RenderMethod = "addCVTFixedPosOperands";
- let PredicateMethod = "isCVTFixedPos<32>";
- let DiagnosticType = "CVTFixedPos32";
-}
-// Also encoded as "64 - <specified imm>" but #1-#64 allowed.
-def fixedpos_asmoperand_i64 : AsmOperandClass {
- let Name = "CVTFixedPos64";
- let RenderMethod = "addCVTFixedPosOperands";
- let PredicateMethod = "isCVTFixedPos<64>";
- let DiagnosticType = "CVTFixedPos64";
+// Pair (indexed, offset)
+defm LDPW : LoadPairOffset<0b00, 0, GPR32, simm7s4, "ldp">;
+defm LDPX : LoadPairOffset<0b10, 0, GPR64, simm7s8, "ldp">;
+defm LDPS : LoadPairOffset<0b00, 1, FPR32, simm7s4, "ldp">;
+defm LDPD : LoadPairOffset<0b01, 1, FPR64, simm7s8, "ldp">;
+defm LDPQ : LoadPairOffset<0b10, 1, FPR128, simm7s16, "ldp">;
+
+defm LDPSW : LoadPairOffset<0b01, 0, GPR64, simm7s4, "ldpsw">;
+
+// Pair (pre-indexed)
+def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, simm7s4, "ldp">;
+def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, simm7s8, "ldp">;
+def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, simm7s4, "ldp">;
+def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, simm7s8, "ldp">;
+def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, simm7s16, "ldp">;
+
+def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
+
+// Pair (post-indexed)
+def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">;
+def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">;
+def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">;
+def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">;
+def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">;
+
+def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
+
+
+// Pair (no allocate)
+defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32, simm7s4, "ldnp">;
+defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64, simm7s8, "ldnp">;
+defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32, simm7s4, "ldnp">;
+defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64, simm7s8, "ldnp">;
+defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128, simm7s16, "ldnp">;
+
+//---
+// (register offset)
+//---
+
+// Integer
+defm LDRBB : Load8RO<0b00, 0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
+defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
+defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
+defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
+
+// Floating-point
+defm LDRB : Load8RO<0b00, 1, 0b01, FPR8, "ldr", untyped, load>;
+defm LDRH : Load16RO<0b01, 1, 0b01, FPR16, "ldr", f16, load>;
+defm LDRS : Load32RO<0b10, 1, 0b01, FPR32, "ldr", f32, load>;
+defm LDRD : Load64RO<0b11, 1, 0b01, FPR64, "ldr", f64, load>;
+defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128, "ldr", f128, load>;
+
+// Load sign-extended half-word
+defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
+defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
+
+// Load sign-extended byte
+defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
+defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
+
+// Load sign-extended word
+defm LDRSW : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
+
+// Pre-fetch.
+defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
+
+// For regular load, we do not have any alignment requirement.
+// Thus, it is safe to directly map the vector loads with interesting
+// addressing modes.
+// FIXME: We could do the same for bitconvert to floating point vectors.
+multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
+ ValueType ScalTy, ValueType VecTy,
+ Instruction LOADW, Instruction LOADX,
+ SubRegIndex sub> {
+ def : Pat<(VecTy (scalar_to_vector (ScalTy
+ (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
+ (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
+ (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
+ sub)>;
+
+ def : Pat<(VecTy (scalar_to_vector (ScalTy
+ (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
+ (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
+ (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
+ sub)>;
}
-// We need the cartesian product of f32/f64 i32/i64 operands for
-// conversions:
-// + Selection needs to use operands of correct floating type
-// + Assembly parsing and decoding depend on integer width
-class cvtfix_i32_op<ValueType FloatVT>
- : Operand<FloatVT>,
- ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<32>", [fpimm]> {
- let ParserMatchClass = fixedpos_asmoperand_i32;
- let DecoderMethod = "DecodeCVT32FixedPosOperand";
- let PrintMethod = "printCVTFixedPosOperand";
-}
+let AddedComplexity = 10 in {
+defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v8i8, LDRBroW, LDRBroX, bsub>;
+defm : ScalToVecROLoadPat<ro8, extloadi8, i32, v16i8, LDRBroW, LDRBroX, bsub>;
-class cvtfix_i64_op<ValueType FloatVT>
- : Operand<FloatVT>,
- ComplexPattern<FloatVT, 1, "SelectCVTFixedPosOperand<64>", [fpimm]> {
- let ParserMatchClass = fixedpos_asmoperand_i64;
- let PrintMethod = "printCVTFixedPosOperand";
-}
+defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
+defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
-// Because of the proliferation of weird operands, it's not really
-// worth going for a multiclass here. Oh well.
-
-class A64I_fptofix<bit sf, bits<2> type, bits<3> opcode,
- RegisterClass GPR, RegisterClass FPR,
- ValueType DstTy, ValueType SrcTy,
- Operand scale_op, string asmop, SDNode cvtop>
- : A64I_fpfixed<sf, 0b0, type, 0b11, opcode,
- (outs GPR:$Rd), (ins FPR:$Rn, scale_op:$Scale),
- !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
- [(set DstTy:$Rd, (cvtop (fmul SrcTy:$Rn, scale_op:$Scale)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def FCVTZSwsi : A64I_fptofix<0b0, 0b00, 0b000, GPR32, FPR32, i32, f32,
- cvtfix_i32_op<f32>, "fcvtzs", fp_to_sint>;
-def FCVTZSxsi : A64I_fptofix<0b1, 0b00, 0b000, GPR64, FPR32, i64, f32,
- cvtfix_i64_op<f32>, "fcvtzs", fp_to_sint>;
-def FCVTZUwsi : A64I_fptofix<0b0, 0b00, 0b001, GPR32, FPR32, i32, f32,
- cvtfix_i32_op<f32>, "fcvtzu", fp_to_uint>;
-def FCVTZUxsi : A64I_fptofix<0b1, 0b00, 0b001, GPR64, FPR32, i64, f32,
- cvtfix_i64_op<f32>, "fcvtzu", fp_to_uint>;
-
-def FCVTZSwdi : A64I_fptofix<0b0, 0b01, 0b000, GPR32, FPR64, i32, f64,
- cvtfix_i32_op<f64>, "fcvtzs", fp_to_sint>;
-def FCVTZSxdi : A64I_fptofix<0b1, 0b01, 0b000, GPR64, FPR64, i64, f64,
- cvtfix_i64_op<f64>, "fcvtzs", fp_to_sint>;
-def FCVTZUwdi : A64I_fptofix<0b0, 0b01, 0b001, GPR32, FPR64, i32, f64,
- cvtfix_i32_op<f64>, "fcvtzu", fp_to_uint>;
-def FCVTZUxdi : A64I_fptofix<0b1, 0b01, 0b001, GPR64, FPR64, i64, f64,
- cvtfix_i64_op<f64>, "fcvtzu", fp_to_uint>;
-
-
-class A64I_fixtofp<bit sf, bits<2> type, bits<3> opcode,
- RegisterClass FPR, RegisterClass GPR,
- ValueType DstTy, ValueType SrcTy,
- Operand scale_op, string asmop, SDNode cvtop>
- : A64I_fpfixed<sf, 0b0, type, 0b00, opcode,
- (outs FPR:$Rd), (ins GPR:$Rn, scale_op:$Scale),
- !strconcat(asmop, "\t$Rd, $Rn, $Scale"),
- [(set DstTy:$Rd, (fdiv (cvtop SrcTy:$Rn), scale_op:$Scale))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def SCVTFswi : A64I_fixtofp<0b0, 0b00, 0b010, FPR32, GPR32, f32, i32,
- cvtfix_i32_op<f32>, "scvtf", sint_to_fp>;
-def SCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b010, FPR32, GPR64, f32, i64,
- cvtfix_i64_op<f32>, "scvtf", sint_to_fp>;
-def UCVTFswi : A64I_fixtofp<0b0, 0b00, 0b011, FPR32, GPR32, f32, i32,
- cvtfix_i32_op<f32>, "ucvtf", uint_to_fp>;
-def UCVTFsxi : A64I_fixtofp<0b1, 0b00, 0b011, FPR32, GPR64, f32, i64,
- cvtfix_i64_op<f32>, "ucvtf", uint_to_fp>;
-def SCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b010, FPR64, GPR32, f64, i32,
- cvtfix_i32_op<f64>, "scvtf", sint_to_fp>;
-def SCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b010, FPR64, GPR64, f64, i64,
- cvtfix_i64_op<f64>, "scvtf", sint_to_fp>;
-def UCVTFdwi : A64I_fixtofp<0b0, 0b01, 0b011, FPR64, GPR32, f64, i32,
- cvtfix_i32_op<f64>, "ucvtf", uint_to_fp>;
-def UCVTFdxi : A64I_fixtofp<0b1, 0b01, 0b011, FPR64, GPR64, f64, i64,
- cvtfix_i64_op<f64>, "ucvtf", uint_to_fp>;
+defm : ScalToVecROLoadPat<ro32, load, i32, v2i32, LDRSroW, LDRSroX, ssub>;
+defm : ScalToVecROLoadPat<ro32, load, i32, v4i32, LDRSroW, LDRSroX, ssub>;
-//===----------------------------------------------------------------------===//
-// Floating-point <-> integer conversion instructions
-//===----------------------------------------------------------------------===//
-// Contains: FCVTZS, FCVTZU, SCVTF, UCVTF
-
-class A64I_fpintI<bit sf, bits<2> type, bits<2> rmode, bits<3> opcode,
- RegisterClass DestPR, RegisterClass SrcPR, string asmop>
- : A64I_fpint<sf, 0b0, type, rmode, opcode, (outs DestPR:$Rd), (ins SrcPR:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn"), [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass A64I_fptointRM<bits<2> rmode, bit o2, string asmop> {
- def Sws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 0},
- GPR32, FPR32, asmop # "s">;
- def Sxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 0},
- GPR64, FPR32, asmop # "s">;
- def Uws : A64I_fpintI<0b0, 0b00, rmode, {o2, 0, 1},
- GPR32, FPR32, asmop # "u">;
- def Uxs : A64I_fpintI<0b1, 0b00, rmode, {o2, 0, 1},
- GPR64, FPR32, asmop # "u">;
-
- def Swd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 0},
- GPR32, FPR64, asmop # "s">;
- def Sxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 0},
- GPR64, FPR64, asmop # "s">;
- def Uwd : A64I_fpintI<0b0, 0b01, rmode, {o2, 0, 1},
- GPR32, FPR64, asmop # "u">;
- def Uxd : A64I_fpintI<0b1, 0b01, rmode, {o2, 0, 1},
- GPR64, FPR64, asmop # "u">;
-}
+defm : ScalToVecROLoadPat<ro32, load, f32, v2f32, LDRSroW, LDRSroX, ssub>;
+defm : ScalToVecROLoadPat<ro32, load, f32, v4f32, LDRSroW, LDRSroX, ssub>;
-defm FCVTN : A64I_fptointRM<0b00, 0b0, "fcvtn">;
-defm FCVTP : A64I_fptointRM<0b01, 0b0, "fcvtp">;
-defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">;
-defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">;
-defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">;
-
-let Predicates = [HasFPARMv8] in {
-def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>;
-def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>;
-def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>;
-def : Pat<(i64 (fp_to_uint f32:$Rn)), (FCVTZUxs $Rn)>;
-def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>;
-def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>;
-def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>;
-def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>;
-}
+defm : ScalToVecROLoadPat<ro64, load, i64, v2i64, LDRDroW, LDRDroX, dsub>;
-multiclass A64I_inttofp<bit o0, string asmop> {
- def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>;
- def CVTFsx : A64I_fpintI<0b1, 0b00, 0b00, {0, 1, o0}, FPR32, GPR64, asmop>;
- def CVTFdw : A64I_fpintI<0b0, 0b01, 0b00, {0, 1, o0}, FPR64, GPR32, asmop>;
- def CVTFdx : A64I_fpintI<0b1, 0b01, 0b00, {0, 1, o0}, FPR64, GPR64, asmop>;
-}
-
-defm S : A64I_inttofp<0b0, "scvtf">;
-defm U : A64I_inttofp<0b1, "ucvtf">;
-
-let Predicates = [HasFPARMv8] in {
-def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>;
-def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>;
-def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>;
-def : Pat<(f64 (sint_to_fp i64:$Rn)), (SCVTFdx $Rn)>;
-def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>;
-def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>;
-def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>;
-def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>;
-}
+defm : ScalToVecROLoadPat<ro64, load, f64, v2f64, LDRDroW, LDRDroX, dsub>;
-def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">;
-def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">;
-def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">;
-def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">;
-let Predicates = [HasFPARMv8] in {
-def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>;
-def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>;
-def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>;
-def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>;
-}
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+ ro_Wextend64:$extend))))),
+ (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
-def lane1_asmoperand : AsmOperandClass {
- let Name = "Lane1";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "Lane1";
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+ ro_Xextend64:$extend))))),
+ (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
}
-def lane1 : Operand<i32> {
- let ParserMatchClass = lane1_asmoperand;
- let PrintMethod = "printBareImmOperand";
-}
+// Match all load 64 bits width whose type is compatible with FPR64
+multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
+ Instruction LOADW, Instruction LOADX> {
-let DecoderMethod = "DecodeFMOVLaneInstruction" in {
- def FMOVxv : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b110,
- (outs GPR64:$Rd), (ins VPR128:$Rn, lane1:$Lane),
- "fmov\t$Rd, $Rn.d[$Lane]", [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
+ def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
- def FMOVvx : A64I_fpint<0b1, 0b0, 0b10, 0b01, 0b111,
- (outs VPR128:$Rd), (ins GPR64:$Rn, lane1:$Lane),
- "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
+ def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
-let Predicates = [HasFPARMv8] in {
-def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]",
- (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>;
-
-def : InstAlias<"fmov $Rd.2d[$Lane], $Rn",
- (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating-point immediate instructions
-//===----------------------------------------------------------------------===//
-// Contains: FMOV
-
-def fpimm_asmoperand : AsmOperandClass {
- let Name = "FMOVImm";
- let ParserMethod = "ParseFPImmOperand";
- let DiagnosticType = "FPImm";
-}
-
-// The MCOperand for these instructions are the encoded 8-bit values.
-def SDXF_fpimm : SDNodeXForm<fpimm, [{
- uint32_t Imm8;
- A64Imms::isFPImm(N->getValueAPF(), Imm8);
- return CurDAG->getTargetConstant(Imm8, MVT::i32);
-}]>;
-
-class fmov_operand<ValueType FT>
- : Operand<i32>,
- PatLeaf<(FT fpimm), [{ return A64Imms::isFPImm(N->getValueAPF()); }],
- SDXF_fpimm> {
- let PrintMethod = "printFPImmOperand";
- let ParserMatchClass = fpimm_asmoperand;
+let AddedComplexity = 10 in {
+let Predicates = [IsLE] in {
+ // We must do vector loads with LD1 in big-endian.
+ defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v8i8, LDRDroW, LDRDroX>;
+ defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
+}
+
+defm : VecROLoadPat<ro64, v1i64, LDRDroW, LDRDroX>;
+defm : VecROLoadPat<ro64, v1f64, LDRDroW, LDRDroX>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must do vector loads with LD1 in big-endian.
+ defm : VecROLoadPat<ro128, v2i64, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v2f64, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v4i32, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v4f32, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v8i16, LDRQroW, LDRQroX>;
+ defm : VecROLoadPat<ro128, v16i8, LDRQroW, LDRQroX>;
+}
+} // AddedComplexity = 10
+
+// zextload -> i64
+multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
+ Instruction INSTW, Instruction INSTX> {
+ def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (SUBREG_TO_REG (i64 0),
+ (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
+ sub_32)>;
+
+ def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (SUBREG_TO_REG (i64 0),
+ (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
+ sub_32)>;
}
-def fmov32_operand : fmov_operand<f32>;
-def fmov64_operand : fmov_operand<f64>;
-
-class A64I_fpimm_impl<bits<2> type, RegisterClass Reg, ValueType VT,
- Operand fmov_operand>
- : A64I_fpimm<0b0, 0b0, type, 0b00000,
- (outs Reg:$Rd),
- (ins fmov_operand:$Imm8),
- "fmov\t$Rd, $Imm8",
- [(set VT:$Rd, fmov_operand:$Imm8)],
- NoItinerary>,
- Sched<[WriteFPALU]>;
-
-def FMOVsi : A64I_fpimm_impl<0b00, FPR32, f32, fmov32_operand>;
-def FMOVdi : A64I_fpimm_impl<0b01, FPR64, f64, fmov64_operand>;
+let AddedComplexity = 10 in {
+ defm : ExtLoadTo64ROPat<ro8, zextloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW, LDRWroX>;
-//===----------------------------------------------------------------------===//
-// Load-register (literal) instructions
-//===----------------------------------------------------------------------===//
-// Contains: LDR, LDRSW, PRFM
+ // zextloadi1 -> zextloadi8
+ defm : ExtLoadTo64ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
-def ldrlit_label_asmoperand : AsmOperandClass {
- let Name = "LoadLitLabel";
- let RenderMethod = "addLabelOperands<19, 4>";
- let DiagnosticType = "Label";
-}
+ // extload -> zextload
+ defm : ExtLoadTo64ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo64ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo64ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
-def ldrlit_label : Operand<i64> {
- let EncoderMethod = "getLoadLitLabelOpValue";
-
- // This label is a 19-bit offset from PC, scaled by the instruction-width: 4.
- let PrintMethod = "printLabelOperand<19, 4>";
- let ParserMatchClass = ldrlit_label_asmoperand;
- let OperandType = "OPERAND_PCREL";
+ // extloadi1 -> zextloadi8
+ defm : ExtLoadTo64ROPat<ro8, extloadi1, LDRBBroW, LDRBBroX>;
}
-// Various instructions take an immediate value (which can always be used),
-// where some numbers have a symbolic name to make things easier. These operands
-// and the associated functions abstract away the differences.
-multiclass namedimm<string prefix, string mapper> {
- def _asmoperand : AsmOperandClass {
- let Name = "NamedImm" # prefix;
- let PredicateMethod = "isUImm";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "ParseNamedImmOperand<" # mapper # ">";
- let DiagnosticType = "NamedImm_" # prefix;
- }
- def _op : Operand<i32> {
- let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
- let PrintMethod = "printNamedImmOperand<" # mapper # ">";
- let DecoderMethod = "DecodeNamedImmOperand<" # mapper # ">";
- }
-}
-
-defm prefetch : namedimm<"prefetch", "A64PRFM::PRFMMapper">;
+// zextload -> i64
+multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
+ Instruction INSTW, Instruction INSTX> {
+ def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
+ (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
-class A64I_LDRlitSimple<bits<2> opc, bit v, RegisterClass OutReg,
- list<dag> patterns = []>
- : A64I_LDRlit<opc, v, (outs OutReg:$Rt), (ins ldrlit_label:$Imm19),
- "ldr\t$Rt, $Imm19", patterns, NoItinerary>,
- Sched<[WriteLd]>;
+ def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
+ (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
-let mayLoad = 1 in {
- def LDRw_lit : A64I_LDRlitSimple<0b00, 0b0, GPR32>;
- def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>;
}
-let Predicates = [HasFPARMv8] in {
-def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>;
-def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>;
+let AddedComplexity = 10 in {
+ // extload -> zextload
+ defm : ExtLoadTo32ROPat<ro8, extloadi8, LDRBBroW, LDRBBroX>;
+ defm : ExtLoadTo32ROPat<ro16, extloadi16, LDRHHroW, LDRHHroX>;
+ defm : ExtLoadTo32ROPat<ro32, extloadi32, LDRWroW, LDRWroX>;
+
+ // zextloadi1 -> zextloadi8
+ defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
+}
+
+//---
+// (unsigned immediate)
+//---
+defm LDRX : LoadUI<0b11, 0, 0b01, GPR64, uimm12s8, "ldr",
+ [(set GPR64:$Rt,
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
+defm LDRW : LoadUI<0b10, 0, 0b01, GPR32, uimm12s4, "ldr",
+ [(set GPR32:$Rt,
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
+defm LDRB : LoadUI<0b00, 1, 0b01, FPR8, uimm12s1, "ldr",
+ [(set FPR8:$Rt,
+ (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
+defm LDRH : LoadUI<0b01, 1, 0b01, FPR16, uimm12s2, "ldr",
+ [(set (f16 FPR16:$Rt),
+ (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
+defm LDRS : LoadUI<0b10, 1, 0b01, FPR32, uimm12s4, "ldr",
+ [(set (f32 FPR32:$Rt),
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
+defm LDRD : LoadUI<0b11, 1, 0b01, FPR64, uimm12s8, "ldr",
+ [(set (f64 FPR64:$Rt),
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
+defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128, uimm12s16, "ldr",
+ [(set (f128 FPR128:$Rt),
+ (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
+
+// For regular load, we do not have any alignment requirement.
+// Thus, it is safe to directly map the vector loads with interesting
+// addressing modes.
+// FIXME: We could do the same for bitconvert to floating point vectors.
+def : Pat <(v8i8 (scalar_to_vector (i32
+ (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+def : Pat <(v16i8 (scalar_to_vector (i32
+ (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
+def : Pat <(v4i16 (scalar_to_vector (i32
+ (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+def : Pat <(v8i16 (scalar_to_vector (i32
+ (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
+def : Pat <(v2i32 (scalar_to_vector (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+def : Pat <(v4i32 (scalar_to_vector (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
+def : Pat <(v1i64 (scalar_to_vector (i64
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat <(v2i64 (scalar_to_vector (i64
+ (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
+
+// Match all load 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ // We must use LD1 to perform vector loads in big-endian.
+ def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+}
+def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
+ (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must use LD1 to perform vector loads in big-endian.
+ def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+}
+def : Pat<(f128 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
+ (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
+
+defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
+ [(set GPR32:$Rt,
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
+ [(set GPR32:$Rt,
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+// zextload -> i64
+def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
+
+// zextloadi1 -> zextloadi8
+def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+
+// extload -> zextload
+def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
+def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
+
+// load sign-extended half-word
+defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
+ [(set GPR32:$Rt,
+ (sextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
+ [(set GPR64:$Rt,
+ (sextloadi16 (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset)))]>;
+
+// load sign-extended byte
+defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
+ [(set GPR32:$Rt,
+ (sextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
+ [(set GPR64:$Rt,
+ (sextloadi8 (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset)))]>;
+
+// load sign-extended word
+defm LDRSW : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
+ [(set GPR64:$Rt,
+ (sextloadi32 (am_indexed32 GPR64sp:$Rn,
+ uimm12s4:$offset)))]>;
+
+// load zero-extended word
+def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
+
+// Pre-fetch.
+def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
+ [(AArch64Prefetch imm:$Rt,
+ (am_indexed64 GPR64sp:$Rn,
+ uimm12s8:$offset))]>;
+
+def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
+
+//---
+// (literal)
+def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">;
+def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">;
+def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">;
+def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">;
+def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">;
+
+// load sign-extended word
+def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
+
+// prefetch
+def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
+// [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
+
+//---
+// (unscaled immediate)
+defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64, "ldur",
+ [(set GPR64:$Rt,
+ (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32, "ldur",
+ [(set GPR32:$Rt,
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8, "ldur",
+ [(set FPR8:$Rt,
+ (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16, "ldur",
+ [(set FPR16:$Rt,
+ (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32, "ldur",
+ [(set (f32 FPR32:$Rt),
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64, "ldur",
+ [(set (f64 FPR64:$Rt),
+ (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128, "ldur",
+ [(set (f128 FPR128:$Rt),
+ (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
+
+defm LDURHH
+ : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
+ [(set GPR32:$Rt,
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURBB
+ : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
+ [(set GPR32:$Rt,
+ (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// Match all load 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+}
+def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
+ (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
+// Match all load 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
+ (LDURQi GPR64sp:$Rn, simm9:$offset)>;
+}
+
+// anyext -> zext
+def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+// unscaled zext
+def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+
+
+//---
+// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
+
+// Define new assembler match classes as we want to only match these when
+// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
+// associate a DiagnosticType either, as we want the diagnostic for the
+// canonical form (the scaled operand) to take precedence.
+class SImm9OffsetOperand<int Width> : AsmOperandClass {
+ let Name = "SImm9OffsetFB" # Width;
+ let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
+ let RenderMethod = "addImmOperands";
}
-let mayLoad = 1 in {
- let Predicates = [HasFPARMv8] in {
- def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>;
- }
-
- def LDRSWx_lit : A64I_LDRlit<0b10, 0b0,
- (outs GPR64:$Rt),
- (ins ldrlit_label:$Imm19),
- "ldrsw\t$Rt, $Imm19",
- [], NoItinerary>,
- Sched<[WriteLd]>;
-
- def PRFM_lit : A64I_LDRlit<0b11, 0b0,
- (outs), (ins prefetch_op:$Rt, ldrlit_label:$Imm19),
- "prfm\t$Rt, $Imm19",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]>;
-}
+def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
+def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
+def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
+def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
+def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
+
+def simm9_offset_fb8 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB8Operand;
+}
+def simm9_offset_fb16 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB16Operand;
+}
+def simm9_offset_fb32 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB32Operand;
+}
+def simm9_offset_fb64 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB64Operand;
+}
+def simm9_offset_fb128 : Operand<i64> {
+ let ParserMatchClass = SImm9OffsetFB128Operand;
+}
+
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"ldr $Rt, [$Rn, $offset]",
+ (LDURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+
+// zextload -> i64
+def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
+
+// load sign-extended half-word
+defm LDURSHW
+ : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
+ [(set GPR32:$Rt,
+ (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURSHX
+ : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
+ [(set GPR64:$Rt,
+ (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// load sign-extended byte
+defm LDURSBW
+ : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
+ [(set GPR32:$Rt,
+ (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+defm LDURSBX
+ : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
+ [(set GPR64:$Rt,
+ (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// load sign-extended word
+defm LDURSW
+ : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
+ [(set GPR64:$Rt,
+ (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
+
+// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
+def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
+ (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
+ (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
+ (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
+ (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
+ (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
+ (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
+ (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+
+// Pre-fetch.
+defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
+ [(AArch64Prefetch imm:$Rt,
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+
+//---
+// (unscaled immediate, unprivileged)
+defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
+defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
+
+defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
+defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
+
+// load sign-extended half-word
+defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
+defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
+
+// load sign-extended byte
+defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
+defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
+
+// load sign-extended word
+defm LDTRSW : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
+
+//---
+// (immediate pre-indexed)
+def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">;
+def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">;
+def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">;
+def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">;
+def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">;
+def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">;
+def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">;
+
+// load sign-extended half-word
+def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
+def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
+
+// load sign-extended byte
+def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
+def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
+
+// load zero-extended byte
+def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">;
+def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">;
+
+// load sign-extended word
+def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
+
+//---
+// (immediate post-indexed)
+def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">;
+def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">;
+def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">;
+def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">;
+def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">;
+def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">;
+def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">;
+
+// load sign-extended half-word
+def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
+def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
+
+// load sign-extended byte
+def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
+def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
+
+// load zero-extended byte
+def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">;
+def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">;
+
+// load sign-extended word
+def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
//===----------------------------------------------------------------------===//
-// Load-store exclusive instructions
+// Store instructions.
//===----------------------------------------------------------------------===//
-// Contains: STXRB, STXRH, STXR, LDXRB, LDXRH, LDXR. STXP, LDXP, STLXRB,
-// STLXRH, STLXR, LDAXRB, LDAXRH, LDAXR, STLXP, LDAXP, STLRB,
-// STLRH, STLR, LDARB, LDARH, LDAR
-
-// Since these instructions have the undefined register bits set to 1 in
-// their canonical form, we need a post encoder method to set those bits
-// to 1 when encoding these instructions. We do this using the
-// fixLoadStoreExclusive function. This function has template parameters:
-//
-// fixLoadStoreExclusive<int hasRs, int hasRt2>
-//
-// hasRs indicates that the instruction uses the Rs field, so we won't set
-// it to 1 (and the same for Rt2). We don't need template parameters for
-// the other register fiels since Rt and Rn are always used.
-
-// This operand parses a GPR64xsp register, followed by an optional immediate
-// #0.
-def GPR64xsp0_asmoperand : AsmOperandClass {
- let Name = "GPR64xsp0";
- let PredicateMethod = "isWrappedReg";
- let RenderMethod = "addRegOperands";
- let ParserMethod = "ParseLSXAddressOperand";
- // Diagnostics are provided by ParserMethod
-}
-
-def GPR64xsp0 : RegisterOperand<GPR64xsp> {
- let ParserMatchClass = GPR64xsp0_asmoperand;
-}
-
-//===----------------------------------
-// Store-exclusive (releasing & normal)
-//===----------------------------------
-
-class A64I_SRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
- dag ins, list<dag> pat,
- InstrItinClass itin> :
- A64I_LDSTex_stn <size,
- opcode{2}, 0, opcode{1}, opcode{0},
- outs, ins,
- !strconcat(asm, "\t$Rs, $Rt, [$Rn]"),
- pat, itin> {
- let mayStore = 1;
- let PostEncoderMethod = "fixLoadStoreExclusive<1,0>";
- let Constraints = "@earlyclobber $Rs";
-}
-
-multiclass A64I_SRex<string asmstr, bits<3> opcode, string prefix> {
- def _byte: A64I_SRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
- (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _hword: A64I_SRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
- (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [],NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _word: A64I_SRexs_impl<0b10, opcode, asmstr,
- (outs GPR32:$Rs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _dword: A64I_SRexs_impl<0b11, opcode, asmstr,
- (outs GPR32:$Rs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-}
-
-defm STXR : A64I_SRex<"stxr", 0b000, "STXR">;
-defm STLXR : A64I_SRex<"stlxr", 0b001, "STLXR">;
-
-//===----------------------------------
-// Loads
-//===----------------------------------
-
-class A64I_LRexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
- dag ins, list<dag> pat,
- InstrItinClass itin> :
- A64I_LDSTex_tn <size,
- opcode{2}, 1, opcode{1}, opcode{0},
- outs, ins,
- !strconcat(asm, "\t$Rt, [$Rn]"),
- pat, itin> {
- let mayLoad = 1;
- let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
-}
-multiclass A64I_LRex<string asmstr, bits<3> opcode> {
- def _byte: A64I_LRexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
- (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd]>;
-
- def _hword: A64I_LRexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
- (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd]>;
-
- def _word: A64I_LRexs_impl<0b10, opcode, asmstr,
- (outs GPR32:$Rt), (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd]>;
-
- def _dword: A64I_LRexs_impl<0b11, opcode, asmstr,
- (outs GPR64:$Rt), (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd]>;
+// Pair (indexed, offset)
+// FIXME: Use dedicated range-checked addressing mode operand here.
+defm STPW : StorePairOffset<0b00, 0, GPR32, simm7s4, "stp">;
+defm STPX : StorePairOffset<0b10, 0, GPR64, simm7s8, "stp">;
+defm STPS : StorePairOffset<0b00, 1, FPR32, simm7s4, "stp">;
+defm STPD : StorePairOffset<0b01, 1, FPR64, simm7s8, "stp">;
+defm STPQ : StorePairOffset<0b10, 1, FPR128, simm7s16, "stp">;
+
+// Pair (pre-indexed)
+def STPWpre : StorePairPreIdx<0b00, 0, GPR32, simm7s4, "stp">;
+def STPXpre : StorePairPreIdx<0b10, 0, GPR64, simm7s8, "stp">;
+def STPSpre : StorePairPreIdx<0b00, 1, FPR32, simm7s4, "stp">;
+def STPDpre : StorePairPreIdx<0b01, 1, FPR64, simm7s8, "stp">;
+def STPQpre : StorePairPreIdx<0b10, 1, FPR128, simm7s16, "stp">;
+
+// Pair (pre-indexed)
+def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">;
+def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">;
+def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">;
+def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">;
+def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">;
+
+// Pair (no allocate)
+defm STNPW : StorePairNoAlloc<0b00, 0, GPR32, simm7s4, "stnp">;
+defm STNPX : StorePairNoAlloc<0b10, 0, GPR64, simm7s8, "stnp">;
+defm STNPS : StorePairNoAlloc<0b00, 1, FPR32, simm7s4, "stnp">;
+defm STNPD : StorePairNoAlloc<0b01, 1, FPR64, simm7s8, "stnp">;
+defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128, simm7s16, "stnp">;
+
+//---
+// (Register offset)
+
+// Integer
+defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
+defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
+defm STRW : Store32RO<0b10, 0, 0b00, GPR32, "str", i32, store>;
+defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
+
+
+// Floating-point
+defm STRB : Store8RO< 0b00, 1, 0b00, FPR8, "str", untyped, store>;
+defm STRH : Store16RO<0b01, 1, 0b00, FPR16, "str", f16, store>;
+defm STRS : Store32RO<0b10, 1, 0b00, FPR32, "str", f32, store>;
+defm STRD : Store64RO<0b11, 1, 0b00, FPR64, "str", f64, store>;
+defm STRQ : Store128RO<0b00, 1, 0b10, FPR128, "str", f128, store>;
+
+multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
+ Instruction STRW, Instruction STRX> {
+
+ def : Pat<(storeop GPR64:$Rt,
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
+
+ def : Pat<(storeop GPR64:$Rt,
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
+ GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
-defm LDXR : A64I_LRex<"ldxr", 0b000>;
-defm LDAXR : A64I_LRex<"ldaxr", 0b001>;
-defm LDAR : A64I_LRex<"ldar", 0b101>;
-
-class acquiring_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
-}]>;
-
-def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
-def atomic_load_acquire_16 : acquiring_load<atomic_load_16>;
-def atomic_load_acquire_32 : acquiring_load<atomic_load_32>;
-def atomic_load_acquire_64 : acquiring_load<atomic_load_64>;
-
-def : Pat<(atomic_load_acquire_8 i64:$Rn), (LDAR_byte $Rn)>;
-def : Pat<(atomic_load_acquire_16 i64:$Rn), (LDAR_hword $Rn)>;
-def : Pat<(atomic_load_acquire_32 i64:$Rn), (LDAR_word $Rn)>;
-def : Pat<(atomic_load_acquire_64 i64:$Rn), (LDAR_dword $Rn)>;
-
-//===----------------------------------
-// Store-release (no exclusivity)
-//===----------------------------------
-
-class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
- dag ins, list<dag> pat,
- InstrItinClass itin> :
- A64I_LDSTex_tn <size,
- opcode{2}, 0, opcode{1}, opcode{0},
- outs, ins,
- !strconcat(asm, "\t$Rt, [$Rn]"),
- pat, itin> {
- let mayStore = 1;
- let PostEncoderMethod = "fixLoadStoreExclusive<0,0>";
-}
-
-class releasing_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Release || Ordering == SequentiallyConsistent;
-}]>;
-
-def atomic_store_release_8 : releasing_store<atomic_store_8>;
-def atomic_store_release_16 : releasing_store<atomic_store_16>;
-def atomic_store_release_32 : releasing_store<atomic_store_32>;
-def atomic_store_release_64 : releasing_store<atomic_store_64>;
-
-multiclass A64I_SLex<string asmstr, bits<3> opcode, string prefix> {
- def _byte: A64I_SLexs_impl<0b00, opcode, !strconcat(asmstr, "b"),
- (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [(atomic_store_release_8 i64:$Rn, i32:$Rt)],
- NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _hword: A64I_SLexs_impl<0b01, opcode, !strconcat(asmstr, "h"),
- (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [(atomic_store_release_16 i64:$Rn, i32:$Rt)],
- NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _word: A64I_SLexs_impl<0b10, opcode, asmstr,
- (outs), (ins GPR32:$Rt, GPR64xsp0:$Rn),
- [(atomic_store_release_32 i64:$Rn, i32:$Rt)],
- NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-
- def _dword: A64I_SLexs_impl<0b11, opcode, asmstr,
- (outs), (ins GPR64:$Rt, GPR64xsp0:$Rn),
- [(atomic_store_release_64 i64:$Rn, i64:$Rt)],
- NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]>;
-}
-
-defm STLR : A64I_SLex<"stlr", 0b101, "STLR">;
-
-//===----------------------------------
-// Store-exclusive pair (releasing & normal)
-//===----------------------------------
-
-class A64I_SPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
- dag ins, list<dag> pat,
- InstrItinClass itin> :
- A64I_LDSTex_stt2n <size,
- opcode{2}, 0, opcode{1}, opcode{0},
- outs, ins,
- !strconcat(asm, "\t$Rs, $Rt, $Rt2, [$Rn]"),
- pat, itin> {
- let mayStore = 1;
-}
-
-
-multiclass A64I_SPex<string asmstr, bits<3> opcode> {
- def _word: A64I_SPexs_impl<0b10, opcode, asmstr, (outs),
- (ins GPR32:$Rs, GPR32:$Rt, GPR32:$Rt2,
- GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
-
- def _dword: A64I_SPexs_impl<0b11, opcode, asmstr, (outs),
- (ins GPR32:$Rs, GPR64:$Rt, GPR64:$Rt2,
- GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
+let AddedComplexity = 10 in {
+ // truncstore i64
+ defm : TruncStoreFrom64ROPat<ro8, truncstorei8, STRBBroW, STRBBroX>;
+ defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
+ defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW, STRWroX>;
}
-defm STXP : A64I_SPex<"stxp", 0b010>;
-defm STLXP : A64I_SPex<"stlxp", 0b011>;
-
-//===----------------------------------
-// Load-exclusive pair (acquiring & normal)
-//===----------------------------------
-
-class A64I_LPexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
- dag ins, list<dag> pat,
- InstrItinClass itin> :
- A64I_LDSTex_tt2n <size,
- opcode{2}, 1, opcode{1}, opcode{0},
- outs, ins,
- !strconcat(asm, "\t$Rt, $Rt2, [$Rn]"),
- pat, itin>{
- let mayLoad = 1;
- let DecoderMethod = "DecodeLoadPairExclusiveInstruction";
- let PostEncoderMethod = "fixLoadStoreExclusive<0,1>";
-}
+multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
+ Instruction STRW, Instruction STRX> {
+ def : Pat<(store (VecTy FPR:$Rt),
+ (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
+ (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
-multiclass A64I_LPex<string asmstr, bits<3> opcode> {
- def _word: A64I_LPexs_impl<0b10, opcode, asmstr,
- (outs GPR32:$Rt, GPR32:$Rt2),
- (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]>;
-
- def _dword: A64I_LPexs_impl<0b11, opcode, asmstr,
- (outs GPR64:$Rt, GPR64:$Rt2),
- (ins GPR64xsp0:$Rn),
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]>;
+ def : Pat<(store (VecTy FPR:$Rt),
+ (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
+ (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
}
-defm LDXP : A64I_LPex<"ldxp", 0b010>;
-defm LDAXP : A64I_LPex<"ldaxp", 0b011>;
+let AddedComplexity = 10 in {
+// Match all store 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
+ defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
+}
+
+defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
+defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
+
+// Match all store 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
+ defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
+}
+} // AddedComplexity = 10
+
+//---
+// (unsigned immediate)
+defm STRX : StoreUI<0b11, 0, 0b00, GPR64, uimm12s8, "str",
+ [(store GPR64:$Rt,
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
+defm STRW : StoreUI<0b10, 0, 0b00, GPR32, uimm12s4, "str",
+ [(store GPR32:$Rt,
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
+defm STRB : StoreUI<0b00, 1, 0b00, FPR8, uimm12s1, "str",
+ [(store FPR8:$Rt,
+ (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
+defm STRH : StoreUI<0b01, 1, 0b00, FPR16, uimm12s2, "str",
+ [(store (f16 FPR16:$Rt),
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
+defm STRS : StoreUI<0b10, 1, 0b00, FPR32, uimm12s4, "str",
+ [(store (f32 FPR32:$Rt),
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
+defm STRD : StoreUI<0b11, 1, 0b00, FPR64, uimm12s8, "str",
+ [(store (f64 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
+defm STRQ : StoreUI<0b00, 1, 0b10, FPR128, uimm12s16, "str", []>;
+
+defm STRHH : StoreUI<0b01, 0, 0b00, GPR32, uimm12s2, "strh",
+ [(truncstorei16 GPR32:$Rt,
+ (am_indexed16 GPR64sp:$Rn,
+ uimm12s2:$offset))]>;
+defm STRBB : StoreUI<0b00, 0, 0b00, GPR32, uimm12s1, "strb",
+ [(truncstorei8 GPR32:$Rt,
+ (am_indexed8 GPR64sp:$Rn,
+ uimm12s1:$offset))]>;
+
+// Match all store 64 bits width whose type is compatible with FPR64
+let AddedComplexity = 10 in {
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v2f32 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v8i8 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v4i16 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+ def : Pat<(store (v2i32 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+}
+def : Pat<(store (v1f64 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(store (v1i64 FPR64:$Rt),
+ (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
+ (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
+
+// Match all store 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v4f32 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v16i8 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v8i16 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v4i32 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+ def : Pat<(store (v2i64 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+}
+def : Pat<(store (f128 FPR128:$Rt),
+ (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
+ (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
+
+// truncstore i64
+def : Pat<(truncstorei32 GPR64:$Rt,
+ (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
+ (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(truncstorei16 GPR64:$Rt,
+ (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
+ (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
+def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
+ (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
+
+} // AddedComplexity = 10
+
+//---
+// (unscaled immediate)
+defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64, "stur",
+ [(store GPR64:$Rt,
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32, "stur",
+ [(store GPR32:$Rt,
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8, "stur",
+ [(store FPR8:$Rt,
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16, "stur",
+ [(store (f16 FPR16:$Rt),
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32, "stur",
+ [(store (f32 FPR32:$Rt),
+ (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64, "stur",
+ [(store (f64 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128, "stur",
+ [(store (f128 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32, "sturh",
+ [(truncstorei16 GPR32:$Rt,
+ (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
+defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32, "sturb",
+ [(truncstorei8 GPR32:$Rt,
+ (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
+
+// Match all store 64 bits width whose type is compatible with FPR64
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v2f32 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8i8 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4i16 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2i32 FPR64:$Rt),
+ (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+}
+def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+
+// Match all store 128 bits width whose type is compatible with FPR128
+let Predicates = [IsLE] in {
+ // We must use ST1 to store vectors in big-endian.
+ def : Pat<(store (v4f32 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v16i8 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v8i16 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v4i32 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2i64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+ def : Pat<(store (v2f64 FPR128:$Rt),
+ (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
+}
+
+// unscaled i64 truncating stores
+def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
+ (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
+ (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
+ (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
+
+//---
+// STR mnemonics fall back to STUR for negative or unaligned offsets.
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURBi FPR8:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURSi FPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
+def : InstAlias<"str $Rt, [$Rn, $offset]",
+ (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
+
+def : InstAlias<"strb $Rt, [$Rn, $offset]",
+ (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
+def : InstAlias<"strh $Rt, [$Rn, $offset]",
+ (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
+
+//---
+// (unscaled immediate, unprivileged)
+defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
+defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
+
+defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
+defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
+
+//---
+// (immediate pre-indexed)
+def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str", pre_store, i32>;
+def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str", pre_store, i64>;
+def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str", pre_store, untyped>;
+def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str", pre_store, f16>;
+def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str", pre_store, f32>;
+def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str", pre_store, f64>;
+def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str", pre_store, f128>;
+
+def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb", pre_truncsti8, i32>;
+def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh", pre_truncsti16, i32>;
+
+// truncstore i64
+def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+
+def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+//---
+// (immediate post-indexed)
+def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str", post_store, i32>;
+def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str", post_store, i64>;
+def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str", post_store, untyped>;
+def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str", post_store, f16>;
+def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str", post_store, f32>;
+def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str", post_store, f64>;
+def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str", post_store, f128>;
+
+def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb", post_truncsti8, i32>;
+def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh", post_truncsti16, i32>;
+
+// truncstore i64
+def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
+ (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
+ simm9:$off)>;
+
+def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
+
+def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
+def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
+ (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
//===----------------------------------------------------------------------===//
-// Load-store register (unscaled immediate) instructions
-//===----------------------------------------------------------------------===//
-// Contains: LDURB, LDURH, LDRUSB, LDRUSH, LDRUSW, STUR, STURB, STURH and PRFUM
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register (register offset) instructions
-//===----------------------------------------------------------------------===//
-// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register (unsigned immediate) instructions
+// Load/store exclusive instructions.
//===----------------------------------------------------------------------===//
-// Contains: LDRB, LDRH, LDRSB, LDRSH, LDRSW, STR, STRB, STRH and PRFM
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register (immediate post-indexed) instructions
-//===----------------------------------------------------------------------===//
-// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register (immediate pre-indexed) instructions
-//===----------------------------------------------------------------------===//
-// Contains: STRB, STRH, STR, LDRB, LDRH, LDR, LDRSB, LDRSH, LDRSW
-
-// Note that patterns are much later on in a completely separate section (they
-// need ADRPxi to be defined).
-
-//===-------------------------------
-// 1. Various operands needed
-//===-------------------------------
-
-//===-------------------------------
-// 1.1 Unsigned 12-bit immediate operands
-//===-------------------------------
-// The addressing mode for these instructions consists of an unsigned 12-bit
-// immediate which is scaled by the size of the memory access.
-//
-// We represent this in the MC layer by two operands:
-// 1. A base register.
-// 2. A 12-bit immediate: not multiplied by access size, so "LDR x0,[x0,#8]"
-// would have '1' in this field.
-// This means that separate functions are needed for converting representations
-// which *are* aware of the intended access size.
-
-// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
-// know the access size via some means. An isolated operand does not have this
-// information unless told from here, which means we need separate tablegen
-// Operands for each access size. This multiclass takes care of instantiating
-// the correct template functions in the rest of the backend.
-
-//===-------------------------------
-// 1.1 Unsigned 12-bit immediate operands
-//===-------------------------------
-
-multiclass offsets_uimm12<int MemSize, string prefix> {
- def uimm12_asmoperand : AsmOperandClass {
- let Name = "OffsetUImm12_" # MemSize;
- let PredicateMethod = "isOffsetUImm12<" # MemSize # ">";
- let RenderMethod = "addOffsetUImm12Operands<" # MemSize # ">";
- let DiagnosticType = "LoadStoreUImm12_" # MemSize;
- }
- // Pattern is really no more than an ImmLeaf, but predicated on MemSize which
- // complicates things beyond TableGen's ken.
- def uimm12 : Operand<i64>,
- ComplexPattern<i64, 1, "SelectOffsetUImm12<" # MemSize # ">"> {
- let ParserMatchClass
- = !cast<AsmOperandClass>(prefix # uimm12_asmoperand);
+def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
+def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
+def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
+def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
- let PrintMethod = "printOffsetUImm12Operand<" # MemSize # ">";
- let EncoderMethod = "getOffsetUImm12OpValue<" # MemSize # ">";
- }
-}
-
-defm byte_ : offsets_uimm12<1, "byte_">;
-defm hword_ : offsets_uimm12<2, "hword_">;
-defm word_ : offsets_uimm12<4, "word_">;
-defm dword_ : offsets_uimm12<8, "dword_">;
-defm qword_ : offsets_uimm12<16, "qword_">;
-
-//===-------------------------------
-// 1.1 Signed 9-bit immediate operands
-//===-------------------------------
-
-// The MCInst is expected to store the bit-wise encoding of the value,
-// which amounts to lopping off the extended sign bits.
-def SDXF_simm9 : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() & 0x1ff, MVT::i32);
-}]>;
-
-def simm9_asmoperand : AsmOperandClass {
- let Name = "SImm9";
- let PredicateMethod = "isSImm<9>";
- let RenderMethod = "addSImmOperands<9>";
- let DiagnosticType = "LoadStoreSImm9";
-}
-
-def simm9 : Operand<i64>,
- ImmLeaf<i64, [{ return Imm >= -0x100 && Imm <= 0xff; }],
- SDXF_simm9> {
- let PrintMethod = "printOffsetSImm9Operand";
- let ParserMatchClass = simm9_asmoperand;
-}
-
-
-//===-------------------------------
-// 1.3 Register offset extensions
-//===-------------------------------
-
-// The assembly-syntax for these addressing-modes is:
-// [<Xn|SP>, <R><m> {, <extend> {<amount>}}]
-//
-// The essential semantics are:
-// + <amount> is a shift: #<log(transfer size)> or #0
-// + <R> can be W or X.
-// + If <R> is W, <extend> can be UXTW or SXTW
-// + If <R> is X, <extend> can be LSL or SXTX
-//
-// The trickiest of those constraints is that Rm can be either GPR32 or GPR64,
-// which will need separate instructions for LLVM type-consistency. We'll also
-// need separate operands, of course.
-multiclass regexts<int MemSize, int RmSize, RegisterClass GPR,
- string Rm, string prefix> {
- def regext_asmoperand : AsmOperandClass {
- let Name = "AddrRegExtend_" # MemSize # "_" # Rm;
- let PredicateMethod = "isAddrRegExtend<" # MemSize # "," # RmSize # ">";
- let RenderMethod = "addAddrRegExtendOperands<" # MemSize # ">";
- let DiagnosticType = "LoadStoreExtend" # RmSize # "_" # MemSize;
- }
+def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
+def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
+def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
+def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
- def regext : Operand<i64> {
- let PrintMethod
- = "printAddrRegExtendOperand<" # MemSize # ", " # RmSize # ">";
+def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
+def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
+def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
+def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
- let DecoderMethod = "DecodeAddrRegExtendOperand";
- let ParserMatchClass
- = !cast<AsmOperandClass>(prefix # regext_asmoperand);
- }
-}
-
-multiclass regexts_wx<int MemSize, string prefix> {
- // Rm is an X-register if LSL or SXTX are specified as the shift.
- defm Xm_ : regexts<MemSize, 64, GPR64, "Xm", prefix # "Xm_">;
+def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
+def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
+def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
+def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
- // Rm is a W-register if UXTW or SXTW are specified as the shift.
- defm Wm_ : regexts<MemSize, 32, GPR32, "Wm", prefix # "Wm_">;
-}
+def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
+def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
+def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
+def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
-defm byte_ : regexts_wx<1, "byte_">;
-defm hword_ : regexts_wx<2, "hword_">;
-defm word_ : regexts_wx<4, "word_">;
-defm dword_ : regexts_wx<8, "dword_">;
-defm qword_ : regexts_wx<16, "qword_">;
+def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
+def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
+def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
+def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
+def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
+def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
-//===------------------------------
-// 2. The instructions themselves.
-//===------------------------------
+def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
+def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
-// We have the following instructions to implement:
-// | | B | H | W | X |
-// |-----------------+-------+-------+-------+--------|
-// | unsigned str | STRB | STRH | STR | STR |
-// | unsigned ldr | LDRB | LDRH | LDR | LDR |
-// | signed ldr to W | LDRSB | LDRSH | - | - |
-// | signed ldr to X | LDRSB | LDRSH | LDRSW | (PRFM) |
+def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
+def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
-// This will instantiate the LDR/STR instructions you'd expect to use for an
-// unsigned datatype (first two rows above) or floating-point register, which is
-// reasonably uniform across all access sizes.
-
-
-//===------------------------------
-// 2.1 Regular instructions
-//===------------------------------
-
-// This class covers the basic unsigned or irrelevantly-signed loads and stores,
-// to general-purpose and floating-point registers.
-
-class AddrParams<string prefix> {
- Operand uimm12 = !cast<Operand>(prefix # "_uimm12");
-
- Operand regextWm = !cast<Operand>(prefix # "_Wm_regext");
- Operand regextXm = !cast<Operand>(prefix # "_Xm_regext");
-}
-
-def byte_addrparams : AddrParams<"byte">;
-def hword_addrparams : AddrParams<"hword">;
-def word_addrparams : AddrParams<"word">;
-def dword_addrparams : AddrParams<"dword">;
-def qword_addrparams : AddrParams<"qword">;
-
-multiclass A64I_LDRSTR_unsigned<string prefix, bits<2> size, bit v,
- bit high_opc, string asmsuffix,
- RegisterClass GPR, AddrParams params> {
- // Unsigned immediate
- def _STR : A64I_LSunsigimm<size, v, {high_opc, 0b0},
- (outs), (ins GPR:$Rt, GPR64xsp:$Rn, params.uimm12:$UImm12),
- "str" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]> {
- let mayStore = 1;
- }
- def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
- def _LDR : A64I_LSunsigimm<size, v, {high_opc, 0b1},
- (outs GPR:$Rt), (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
- "ldr" # asmsuffix # "\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
- }
- def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
- // Register offset (four of these: load/store and Wm/Xm).
- let mayLoad = 1 in {
- def _Wm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b0,
- (outs GPR:$Rt),
- (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
- "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-
- def _Xm_RegOffset_LDR : A64I_LSregoff<size, v, {high_opc, 0b1}, 0b1,
- (outs GPR:$Rt),
- (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
- "ldr" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
- }
- def : InstAlias<"ldr" # asmsuffix # " $Rt, [$Rn, $Rm]",
- (!cast<Instruction>(prefix # "_Xm_RegOffset_LDR") GPR:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, 2)>;
-
- let mayStore = 1 in {
- def _Wm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b0,
- (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR32:$Rm,
- params.regextWm:$Ext),
- "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
-
- def _Xm_RegOffset_STR : A64I_LSregoff<size, v, {high_opc, 0b0}, 0b1,
- (outs), (ins GPR:$Rt, GPR64xsp:$Rn, GPR64:$Rm,
- params.regextXm:$Ext),
- "str" # asmsuffix # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]>;
- }
- def : InstAlias<"str" # asmsuffix # " $Rt, [$Rn, $Rm]",
- (!cast<Instruction>(prefix # "_Xm_RegOffset_STR") GPR:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, 2)>;
-
- // Unaligned immediate
- def _STUR : A64I_LSunalimm<size, v, {high_opc, 0b0},
- (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
- "stur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]> {
- let mayStore = 1;
- }
- def : InstAlias<"stur" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_STUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
- def _LDUR : A64I_LSunalimm<size, v, {high_opc, 0b1},
- (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldur" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
- }
- def : InstAlias<"ldur" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_LDUR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
- // Post-indexed
- def _PostInd_STR : A64I_LSpostind<size, v, {high_opc, 0b0},
- (outs GPR64xsp:$Rn_wb),
- (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
- "str" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]> {
- let Constraints = "$Rn = $Rn_wb";
- let mayStore = 1;
-
- // Decoder only needed for unpredictability checking (FIXME).
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- def _PostInd_LDR : A64I_LSpostind<size, v, {high_opc, 0b1},
- (outs GPR:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldr" # asmsuffix # "\t$Rt, [$Rn], $SImm9",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- // Pre-indexed
- def _PreInd_STR : A64I_LSpreind<size, v, {high_opc, 0b0},
- (outs GPR64xsp:$Rn_wb),
- (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
- "str" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt]> {
- let Constraints = "$Rn = $Rn_wb";
- let mayStore = 1;
-
- // Decoder only needed for unpredictability checking (FIXME).
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- def _PreInd_LDR : A64I_LSpreind<size, v, {high_opc, 0b1},
- (outs GPR:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
-}
-
-// STRB/LDRB: First define the instructions
-defm LS8
- : A64I_LDRSTR_unsigned<"LS8", 0b00, 0b0, 0b0, "b", GPR32, byte_addrparams>;
-
-// STRH/LDRH
-defm LS16
- : A64I_LDRSTR_unsigned<"LS16", 0b01, 0b0, 0b0, "h", GPR32, hword_addrparams>;
-
-
-// STR/LDR to/from a W register
-defm LS32
- : A64I_LDRSTR_unsigned<"LS32", 0b10, 0b0, 0b0, "", GPR32, word_addrparams>;
-
-// STR/LDR to/from an X register
-defm LS64
- : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>;
-
-let Predicates = [HasFPARMv8] in {
-// STR/LDR to/from a B register
-defm LSFP8
- : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>;
-
-// STR/LDR to/from an H register
-defm LSFP16
- : A64I_LDRSTR_unsigned<"LSFP16", 0b01, 0b1, 0b0, "", FPR16, hword_addrparams>;
-
-// STR/LDR to/from an S register
-defm LSFP32
- : A64I_LDRSTR_unsigned<"LSFP32", 0b10, 0b1, 0b0, "", FPR32, word_addrparams>;
-// STR/LDR to/from a D register
-defm LSFP64
- : A64I_LDRSTR_unsigned<"LSFP64", 0b11, 0b1, 0b0, "", FPR64, dword_addrparams>;
-// STR/LDR to/from a Q register
-defm LSFP128
- : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128,
- qword_addrparams>;
-}
-
-//===------------------------------
-// 2.3 Signed loads
-//===------------------------------
-
-// Byte and half-word signed loads can both go into either an X or a W register,
-// so it's worth factoring out. Signed word loads don't fit because there is no
-// W version.
-multiclass A64I_LDR_signed<bits<2> size, string asmopcode, AddrParams params,
- string prefix> {
- // Unsigned offset
- def w : A64I_LSunsigimm<size, 0b0, 0b11,
- (outs GPR32:$Rt),
- (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
- }
- def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # w) GPR32:$Rt, GPR64xsp:$Rn, 0)>;
-
- def x : A64I_LSunsigimm<size, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, params.uimm12:$UImm12),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
- }
- def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # x) GPR64:$Rt, GPR64xsp:$Rn, 0)>;
-
- // Register offset
- let mayLoad = 1 in {
- def w_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b0,
- (outs GPR32:$Rt),
- (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-
- def w_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b11, 0b1,
- (outs GPR32:$Rt),
- (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-
- def x_Wm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b0,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, GPR32:$Rm, params.regextWm:$Ext),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-
- def x_Xm_RegOffset : A64I_LSregoff<size, 0b0, 0b10, 0b1,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, GPR64:$Rm, params.regextXm:$Ext),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
- }
- def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
- (!cast<Instruction>(prefix # "w_Xm_RegOffset") GPR32:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, 2)>;
-
- def : InstAlias<"ldrs" # asmopcode # " $Rt, [$Rn, $Rm]",
- (!cast<Instruction>(prefix # "x_Xm_RegOffset") GPR64:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, 2)>;
-
-
- let mayLoad = 1 in {
- // Unaligned offset
- def w_U : A64I_LSunalimm<size, 0b0, 0b11,
- (outs GPR32:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]>;
-
- def x_U : A64I_LSunalimm<size, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldurs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]>;
-
-
- // Post-indexed
- def w_PostInd : A64I_LSpostind<size, 0b0, 0b11,
- (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- def x_PostInd : A64I_LSpostind<size, 0b0, 0b10,
- (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrs" # asmopcode # "\t$Rt, [$Rn], $SImm9",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- // Pre-indexed
- def w_PreInd : A64I_LSpreind<size, 0b0, 0b11,
- (outs GPR32:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
-
- def x_PreInd : A64I_LSpreind<size, 0b0, 0b10,
- (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
- }
- } // let mayLoad = 1
-}
-
-// LDRSB
-defm LDRSB : A64I_LDR_signed<0b00, "b", byte_addrparams, "LDRSB">;
-// LDRSH
-defm LDRSH : A64I_LDR_signed<0b01, "h", hword_addrparams, "LDRSH">;
-
-// LDRSW: load a 32-bit register, sign-extending to 64-bits.
-def LDRSWx
- : A64I_LSunsigimm<0b10, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, word_uimm12:$UImm12),
- "ldrsw\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
-}
-def : InstAlias<"ldrsw $Rt, [$Rn]", (LDRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
-
-let mayLoad = 1 in {
- def LDRSWx_Wm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b0,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, GPR32:$Rm, word_Wm_regext:$Ext),
- "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-
- def LDRSWx_Xm_RegOffset : A64I_LSregoff<0b10, 0b0, 0b10, 0b1,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, GPR64:$Rm, word_Xm_regext:$Ext),
- "ldrsw\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd, ReadLd]>;
-}
-def : InstAlias<"ldrsw $Rt, [$Rn, $Rm]",
- (LDRSWx_Xm_RegOffset GPR64:$Rt, GPR64xsp:$Rn, GPR64:$Rm, 2)>;
-
-
-def LDURSWx
- : A64I_LSunalimm<0b10, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldursw\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
-}
-def : InstAlias<"ldursw $Rt, [$Rn]", (LDURSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
-
-def LDRSWx_PostInd
- : A64I_LSpostind<0b10, 0b0, 0b10,
- (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrsw\t$Rt, [$Rn], $SImm9",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
-}
-
-def LDRSWx_PreInd : A64I_LSpreind<0b10, 0b0, 0b10,
- (outs GPR64:$Rt, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldrsw\t$Rt, [$Rn, $SImm9]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeSingleIndexedInstruction";
-}
-
-//===------------------------------
-// 2.4 Prefetch operations
-//===------------------------------
-
-def PRFM : A64I_LSunsigimm<0b11, 0b0, 0b10, (outs),
- (ins prefetch_op:$Rt, GPR64xsp:$Rn, dword_uimm12:$UImm12),
- "prfm\t$Rt, [$Rn, $UImm12]",
- [], NoItinerary>,
- Sched<[WritePreLd, ReadPreLd]> {
- let mayLoad = 1;
-}
-def : InstAlias<"prfm $Rt, [$Rn]",
- (PRFM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
-
-let mayLoad = 1 in {
- def PRFM_Wm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b0, (outs),
- (ins prefetch_op:$Rt, GPR64xsp:$Rn,
- GPR32:$Rm, dword_Wm_regext:$Ext),
- "prfm\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WritePreLd, ReadPreLd]>;
- def PRFM_Xm_RegOffset : A64I_LSregoff<0b11, 0b0, 0b10, 0b1, (outs),
- (ins prefetch_op:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, dword_Xm_regext:$Ext),
- "prfm\t$Rt, [$Rn, $Rm, $Ext]",
- [], NoItinerary>,
- Sched<[WritePreLd, ReadPreLd]>;
-}
-
-def : InstAlias<"prfm $Rt, [$Rn, $Rm]",
- (PRFM_Xm_RegOffset prefetch_op:$Rt, GPR64xsp:$Rn,
- GPR64:$Rm, 2)>;
-
-
-def PRFUM : A64I_LSunalimm<0b11, 0b0, 0b10, (outs),
- (ins prefetch_op:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
- "prfum\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WritePreLd, ReadPreLd]> {
- let mayLoad = 1;
-}
-def : InstAlias<"prfum $Rt, [$Rn]",
- (PRFUM prefetch_op:$Rt, GPR64xsp:$Rn, 0)>;
+def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
+def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
//===----------------------------------------------------------------------===//
-// Load-store register (unprivileged) instructions
+// Scaled floating point to integer conversion instructions.
//===----------------------------------------------------------------------===//
-// Contains: LDTRB, LDTRH, LDTRSB, LDTRSH, LDTRSW, STTR, STTRB and STTRH
-
-// These instructions very much mirror the "unscaled immediate" loads, but since
-// there are no floating-point variants we need to split them out into their own
-// section to avoid instantiation of "ldtr d0, [sp]" etc.
-
-multiclass A64I_LDTRSTTR<bits<2> size, string asmsuffix, RegisterClass GPR,
- string prefix> {
- def _UnPriv_STR : A64I_LSunpriv<size, 0b0, 0b00,
- (outs), (ins GPR:$Rt, GPR64xsp:$Rn, simm9:$SImm9),
- "sttr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayStore = 1;
- }
-
- def : InstAlias<"sttr" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_UnPriv_STR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
- def _UnPriv_LDR : A64I_LSunpriv<size, 0b0, 0b01,
- (outs GPR:$Rt), (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldtr" # asmsuffix # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
- }
-
- def : InstAlias<"ldtr" # asmsuffix # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "_UnPriv_LDR") GPR:$Rt, GPR64xsp:$Rn, 0)>;
-
-}
-
-// STTRB/LDTRB: First define the instructions
-defm LS8 : A64I_LDTRSTTR<0b00, "b", GPR32, "LS8">;
-
-// STTRH/LDTRH
-defm LS16 : A64I_LDTRSTTR<0b01, "h", GPR32, "LS16">;
-
-// STTR/LDTR to/from a W register
-defm LS32 : A64I_LDTRSTTR<0b10, "", GPR32, "LS32">;
-
-// STTR/LDTR to/from an X register
-defm LS64 : A64I_LDTRSTTR<0b11, "", GPR64, "LS64">;
-
-// Now a class for the signed instructions that can go to either 32 or 64
-// bits...
-multiclass A64I_LDTR_signed<bits<2> size, string asmopcode, string prefix> {
- let mayLoad = 1 in {
- def w : A64I_LSunpriv<size, 0b0, 0b11,
- (outs GPR32:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]>;
-
- def x : A64I_LSunpriv<size, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldtrs" # asmopcode # "\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]>;
- }
-
- def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "w") GPR32:$Rt, GPR64xsp:$Rn, 0)>;
-
- def : InstAlias<"ldtrs" # asmopcode # " $Rt, [$Rn]",
- (!cast<Instruction>(prefix # "x") GPR64:$Rt, GPR64xsp:$Rn, 0)>;
-
-}
-// LDTRSB
-defm LDTRSB : A64I_LDTR_signed<0b00, "b", "LDTRSB">;
-// LDTRSH
-defm LDTRSH : A64I_LDTR_signed<0b01, "h", "LDTRSH">;
-
-// And finally LDTRSW which only goes to 64 bits.
-def LDTRSWx : A64I_LSunpriv<0b10, 0b0, 0b10,
- (outs GPR64:$Rt),
- (ins GPR64xsp:$Rn, simm9:$SImm9),
- "ldtrsw\t$Rt, [$Rn, $SImm9]",
- [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayLoad = 1;
+defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
+defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
+defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
+defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
+defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
+defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
+defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
+defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
+defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
+defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", fp_to_sint>;
+defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", fp_to_uint>;
+let isCodeGenOnly = 1 in {
+defm FCVTZS_Int : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
+defm FCVTZU_Int : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
+defm FCVTZS_Int : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_fcvtzs>;
+defm FCVTZU_Int : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_fcvtzu>;
}
-def : InstAlias<"ldtrsw $Rt, [$Rn]", (LDTRSWx GPR64:$Rt, GPR64xsp:$Rn, 0)>;
//===----------------------------------------------------------------------===//
-// Load-store register pair (offset) instructions
+// Scaled integer to floating point conversion instructions.
//===----------------------------------------------------------------------===//
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register pair (post-indexed) instructions
-//===----------------------------------------------------------------------===//
-// Contains: STP, LDP, LDPSW
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store register pair (pre-indexed) instructions
-//===----------------------------------------------------------------------===//
-// Contains: STP, LDP, LDPSW
-//
-// and
-//
-//===----------------------------------------------------------------------===//
-// Load-store non-temporal register pair (offset) instructions
-//===----------------------------------------------------------------------===//
-// Contains: STNP, LDNP
-
-
-// Anything that creates an MCInst (Decoding, selection and AsmParsing) has to
-// know the access size via some means. An isolated operand does not have this
-// information unless told from here, which means we need separate tablegen
-// Operands for each access size. This multiclass takes care of instantiating
-// the correct template functions in the rest of the backend.
-
-multiclass offsets_simm7<string MemSize, string prefix> {
- // The bare signed 7-bit immediate is used in post-indexed instructions, but
- // because of the scaling performed a generic "simm7" operand isn't
- // appropriate here either.
- def simm7_asmoperand : AsmOperandClass {
- let Name = "SImm7_Scaled" # MemSize;
- let PredicateMethod = "isSImm7Scaled<" # MemSize # ">";
- let RenderMethod = "addSImm7ScaledOperands<" # MemSize # ">";
- let DiagnosticType = "LoadStoreSImm7_" # MemSize;
- }
-
- def simm7 : Operand<i64> {
- let PrintMethod = "printSImm7ScaledOperand<" # MemSize # ">";
- let ParserMatchClass = !cast<AsmOperandClass>(prefix # "simm7_asmoperand");
- }
-}
-
-defm word_ : offsets_simm7<"4", "word_">;
-defm dword_ : offsets_simm7<"8", "dword_">;
-defm qword_ : offsets_simm7<"16", "qword_">;
-
-multiclass A64I_LSPsimple<bits<2> opc, bit v, RegisterClass SomeReg,
- Operand simm7, string prefix> {
- def _STR : A64I_LSPoffset<opc, v, 0b0, (outs),
- (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
- "stp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
- Sched<[WriteLd, ReadLd]> {
- let mayStore = 1;
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
- def : InstAlias<"stp $Rt, $Rt2, [$Rn]",
- (!cast<Instruction>(prefix # "_STR") SomeReg:$Rt,
- SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
-
- def _LDR : A64I_LSPoffset<opc, v, 0b1,
- (outs SomeReg:$Rt, SomeReg:$Rt2),
- (ins GPR64xsp:$Rn, simm7:$SImm7),
- "ldp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
- def : InstAlias<"ldp $Rt, $Rt2, [$Rn]",
- (!cast<Instruction>(prefix # "_LDR") SomeReg:$Rt,
- SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
-
- def _PostInd_STR : A64I_LSPpostind<opc, v, 0b0,
- (outs GPR64xsp:$Rn_wb),
- (ins SomeReg:$Rt, SomeReg:$Rt2,
- GPR64xsp:$Rn,
- simm7:$SImm7),
- "stp\t$Rt, $Rt2, [$Rn], $SImm7",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
- let mayStore = 1;
- let Constraints = "$Rn = $Rn_wb";
-
- // Decoder only needed for unpredictability checking (FIXME).
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
-
- def _PostInd_LDR : A64I_LSPpostind<opc, v, 0b1,
- (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm7:$SImm7),
- "ldp\t$Rt, $Rt2, [$Rn], $SImm7",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
-
- def _PreInd_STR : A64I_LSPpreind<opc, v, 0b0, (outs GPR64xsp:$Rn_wb),
- (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
- "stp\t$Rt, $Rt2, [$Rn, $SImm7]!",
- [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
- let mayStore = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
-
- def _PreInd_LDR : A64I_LSPpreind<opc, v, 0b1,
- (outs SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn_wb),
- (ins GPR64xsp:$Rn, simm7:$SImm7),
- "ldp\t$Rt, $Rt2, [$Rn, $SImm7]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
- def _NonTemp_STR : A64I_LSPnontemp<opc, v, 0b0, (outs),
- (ins SomeReg:$Rt, SomeReg:$Rt2, GPR64xsp:$Rn, simm7:$SImm7),
- "stnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
- Sched<[WriteSt, ReadSt, ReadSt, ReadSt]> {
- let mayStore = 1;
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
- def : InstAlias<"stnp $Rt, $Rt2, [$Rn]",
- (!cast<Instruction>(prefix # "_NonTemp_STR") SomeReg:$Rt,
- SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
-
- def _NonTemp_LDR : A64I_LSPnontemp<opc, v, 0b1,
- (outs SomeReg:$Rt, SomeReg:$Rt2),
- (ins GPR64xsp:$Rn, simm7:$SImm7),
- "ldnp\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let DecoderMethod = "DecodeLDSTPairInstruction";
- }
- def : InstAlias<"ldnp $Rt, $Rt2, [$Rn]",
- (!cast<Instruction>(prefix # "_NonTemp_LDR") SomeReg:$Rt,
- SomeReg:$Rt2, GPR64xsp:$Rn, 0)>;
-
-}
-
-
-defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">;
-defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">;
-
-let Predicates = [HasFPARMv8] in {
-defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">;
-defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">;
-defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7,
- "LSFPPair128">;
-}
-
-
-def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1,
- (outs GPR64:$Rt, GPR64:$Rt2),
- (ins GPR64xsp:$Rn, word_simm7:$SImm7),
- "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]", [], NoItinerary>,
- Sched<[WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let DecoderMethod = "DecodeLDSTPairInstruction";
-}
-def : InstAlias<"ldpsw $Rt, $Rt2, [$Rn]",
- (LDPSWx GPR64:$Rt, GPR64:$Rt2, GPR64xsp:$Rn, 0)>;
-
-def LDPSWx_PostInd : A64I_LSPpostind<0b01, 0b0, 0b1,
- (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
- (ins GPR64xsp:$Rn, word_simm7:$SImm7),
- "ldpsw\t$Rt, $Rt2, [$Rn], $SImm7",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeLDSTPairInstruction";
-}
-
-def LDPSWx_PreInd : A64I_LSPpreind<0b01, 0b0, 0b1,
- (outs GPR64:$Rt, GPR64:$Rt2, GPR64:$Rn_wb),
- (ins GPR64xsp:$Rn, word_simm7:$SImm7),
- "ldpsw\t$Rt, $Rt2, [$Rn, $SImm7]!",
- [], NoItinerary>,
- Sched<[WriteLd, WriteLd, WriteLd, ReadLd]> {
- let mayLoad = 1;
- let Constraints = "$Rn = $Rn_wb";
- let DecoderMethod = "DecodeLDSTPairInstruction";
-}
+defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
+defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
//===----------------------------------------------------------------------===//
-// Logical (immediate) instructions
+// Unscaled integer to floating point conversion instruction.
//===----------------------------------------------------------------------===//
-// Contains: AND, ORR, EOR, ANDS, + aliases TST, MOV
-multiclass logical_imm_operands<string prefix, string note,
- int size, ValueType VT> {
- def _asmoperand : AsmOperandClass {
- let Name = "LogicalImm" # note # size;
- let PredicateMethod = "isLogicalImm" # note # "<" # size # ">";
- let RenderMethod = "addLogicalImmOperands<" # size # ">";
- let DiagnosticType = "LogicalSecondSource";
- }
+defm FMOV : UnscaledConversion<"fmov">;
- def _operand
- : Operand<VT>, ComplexPattern<VT, 1, "SelectLogicalImm", [imm]> {
- let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
- let PrintMethod = "printLogicalImmOperand<" # size # ">";
- let DecoderMethod = "DecodeLogicalImmOperand<" # size # ">";
- }
-}
-
-defm logical_imm32 : logical_imm_operands<"logical_imm32", "", 32, i32>;
-defm logical_imm64 : logical_imm_operands<"logical_imm64", "", 64, i64>;
-
-// The mov versions only differ in assembly parsing, where they
-// exclude values representable with either MOVZ or MOVN.
-defm logical_imm32_mov
- : logical_imm_operands<"logical_imm32_mov", "MOV", 32, i32>;
-defm logical_imm64_mov
- : logical_imm_operands<"logical_imm64_mov", "MOV", 64, i64>;
-
-
-multiclass A64I_logimmSizes<bits<2> opc, string asmop, SDNode opnode> {
- def wwi : A64I_logicalimm<0b0, opc, (outs GPR32wsp:$Rd),
- (ins GPR32:$Rn, logical_imm32_operand:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [(set i32:$Rd,
- (opnode i32:$Rn, logical_imm32_operand:$Imm))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-
- def xxi : A64I_logicalimm<0b1, opc, (outs GPR64xsp:$Rd),
- (ins GPR64:$Rn, logical_imm64_operand:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [(set i64:$Rd,
- (opnode i64:$Rn, logical_imm64_operand:$Imm))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-}
-
-defm AND : A64I_logimmSizes<0b00, "and", and>;
-defm ORR : A64I_logimmSizes<0b01, "orr", or>;
-defm EOR : A64I_logimmSizes<0b10, "eor", xor>;
-
-let Defs = [NZCV] in {
- def ANDSwwi : A64I_logicalimm<0b0, 0b11, (outs GPR32:$Rd),
- (ins GPR32:$Rn, logical_imm32_operand:$Imm),
- "ands\t$Rd, $Rn, $Imm",
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-
- def ANDSxxi : A64I_logicalimm<0b1, 0b11, (outs GPR64:$Rd),
- (ins GPR64:$Rn, logical_imm64_operand:$Imm),
- "ands\t$Rd, $Rn, $Imm",
- [], NoItinerary>,
- Sched<[WriteALU, ReadALU]>;
-}
-
-
-def : InstAlias<"tst $Rn, $Imm",
- (ANDSwwi WZR, GPR32:$Rn, logical_imm32_operand:$Imm)>;
-def : InstAlias<"tst $Rn, $Imm",
- (ANDSxxi XZR, GPR64:$Rn, logical_imm64_operand:$Imm)>;
-def : InstAlias<"mov $Rd, $Imm",
- (ORRwwi GPR32wsp:$Rd, WZR, logical_imm32_mov_operand:$Imm)>;
-def : InstAlias<"mov $Rd, $Imm",
- (ORRxxi GPR64xsp:$Rd, XZR, logical_imm64_mov_operand:$Imm)>;
+def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
+def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
//===----------------------------------------------------------------------===//
-// Logical (shifted register) instructions
+// Floating point conversion instruction.
//===----------------------------------------------------------------------===//
-// Contains: AND, BIC, ORR, ORN, EOR, EON, ANDS, BICS + aliases TST, MVN, MOV
-
-// Operand for optimizing (icmp (and LHS, RHS), 0, SomeCode). In theory "ANDS"
-// behaves differently for unsigned comparisons, so we defensively only allow
-// signed or n/a as the operand. In practice "unsigned greater than 0" is "not
-// equal to 0" and LLVM gives us this.
-def signed_cond : PatLeaf<(cond), [{
- return !isUnsignedIntSetCC(N->get());
-}]>;
+defm FCVT : FPConversion<"fcvt">;
-// These instructions share their "shift" operands with add/sub (shifted
-// register instructions). They are defined there.
-
-// N.b. the commutable parameter is just !N. It will be first against the wall
-// when the revolution comes.
-multiclass logical_shifts<string prefix, bit sf, bits<2> opc,
- bit N, bit commutable,
- string asmop, SDPatternOperator opfrag, ValueType ty,
- RegisterClass GPR, list<Register> defs> {
- let isCommutable = commutable, Defs = defs in {
- def _lsl : A64I_logicalshift<sf, opc, 0b00, N,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (shl ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _lsr : A64I_logicalshift<sf, opc, 0b01, N,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (srl ty:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _asr : A64I_logicalshift<sf, opc, 0b10, N,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (sra ty:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _ror : A64I_logicalshift<sf, opc, 0b11, N,
- (outs GPR:$Rd),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("ror_operand_" # ty):$Imm6),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm, $Imm6"),
- [(set ty:$Rd, (opfrag ty:$Rn, (rotr ty:$Rm,
- !cast<Operand>("ror_operand_" # ty):$Imm6))
- )],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- }
+def : Pat<(f32_to_f16 FPR32:$Rn),
+ (i32 (COPY_TO_REGCLASS
+ (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
+ GPR32))>;
- def _noshift
- : InstAlias<!strconcat(asmop, " $Rd, $Rn, $Rm"),
- (!cast<Instruction>(prefix # "_lsl") GPR:$Rd, GPR:$Rn,
- GPR:$Rm, 0)>;
-
- def : Pat<(opfrag ty:$Rn, ty:$Rm),
- (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
-}
-
-multiclass logical_sizes<string prefix, bits<2> opc, bit N, bit commutable,
- string asmop, SDPatternOperator opfrag,
- list<Register> defs> {
- defm xxx : logical_shifts<prefix # "xxx", 0b1, opc, N,
- commutable, asmop, opfrag, i64, GPR64, defs>;
- defm www : logical_shifts<prefix # "www", 0b0, opc, N,
- commutable, asmop, opfrag, i32, GPR32, defs>;
-}
-
-
-defm AND : logical_sizes<"AND", 0b00, 0b0, 0b1, "and", and, []>;
-defm ORR : logical_sizes<"ORR", 0b01, 0b0, 0b1, "orr", or, []>;
-defm EOR : logical_sizes<"EOR", 0b10, 0b0, 0b1, "eor", xor, []>;
-defm ANDS : logical_sizes<"ANDS", 0b11, 0b0, 0b1, "ands",
- PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs),
- [{ (void)N; return false; }]>,
- [NZCV]>;
-
-defm BIC : logical_sizes<"BIC", 0b00, 0b1, 0b0, "bic",
- PatFrag<(ops node:$lhs, node:$rhs),
- (and node:$lhs, (not node:$rhs))>, []>;
-defm ORN : logical_sizes<"ORN", 0b01, 0b1, 0b0, "orn",
- PatFrag<(ops node:$lhs, node:$rhs),
- (or node:$lhs, (not node:$rhs))>, []>;
-defm EON : logical_sizes<"EON", 0b10, 0b1, 0b0, "eon",
- PatFrag<(ops node:$lhs, node:$rhs),
- (xor node:$lhs, (not node:$rhs))>, []>;
-defm BICS : logical_sizes<"BICS", 0b11, 0b1, 0b0, "bics",
- PatFrag<(ops node:$lhs, node:$rhs),
- (and node:$lhs, (not node:$rhs)),
- [{ (void)N; return false; }]>,
- [NZCV]>;
-
-multiclass tst_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
- let isCommutable = 1, Rd = 0b11111, Defs = [NZCV] in {
- def _lsl : A64I_logicalshift<sf, 0b11, 0b00, 0b0,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6),
- "tst\t$Rn, $Rm, $Imm6",
- [(set NZCV, (A64setcc (and ty:$Rn, (shl ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6)),
- 0, signed_cond))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
-
- def _lsr : A64I_logicalshift<sf, 0b11, 0b01, 0b0,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6),
- "tst\t$Rn, $Rm, $Imm6",
- [(set NZCV, (A64setcc (and ty:$Rn, (srl ty:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6)),
- 0, signed_cond))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _asr : A64I_logicalshift<sf, 0b11, 0b10, 0b0,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6),
- "tst\t$Rn, $Rm, $Imm6",
- [(set NZCV, (A64setcc (and ty:$Rn, (sra ty:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6)),
- 0, signed_cond))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _ror : A64I_logicalshift<sf, 0b11, 0b11, 0b0,
- (outs),
- (ins GPR:$Rn, GPR:$Rm,
- !cast<Operand>("ror_operand_" # ty):$Imm6),
- "tst\t$Rn, $Rm, $Imm6",
- [(set NZCV, (A64setcc (and ty:$Rn, (rotr ty:$Rm,
- !cast<Operand>("ror_operand_" # ty):$Imm6)),
- 0, signed_cond))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- }
-
- def _noshift : InstAlias<"tst $Rn, $Rm",
- (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
-
- def : Pat<(A64setcc (and ty:$Rn, ty:$Rm), 0, signed_cond),
- (!cast<Instruction>(prefix # "_lsl") $Rn, $Rm, 0)>;
-}
-
-defm TSTxx : tst_shifts<"TSTxx", 0b1, i64, GPR64>;
-defm TSTww : tst_shifts<"TSTww", 0b0, i32, GPR32>;
-
-
-multiclass mvn_shifts<string prefix, bit sf, ValueType ty, RegisterClass GPR> {
- let isCommutable = 0, Rn = 0b11111 in {
- def _lsl : A64I_logicalshift<sf, 0b01, 0b00, 0b1,
- (outs GPR:$Rd),
- (ins GPR:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6),
- "mvn\t$Rd, $Rm, $Imm6",
- [(set ty:$Rd, (not (shl ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
-
- def _lsr : A64I_logicalshift<sf, 0b01, 0b01, 0b1,
- (outs GPR:$Rd),
- (ins GPR:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6),
- "mvn\t$Rd, $Rm, $Imm6",
- [(set ty:$Rd, (not (srl ty:$Rm,
- !cast<Operand>("lsr_operand_" # ty):$Imm6)))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _asr : A64I_logicalshift<sf, 0b01, 0b10, 0b1,
- (outs GPR:$Rd),
- (ins GPR:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6),
- "mvn\t$Rd, $Rm, $Imm6",
- [(set ty:$Rd, (not (sra ty:$Rm,
- !cast<Operand>("asr_operand_" # ty):$Imm6)))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
-
- def _ror : A64I_logicalshift<sf, 0b01, 0b11, 0b1,
- (outs GPR:$Rd),
- (ins GPR:$Rm,
- !cast<Operand>("ror_operand_" # ty):$Imm6),
- "mvn\t$Rd, $Rm, $Imm6",
- [(set ty:$Rd, (not (rotr ty:$Rm,
- !cast<Operand>("lsl_operand_" # ty):$Imm6)))],
- NoItinerary>,
- Sched<[WriteALU, ReadALU, ReadALU]>;
- }
-
- def _noshift : InstAlias<"mvn $Rn, $Rm",
- (!cast<Instruction>(prefix # "_lsl") GPR:$Rn, GPR:$Rm, 0)>;
-
- def : Pat<(not ty:$Rm),
- (!cast<Instruction>(prefix # "_lsl") $Rm, 0)>;
-}
-
-defm MVNxx : mvn_shifts<"MVNxx", 0b1, i64, GPR64>;
-defm MVNww : mvn_shifts<"MVNww", 0b0, i32, GPR32>;
-
-def MOVxx :InstAlias<"mov $Rd, $Rm", (ORRxxx_lsl GPR64:$Rd, XZR, GPR64:$Rm, 0)>;
-def MOVww :InstAlias<"mov $Rd, $Rm", (ORRwww_lsl GPR32:$Rd, WZR, GPR32:$Rm, 0)>;
+def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
+ [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
//===----------------------------------------------------------------------===//
-// Move wide (immediate) instructions
+// Floating point single operand instructions.
//===----------------------------------------------------------------------===//
-// Contains: MOVN, MOVZ, MOVK + MOV aliases
-
-// A wide variety of different relocations are needed for variants of these
-// instructions, so it turns out that we need a different operand for all of
-// them.
-multiclass movw_operands<string prefix, string instname, int width> {
- def _imm_asmoperand : AsmOperandClass {
- let Name = instname # width # "Shifted" # shift;
- let PredicateMethod = "is" # instname # width # "Imm";
- let RenderMethod = "addMoveWideImmOperands";
- let ParserMethod = "ParseImmWithLSLOperand";
- let DiagnosticType = "MOVWUImm16";
- }
- def _imm : Operand<i64> {
- let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_imm_asmoperand");
- let PrintMethod = "printMoveWideImmOperand";
- let EncoderMethod = "getMoveWideImmOpValue";
- let DecoderMethod = "DecodeMoveWideImmOperand<" # width # ">";
+defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
+defm FMOV : SingleOperandFPData<0b0000, "fmov">;
+defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
+defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
+defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
+defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
+defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
- let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
- }
-}
+def : Pat<(v1f64 (int_aarch64_neon_frintn (v1f64 FPR64:$Rn))),
+ (FRINTNDr FPR64:$Rn)>;
-defm movn32 : movw_operands<"movn32", "MOVN", 32>;
-defm movn64 : movw_operands<"movn64", "MOVN", 64>;
-defm movz32 : movw_operands<"movz32", "MOVZ", 32>;
-defm movz64 : movw_operands<"movz64", "MOVZ", 64>;
-defm movk32 : movw_operands<"movk32", "MOVK", 32>;
-defm movk64 : movw_operands<"movk64", "MOVK", 64>;
-
-multiclass A64I_movwSizes<bits<2> opc, string asmop, dag ins32bit,
- dag ins64bit> {
-
- def wii : A64I_movw<0b0, opc, (outs GPR32:$Rd), ins32bit,
- !strconcat(asmop, "\t$Rd, $FullImm"),
- [], NoItinerary>,
- Sched<[WriteALU]> {
- bits<18> FullImm;
- let UImm16 = FullImm{15-0};
- let Shift = FullImm{17-16};
- }
-
- def xii : A64I_movw<0b1, opc, (outs GPR64:$Rd), ins64bit,
- !strconcat(asmop, "\t$Rd, $FullImm"),
- [], NoItinerary>,
- Sched<[WriteALU]> {
- bits<18> FullImm;
- let UImm16 = FullImm{15-0};
- let Shift = FullImm{17-16};
- }
+// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
+// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
+// <rdar://problem/13715968>
+// TODO: We should really model the FPSR flags correctly. This is really ugly.
+let hasSideEffects = 1 in {
+defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
}
-let isMoveImm = 1, isReMaterializable = 1,
- isAsCheapAsAMove = 1, hasSideEffects = 0 in {
- defm MOVN : A64I_movwSizes<0b00, "movn",
- (ins movn32_imm:$FullImm),
- (ins movn64_imm:$FullImm)>;
-
- // Some relocations are able to convert between a MOVZ and a MOVN. If these
- // are applied the instruction must be emitted with the corresponding bits as
- // 0, which means a MOVZ needs to override that bit from the default.
- let PostEncoderMethod = "fixMOVZ" in
- defm MOVZ : A64I_movwSizes<0b10, "movz",
- (ins movz32_imm:$FullImm),
- (ins movz64_imm:$FullImm)>;
-}
-
-let Constraints = "$src = $Rd",
- SchedRW = [WriteALU, ReadALU] in
-defm MOVK : A64I_movwSizes<0b11, "movk",
- (ins GPR32:$src, movk32_imm:$FullImm),
- (ins GPR64:$src, movk64_imm:$FullImm)>;
-
+defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
-// And now the "MOV" aliases. These also need their own operands because what
-// they accept is completely different to what the base instructions accept.
-multiclass movalias_operand<string prefix, string basename,
- string immpredicate, int width> {
- def _asmoperand : AsmOperandClass {
- let Name = basename # width # "MovAlias";
- let PredicateMethod
- = "isMoveWideMovAlias<" # width # ", A64Imms::" # immpredicate # ">";
- let RenderMethod
- = "addMoveWideMovAliasOperands<" # width # ", "
- # "A64Imms::" # immpredicate # ">";
- }
-
- def _movimm : Operand<i64> {
- let ParserMatchClass = !cast<AsmOperandClass>(prefix # "_asmoperand");
-
- let MIOperandInfo = (ops uimm16:$UImm16, imm:$Shift);
- }
+let SchedRW = [WriteFDiv] in {
+defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
}
-defm movz32 : movalias_operand<"movz32", "MOVZ", "isMOVZImm", 32>;
-defm movz64 : movalias_operand<"movz64", "MOVZ", "isMOVZImm", 64>;
-defm movn32 : movalias_operand<"movn32", "MOVN", "isOnlyMOVNImm", 32>;
-defm movn64 : movalias_operand<"movn64", "MOVN", "isOnlyMOVNImm", 64>;
-
-// FIXME: these are officially canonical aliases, but TableGen is too limited to
-// print them at the moment. I believe in this case an "AliasPredicate" method
-// will need to be implemented. to allow it, as well as the more generally
-// useful handling of non-register, non-constant operands.
-class movalias<Instruction INST, RegisterClass GPR, Operand operand>
- : InstAlias<"mov $Rd, $FullImm", (INST GPR:$Rd, operand:$FullImm)>;
-
-def : movalias<MOVZwii, GPR32, movz32_movimm>;
-def : movalias<MOVZxii, GPR64, movz64_movimm>;
-def : movalias<MOVNwii, GPR32, movn32_movimm>;
-def : movalias<MOVNxii, GPR64, movn64_movimm>;
-
-def movw_addressref_g0 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<0>">;
-def movw_addressref_g1 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<1>">;
-def movw_addressref_g2 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<2>">;
-def movw_addressref_g3 : ComplexPattern<i64, 2, "SelectMOVWAddressRef<3>">;
-
-def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2,
- movw_addressref_g1:$G1, movw_addressref_g0:$G0),
- (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3),
- movw_addressref_g2:$G2),
- movw_addressref_g1:$G1),
- movw_addressref_g0:$G0)>;
-
//===----------------------------------------------------------------------===//
-// PC-relative addressing instructions
+// Floating point two operand instructions.
//===----------------------------------------------------------------------===//
-// Contains: ADR, ADRP
-
-def adr_label : Operand<i64> {
- let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_adr_prel>";
- // This label is a 21-bit offset from PC, unscaled
- let PrintMethod = "printLabelOperand<21, 1>";
- let ParserMatchClass = label_asmoperand<21, 1>;
- let OperandType = "OPERAND_PCREL";
-}
-
-def adrp_label_asmoperand : AsmOperandClass {
- let Name = "AdrpLabel";
- let RenderMethod = "addLabelOperands<21, 4096>";
- let DiagnosticType = "Label";
-}
-
-def adrp_label : Operand<i64> {
- let EncoderMethod = "getAdrpLabelOpValue";
-
- // This label is a 21-bit offset from PC, scaled by the page-size: 4096.
- let PrintMethod = "printLabelOperand<21, 4096>";
- let ParserMatchClass = adrp_label_asmoperand;
- let OperandType = "OPERAND_PCREL";
-}
-
-let hasSideEffects = 0 in {
- def ADRxi : A64I_PCADR<0b0, (outs GPR64:$Rd), (ins adr_label:$Label),
- "adr\t$Rd, $Label", [], NoItinerary>,
- Sched<[WriteALUs]>;
-
- def ADRPxi : A64I_PCADR<0b1, (outs GPR64:$Rd), (ins adrp_label:$Label),
- "adrp\t$Rd, $Label", [], NoItinerary>,
- Sched<[WriteALUs]>;
-}
+defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
+let SchedRW = [WriteFDiv] in {
+defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
+}
+defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAX : TwoOperandFPData<0b0100, "fmax", AArch64fmax>;
+defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_aarch64_neon_fminnm>;
+defm FMIN : TwoOperandFPData<0b0101, "fmin", AArch64fmin>;
+let SchedRW = [WriteFMul] in {
+defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
+defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
+}
+defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
+
+def : Pat<(v1f64 (AArch64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (AArch64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (int_aarch64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v1f64 (int_aarch64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
//===----------------------------------------------------------------------===//
-// System instructions
+// Floating point three operand instructions.
//===----------------------------------------------------------------------===//
-// Contains: HINT, CLREX, DSB, DMB, ISB, MSR, SYS, SYSL, MRS
-// + aliases IC, DC, AT, TLBI, NOP, YIELD, WFE, WFI, SEV, SEVL
-// Op1 and Op2 fields are sometimes simple 3-bit unsigned immediate values.
-def uimm3_asmoperand : AsmOperandClass {
- let Name = "UImm3";
- let PredicateMethod = "isUImm<3>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm3";
-}
+defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
+defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
+ TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
+defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
+ TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
+defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
+ TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
-def uimm3 : Operand<i32> {
- let ParserMatchClass = uimm3_asmoperand;
-}
+// The following def pats catch the case where the LHS of an FMA is negated.
+// The TriOpFrag above catches the case where the middle operand is negated.
-// The HINT alias can accept a simple unsigned 7-bit immediate.
-def uimm7_asmoperand : AsmOperandClass {
- let Name = "UImm7";
- let PredicateMethod = "isUImm<7>";
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "UImm7";
-}
+// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
+// the NEON variant.
+def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
+ (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def uimm7 : Operand<i32> {
- let ParserMatchClass = uimm7_asmoperand;
-}
+def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
+ (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-// Multiclass namedimm is defined with the prefetch operands. Most of these fit
-// into the NamedImmMapper scheme well: they either accept a named operand or
-// any immediate under a particular value (which may be 0, implying no immediate
-// is allowed).
-defm dbarrier : namedimm<"dbarrier", "A64DB::DBarrierMapper">;
-defm isb : namedimm<"isb", "A64ISB::ISBMapper">;
-defm ic : namedimm<"ic", "A64IC::ICMapper">;
-defm dc : namedimm<"dc", "A64DC::DCMapper">;
-defm at : namedimm<"at", "A64AT::ATMapper">;
-defm tlbi : namedimm<"tlbi", "A64TLBI::TLBIMapper">;
-
-// However, MRS and MSR are more complicated for a few reasons:
-// * There are ~1000 generic names S3_<op1>_<CRn>_<CRm>_<Op2> which have an
-// implementation-defined effect
-// * Most registers are shared, but some are read-only or write-only.
-// * There is a variant of MSR which accepts the same register name (SPSel),
-// but which would have a different encoding.
-
-// In principle these could be resolved in with more complicated subclasses of
-// NamedImmMapper, however that imposes an overhead on other "named
-// immediates". Both in concrete terms with virtual tables and in unnecessary
-// abstraction.
-
-// The solution adopted here is to take the MRS/MSR Mappers out of the usual
-// hierarchy (they're not derived from NamedImmMapper) and to add logic for
-// their special situation.
-def mrs_asmoperand : AsmOperandClass {
- let Name = "MRS";
- let ParserMethod = "ParseSysRegOperand";
- let DiagnosticType = "MRS";
-}
+// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and
+// "(-a) + b*(-c)".
+def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
+ (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
-def mrs_op : Operand<i32> {
- let ParserMatchClass = mrs_asmoperand;
- let PrintMethod = "printMRSOperand";
- let DecoderMethod = "DecodeMRSOperand";
-}
+def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
+ (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-def msr_asmoperand : AsmOperandClass {
- let Name = "MSRWithReg";
+def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))),
+ (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
- // Note that SPSel is valid for both this and the pstate operands, but with
- // different immediate encodings. This is why these operands provide a string
- // AArch64Operand rather than an immediate. The overlap is small enough that
- // it could be resolved with hackery now, but who can say in future?
- let ParserMethod = "ParseSysRegOperand";
- let DiagnosticType = "MSR";
-}
-
-def msr_op : Operand<i32> {
- let ParserMatchClass = msr_asmoperand;
- let PrintMethod = "printMSROperand";
- let DecoderMethod = "DecodeMSROperand";
-}
-
-def pstate_asmoperand : AsmOperandClass {
- let Name = "MSRPState";
- // See comment above about parser.
- let ParserMethod = "ParseSysRegOperand";
- let DiagnosticType = "MSR";
-}
-
-def pstate_op : Operand<i32> {
- let ParserMatchClass = pstate_asmoperand;
- let PrintMethod = "printNamedImmOperand<A64PState::PStateMapper>";
- let DecoderMethod = "DecodeNamedImmOperand<A64PState::PStateMapper>";
-}
-
-// When <CRn> is specified, an assembler should accept something like "C4", not
-// the usual "#4" immediate.
-def CRx_asmoperand : AsmOperandClass {
- let Name = "CRx";
- let PredicateMethod = "isUImm<4>";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "ParseCRxOperand";
- // Diagnostics are handled in all cases by ParseCRxOperand.
-}
-
-def CRx : Operand<i32> {
- let ParserMatchClass = CRx_asmoperand;
- let PrintMethod = "printCRxOperand";
-}
-
-
-// Finally, we can start defining the instructions.
-
-// HINT is straightforward, with a few aliases.
-def HINTi : A64I_system<0b0, (outs), (ins uimm7:$UImm7), "hint\t$UImm7",
- [], NoItinerary> {
- bits<7> UImm7;
- let CRm = UImm7{6-3};
- let Op2 = UImm7{2-0};
-
- let Op0 = 0b00;
- let Op1 = 0b011;
- let CRn = 0b0010;
- let Rt = 0b11111;
-}
-
-def : InstAlias<"nop", (HINTi 0)>;
-def : InstAlias<"yield", (HINTi 1)>;
-def : InstAlias<"wfe", (HINTi 2)>;
-def : InstAlias<"wfi", (HINTi 3)>;
-def : InstAlias<"sev", (HINTi 4)>;
-def : InstAlias<"sevl", (HINTi 5)>;
-
-// Quite a few instructions then follow a similar pattern of fixing common
-// fields in the bitpattern, we'll define a helper-class for them.
-class simple_sys<bits<2> op0, bits<3> op1, bits<4> crn, bits<3> op2,
- Operand operand, string asmop>
- : A64I_system<0b0, (outs), (ins operand:$CRm), !strconcat(asmop, "\t$CRm"),
- [], NoItinerary> {
- let Op0 = op0;
- let Op1 = op1;
- let CRn = crn;
- let Op2 = op2;
- let Rt = 0b11111;
-}
-
-
-def CLREXi : simple_sys<0b00, 0b011, 0b0011, 0b010, uimm4, "clrex">;
-def DSBi : simple_sys<0b00, 0b011, 0b0011, 0b100, dbarrier_op, "dsb">;
-def DMBi : simple_sys<0b00, 0b011, 0b0011, 0b101, dbarrier_op, "dmb">;
-def ISBi : simple_sys<0b00, 0b011, 0b0011, 0b110, isb_op, "isb">;
-
-def : InstAlias<"clrex", (CLREXi 0b1111)>;
-def : InstAlias<"isb", (ISBi 0b1111)>;
-
-// (DMBi 0xb) is a "DMB ISH" instruciton, appropriate for Linux SMP
-// configurations at least.
-def : Pat<(atomic_fence imm, imm), (DMBi 0xb)>;
-
-// Any SYS bitpattern can be represented with a complex and opaque "SYS"
-// instruction.
-def SYSiccix : A64I_system<0b0, (outs),
- (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm,
- uimm3:$Op2, GPR64:$Rt),
- "sys\t$Op1, $CRn, $CRm, $Op2, $Rt",
- [], NoItinerary> {
- let Op0 = 0b01;
-}
-
-// You can skip the Xt argument whether it makes sense or not for the generic
-// SYS instruction.
-def : InstAlias<"sys $Op1, $CRn, $CRm, $Op2",
- (SYSiccix uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2, XZR)>;
-
-
-// But many have aliases, which obviously don't fit into
-class SYSalias<dag ins, string asmstring>
- : A64I_system<0b0, (outs), ins, asmstring, [], NoItinerary> {
- let isAsmParserOnly = 1;
-
- bits<14> SysOp;
- let Op0 = 0b01;
- let Op1 = SysOp{13-11};
- let CRn = SysOp{10-7};
- let CRm = SysOp{6-3};
- let Op2 = SysOp{2-0};
-}
-
-def ICix : SYSalias<(ins ic_op:$SysOp, GPR64:$Rt), "ic\t$SysOp, $Rt">;
-
-def ICi : SYSalias<(ins ic_op:$SysOp), "ic\t$SysOp"> {
- let Rt = 0b11111;
-}
-
-def DCix : SYSalias<(ins dc_op:$SysOp, GPR64:$Rt), "dc\t$SysOp, $Rt">;
-def ATix : SYSalias<(ins at_op:$SysOp, GPR64:$Rt), "at\t$SysOp, $Rt">;
-
-def TLBIix : SYSalias<(ins tlbi_op:$SysOp, GPR64:$Rt), "tlbi\t$SysOp, $Rt">;
-
-def TLBIi : SYSalias<(ins tlbi_op:$SysOp), "tlbi\t$SysOp"> {
- let Rt = 0b11111;
-}
-
-
-def SYSLxicci : A64I_system<0b1, (outs GPR64:$Rt),
- (ins uimm3:$Op1, CRx:$CRn, CRx:$CRm, uimm3:$Op2),
- "sysl\t$Rt, $Op1, $CRn, $CRm, $Op2",
- [], NoItinerary> {
- let Op0 = 0b01;
-}
-
-// The instructions themselves are rather simple for MSR and MRS.
-def MSRix : A64I_system<0b0, (outs), (ins msr_op:$SysReg, GPR64:$Rt),
- "msr\t$SysReg, $Rt", [], NoItinerary> {
- bits<16> SysReg;
- let Op0 = SysReg{15-14};
- let Op1 = SysReg{13-11};
- let CRn = SysReg{10-7};
- let CRm = SysReg{6-3};
- let Op2 = SysReg{2-0};
-}
-
-def MRSxi : A64I_system<0b1, (outs GPR64:$Rt), (ins mrs_op:$SysReg),
- "mrs\t$Rt, $SysReg", [], NoItinerary> {
- bits<16> SysReg;
- let Op0 = SysReg{15-14};
- let Op1 = SysReg{13-11};
- let CRn = SysReg{10-7};
- let CRm = SysReg{6-3};
- let Op2 = SysReg{2-0};
-}
-
-def MSRii : A64I_system<0b0, (outs), (ins pstate_op:$PState, uimm4:$CRm),
- "msr\t$PState, $CRm", [], NoItinerary> {
- bits<6> PState;
-
- let Op0 = 0b00;
- let Op1 = PState{5-3};
- let CRn = 0b0100;
- let Op2 = PState{2-0};
- let Rt = 0b11111;
-}
+def : Pat<(f64 (fma FPR64:$Rn, (fneg FPR64:$Rm), (fneg FPR64:$Ra))),
+ (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
//===----------------------------------------------------------------------===//
-// Test & branch (immediate) instructions
+// Floating point comparison instructions.
//===----------------------------------------------------------------------===//
-// Contains: TBZ, TBNZ
-// The bit to test is a simple unsigned 6-bit immediate in the X-register
-// versions.
-def uimm6 : Operand<i64> {
- let ParserMatchClass = uimm6_asmoperand;
-}
-
-def label_wid14_scal4_asmoperand : label_asmoperand<14, 4>;
-
-def tbimm_target : Operand<OtherVT> {
- let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_tstbr>";
-
- // This label is a 14-bit offset from PC, scaled by the instruction-width: 4.
- let PrintMethod = "printLabelOperand<14, 4>";
- let ParserMatchClass = label_wid14_scal4_asmoperand;
-
- let OperandType = "OPERAND_PCREL";
-}
-
-def A64eq : ImmLeaf<i32, [{ return Imm == A64CC::EQ; }]>;
-def A64ne : ImmLeaf<i32, [{ return Imm == A64CC::NE; }]>;
-
-// These instructions correspond to patterns involving "and" with a power of
-// two, which we need to be able to select.
-def tstb64_pat : ComplexPattern<i64, 1, "SelectTSTBOperand<64>">;
-def tstb32_pat : ComplexPattern<i32, 1, "SelectTSTBOperand<32>">;
-
-let isBranch = 1, isTerminator = 1 in {
- def TBZxii : A64I_TBimm<0b0, (outs),
- (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
- "tbz\t$Rt, $Imm, $Label",
- [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
- A64eq, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr]>;
-
- def TBNZxii : A64I_TBimm<0b1, (outs),
- (ins GPR64:$Rt, uimm6:$Imm, tbimm_target:$Label),
- "tbnz\t$Rt, $Imm, $Label",
- [(A64br_cc (A64cmp (and i64:$Rt, tstb64_pat:$Imm), 0),
- A64ne, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr]>;
-
-
- // Note, these instructions overlap with the above 64-bit patterns. This is
- // intentional, "tbz x3, #1, somewhere" and "tbz w3, #1, somewhere" would both
- // do the same thing and are both permitted assembly. They also both have
- // sensible DAG patterns.
- def TBZwii : A64I_TBimm<0b0, (outs),
- (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
- "tbz\t$Rt, $Imm, $Label",
- [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
- A64eq, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr]> {
- let Imm{5} = 0b0;
- }
-
- def TBNZwii : A64I_TBimm<0b1, (outs),
- (ins GPR32:$Rt, uimm5:$Imm, tbimm_target:$Label),
- "tbnz\t$Rt, $Imm, $Label",
- [(A64br_cc (A64cmp (and i32:$Rt, tstb32_pat:$Imm), 0),
- A64ne, bb:$Label)],
- NoItinerary>,
- Sched<[WriteBr]> {
- let Imm{5} = 0b0;
- }
-}
+defm FCMPE : FPComparison<1, "fcmpe">;
+defm FCMP : FPComparison<0, "fcmp", AArch64fcmp>;
//===----------------------------------------------------------------------===//
-// Unconditional branch (immediate) instructions
+// Floating point conditional comparison instructions.
//===----------------------------------------------------------------------===//
-// Contains: B, BL
-
-def label_wid26_scal4_asmoperand : label_asmoperand<26, 4>;
-
-def bimm_target : Operand<OtherVT> {
- let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_uncondbr>";
- // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
- let PrintMethod = "printLabelOperand<26, 4>";
- let ParserMatchClass = label_wid26_scal4_asmoperand;
-
- let OperandType = "OPERAND_PCREL";
-}
-
-def blimm_target : Operand<i64> {
- let EncoderMethod = "getLabelOpValue<AArch64::fixup_a64_call>";
-
- // This label is a 26-bit offset from PC, scaled by the instruction-width: 4.
- let PrintMethod = "printLabelOperand<26, 4>";
- let ParserMatchClass = label_wid26_scal4_asmoperand;
-
- let OperandType = "OPERAND_PCREL";
-}
+defm FCCMPE : FPCondComparison<1, "fccmpe">;
+defm FCCMP : FPCondComparison<0, "fccmp">;
-class A64I_BimmImpl<bit op, string asmop, list<dag> patterns, Operand lbl_type>
- : A64I_Bimm<op, (outs), (ins lbl_type:$Label),
- !strconcat(asmop, "\t$Label"), patterns,
- NoItinerary>,
- Sched<[WriteBr]>;
+//===----------------------------------------------------------------------===//
+// Floating point conditional select instruction.
+//===----------------------------------------------------------------------===//
-let isBranch = 1 in {
- def Bimm : A64I_BimmImpl<0b0, "b", [(br bb:$Label)], bimm_target> {
- let isTerminator = 1;
- let isBarrier = 1;
- }
+defm FCSEL : FPCondSelect<"fcsel">;
- let SchedRW = [WriteBrL] in {
- def BLimm : A64I_BimmImpl<0b1, "bl",
- [(AArch64Call tglobaladdr:$Label)], blimm_target> {
- let isCall = 1;
- let Defs = [X30];
- }
- }
+// CSEL instructions providing f128 types need to be handled by a
+// pseudo-instruction since the eventual code will need to introduce basic
+// blocks and control flow.
+def F128CSEL : Pseudo<(outs FPR128:$Rd),
+ (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
+ [(set (f128 FPR128:$Rd),
+ (AArch64csel FPR128:$Rn, FPR128:$Rm,
+ (i32 imm:$cond), NZCV))]> {
+ let Uses = [NZCV];
+ let usesCustomInserter = 1;
}
-def : Pat<(AArch64Call texternalsym:$Label), (BLimm texternalsym:$Label)>;
//===----------------------------------------------------------------------===//
-// Unconditional branch (register) instructions
+// Floating point immediate move.
//===----------------------------------------------------------------------===//
-// Contains: BR, BLR, RET, ERET, DRP.
-
-// Most of the notional opcode fields in the A64I_Breg format are fixed in A64
-// at the moment.
-class A64I_BregImpl<bits<4> opc,
- dag outs, dag ins, string asmstr, list<dag> patterns,
- InstrItinClass itin = NoItinerary>
- : A64I_Breg<opc, 0b11111, 0b000000, 0b00000,
- outs, ins, asmstr, patterns, itin>,
- Sched<[WriteBr]> {
- let isBranch = 1;
- let isIndirectBranch = 1;
-}
-
-// Note that these are not marked isCall or isReturn because as far as LLVM is
-// concerned they're not. "ret" is just another jump unless it has been selected
-// by LLVM as the function's return.
-let isBranch = 1 in {
- def BRx : A64I_BregImpl<0b0000,(outs), (ins GPR64:$Rn),
- "br\t$Rn", [(brind i64:$Rn)]> {
- let isBarrier = 1;
- let isTerminator = 1;
- }
-
- let SchedRW = [WriteBrL] in {
- def BLRx : A64I_BregImpl<0b0001, (outs), (ins GPR64:$Rn),
- "blr\t$Rn", [(AArch64Call i64:$Rn)]> {
- let isBarrier = 0;
- let isCall = 1;
- let Defs = [X30];
- }
- }
-
- def RETx : A64I_BregImpl<0b0010, (outs), (ins GPR64:$Rn),
- "ret\t$Rn", []> {
- let isBarrier = 1;
- let isTerminator = 1;
- let isReturn = 1;
- }
-
- // Create a separate pseudo-instruction for codegen to use so that we don't
- // flag x30 as used in every function. It'll be restored before the RET by the
- // epilogue if it's legitimately used.
- def RET : A64PseudoExpand<(outs), (ins), [(A64ret)], (RETx (ops X30))> {
- let isTerminator = 1;
- let isBarrier = 1;
- let isReturn = 1;
- }
-
- def ERET : A64I_BregImpl<0b0100, (outs), (ins), "eret", []> {
- let Rn = 0b11111;
- let isBarrier = 1;
- let isTerminator = 1;
- let isReturn = 1;
- }
-
- def DRPS : A64I_BregImpl<0b0101, (outs), (ins), "drps", []> {
- let Rn = 0b11111;
- let isBarrier = 1;
- }
+let isReMaterializable = 1 in {
+defm FMOV : FPMoveImmediate<"fmov">;
}
-def RETAlias : InstAlias<"ret", (RETx X30)>;
-
-
//===----------------------------------------------------------------------===//
-// Address generation patterns
+// Advanced SIMD two vector instructions.
//===----------------------------------------------------------------------===//
-// Primary method of address generation for the small/absolute memory model is
-// an ADRP/ADR pair:
-// ADRP x0, some_variable
-// ADD x0, x0, #:lo12:some_variable
-//
-// The load/store elision of the ADD is accomplished when selecting
-// addressing-modes. This just mops up the cases where that doesn't work and we
-// really need an address in some register.
-
-// This wrapper applies a LO12 modifier to the address. Otherwise we could just
-// use the same address.
-
-class ADRP_ADD<SDNode Wrapper, SDNode addrop>
- : Pat<(Wrapper addrop:$Hi, addrop:$Lo12, (i32 imm)),
- (ADDxxi_lsl0_s (ADRPxi addrop:$Hi), addrop:$Lo12)>;
-
-def : ADRP_ADD<A64WrapperSmall, tblockaddress>;
-def : ADRP_ADD<A64WrapperSmall, texternalsym>;
-def : ADRP_ADD<A64WrapperSmall, tglobaladdr>;
-def : ADRP_ADD<A64WrapperSmall, tglobaltlsaddr>;
-def : ADRP_ADD<A64WrapperSmall, tjumptable>;
-def : ADRP_ADD<A64WrapperSmall, tconstpool>;
+defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_aarch64_neon_abs>;
+defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
+defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
+defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
+defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
+defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
+
+defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
+defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
+defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
+ (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
+ (i64 4)))),
+ (FCVTLv8i16 V128:$Rn)>;
+def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
+ (i64 2))))),
+ (FCVTLv4i32 V128:$Rn)>;
+
+defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
+defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
+defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
+defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
+defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
+def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
+ (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd,
+ (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
+ (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
+ (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
+defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
+defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
+ int_aarch64_neon_fcvtxn>;
+defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
+defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
+let isCodeGenOnly = 1 in {
+defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs",
+ int_aarch64_neon_fcvtzs>;
+defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu",
+ int_aarch64_neon_fcvtzu>;
+}
+defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
+defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
+defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
+defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
+defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
+defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
+defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
+defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
+defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
+defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
+defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
+defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
+ UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
+defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
+// Aliases for MVN -> NOT.
+def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
+ (NOTv8i8 V64:$Vd, V64:$Vn)>;
+def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
+ (NOTv16i8 V128:$Vd, V128:$Vn)>;
+
+def : Pat<(AArch64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>;
+def : Pat<(AArch64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
+def : Pat<(AArch64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>;
+def : Pat<(AArch64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>;
+def : Pat<(AArch64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
+def : Pat<(AArch64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
+
+def : Pat<(AArch64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v1i64 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(AArch64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(AArch64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+
+def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
+def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
+
+defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_aarch64_neon_rbit>;
+defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
+defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
+defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
+defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
+ BinOpFrag<(add node:$LHS, (int_aarch64_neon_saddlp node:$RHS))> >;
+defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_aarch64_neon_saddlp>;
+defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
+defm SHLL : SIMDVectorLShiftLongBySizeBHS;
+defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
+defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
+defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
+defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
+ BinOpFrag<(add node:$LHS, (int_aarch64_neon_uaddlp node:$RHS))> >;
+defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
+ int_aarch64_neon_uaddlp>;
+defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
+defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
+defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
+defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
+defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
+defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
+
+def : Pat<(v2f32 (AArch64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
+def : Pat<(v4f32 (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
+
+// Patterns for vector long shift (by element width). These need to match all
+// three of zext, sext and anyext so it's easier to pull the patterns out of the
+// definition.
+multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
+ def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
+ (SHLLv8i8 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
+ (SHLLv16i8 V128:$Rn)>;
+ def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
+ (SHLLv4i16 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
+ (SHLLv8i16 V128:$Rn)>;
+ def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
+ (SHLLv2i32 V64:$Rn)>;
+ def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
+ (SHLLv4i32 V128:$Rn)>;
+}
+
+defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
+defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
+defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
//===----------------------------------------------------------------------===//
-// GOT access patterns
+// Advanced SIMD three vector instructions.
//===----------------------------------------------------------------------===//
-class GOTLoadSmall<SDNode addrfrag>
- : Pat<(A64GOTLoad (A64WrapperSmall addrfrag:$Hi, addrfrag:$Lo12, 8)),
- (LS64_LDR (ADRPxi addrfrag:$Hi), addrfrag:$Lo12)>;
-
-def : GOTLoadSmall<texternalsym>;
-def : GOTLoadSmall<tglobaladdr>;
-def : GOTLoadSmall<tglobaltlsaddr>;
+defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
+defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_aarch64_neon_addp>;
+defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>;
+defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>;
+defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>;
+defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
+defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
+defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
+defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
+defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_aarch64_neon_fmaxnm>;
+defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>;
+defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", AArch64fmax>;
+defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>;
+defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_aarch64_neon_fminnm>;
+defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>;
+defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", AArch64fmin>;
+
+// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
+// instruction expects the addend first, while the fma intrinsic puts it last.
+defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
+ TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+
+// The following def pats catch the case where the LHS of an FMA is negated.
+// The TriOpFrag above catches the case where the middle operand is negated.
+def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
+ (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
+ (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
+ (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
+defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>;
+defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
+defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
+ TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
+defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
+ TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
+defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
+defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
+defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_sabd node:$MHS, node:$RHS))> >;
+defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_aarch64_neon_sabd>;
+defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_aarch64_neon_shadd>;
+defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
+defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
+defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_aarch64_neon_smax>;
+defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
+defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_aarch64_neon_smin>;
+defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
+defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
+defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_aarch64_neon_srhadd>;
+defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
+defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
+defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
+defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_uabd node:$MHS, node:$RHS))> >;
+defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_aarch64_neon_uabd>;
+defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_aarch64_neon_uhadd>;
+defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
+defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
+defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_aarch64_neon_umax>;
+defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
+defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_aarch64_neon_umin>;
+defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
+defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_aarch64_neon_urhadd>;
+defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
+defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
+
+defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
+defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
+ BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
+defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
+defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
+defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
+ TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
+defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
+defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
+ BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
+defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
+
+def : Pat<(AArch64bsl (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+def : Pat<(AArch64bsl (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
+ (BSLv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
+
+def : Pat<(AArch64bsl (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+def : Pat<(AArch64bsl (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
+ (BSLv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+
+def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
+def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
+ (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
+
+def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
+def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
+ (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
+
+def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmls.8b\t$dst, $src1, $src2}",
+ (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmls.16b\t$dst, $src1, $src2}",
+ (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmls.4h\t$dst, $src1, $src2}",
+ (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmls.8h\t$dst, $src1, $src2}",
+ (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmls.2s\t$dst, $src1, $src2}",
+ (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmls.4s\t$dst, $src1, $src2}",
+ (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmls.2d\t$dst, $src1, $src2}",
+ (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmlo.8b\t$dst, $src1, $src2}",
+ (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmlo.16b\t$dst, $src1, $src2}",
+ (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmlo.4h\t$dst, $src1, $src2}",
+ (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmlo.8h\t$dst, $src1, $src2}",
+ (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmlo.2s\t$dst, $src1, $src2}",
+ (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmlo.4s\t$dst, $src1, $src2}",
+ (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmlo.2d\t$dst, $src1, $src2}",
+ (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmle.8b\t$dst, $src1, $src2}",
+ (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmle.16b\t$dst, $src1, $src2}",
+ (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmle.4h\t$dst, $src1, $src2}",
+ (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmle.8h\t$dst, $src1, $src2}",
+ (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmle.2s\t$dst, $src1, $src2}",
+ (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmle.4s\t$dst, $src1, $src2}",
+ (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmle.2d\t$dst, $src1, $src2}",
+ (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
+ "|cmlt.8b\t$dst, $src1, $src2}",
+ (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
+ "|cmlt.16b\t$dst, $src1, $src2}",
+ (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
+ "|cmlt.4h\t$dst, $src1, $src2}",
+ (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
+ "|cmlt.8h\t$dst, $src1, $src2}",
+ (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|cmlt.2s\t$dst, $src1, $src2}",
+ (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|cmlt.4s\t$dst, $src1, $src2}",
+ (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|cmlt.2d\t$dst, $src1, $src2}",
+ (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|fcmle.2s\t$dst, $src1, $src2}",
+ (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|fcmle.4s\t$dst, $src1, $src2}",
+ (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|fcmle.2d\t$dst, $src1, $src2}",
+ (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|fcmlt.2s\t$dst, $src1, $src2}",
+ (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|fcmlt.4s\t$dst, $src1, $src2}",
+ (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|fcmlt.2d\t$dst, $src1, $src2}",
+ (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
+ "|facle.2s\t$dst, $src1, $src2}",
+ (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
+ "|facle.4s\t$dst, $src1, $src2}",
+ (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
+ "|facle.2d\t$dst, $src1, $src2}",
+ (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
+
+def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
+ "|faclt.2s\t$dst, $src1, $src2}",
+ (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
+ "|faclt.4s\t$dst, $src1, $src2}",
+ (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
+def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
+ "|faclt.2d\t$dst, $src1, $src2}",
+ (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
//===----------------------------------------------------------------------===//
-// Tail call handling
+// Advanced SIMD three scalar instructions.
//===----------------------------------------------------------------------===//
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [XSP] in {
- def TC_RETURNdi
- : PseudoInst<(outs), (ins i64imm:$dst, i32imm:$FPDiff),
- [(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff))]>;
-
- def TC_RETURNxi
- : PseudoInst<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff),
- [(AArch64tcret i64:$dst, (i32 timm:$FPDiff))]>;
-}
-
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
- Uses = [XSP] in {
- def TAIL_Bimm : A64PseudoExpand<(outs), (ins bimm_target:$Label), [],
- (Bimm bimm_target:$Label)>;
-
- def TAIL_BRx : A64PseudoExpand<(outs), (ins tcGPR64:$Rd), [],
- (BRx GPR64:$Rd)>;
-}
-
-
-def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
- (TC_RETURNdi texternalsym:$dst, imm:$FPDiff)>;
+defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
+defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
+defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
+defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
+defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
+defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
+defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
+defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>;
+def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
+ (FABD64 FPR64:$Rn, FPR64:$Rm)>;
+defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
+ int_aarch64_neon_facge>;
+defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
+ int_aarch64_neon_facgt>;
+defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>;
+defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>;
+defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>;
+defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>;
+defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>;
+defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>;
+defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
+defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
+defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
+defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
+defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_aarch64_neon_srshl>;
+defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_aarch64_neon_sshl>;
+defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
+defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
+defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
+defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
+defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
+defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_aarch64_neon_urshl>;
+defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_aarch64_neon_ushl>;
+
+def : InstAlias<"cmls $dst, $src1, $src2",
+ (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmle $dst, $src1, $src2",
+ (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmlo $dst, $src1, $src2",
+ (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"cmlt $dst, $src1, $src2",
+ (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"fcmle $dst, $src1, $src2",
+ (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"fcmle $dst, $src1, $src2",
+ (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"fcmlt $dst, $src1, $src2",
+ (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"fcmlt $dst, $src1, $src2",
+ (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"facle $dst, $src1, $src2",
+ (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"facle $dst, $src1, $src2",
+ (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
+def : InstAlias<"faclt $dst, $src1, $src2",
+ (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
+def : InstAlias<"faclt $dst, $src1, $src2",
+ (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
//===----------------------------------------------------------------------===//
-// Thread local storage
+// Advanced SIMD three scalar instructions (mixed operands).
//===----------------------------------------------------------------------===//
+defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
+ int_aarch64_neon_sqdmulls_scalar>;
+defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
+defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
+
+def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
+ (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (i32 FPR32:$Rm))))),
+ (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-// This is a pseudo-instruction representing the ".tlsdesccall" directive in
-// assembly. Its effect is to insert an R_AARCH64_TLSDESC_CALL relocation at the
-// current location. It should always be immediately followed by a BLR
-// instruction, and is intended solely for relaxation by the linker.
-
-def : Pat<(A64threadpointer), (MRSxi 0xde82)>;
-
-def TLSDESCCALL : PseudoInst<(outs), (ins i64imm:$Lbl), []> {
- let hasSideEffects = 1;
-}
-
-def TLSDESC_BLRx : PseudoInst<(outs), (ins GPR64:$Rn, i64imm:$Var),
- [(A64tlsdesc_blr i64:$Rn, tglobaltlsaddr:$Var)]> {
- let isCall = 1;
- let Defs = [X30];
-}
+//===----------------------------------------------------------------------===//
+// Advanced SIMD two scalar instructions.
+//===----------------------------------------------------------------------===//
-def : Pat<(A64tlsdesc_blr i64:$Rn, texternalsym:$Var),
- (TLSDESC_BLRx $Rn, texternalsym:$Var)>;
+defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_aarch64_neon_abs>;
+defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
+defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
+defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
+defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
+defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
+defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
+defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
+defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
+defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
+defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
+defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">;
+defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">;
+defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">;
+defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">;
+defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">;
+defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">;
+defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">;
+defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">;
+def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
+defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">;
+defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">;
+defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">;
+defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">;
+defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">;
+defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
+ UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
+defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>;
+defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
+defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
+defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
+defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
+defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
+ int_aarch64_neon_suqadd>;
+defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>;
+defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
+defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
+ int_aarch64_neon_usqadd>;
+
+def : Pat<(AArch64neg (v1i64 V64:$Rn)), (NEGv1i64 V64:$Rn)>;
+
+def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
+ (FCVTASv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
+ (FCVTAUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
+ (FCVTMSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
+ (FCVTMUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
+ (FCVTNSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
+ (FCVTNUv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
+ (FCVTPSv1i64 FPR64:$Rn)>;
+def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
+ (FCVTPUv1i64 FPR64:$Rn)>;
+
+def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
+ (FRECPEv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
+ (FRECPEv1i64 FPR64:$Rn)>;
+
+def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
+ (FRECPXv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
+ (FRECPXv1i64 FPR64:$Rn)>;
+
+def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
+ (FRSQRTEv1i32 FPR32:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
+ (FRSQRTEv1i64 FPR64:$Rn)>;
+
+// If an integer is about to be converted to a floating point value,
+// just load it on the floating point unit.
+// Here are the patterns for 8 and 16-bits to float.
+// 8-bits -> float.
+multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
+ SDPatternOperator loadop, Instruction UCVTF,
+ ROAddrMode ro, Instruction LDRW, Instruction LDRX,
+ SubRegIndex sub> {
+ def : Pat<(DstTy (uint_to_fp (SrcTy
+ (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
+ ro.Wext:$extend))))),
+ (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
+ (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
+ sub))>;
+
+ def : Pat<(DstTy (uint_to_fp (SrcTy
+ (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
+ ro.Wext:$extend))))),
+ (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
+ (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
+ sub))>;
+}
+
+defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
+ UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+// 16-bits -> float.
+defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
+ UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
+def : Pat <(f32 (uint_to_fp (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
+ (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
+// 32-bits are handled in target specific dag combine:
+// performIntToFpCombine.
+// 64-bits integer to 32-bits floating point, not possible with
+// UCVTF on floating point registers (both source and destination
+// must have the same size).
+
+// Here are the patterns for 8, 16, 32, and 64-bits to double.
+// 8-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
+ UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
+// 16-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
+ UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
+// 32-bits -> double.
+defm : UIntToFPROLoadPat<f64, i32, load,
+ UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
+def : Pat <(f64 (uint_to_fp (i32
+ (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
+def : Pat <(f64 (uint_to_fp (i32
+ (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+ (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
+// 64-bits -> double are handled in target specific dag combine:
+// performIntToFpCombine.
//===----------------------------------------------------------------------===//
-// Bitfield patterns
+// Advanced SIMD three different-sized vector instructions.
//===----------------------------------------------------------------------===//
-def bfi32_lsb_to_immr : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((32 - N->getZExtValue()) % 32, MVT::i64);
+defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
+defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
+defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
+defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
+defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_aarch64_neon_pmull>;
+defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
+ int_aarch64_neon_sabd>;
+defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
+ int_aarch64_neon_sabd>;
+defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
+ BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
+defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
+ BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
+defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_aarch64_neon_smull>;
+defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
+ int_aarch64_neon_sqadd>;
+defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
+ int_aarch64_neon_sqsub>;
+defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
+ int_aarch64_neon_sqdmull>;
+defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
+ BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
+defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
+ BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
+defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
+ int_aarch64_neon_uabd>;
+defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
+ int_aarch64_neon_uabd>;
+defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
+ BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
+defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
+ BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
+defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_aarch64_neon_umull>;
+defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
+ BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
+defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
+ BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
+
+// Patterns for 64-bit pmull
+def : Pat<(int_aarch64_neon_pmull64 V64:$Rn, V64:$Rm),
+ (PMULLv1i64 V64:$Rn, V64:$Rm)>;
+def : Pat<(int_aarch64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
+ (vector_extract (v2i64 V128:$Rm), (i64 1))),
+ (PMULLv2i64 V128:$Rn, V128:$Rm)>;
+
+// CodeGen patterns for addhn and subhn instructions, which can actually be
+// written in LLVM IR without too much difficulty.
+
+// ADDHN
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
+ (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v8i8 V64:$Rd),
+ (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 8))))),
+ (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v4i16 V64:$Rd),
+ (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v2i32 V64:$Rd),
+ (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+
+// SUBHN
+def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
+ (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
+def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
+def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v8i8 V64:$Rd),
+ (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 8))))),
+ (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v4i16 V64:$Rd),
+ (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 16))))),
+ (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+def : Pat<(concat_vectors (v2i32 V64:$Rd),
+ (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
+ (i32 32))))),
+ (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
+ V128:$Rn, V128:$Rm)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD bitwise extract from vector instruction.
+//----------------------------------------------------------------------------
+
+defm EXT : SIMDBitwiseExtract<"ext">;
+
+def : Pat<(v4i16 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v8i16 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2i32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v2f32 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
+ (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
+def : Pat<(v4i32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v4f32 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2i64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+def : Pat<(v2f64 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
+ (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
+
+// We use EXT to handle extract_subvector to copy the upper 64-bits of a
+// 128-bit vector.
+def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
+ (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD zip vector
+//----------------------------------------------------------------------------
+
+defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
+defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
+defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
+defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
+defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
+defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD TBL/TBX instructions
+//----------------------------------------------------------------------------
+
+defm TBL : SIMDTableLookup< 0, "tbl">;
+defm TBX : SIMDTableLookupTied<1, "tbx">;
+
+def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
+ (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
+def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
+ (TBLv16i8One V128:$Ri, V128:$Rn)>;
+
+def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
+ (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
+ (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
+def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
+ (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
+ (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar CPY instruction
+//----------------------------------------------------------------------------
+
+defm CPY : SIMDScalarCPY<"cpy">;
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar pairwise instructions
+//----------------------------------------------------------------------------
+
+defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
+defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
+defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
+defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
+defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
+defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
+def : Pat<(i64 (int_aarch64_neon_saddv (v2i64 V128:$Rn))),
+ (ADDPv2i64p V128:$Rn)>;
+def : Pat<(i64 (int_aarch64_neon_uaddv (v2i64 V128:$Rn))),
+ (ADDPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
+ (FADDPv2i32p V64:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
+ (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
+def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
+ (FADDPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
+ (FMAXNMPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
+ (FMAXNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
+ (FMAXPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
+ (FMAXPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
+ (FMINNMPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
+ (FMINNMPv2i64p V128:$Rn)>;
+def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
+ (FMINPv2i32p V64:$Rn)>;
+def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
+ (FMINPv2i64p V128:$Rn)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD INS/DUP instructions
+//----------------------------------------------------------------------------
+
+def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
+def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
+def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
+def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
+def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
+def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
+def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
+
+def DUPv2i64lane : SIMDDup64FromElement;
+def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
+def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
+def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
+def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
+def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
+def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
+
+def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
+ (v2f32 (DUPv2i32lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
+ (i64 0)))>;
+def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
+ (v4f32 (DUPv4i32lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
+ (i64 0)))>;
+def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
+ (v2f64 (DUPv2i64lane
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
+ (i64 0)))>;
+
+def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+ (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
+def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
+ (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
+def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
+ (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
+
+// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
+// instruction even if the types don't match: we just have to remap the lane
+// carefully. N.b. this trick only applies to truncations.
+def VecIndex_x2 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(2 * N->getZExtValue(), MVT::i64);
}]>;
-
-def bfi64_lsb_to_immr : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant((64 - N->getZExtValue()) % 64, MVT::i64);
+def VecIndex_x4 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(4 * N->getZExtValue(), MVT::i64);
}]>;
-
-def bfi_width_to_imms : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(N->getZExtValue() - 1, MVT::i64);
+def VecIndex_x8 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(8 * N->getZExtValue(), MVT::i64);
}]>;
+multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
+ ValueType Src128VT, ValueType ScalVT,
+ Instruction DUP, SDNodeXForm IdxXFORM> {
+ def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
+ imm:$idx)))),
+ (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
+
+ def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
+ imm:$idx)))),
+ (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
+}
+
+defm : DUPWithTruncPats<v8i8, v4i16, v8i16, i32, DUPv8i8lane, VecIndex_x2>;
+defm : DUPWithTruncPats<v8i8, v2i32, v4i32, i32, DUPv8i8lane, VecIndex_x4>;
+defm : DUPWithTruncPats<v4i16, v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
+
+defm : DUPWithTruncPats<v16i8, v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
+defm : DUPWithTruncPats<v16i8, v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
+defm : DUPWithTruncPats<v8i16, v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
+
+multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
+ SDNodeXForm IdxXFORM> {
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v2i64 V128:$Rn),
+ imm:$idx))))),
+ (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
+
+ def : Pat<(ResVT (AArch64dup (i32 (trunc (vector_extract (v1i64 V64:$Rn),
+ imm:$idx))))),
+ (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
+}
+
+defm : DUPWithTrunci64Pats<v8i8, DUPv8i8lane, VecIndex_x8>;
+defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane, VecIndex_x4>;
+defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane, VecIndex_x2>;
+
+defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
+defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
+defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
+
+// SMOV and UMOV definitions, with some extra patterns for convenience
+defm SMOV : SMov;
+defm UMOV : UMov;
+
+def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
+ (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
+ (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
+ (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
+def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
+ (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
+
+// Extracting i8 or i16 elements will have the zero-extend transformed to
+// an 'and' mask by type legalization since neither i8 nor i16 are legal types
+// for AArch64. Match these patterns here since UMOV already zeroes out the high
+// bits of the destination register.
+def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
+ (i32 0xff)),
+ (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
+def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
+ (i32 0xffff)),
+ (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
+
+defm INS : SIMDIns;
+
+def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+
+def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
+ (SUBREG_TO_REG (i32 0),
+ (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
+
+def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
+ (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+ (i32 FPR32:$Rn), ssub))>;
+def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
+ (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+ (i32 FPR32:$Rn), ssub))>;
+def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
+ (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+ (i64 FPR64:$Rn), dsub))>;
+
+def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
+def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
+
+def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
+ (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi32lane
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm,
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
+ (i64 0)),
+ dsub)>;
+def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
+ (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
+ (INSvi32lane
+ V128:$Rn, VectorIndexS:$imm,
+ (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
+ (i64 0))>;
+def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
+ (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
+ (INSvi64lane
+ V128:$Rn, VectorIndexD:$imm,
+ (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
+ (i64 0))>;
+
+// Copy an element at a constant index in one vector into a constant indexed
+// element of another.
+// FIXME refactor to a shared class/dev parameterized on vector type, vector
+// index type and INS extension
+def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
+ (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
+ VectorIndexB:$idx2)),
+ (v16i8 (INSvi8lane
+ V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
+ )>;
+def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
+ (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
+ VectorIndexH:$idx2)),
+ (v8i16 (INSvi16lane
+ V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
+ )>;
+def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
+ (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
+ VectorIndexS:$idx2)),
+ (v4i32 (INSvi32lane
+ V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
+ )>;
+def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
+ (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
+ VectorIndexD:$idx2)),
+ (v2i64 (INSvi64lane
+ V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
+ )>;
+
+multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
+ ValueType VTScal, Instruction INS> {
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
+
+ def : Pat<(VT128 (vector_insert V128:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (INS V128:$src, imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
+
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
+ imm:$Immd, V128:$Rn, imm:$Immn),
+ dsub)>;
+
+ def : Pat<(VT64 (vector_insert V64:$src,
+ (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
+ imm:$Immd)),
+ (EXTRACT_SUBREG
+ (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
+ (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
+ dsub)>;
+}
+
+defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi32lane>;
+
+
+// Floating point vector extractions are codegen'd as either a sequence of
+// subregister extractions, possibly fed by an INS if the lane number is
+// anything other than zero.
+def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
+ (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
+def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
+ (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
+def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
+ (f64 (EXTRACT_SUBREG
+ (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
+ V128:$Rn, VectorIndexD:$idx),
+ dsub))>;
+def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
+ (f32 (EXTRACT_SUBREG
+ (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
+ V128:$Rn, VectorIndexS:$idx),
+ ssub))>;
+
+// All concat_vectors operations are canonicalised to act on i64 vectors for
+// AArch64. In the general case we need an instruction, which had just as well be
+// INS.
+class ConcatPat<ValueType DstTy, ValueType SrcTy>
+ : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
+ (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
+
+def : ConcatPat<v2i64, v1i64>;
+def : ConcatPat<v2f64, v1f64>;
+def : ConcatPat<v4i32, v2i32>;
+def : ConcatPat<v4f32, v2f32>;
+def : ConcatPat<v8i16, v4i16>;
+def : ConcatPat<v16i8, v8i8>;
+
+// If the high lanes are undef, though, we can just ignore them:
+class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
+ : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
+ (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
+
+def : ConcatUndefPat<v2i64, v1i64>;
+def : ConcatUndefPat<v2f64, v1f64>;
+def : ConcatUndefPat<v4i32, v2i32>;
+def : ConcatUndefPat<v4f32, v2f32>;
+def : ConcatUndefPat<v8i16, v4i16>;
+def : ConcatUndefPat<v16i8, v8i8>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD across lanes instructions
+//----------------------------------------------------------------------------
+
+defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
+defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
+defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
+defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
+defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
+defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
+defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
+defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
+defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
+defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
+defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+
+multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
+// If there is a sign extension after this intrinsic, consume it as smov already
+// performed it
+ def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+// If there is a sign extension after this intrinsic, consume it as smov already
+// performed it
+def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+// If there is a sign extension after this intrinsic, consume it as smov already
+// performed it
+def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ (i64 0)))>;
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ (i64 0)))>;
+// If there is a sign extension after this intrinsic, consume it as smov already
+// performed it
+def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
+
+def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
+// If there is a masking operation keeping only what has been actually
+// generated, consume it.
+ def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+// If there is a masking operation keeping only what has been actually
+// generated, consume it.
+def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub))>;
+
+// If there is a masking operation keeping only what has been actually
+// generated, consume it.
+def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ ssub))>;
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ ssub))>;
+// If there is a masking operation keeping only what has been actually
+// generated, consume it.
+def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub))>;
+
+def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+
+}
+
+multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
+ (i64 0)))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
+ (i64 0)))>;
+
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
+ ssub))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
+ ssub))>;
+
+def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
+ dsub))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
+ Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
+ ssub))>;
+def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
+ ssub))>;
+
+def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
+ ssub))>;
+def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
+ ssub))>;
+
+def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
+ dsub))>;
+}
+
+defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_aarch64_neon_saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_aarch64_neon_uaddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_aarch64_neon_smaxv>;
+def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_aarch64_neon_sminv>;
+def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_aarch64_neon_umaxv>;
+def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_aarch64_neon_uminv>;
+def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
+ (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
+
+defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
+defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
+
+// The vaddlv_s32 intrinsic gets mapped to SADDLP.
+def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SADDLPv2i32_v1i64 V64:$Rn), dsub),
+ dsub))>;
+// The vaddlv_u32 intrinsic gets mapped to UADDLP.
+def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
+ (i64 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UADDLPv2i32_v1i64 V64:$Rn), dsub),
+ dsub))>;
+
+//------------------------------------------------------------------------------
+// AdvSIMD modified immediate instructions
+//------------------------------------------------------------------------------
+
+// AdvSIMD BIC
+defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
+// AdvSIMD ORR
+defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
+
+def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0)>;
+def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
+
+def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+// AdvSIMD FMOV
+def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
+ "fmov", ".2d",
+ [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8,
+ "fmov", ".2s",
+ [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
+ "fmov", ".4s",
+ [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
+
+// AdvSIMD MOVI
+
+// EDIT byte mask: scalar
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
+ [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
+// The movi_edit node has the immediate value already encoded, so we use
+// a plain imm0_255 here.
+def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
+ (MOVID imm0_255:$shift)>;
+
+def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;
+
+def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
+def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
+
+// EDIT byte mask: 2d
+
+// The movi_edit node has the immediate value already encoded, so we use
+// a plain imm0_255 in the pattern
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
+ simdimmtype10,
+ "movi", ".2d",
+ [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
+
+
+// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
+// Complexity is added to break a tie with a plain MOVI.
+let AddedComplexity = 1 in {
+def : Pat<(f32 fpimm0),
+ (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
+ Requires<[HasZCZ]>;
+def : Pat<(f64 fpimm0),
+ (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
+ Requires<[HasZCZ]>;
+}
+
+def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+
+def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
+
+def : Pat<(v2f64 (AArch64dup (f64 fpimm0))), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4f32 (AArch64dup (f32 fpimm0))), (MOVIv2d_ns (i32 0))>;
+
+// EDIT per word & halfword: 2s, 4h, 4s, & 8h
+defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
+
+def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
+
+// EDIT per word: 2s & 4s with MSL shifter
+def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
+ [(set (v2i32 V64:$Rd),
+ (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
+ [(set (v4i32 V128:$Rd),
+ (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+
+// Per byte: 8b & 16b
+def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255,
+ "movi", ".8b",
+ [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
+def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
+ "movi", ".16b",
+ [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
+
+// AdvSIMD MVNI
+
+// EDIT per word & halfword: 2s, 4h, 4s, & 8h
+defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
+
+def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>;
+def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
+
+def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
+def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
+ (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
+
+// EDIT per word: 2s & 4s with MSL shifter
+def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
+ [(set (v2i32 V64:$Rd),
+ (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
+ [(set (v4i32 V128:$Rd),
+ (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD indexed element
+//----------------------------------------------------------------------------
+
+let neverHasSideEffects = 1 in {
+ defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
+ defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
+}
+
+// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
+// instruction expects the addend first, while the intrinsic expects it last.
+
+// On the other hand, there are quite a few valid combinatorial options due to
+// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
+defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
+defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
+ TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
+
+defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+ TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+ TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
+defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+ TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
+defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
+ TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
+
+multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
+ // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
+ // and DUP scalar.
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (v2f32 (AArch64duplane32
+ (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
+ VectorIndexS:$idx)))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
+ def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
+ (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
+ (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
+ // and DUP scalar.
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
+ VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (v4f32 (AArch64duplane32
+ (v4f32 (insert_subvector undef,
+ (v2f32 (fneg V64:$Rm)),
+ (i32 0))),
+ VectorIndexS:$idx)))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
+ VectorIndexS:$idx)>;
+ def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
+ (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
+ (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
+
+ // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
+ // (DUPLANE from 64-bit would be trivial).
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
+ VectorIndexD:$idx))),
+ (FMLSv2i64_indexed
+ V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
+ (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
+ (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
+ (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
+
+ // 2 variants for 32-bit scalar version: extract from .2s or from .4s
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v4f32 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
+ (vector_extract (v2f32 (fneg V64:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
+ (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
+
+ // 1 variant for 64-bit scalar version: extract from .1d or from .2d
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
+ (vector_extract (v2f64 (fneg V128:$Rm)),
+ VectorIndexS:$idx))),
+ (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
+ V128:$Rm, VectorIndexS:$idx)>;
+}
+
+defm : FMLSIndexedAfterNegPatterns<
+ TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+defm : FMLSIndexedAfterNegPatterns<
+ TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
+
+defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
+defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
+
+def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+ (FMULv2i32_indexed V64:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
+ (i64 0))>;
+def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+ (FMULv4i32_indexed V128:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
+ (i64 0))>;
+def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
+ (FMULv2i64_indexed V128:$Rn,
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
+ (i64 0))>;
+
+defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
+defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
+ TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
+defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
+ TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
+defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
+defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_smull node:$MHS, node:$RHS))>>;
+defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
+ int_aarch64_neon_smull>;
+defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
+ int_aarch64_neon_sqadd>;
+defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
+ int_aarch64_neon_sqsub>;
+defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
+defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
+ TriOpFrag<(add node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
+ TriOpFrag<(sub node:$LHS, (int_aarch64_neon_umull node:$MHS, node:$RHS))>>;
+defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
+ int_aarch64_neon_umull>;
+
+// A scalar sqdmull with the second operand being a vector lane can be
+// handled directly with the indexed instruction encoding.
+def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
+ (vector_extract (v4i32 V128:$Vm),
+ VectorIndexS:$idx)),
+ (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD scalar shift instructions
+//----------------------------------------------------------------------------
+defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
+defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
+defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
+defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
+// Codegen patterns for the above. We don't put these directly on the
+// instructions because TableGen's type inference can't handle the truth.
+// Having the same base pattern for fp <--> int totally freaks it out.
+def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
+ (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
+ (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
+ (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
+ (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
+ (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
+ (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
+def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+ (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
+ (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
+ vecshiftR64:$imm)),
+ (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
+
+defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
+defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
+defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
+ int_aarch64_neon_sqrshrn>;
+defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
+ int_aarch64_neon_sqrshrun>;
+defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
+defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
+ int_aarch64_neon_sqshrn>;
+defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
+ int_aarch64_neon_sqshrun>;
+defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
+defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", AArch64srshri>;
+defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64srshri node:$MHS, node:$RHS))>>;
+defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", AArch64vashr>;
+defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64vashr node:$MHS, node:$RHS))>>;
+defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
+ int_aarch64_neon_uqrshrn>;
+defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
+defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
+ int_aarch64_neon_uqshrn>;
+defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", AArch64urshri>;
+defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64urshri node:$MHS, node:$RHS))>>;
+defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", AArch64vlshr>;
+defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64vlshr node:$MHS, node:$RHS))>>;
+
+//----------------------------------------------------------------------------
+// AdvSIMD vector shift instructions
+//----------------------------------------------------------------------------
+defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
+defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
+defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
+ int_aarch64_neon_vcvtfxs2fp>;
+defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
+ int_aarch64_neon_rshrn>;
+defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
+defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
+ BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
+defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
+def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftL64:$imm))),
+ (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
+defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
+ int_aarch64_neon_sqrshrn>;
+defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
+ int_aarch64_neon_sqrshrun>;
+defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
+defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
+defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
+ int_aarch64_neon_sqshrn>;
+defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
+ int_aarch64_neon_sqshrun>;
+defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
+def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+ (i32 vecshiftR64:$imm))),
+ (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
+defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
+defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64srshri node:$MHS, node:$RHS))> >;
+defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
+ BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
+
+defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
+defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
+ TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
+defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
+ int_aarch64_neon_vcvtfxu2fp>;
+defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
+ int_aarch64_neon_uqrshrn>;
+defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
+defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
+ int_aarch64_neon_uqshrn>;
+defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
+defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
+ TriOpFrag<(add node:$LHS,
+ (AArch64urshri node:$MHS, node:$RHS))> >;
+defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
+ BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
+defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
+defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
+ TriOpFrag<(add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
+
+// SHRN patterns for when a logical right shift was used instead of arithmetic
+// (the immediate guarantees no sign bits actually end up in the result so it
+// doesn't matter).
+def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
+ (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
+def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
+ (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
+def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
+ (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
+
+def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
+ (trunc (AArch64vlshr (v8i16 V128:$Rn),
+ vecshiftR16Narrow:$imm)))),
+ (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR16Narrow:$imm)>;
+def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
+ (trunc (AArch64vlshr (v4i32 V128:$Rn),
+ vecshiftR32Narrow:$imm)))),
+ (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
+ (trunc (AArch64vlshr (v2i64 V128:$Rn),
+ vecshiftR64Narrow:$imm)))),
+ (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
+ V128:$Rn, vecshiftR32Narrow:$imm)>;
+
+// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
+// Anyexts are implemented as zexts.
+def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
+def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
+// Also match an extend from the upper half of a 128 bit source register.
+def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (USHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (USHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
+ (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (USHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (USHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
+ (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (USHLLv4i32_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (USHLLv4i32_shift V128:$Rn, (i32 0))>;
+def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
+ (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
+
+// Vector shift sxtl aliases
+def : InstAlias<"sxtl.8h $dst, $src1",
+ (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.8h, $src1.8b",
+ (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl.4s $dst, $src1",
+ (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.4s, $src1.4h",
+ (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl.2d $dst, $src1",
+ (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"sxtl $dst.2d, $src1.2s",
+ (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+
+// Vector shift sxtl2 aliases
+def : InstAlias<"sxtl2.8h $dst, $src1",
+ (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
+ (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2.4s $dst, $src1",
+ (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
+ (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2.2d $dst, $src1",
+ (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
+ (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+
+// Vector shift uxtl aliases
+def : InstAlias<"uxtl.8h $dst, $src1",
+ (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.8h, $src1.8b",
+ (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl.4s $dst, $src1",
+ (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.4s, $src1.4h",
+ (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl.2d $dst, $src1",
+ (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+def : InstAlias<"uxtl $dst.2d, $src1.2s",
+ (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
+
+// Vector shift uxtl2 aliases
+def : InstAlias<"uxtl2.8h $dst, $src1",
+ (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
+ (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2.4s $dst, $src1",
+ (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
+ (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2.2d $dst, $src1",
+ (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
+ (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
+
+// If an integer is about to be converted to a floating point value,
+// just load it on the floating point unit.
+// These patterns are more complex because floating point loads do not
+// support sign extension.
+// The sign extension has to be explicitly added and is only supported for
+// one step: byte-to-half, half-to-word, word-to-doubleword.
+// SCVTF GPR -> FPR is 9 cycles.
+// SCVTF FPR -> FPR is 4 cyclces.
+// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
+// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
+// and still being faster.
+// However, this is not good for code size.
+// 8-bits -> float. 2 sizes step-up.
+class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
+ : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
+ (SCVTFv1i32 (f32 (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (f64
+ (EXTRACT_SUBREG
+ (SSHLLv8i8_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ bsub),
+ 0),
+ dsub)),
+ 0),
+ ssub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
+ (LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
+def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
+ (LDRBroX GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
+def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
+ (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
+def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
+ (LDURBi GPR64sp:$Rn, simm9:$offset)>;
+
+// 16-bits -> float. 1 size step-up.
+class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
+ : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
+ (SCVTFv1i32 (f32 (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ hsub),
+ 0),
+ ssub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
+ (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
+def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
+ (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
+def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
+ (LDURHi GPR64sp:$Rn, simm9:$offset)>;
+
+// 32-bits to 32-bits are handled in target specific dag combine:
+// performIntToFpCombine.
+// 64-bits integer to 32-bits floating point, not possible with
+// SCVTF on floating point registers (both source and destination
+// must have the same size).
+
+// Here are the patterns for 8, 16, 32, and 64-bits to double.
+// 8-bits -> double. 3 size step-up: give up.
+// 16-bits -> double. 2 size step.
+class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
+ : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
+ (SCVTFv1i64 (f64 (EXTRACT_SUBREG
+ (SSHLLv2i32_shift
+ (f64
+ (EXTRACT_SUBREG
+ (SSHLLv4i16_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ hsub),
+ 0),
+ dsub)),
+ 0),
+ dsub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
+ (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
+def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
+ (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
+def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
+ (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
+def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
+ (LDURHi GPR64sp:$Rn, simm9:$offset)>;
+// 32-bits -> double. 1 size step-up.
+class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
+ : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
+ (SCVTFv1i64 (f64 (EXTRACT_SUBREG
+ (SSHLLv2i32_shift
+ (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
+ INST,
+ ssub),
+ 0),
+ dsub)))>, Requires<[NotForCodeSize]>;
+
+def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
+ (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
+def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
+ (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
+def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
+ (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
+def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
+ (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// 64-bits -> double are handled in target specific dag combine:
+// performIntToFpCombine.
+
+
+//----------------------------------------------------------------------------
+// AdvSIMD Load-Store Structure
+//----------------------------------------------------------------------------
+defm LD1 : SIMDLd1Multiple<"ld1">;
+defm LD2 : SIMDLd2Multiple<"ld2">;
+defm LD3 : SIMDLd3Multiple<"ld3">;
+defm LD4 : SIMDLd4Multiple<"ld4">;
+
+defm ST1 : SIMDSt1Multiple<"st1">;
+defm ST2 : SIMDSt2Multiple<"st2">;
+defm ST3 : SIMDSt3Multiple<"st3">;
+defm ST4 : SIMDSt4Multiple<"st4">;
+
+class Ld1Pat<ValueType ty, Instruction INST>
+ : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
+
+def : Ld1Pat<v16i8, LD1Onev16b>;
+def : Ld1Pat<v8i16, LD1Onev8h>;
+def : Ld1Pat<v4i32, LD1Onev4s>;
+def : Ld1Pat<v2i64, LD1Onev2d>;
+def : Ld1Pat<v8i8, LD1Onev8b>;
+def : Ld1Pat<v4i16, LD1Onev4h>;
+def : Ld1Pat<v2i32, LD1Onev2s>;
+def : Ld1Pat<v1i64, LD1Onev1d>;
+
+class St1Pat<ValueType ty, Instruction INST>
+ : Pat<(store ty:$Vt, GPR64sp:$Rn),
+ (INST ty:$Vt, GPR64sp:$Rn)>;
+
+def : St1Pat<v16i8, ST1Onev16b>;
+def : St1Pat<v8i16, ST1Onev8h>;
+def : St1Pat<v4i32, ST1Onev4s>;
+def : St1Pat<v2i64, ST1Onev2d>;
+def : St1Pat<v8i8, ST1Onev8b>;
+def : St1Pat<v4i16, ST1Onev4h>;
+def : St1Pat<v2i32, ST1Onev2s>;
+def : St1Pat<v1i64, ST1Onev1d>;
+
+//---
+// Single-element
+//---
+
+defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
+defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
+defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
+defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
+defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
+defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
+defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
+defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
+defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
+defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
+defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
+defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
+defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
+defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
+defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
+defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
+defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
+defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
+defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
+}
+
+def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
+// Grab the floating point version too
+def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
+
+class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne128:$Rd),
+ (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
+def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
+def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
+
+class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1>
+ : Pat<(vector_insert (VTy VecListOne64:$Rd),
+ (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (EXTRACT_SUBREG
+ (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
+ VecIndex:$idx, GPR64sp:$Rn),
+ dsub)>;
+
+def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
+def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
+def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
+
+
+defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
+defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
+defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
+defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
+
+// Stores
+defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
+defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
+defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
+defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
+
+let AddedComplexity = 15 in
+class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
+def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
+def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
+def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
+def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
+
+let AddedComplexity = 15 in
+class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1>
+ : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn)>;
+
+def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
+def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
+def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
+def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
+
+multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, offset),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, GPR64:$Rm),
+ (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
+ VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
+}
+
+defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
+defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
+
+multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction ST1,
+ int offset> {
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, offset),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
+
+ def : Pat<(scalar_store
+ (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
+ GPR64sp:$Rn, GPR64:$Rm),
+ (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
+}
+
+defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
+ 1>;
+defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
+ 2>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
+defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
+defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
+defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
+defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
+defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
+defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
+defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
+defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
+defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
+defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
+defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
+defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
+}
+
+defm ST1 : SIMDLdSt1SingleAliases<"st1">;
+defm ST2 : SIMDLdSt2SingleAliases<"st2">;
+defm ST3 : SIMDLdSt3SingleAliases<"st3">;
+defm ST4 : SIMDLdSt4SingleAliases<"st4">;
+
+//----------------------------------------------------------------------------
+// Crypto extensions
+//----------------------------------------------------------------------------
+
+def AESErr : AESTiedInst<0b0100, "aese", int_aarch64_crypto_aese>;
+def AESDrr : AESTiedInst<0b0101, "aesd", int_aarch64_crypto_aesd>;
+def AESMCrr : AESInst< 0b0110, "aesmc", int_aarch64_crypto_aesmc>;
+def AESIMCrr : AESInst< 0b0111, "aesimc", int_aarch64_crypto_aesimc>;
+
+def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_aarch64_crypto_sha1c>;
+def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_aarch64_crypto_sha1p>;
+def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_aarch64_crypto_sha1m>;
+def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
+def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
+def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
+def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
+
+def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_aarch64_crypto_sha1h>;
+def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_aarch64_crypto_sha1su1>;
+def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
+
+//----------------------------------------------------------------------------
+// Compiler-pseudos
+//----------------------------------------------------------------------------
+// FIXME: Like for X86, these should go in their own separate .td file.
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+// FIXME: X86 also checks for CMOV here. Do we need something similar?
+def def32 : PatLeaf<(i32 GPR32:$src), [{
+ return N->getOpcode() != ISD::TRUNCATE &&
+ N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ N->getOpcode() != ISD::CopyFromReg;
+}]>;
-// The simpler patterns deal with cases where no AND mask is actually needed
-// (either all bits are used or the low 32 bits are used).
-let AddedComplexity = 10 in {
-
-def : Pat<(A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
- (BFIxxii $src, $Rn,
- (bfi64_lsb_to_immr (i64 imm:$ImmR)),
- (bfi_width_to_imms (i64 imm:$ImmS)))>;
-
-def : Pat<(A64Bfi i32:$src, i32:$Rn, imm:$ImmR, imm:$ImmS),
- (BFIwwii $src, $Rn,
- (bfi32_lsb_to_immr (i64 imm:$ImmR)),
- (bfi_width_to_imms (i64 imm:$ImmS)))>;
-
-
-def : Pat<(and (A64Bfi i64:$src, i64:$Rn, imm:$ImmR, imm:$ImmS),
- (i64 4294967295)),
- (SUBREG_TO_REG (i64 0),
- (BFIwwii (EXTRACT_SUBREG $src, sub_32),
- (EXTRACT_SUBREG $Rn, sub_32),
- (bfi32_lsb_to_immr (i64 imm:$ImmR)),
- (bfi_width_to_imms (i64 imm:$ImmS))),
- sub_32)>;
-
-}
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous patterns
-//===----------------------------------------------------------------------===//
-
-// Truncation from 64 to 32-bits just involves renaming your register.
-def : Pat<(i32 (trunc i64:$val)), (EXTRACT_SUBREG $val, sub_32)>;
-
-// Similarly, extension where we don't care about the high bits is
-// just a rename.
-def : Pat<(i64 (anyext i32:$val)),
- (INSERT_SUBREG (IMPLICIT_DEF), $val, sub_32)>;
-
-// SELECT instructions providing f128 types need to be handled by a
-// pseudo-instruction since the eventual code will need to introduce basic
-// blocks and control flow.
-def F128CSEL : PseudoInst<(outs FPR128:$Rd),
- (ins FPR128:$Rn, FPR128:$Rm, cond_code_op:$Cond),
- [(set f128:$Rd, (simple_select f128:$Rn, f128:$Rm))]> {
- let Uses = [NZCV];
- let usesCustomInserter = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Load/store patterns
-//===----------------------------------------------------------------------===//
-
-// There are lots of patterns here, because we need to allow at least three
-// parameters to vary independently.
-// 1. Instruction: "ldrb w9, [sp]", "ldrh w9, [sp]", ...
-// 2. LLVM source: zextloadi8, anyextloadi8, ...
-// 3. Address-generation: A64Wrapper, (add BASE, OFFSET), ...
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
+
+// For an anyext, we don't care what the high bits are, so we can perform an
+// INSERT_SUBREF into an IMPLICIT_DEF.
+def : Pat<(i64 (anyext GPR32:$src)),
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
+
+// When we need to explicitly zero-extend, we use an unsigned bitfield move
+// instruction (UBFM) on the enclosing super-reg.
+def : Pat<(i64 (zext GPR32:$src)),
+ (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+
+// To sign extend, we use a signed bitfield move instruction (SBFM) on the
+// containing super-reg.
+def : Pat<(i64 (sext GPR32:$src)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
+def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
+def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
+
+def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
+def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
+
+def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
+ (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
+def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
+
+def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 (i64shift_a imm0_63:$imm)),
+ (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
+
+// sra patterns have an AddedComplexity of 10, so make sure we have a higher
+// AddedComplexity for the following patterns since we want to match sext + sra
+// patterns before we attempt to match a single sra node.
+let AddedComplexity = 20 in {
+// We support all sext + sra combinations which preserve at least one bit of the
+// original value which is to be sign extended. E.g. we support shifts up to
+// bitwidth-1 bits.
+def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
+def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
+
+def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
+ (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
+def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
+ (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
+
+def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
+ (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
+ (i64 imm0_31:$imm), 31)>;
+} // AddedComplexity = 20
+
+// To truncate, we can simply extract from a subregister.
+def : Pat<(i32 (trunc GPR64sp:$src)),
+ (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
+
+// __builtin_trap() uses the BRK instruction on AArch64.
+def : Pat<(trap), (BRK 1)>;
+
+// Conversions within AdvSIMD types in the same register size are free.
+// But because we need a consistent lane ordering, in big endian many
+// conversions require one or more REV instructions.
//
-// The biggest problem turns out to be the address-generation variable. At the
-// point of instantiation we need to produce two DAGs, one for the pattern and
-// one for the instruction. Doing this at the lowest level of classes doesn't
-// work.
+// Consider a simple memory load followed by a bitconvert then a store.
+// v0 = load v2i32
+// v1 = BITCAST v2i32 v0 to v4i16
+// store v4i16 v2
//
-// Consider the simple uimm12 addressing mode, and the desire to match both (add
-// GPR64xsp:$Rn, uimm12:$Offset) and GPR64xsp:$Rn, particularly on the
-// instruction side. We'd need to insert either "GPR64xsp" and "uimm12" or
-// "GPR64xsp" and "0" into an unknown dag. !subst is not capable of this
-// operation, and PatFrags are for selection not output.
+// In big endian mode every memory access has an implicit byte swap. LDR and
+// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
+// is, they treat the vector as a sequence of elements to be byte-swapped.
+// The two pairs of instructions are fundamentally incompatible. We've decided
+// to use LD1/ST1 only to simplify compiler implementation.
//
-// As a result, the address-generation patterns are the final
-// instantiations. However, we do still need to vary the operand for the address
-// further down (At the point we're deciding A64WrapperSmall, we don't know
-// the memory width of the operation).
-
-//===------------------------------
-// 1. Basic infrastructural defs
-//===------------------------------
-
-// First, some simple classes for !foreach and !subst to use:
-class Decls {
- dag pattern;
-}
-
-def decls : Decls;
-def ALIGN;
-def INST;
-def OFFSET;
-def SHIFT;
-
-// You can't use !subst on an actual immediate, but you *can* use it on an
-// operand record that happens to match a single immediate. So we do.
-def imm_eq0 : ImmLeaf<i64, [{ return Imm == 0; }]>;
-def imm_eq1 : ImmLeaf<i64, [{ return Imm == 1; }]>;
-def imm_eq2 : ImmLeaf<i64, [{ return Imm == 2; }]>;
-def imm_eq3 : ImmLeaf<i64, [{ return Imm == 3; }]>;
-def imm_eq4 : ImmLeaf<i64, [{ return Imm == 4; }]>;
-
-// If the low bits of a pointer are known to be 0 then an "or" is just as good
-// as addition for computing an offset. This fragment forwards that check for
-// TableGen's use.
-def add_like_or : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),
-[{
- return CurDAG->isBaseWithConstantOffset(SDValue(N, 0));
-}]>;
-
-// Load/store (unsigned immediate) operations with relocations against global
-// symbols (for lo12) are only valid if those symbols have correct alignment
-// (since the immediate offset is divided by the access scale, it can't have a
-// remainder).
+// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
+// the original code sequence:
+// v0 = load v2i32
+// v1 = REV v2i32 (implicit)
+// v2 = BITCAST v2i32 v1 to v4i16
+// v3 = REV v4i16 v2 (implicit)
+// store v4i16 v3
//
-// The guaranteed alignment is provided as part of the WrapperSmall
-// operation, and checked against one of these.
-def any_align : ImmLeaf<i32, [{ (void)Imm; return true; }]>;
-def min_align2 : ImmLeaf<i32, [{ return Imm >= 2; }]>;
-def min_align4 : ImmLeaf<i32, [{ return Imm >= 4; }]>;
-def min_align8 : ImmLeaf<i32, [{ return Imm >= 8; }]>;
-def min_align16 : ImmLeaf<i32, [{ return Imm >= 16; }]>;
-
-// "Normal" load/store instructions can be used on atomic operations, provided
-// the ordering parameter is at most "monotonic". Anything above that needs
-// special handling with acquire/release instructions.
-class simple_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
-}]>;
-
-def atomic_load_simple_i8 : simple_load<atomic_load_8>;
-def atomic_load_simple_i16 : simple_load<atomic_load_16>;
-def atomic_load_simple_i32 : simple_load<atomic_load_32>;
-def atomic_load_simple_i64 : simple_load<atomic_load_64>;
-
-class simple_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- return cast<AtomicSDNode>(N)->getOrdering() <= Monotonic;
-}]>;
-
-def atomic_store_simple_i8 : simple_store<atomic_store_8>;
-def atomic_store_simple_i16 : simple_store<atomic_store_16>;
-def atomic_store_simple_i32 : simple_store<atomic_store_32>;
-def atomic_store_simple_i64 : simple_store<atomic_store_64>;
-
-//===------------------------------
-// 2. UImm12 and SImm9
-//===------------------------------
-
-// These instructions have two operands providing the address so they can be
-// treated similarly for most purposes.
-
-//===------------------------------
-// 2.1 Base patterns covering extend/truncate semantics
-//===------------------------------
-
-// Atomic patterns can be shared between integer operations of all sizes, a
-// quick multiclass here allows reuse.
-multiclass ls_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
- dag Offset, dag address, ValueType transty,
- ValueType sty> {
- def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
- (LOAD Base, Offset)>;
-
- def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
- (STORE $Rt, Base, Offset)>;
-}
-
-// Instructions accessing a memory chunk smaller than a register (or, in a
-// pinch, the same size) have a characteristic set of patterns they want to
-// match: extending loads and truncating stores. This class deals with the
-// sign-neutral version of those patterns.
+// But this is now broken - the value stored is different to the value loaded
+// due to lane reordering. To fix this, on every BITCAST we must perform two
+// other REVs:
+// v0 = load v2i32
+// v1 = REV v2i32 (implicit)
+// v2 = REV v2i32
+// v3 = BITCAST v2i32 v2 to v4i16
+// v4 = REV v4i16
+// v5 = REV v4i16 v4 (implicit)
+// store v4i16 v5
//
-// It will be instantiated across multiple addressing-modes.
-multiclass ls_small_pats<Instruction LOAD, Instruction STORE,
- dag Base, dag Offset,
- dag address, ValueType sty>
- : ls_atomic_pats<LOAD, STORE, Base, Offset, address, i32, sty> {
- def : Pat<(!cast<SDNode>(zextload # sty) address), (LOAD Base, Offset)>;
-
- def : Pat<(!cast<SDNode>(extload # sty) address), (LOAD Base, Offset)>;
-
- // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
- // register was actually set.
- def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
- (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
-
- def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
- (SUBREG_TO_REG (i64 0), (LOAD Base, Offset), sub_32)>;
-
- def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
- (STORE $Rt, Base, Offset)>;
-
- // For truncating store from 64-bits, we have to manually tell LLVM to
- // ignore the high bits of the x register.
- def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
- (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
-}
-
-// Next come patterns for sign-extending loads.
-multiclass load_signed_pats<string T, string U, dag Base, dag Offset,
- dag address, ValueType sty> {
- def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
- (!cast<Instruction>("LDRS" # T # "w" # U) Base, Offset)>;
-
- def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
- (!cast<Instruction>("LDRS" # T # "x" # U) Base, Offset)>;
-
-}
-
-// and finally "natural-width" loads and stores come next.
-multiclass ls_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
- dag Offset, dag address, ValueType sty> {
- def : Pat<(sty (load address)), (LOAD Base, Offset)>;
- def : Pat<(store sty:$Rt, address), (STORE $Rt, Base, Offset)>;
-}
-
-// Integer operations also get atomic instructions to select for.
-multiclass ls_int_neutral_pats<Instruction LOAD, Instruction STORE, dag Base,
- dag Offset, dag address, ValueType sty>
- : ls_neutral_pats<LOAD, STORE, Base, Offset, address, sty>,
- ls_atomic_pats<LOAD, STORE, Base, Offset, address, sty, sty>;
-
-//===------------------------------
-// 2.2. Addressing-mode instantiations
-//===------------------------------
-
-multiclass uimm12_pats<dag address, dag Base, dag Offset> {
- defm : ls_small_pats<LS8_LDR, LS8_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, byte_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, byte_uimm12,
- !subst(ALIGN, any_align, decls.pattern))),
- i8>;
- defm : ls_small_pats<LS16_LDR, LS16_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, hword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, hword_uimm12,
- !subst(ALIGN, min_align2, decls.pattern))),
- i16>;
- defm : ls_small_pats<LS32_LDR, LS32_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, word_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, word_uimm12,
- !subst(ALIGN, min_align4, decls.pattern))),
- i32>;
-
- defm : ls_int_neutral_pats<LS32_LDR, LS32_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, word_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, word_uimm12,
- !subst(ALIGN, min_align4, decls.pattern))),
- i32>;
-
- defm : ls_int_neutral_pats<LS64_LDR, LS64_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, dword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, dword_uimm12,
- !subst(ALIGN, min_align8, decls.pattern))),
- i64>;
-
- defm : ls_neutral_pats<LSFP16_LDR, LSFP16_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, hword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, hword_uimm12,
- !subst(ALIGN, min_align2, decls.pattern))),
- f16>;
-
- defm : ls_neutral_pats<LSFP32_LDR, LSFP32_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, word_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, word_uimm12,
- !subst(ALIGN, min_align4, decls.pattern))),
- f32>;
-
- defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, dword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, dword_uimm12,
- !subst(ALIGN, min_align8, decls.pattern))),
- f64>;
-
- defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, qword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, qword_uimm12,
- !subst(ALIGN, min_align16, decls.pattern))),
- f128>;
-
- defm : load_signed_pats<"B", "", Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, byte_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, byte_uimm12,
- !subst(ALIGN, any_align, decls.pattern))),
- i8>;
-
- defm : load_signed_pats<"H", "", Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, hword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, hword_uimm12,
- !subst(ALIGN, min_align2, decls.pattern))),
- i16>;
-
- def : Pat<(sextloadi32 !foreach(decls.pattern, address,
- !subst(OFFSET, word_uimm12,
- !subst(ALIGN, min_align4, decls.pattern)))),
- (LDRSWx Base, !foreach(decls.pattern, Offset,
- !subst(OFFSET, word_uimm12, decls.pattern)))>;
-}
-
-// Straightforward patterns of last resort: a pointer with or without an
-// appropriate offset.
-defm : uimm12_pats<(i64 i64:$Rn), (i64 i64:$Rn), (i64 0)>;
-defm : uimm12_pats<(add i64:$Rn, OFFSET:$UImm12),
- (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
-
-// The offset could be hidden behind an "or", of course:
-defm : uimm12_pats<(add_like_or i64:$Rn, OFFSET:$UImm12),
- (i64 i64:$Rn), (i64 OFFSET:$UImm12)>;
-
-// Global addresses under the small-absolute model should use these
-// instructions. There are ELF relocations specifically for it.
-defm : uimm12_pats<(A64WrapperSmall tglobaladdr:$Hi, tglobaladdr:$Lo12, ALIGN),
- (ADRPxi tglobaladdr:$Hi), (i64 tglobaladdr:$Lo12)>;
-
-defm : uimm12_pats<(A64WrapperSmall tglobaltlsaddr:$Hi, tglobaltlsaddr:$Lo12,
- ALIGN),
- (ADRPxi tglobaltlsaddr:$Hi), (i64 tglobaltlsaddr:$Lo12)>;
-
-// External symbols that make it this far should also get standard relocations.
-defm : uimm12_pats<(A64WrapperSmall texternalsym:$Hi, texternalsym:$Lo12,
- ALIGN),
- (ADRPxi texternalsym:$Hi), (i64 texternalsym:$Lo12)>;
-
-defm : uimm12_pats<(A64WrapperSmall tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
- (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
-
-// We also want to use uimm12 instructions for local variables at the moment.
-def tframeindex_XFORM : SDNodeXForm<frameindex, [{
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- return CurDAG->getTargetFrameIndex(FI, MVT::i64);
-}]>;
-
-defm : uimm12_pats<(i64 frameindex:$Rn),
- (tframeindex_XFORM tframeindex:$Rn), (i64 0)>;
-
-// These can be much simpler than uimm12 because we don't to change the operand
-// type (e.g. LDURB and LDURH take the same operands).
-multiclass simm9_pats<dag address, dag Base, dag Offset> {
- defm : ls_small_pats<LS8_LDUR, LS8_STUR, Base, Offset, address, i8>;
- defm : ls_small_pats<LS16_LDUR, LS16_STUR, Base, Offset, address, i16>;
-
- defm : ls_int_neutral_pats<LS32_LDUR, LS32_STUR, Base, Offset, address, i32>;
- defm : ls_int_neutral_pats<LS64_LDUR, LS64_STUR, Base, Offset, address, i64>;
-
- defm : ls_neutral_pats<LSFP16_LDUR, LSFP16_STUR, Base, Offset, address, f16>;
- defm : ls_neutral_pats<LSFP32_LDUR, LSFP32_STUR, Base, Offset, address, f32>;
- defm : ls_neutral_pats<LSFP64_LDUR, LSFP64_STUR, Base, Offset, address, f64>;
- defm : ls_neutral_pats<LSFP128_LDUR, LSFP128_STUR, Base, Offset, address,
- f128>;
-
- def : Pat<(i64 (zextloadi32 address)),
- (SUBREG_TO_REG (i64 0), (LS32_LDUR Base, Offset), sub_32)>;
-
- def : Pat<(truncstorei32 i64:$Rt, address),
- (LS32_STUR (EXTRACT_SUBREG $Rt, sub_32), Base, Offset)>;
-
- defm : load_signed_pats<"B", "_U", Base, Offset, address, i8>;
- defm : load_signed_pats<"H", "_U", Base, Offset, address, i16>;
- def : Pat<(sextloadi32 address), (LDURSWx Base, Offset)>;
-}
-
-defm : simm9_pats<(add i64:$Rn, simm9:$SImm9),
- (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
-
-defm : simm9_pats<(add_like_or i64:$Rn, simm9:$SImm9),
- (i64 $Rn), (SDXF_simm9 simm9:$SImm9)>;
-
-
-//===------------------------------
-// 3. Register offset patterns
-//===------------------------------
-
-// Atomic patterns can be shared between integer operations of all sizes, a
-// quick multiclass here allows reuse.
-multiclass ro_atomic_pats<Instruction LOAD, Instruction STORE, dag Base,
- dag Offset, dag Extend, dag address,
- ValueType transty, ValueType sty> {
- def : Pat<(!cast<PatFrag>("atomic_load_simple_" # sty) address),
- (LOAD Base, Offset, Extend)>;
-
- def : Pat<(!cast<PatFrag>("atomic_store_simple_" # sty) address, transty:$Rt),
- (STORE $Rt, Base, Offset, Extend)>;
-}
-
-// The register offset instructions take three operands giving the instruction,
-// and have an annoying split between instructions where Rm is 32-bit and
-// 64-bit. So we need a special hierarchy to describe them. Other than that the
-// same operations should be supported as for simm9 and uimm12 addressing.
-
-multiclass ro_small_pats<Instruction LOAD, Instruction STORE,
- dag Base, dag Offset, dag Extend,
- dag address, ValueType sty>
- : ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, i32, sty> {
- def : Pat<(!cast<SDNode>(zextload # sty) address),
- (LOAD Base, Offset, Extend)>;
-
- def : Pat<(!cast<SDNode>(extload # sty) address),
- (LOAD Base, Offset, Extend)>;
-
- // For zero-extension to 64-bits we have to tell LLVM that the whole 64-bit
- // register was actually set.
- def : Pat<(i64 (!cast<SDNode>(zextload # sty) address)),
- (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
-
- def : Pat<(i64 (!cast<SDNode>(extload # sty) address)),
- (SUBREG_TO_REG (i64 0), (LOAD Base, Offset, Extend), sub_32)>;
-
- def : Pat<(!cast<SDNode>(truncstore # sty) i32:$Rt, address),
- (STORE $Rt, Base, Offset, Extend)>;
-
- // For truncating store from 64-bits, we have to manually tell LLVM to
- // ignore the high bits of the x register.
- def : Pat<(!cast<SDNode>(truncstore # sty) i64:$Rt, address),
- (STORE (EXTRACT_SUBREG $Rt, sub_32), Base, Offset, Extend)>;
-
-}
-
-// Next come patterns for sign-extending loads.
-multiclass ro_signed_pats<string T, string Rm, dag Base, dag Offset, dag Extend,
- dag address, ValueType sty> {
- def : Pat<(i32 (!cast<SDNode>("sextload" # sty) address)),
- (!cast<Instruction>("LDRS" # T # "w_" # Rm # "_RegOffset")
- Base, Offset, Extend)>;
-
- def : Pat<(i64 (!cast<SDNode>("sextload" # sty) address)),
- (!cast<Instruction>("LDRS" # T # "x_" # Rm # "_RegOffset")
- Base, Offset, Extend)>;
-}
-
-// and finally "natural-width" loads and stores come next.
-multiclass ro_neutral_pats<Instruction LOAD, Instruction STORE,
- dag Base, dag Offset, dag Extend, dag address,
- ValueType sty> {
- def : Pat<(sty (load address)), (LOAD Base, Offset, Extend)>;
- def : Pat<(store sty:$Rt, address),
- (STORE $Rt, Base, Offset, Extend)>;
-}
-
-multiclass ro_int_neutral_pats<Instruction LOAD, Instruction STORE,
- dag Base, dag Offset, dag Extend, dag address,
- ValueType sty>
- : ro_neutral_pats<LOAD, STORE, Base, Offset, Extend, address, sty>,
- ro_atomic_pats<LOAD, STORE, Base, Offset, Extend, address, sty, sty>;
-
-multiclass regoff_pats<string Rm, dag address, dag Base, dag Offset,
- dag Extend> {
- defm : ro_small_pats<!cast<Instruction>("LS8_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LS8_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq0, decls.pattern)),
- i8>;
- defm : ro_small_pats<!cast<Instruction>("LS16_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LS16_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq1, decls.pattern)),
- i16>;
- defm : ro_small_pats<!cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq2, decls.pattern)),
- i32>;
-
- defm : ro_int_neutral_pats<
- !cast<Instruction>("LS32_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LS32_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq2, decls.pattern)),
- i32>;
-
- defm : ro_int_neutral_pats<
- !cast<Instruction>("LS64_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LS64_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq3, decls.pattern)),
- i64>;
-
- defm : ro_neutral_pats<!cast<Instruction>("LSFP16_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LSFP16_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq1, decls.pattern)),
- f16>;
-
- defm : ro_neutral_pats<!cast<Instruction>("LSFP32_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LSFP32_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq2, decls.pattern)),
- f32>;
-
- defm : ro_neutral_pats<!cast<Instruction>("LSFP64_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LSFP64_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq3, decls.pattern)),
- f64>;
-
- defm : ro_neutral_pats<!cast<Instruction>("LSFP128_" # Rm # "_RegOffset_LDR"),
- !cast<Instruction>("LSFP128_" # Rm # "_RegOffset_STR"),
- Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq4, decls.pattern)),
- f128>;
-
- defm : ro_signed_pats<"B", Rm, Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq0, decls.pattern)),
- i8>;
-
- defm : ro_signed_pats<"H", Rm, Base, Offset, Extend,
- !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq1, decls.pattern)),
- i16>;
-
- def : Pat<(sextloadi32 !foreach(decls.pattern, address,
- !subst(SHIFT, imm_eq2, decls.pattern))),
- (!cast<Instruction>("LDRSWx_" # Rm # "_RegOffset")
- Base, Offset, Extend)>;
-}
-
-
-// Finally we're in a position to tell LLVM exactly what addresses are reachable
-// using register-offset instructions. Essentially a base plus a possibly
-// extended, possibly shifted (by access size) offset.
-
-defm : regoff_pats<"Wm", (add i64:$Rn, (sext i32:$Rm)),
- (i64 i64:$Rn), (i32 i32:$Rm), (i64 6)>;
-
-defm : regoff_pats<"Wm", (add i64:$Rn, (shl (sext i32:$Rm), SHIFT)),
- (i64 i64:$Rn), (i32 i32:$Rm), (i64 7)>;
-
-defm : regoff_pats<"Wm", (add i64:$Rn, (zext i32:$Rm)),
- (i64 i64:$Rn), (i32 i32:$Rm), (i64 2)>;
-
-defm : regoff_pats<"Wm", (add i64:$Rn, (shl (zext i32:$Rm), SHIFT)),
- (i64 i64:$Rn), (i32 i32:$Rm), (i64 3)>;
-
-defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm),
- (i64 i64:$Rn), (i64 i64:$Rm), (i64 2)>;
-
-defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
- (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD (NEON) Support
+// This means an extra two instructions, but actually in most cases the two REV
+// instructions can be combined into one. For example:
+// (REV64_2s (REV64_4h X)) === (REV32_4h X)
+//
+// There is also no 128-bit REV instruction. This must be synthesized with an
+// EXT instruction.
//
+// Most bitconverts require some sort of conversion. The only exceptions are:
+// a) Identity conversions - vNfX <-> vNiX
+// b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
+//
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+
+def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i8 (bitconvert GPR64:$Xn)),
+ (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
+ (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
+ (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
+ (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
+
+def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))),
+ (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
+ (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
+ (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
+ (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
+}
+def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
+
+def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
+ (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
+def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
+ (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
+def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
+ (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
+def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
+ (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
+def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
+ (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
+ (v1i64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
+ (v1i64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))),
+ (v1i64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
+ (v1i64 (REV64v2i32 FPR64:$src))>;
+}
+def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
+ (v2i32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))),
+ (v2i32 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
+ (v2i32 (REV64v2i32 FPR64:$src))>;
+}
+def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
+def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))),
+ (v4i16 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
+ (v4i16 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
+ (v4i16 (REV64v4i16 FPR64:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
+def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))),
+ (v8i8 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))),
+ (v8i8 (REV16v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))),
+ (v8i8 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))),
+ (v8i8 (REV64v8i8 FPR64:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))),
+ (f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))),
+ (f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))),
+ (f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))),
+ (f64 (REV64v8i8 FPR64:$src))>;
+}
+def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
+def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
+ (v1f64 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
+ (v1f64 (REV64v4i16 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))),
+ (v1f64 (REV64v8i8 FPR64:$src))>;
+def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
+ (v1f64 (REV64v2i32 FPR64:$src))>;
+}
+def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
+def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
+ (v2f32 (REV32v4i16 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))),
+ (v2f32 (REV32v8i8 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
+ (v2f32 (REV64v2i32 FPR64:$src))>;
+}
+def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
+def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
+ (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
+def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
+ (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
+def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
+ (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
+ (REV64v16i8 FPR128:$src), (i32 8)))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
+ (v2f64 (EXTv16i8 FPR128:$src,
+ FPR128:$src, (i32 8)))>;
+def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
+ (v2f64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
+ (v2f64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
+ (v2f64 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
+ (v2f64 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
+ (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src), (i32 8)))>;
+def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
+ (v4f32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
+ (v4f32 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
+ (v4f32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
+ (v4f32 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
+ (v2i64 (EXTv16i8 FPR128:$src,
+ FPR128:$src, (i32 8)))>;
+def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
+ (v2i64 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
+ (v2i64 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
+ (v2i64 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
+ (v2i64 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
+ (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
+ (REV64v4i32 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
+ (v4i32 (REV64v4i32 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
+ (v4i32 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
+ (v4i32 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
+ (v4i32 (REV64v4i32 FPR128:$src))>;
+}
+def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
+
+let Predicates = [IsLE] in {
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
+ (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
+ (REV64v8i16 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
+ (v8i16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
+ (v8i16 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
+ (v8i16 (REV64v8i16 FPR128:$src))>;
+def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
+ (v8i16 (REV32v8i16 FPR128:$src))>;
+}
+
+let Predicates = [IsLE] in {
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
+}
+let Predicates = [IsBE] in {
+def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
+ (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
+ (REV64v16i8 FPR128:$src),
+ (i32 8)))>;
+def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
+ (v16i8 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
+ (v16i8 (REV32v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
+ (v16i8 (REV16v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
+ (v16i8 (REV64v16i8 FPR128:$src))>;
+def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
+ (v16i8 (REV32v16i8 FPR128:$src))>;
+}
+
+def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
+ (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
+
+// A 64-bit subvector insert to the first 128-bit vector position
+// is a subregister copy that needs no instruction.
+def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
+
+// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
+// or v2f32.
+def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
+ (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
+ (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
+def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
+ (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
+ (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
+ // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
+ // so we match on v4f32 here, not v2f32. This will also catch adding
+ // the low two lanes of a true v4f32 vector.
+def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
+ (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
+ (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+
+// Scalar 64-bit shifts in FPR64 registers.
+def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
+ (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
+
+// Tail call return handling. These are all compiler pseudo-instructions,
+// so no encoding information or anything like that.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
+ def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff),[]>;
+ def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>;
+}
+
+def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>;
+def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
+def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
+ (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
-include "AArch64InstrNEON.td"
+include "AArch64InstrAtomics.td"
diff --git a/lib/Target/AArch64/AArch64InstrNEON.td b/lib/Target/AArch64/AArch64InstrNEON.td
deleted file mode 100644
index 0b97e3b..0000000
--- a/lib/Target/AArch64/AArch64InstrNEON.td
+++ /dev/null
@@ -1,9476 +0,0 @@
-//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes the AArch64 NEON instruction set.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// NEON-specific DAG Nodes.
-//===----------------------------------------------------------------------===//
-
-// (outs Result), (ins Imm, OpCmode)
-def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
-
-def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>;
-
-def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>;
-
-// (outs Result), (ins Imm)
-def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1,
- [SDTCisVec<0>, SDTCisVT<1, i32>]>>;
-
-// (outs Result), (ins LHS, RHS, CondCode)
-def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3,
- [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
-
-// (outs Result), (ins LHS, 0/0.0 constant, CondCode)
-def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3,
- [SDTCisVec<0>, SDTCisVec<1>]>>;
-
-// (outs Result), (ins LHS, RHS)
-def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>;
-
-def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisVT<2, i32>]>;
-def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>;
-def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>;
-
-def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>;
-def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>;
-def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>;
-def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>;
-def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>;
-def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>;
-def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>;
-
-def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
-def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>;
-def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>;
-def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>;
-def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1,
- [SDTCisVec<0>]>>;
-def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2,
- [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>;
-def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3,
- [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>;
-
-//===----------------------------------------------------------------------===//
-// Addressing-mode instantiations
-//===----------------------------------------------------------------------===//
-
-multiclass ls_64_pats<dag address, dag Base, dag Offset, ValueType Ty> {
-defm : ls_neutral_pats<LSFP64_LDR, LSFP64_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, dword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, dword_uimm12,
- !subst(ALIGN, min_align8, decls.pattern))),
- Ty>;
-}
-
-multiclass ls_128_pats<dag address, dag Base, dag Offset, ValueType Ty> {
-defm : ls_neutral_pats<LSFP128_LDR, LSFP128_STR, Base,
- !foreach(decls.pattern, Offset,
- !subst(OFFSET, qword_uimm12, decls.pattern)),
- !foreach(decls.pattern, address,
- !subst(OFFSET, qword_uimm12,
- !subst(ALIGN, min_align16, decls.pattern))),
- Ty>;
-}
-
-multiclass uimm12_neon_pats<dag address, dag Base, dag Offset> {
- defm : ls_64_pats<address, Base, Offset, v8i8>;
- defm : ls_64_pats<address, Base, Offset, v4i16>;
- defm : ls_64_pats<address, Base, Offset, v2i32>;
- defm : ls_64_pats<address, Base, Offset, v1i64>;
- defm : ls_64_pats<address, Base, Offset, v2f32>;
- defm : ls_64_pats<address, Base, Offset, v1f64>;
-
- defm : ls_128_pats<address, Base, Offset, v16i8>;
- defm : ls_128_pats<address, Base, Offset, v8i16>;
- defm : ls_128_pats<address, Base, Offset, v4i32>;
- defm : ls_128_pats<address, Base, Offset, v2i64>;
- defm : ls_128_pats<address, Base, Offset, v4f32>;
- defm : ls_128_pats<address, Base, Offset, v2f64>;
-}
-
-defm : uimm12_neon_pats<(A64WrapperSmall
- tconstpool:$Hi, tconstpool:$Lo12, ALIGN),
- (ADRPxi tconstpool:$Hi), (i64 tconstpool:$Lo12)>;
-
-//===----------------------------------------------------------------------===//
-// Multiclasses
-//===----------------------------------------------------------------------===//
-
-multiclass NeonI_3VSame_B_sizes<bit u, bits<2> size, bits<5> opcode,
- string asmop, SDPatternOperator opnode8B,
- SDPatternOperator opnode16B,
- bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8B : NeonI_3VSame<0b0, u, size, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
- [(set (v8i8 VPR64:$Rd),
- (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _16B : NeonI_3VSame<0b1, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-
-}
-
-multiclass NeonI_3VSame_HS_sizes<bit u, bits<5> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _4H : NeonI_3VSame<0b0, u, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h",
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _8H : NeonI_3VSame<0b1, u, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h",
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _2S : NeonI_3VSame<0b0, u, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _4S : NeonI_3VSame<0b1, u, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-multiclass NeonI_3VSame_BHS_sizes<bit u, bits<5> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
- : NeonI_3VSame_HS_sizes<u, opcode, asmop, opnode, Commutable> {
- let isCommutable = Commutable in {
- def _8B : NeonI_3VSame<0b0, u, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b",
- [(set (v8i8 VPR64:$Rd),
- (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _16B : NeonI_3VSame<0b1, u, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-multiclass NeonI_3VSame_BHSD_sizes<bit u, bits<5> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0>
- : NeonI_3VSame_BHS_sizes<u, opcode, asmop, opnode, Commutable> {
- let isCommutable = Commutable in {
- def _2D : NeonI_3VSame<0b1, u, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types,
-// but Result types can be integer or floating point types.
-multiclass NeonI_3VSame_SD_sizes<bit u, bit size, bits<5> opcode,
- string asmop, SDPatternOperator opnode,
- ValueType ResTy2S, ValueType ResTy4S,
- ValueType ResTy2D, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s",
- [(set (ResTy2S VPR64:$Rd),
- (ResTy2S (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
- [(set (ResTy4S VPR128:$Rd),
- (ResTy4S (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d",
- [(set (ResTy2D VPR128:$Rd),
- (ResTy2D (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Instruction Definitions
-//===----------------------------------------------------------------------===//
-
-// Vector Arithmetic Instructions
-
-// Vector Add (Integer and Floating-Point)
-
-defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>;
-defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd,
- v2f32, v4f32, v2f64, 1>;
-
-// Patterns to match add of v1i8/v1i16/v1i32 types
-def : Pat<(v1i8 (add FPR8:$Rn, FPR8:$Rm)),
- (EXTRACT_SUBREG
- (ADDvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
- sub_8)>;
-def : Pat<(v1i16 (add FPR16:$Rn, FPR16:$Rm)),
- (EXTRACT_SUBREG
- (ADDvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
- sub_16)>;
-def : Pat<(v1i32 (add FPR32:$Rn, FPR32:$Rm)),
- (EXTRACT_SUBREG
- (ADDvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
- sub_32)>;
-
-// Vector Sub (Integer and Floating-Point)
-
-defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>;
-defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub,
- v2f32, v4f32, v2f64, 0>;
-
-// Patterns to match sub of v1i8/v1i16/v1i32 types
-def : Pat<(v1i8 (sub FPR8:$Rn, FPR8:$Rm)),
- (EXTRACT_SUBREG
- (SUBvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
- sub_8)>;
-def : Pat<(v1i16 (sub FPR16:$Rn, FPR16:$Rm)),
- (EXTRACT_SUBREG
- (SUBvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
- sub_16)>;
-def : Pat<(v1i32 (sub FPR32:$Rn, FPR32:$Rm)),
- (EXTRACT_SUBREG
- (SUBvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
- sub_32)>;
-
-// Vector Multiply (Integer and Floating-Point)
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>;
-defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul,
- v2f32, v4f32, v2f64, 1>;
-}
-
-// Patterns to match mul of v1i8/v1i16/v1i32 types
-def : Pat<(v1i8 (mul FPR8:$Rn, FPR8:$Rm)),
- (EXTRACT_SUBREG
- (MULvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
- sub_8)>;
-def : Pat<(v1i16 (mul FPR16:$Rn, FPR16:$Rm)),
- (EXTRACT_SUBREG
- (MULvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
- sub_16)>;
-def : Pat<(v1i32 (mul FPR32:$Rn, FPR32:$Rm)),
- (EXTRACT_SUBREG
- (MULvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
- sub_32)>;
-
-// Vector Multiply (Polynomial)
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
- int_arm_neon_vmulp, int_arm_neon_vmulp, 1>;
-}
-
-// Vector Multiply-accumulate and Multiply-subtract (Integer)
-
-// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
-// two operands constraints.
-class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
- RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
- bits<5> opcode, SDPatternOperator opnode>
- : NeonI_3VSame<q, u, size, opcode,
- (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
- asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
- [(set (OpTy VPRC:$Rd),
- (OpTy (opnode (OpTy VPRC:$src), (OpTy VPRC:$Rn), (OpTy VPRC:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (add node:$Ra, (mul node:$Rn, node:$Rm))>;
-
-def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (sub node:$Ra, (mul node:$Rn, node:$Rm))>;
-
-
-let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
-def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8,
- 0b0, 0b0, 0b00, 0b10010, Neon_mla>;
-def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8,
- 0b1, 0b0, 0b00, 0b10010, Neon_mla>;
-def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16,
- 0b0, 0b0, 0b01, 0b10010, Neon_mla>;
-def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16,
- 0b1, 0b0, 0b01, 0b10010, Neon_mla>;
-def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32,
- 0b0, 0b0, 0b10, 0b10010, Neon_mla>;
-def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32,
- 0b1, 0b0, 0b10, 0b10010, Neon_mla>;
-
-def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8,
- 0b0, 0b1, 0b00, 0b10010, Neon_mls>;
-def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8,
- 0b1, 0b1, 0b00, 0b10010, Neon_mls>;
-def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16,
- 0b0, 0b1, 0b01, 0b10010, Neon_mls>;
-def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16,
- 0b1, 0b1, 0b01, 0b10010, Neon_mls>;
-def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32,
- 0b0, 0b1, 0b10, 0b10010, Neon_mls>;
-def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32,
- 0b1, 0b1, 0b10, 0b10010, Neon_mls>;
-}
-
-// Vector Multiply-accumulate and Multiply-subtract (Floating Point)
-
-def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (fadd node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
-
-def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (fsub node:$Ra, (fmul_su node:$Rn, node:$Rm))>;
-
-let Predicates = [HasNEON, UseFusedMAC],
- SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC] in {
-def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32,
- 0b0, 0b0, 0b00, 0b11001, Neon_fmla>;
-def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32,
- 0b1, 0b0, 0b00, 0b11001, Neon_fmla>;
-def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64,
- 0b1, 0b0, 0b01, 0b11001, Neon_fmla>;
-
-def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32,
- 0b0, 0b0, 0b10, 0b11001, Neon_fmls>;
-def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32,
- 0b1, 0b0, 0b10, 0b11001, Neon_fmls>;
-def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64,
- 0b1, 0b0, 0b11, 0b11001, Neon_fmls>;
-}
-
-// We're also allowed to match the fma instruction regardless of compile
-// options.
-def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)),
- (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
-def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
- (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
-def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)),
- (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
-
-def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)),
- (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>;
-def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
- (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
-def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)),
- (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>;
-
-// Vector Divide (Floating-Point)
-
-let SchedRW = [WriteFPDiv, ReadFPDiv, ReadFPDiv] in {
-defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv,
- v2f32, v4f32, v2f64, 0>;
-}
-
-// Vector Bitwise Operations
-
-// Vector Bitwise AND
-
-defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>;
-
-// Vector Bitwise Exclusive OR
-
-defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>;
-
-// Vector Bitwise OR
-
-defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
-
-// ORR disassembled as MOV if Vn==Vm
-
-// Vector Move - register
-// Alias for ORR if Vn=Vm.
-// FIXME: This is actually the preferred syntax but TableGen can't deal with
-// custom printing of aliases.
-def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
- (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
-def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
- (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
-
-// The MOVI instruction takes two immediate operands. The first is the
-// immediate encoding, while the second is the cmode. A cmode of 14, or
-// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC.
-def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>;
-def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>;
-
-def Neon_not8B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>;
-def Neon_not16B : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>;
-
-def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm),
- (or node:$Rn, (Neon_not8B node:$Rm))>;
-
-def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm),
- (or node:$Rn, (Neon_not16B node:$Rm))>;
-
-def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm),
- (and node:$Rn, (Neon_not8B node:$Rm))>;
-
-def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm),
- (and node:$Rn, (Neon_not16B node:$Rm))>;
-
-
-// Vector Bitwise OR NOT - register
-
-defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn",
- Neon_orn8B, Neon_orn16B, 0>;
-
-// Vector Bitwise Bit Clear (AND NOT) - register
-
-defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic",
- Neon_bic8B, Neon_bic16B, 0>;
-
-multiclass Neon_bitwise2V_patterns<SDPatternOperator opnode8B,
- SDPatternOperator opnode16B,
- Instruction INST8B,
- Instruction INST16B> {
- def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$Rn, VPR128:$Rm)>;
-}
-
-// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN
-defm : Neon_bitwise2V_patterns<and, and, ANDvvv_8B, ANDvvv_16B>;
-defm : Neon_bitwise2V_patterns<or, or, ORRvvv_8B, ORRvvv_16B>;
-defm : Neon_bitwise2V_patterns<xor, xor, EORvvv_8B, EORvvv_16B>;
-defm : Neon_bitwise2V_patterns<Neon_bic8B, Neon_bic16B, BICvvv_8B, BICvvv_16B>;
-defm : Neon_bitwise2V_patterns<Neon_orn8B, Neon_orn16B, ORNvvv_8B, ORNvvv_16B>;
-
-// Vector Bitwise Select
-def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8,
- 0b0, 0b1, 0b01, 0b00011, vselect>;
-
-def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8,
- 0b1, 0b1, 0b01, 0b00011, vselect>;
-
-multiclass Neon_bitwise3V_patterns<SDPatternOperator opnode,
- Instruction INST8B,
- Instruction INST16B> {
- // Disassociate type from instruction definition
- def : Pat<(v8i8 (opnode (v8i8 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v2i32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v2f32 (opnode (v2i32 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v4i16 (opnode (v4i16 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1i64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1f64 (opnode (v1i64 VPR64:$src), VPR64:$Rn, VPR64:$Rm)),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v16i8 (opnode (v16i8 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v4i32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v8i16 (opnode (v8i16 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2i64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2f64 (opnode (v2i64 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v4f32 (opnode (v4i32 VPR128:$src), VPR128:$Rn, VPR128:$Rm)),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
-
- // Allow to match BSL instruction pattern with non-constant operand
- def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd),
- (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
- (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd),
- (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
- (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd),
- (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
- (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd),
- (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))),
- (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd),
- (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
- (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd),
- (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
- (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd),
- (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
- (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd),
- (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))),
- (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>;
-
- // Allow to match llvm.arm.* intrinsics.
- def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src),
- (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src),
- (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src),
- (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src),
- (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src),
- (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src),
- (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))),
- (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>;
- def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src),
- (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src),
- (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src),
- (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src),
- (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src),
- (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
- def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src),
- (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
- (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>;
-}
-
-// Additional patterns for bitwise instruction BSL
-defm: Neon_bitwise3V_patterns<vselect, BSLvvv_8B, BSLvvv_16B>;
-
-def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm),
- (vselect node:$src, node:$Rn, node:$Rm),
- [{ (void)N; return false; }]>;
-
-// Vector Bitwise Insert if True
-
-def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8,
- 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
-def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8,
- 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>;
-
-// Vector Bitwise Insert if False
-
-def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8,
- 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
-def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8,
- 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>;
-
-// Vector Absolute Difference and Accumulate (Signed, Unsigned)
-
-def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>;
-def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>;
-
-// Vector Absolute Difference and Accumulate (Unsigned)
-def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8,
- 0b0, 0b1, 0b00, 0b01111, Neon_uaba>;
-def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8,
- 0b1, 0b1, 0b00, 0b01111, Neon_uaba>;
-def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16,
- 0b0, 0b1, 0b01, 0b01111, Neon_uaba>;
-def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16,
- 0b1, 0b1, 0b01, 0b01111, Neon_uaba>;
-def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32,
- 0b0, 0b1, 0b10, 0b01111, Neon_uaba>;
-def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32,
- 0b1, 0b1, 0b10, 0b01111, Neon_uaba>;
-
-// Vector Absolute Difference and Accumulate (Signed)
-def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8,
- 0b0, 0b0, 0b00, 0b01111, Neon_saba>;
-def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8,
- 0b1, 0b0, 0b00, 0b01111, Neon_saba>;
-def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16,
- 0b0, 0b0, 0b01, 0b01111, Neon_saba>;
-def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16,
- 0b1, 0b0, 0b01, 0b01111, Neon_saba>;
-def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32,
- 0b0, 0b0, 0b10, 0b01111, Neon_saba>;
-def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32,
- 0b1, 0b0, 0b10, 0b01111, Neon_saba>;
-
-
-// Vector Absolute Difference (Signed, Unsigned)
-defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>;
-defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>;
-
-// Vector Absolute Difference (Floating Point)
-defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd",
- int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>;
-
-// Vector Reciprocal Step (Floating Point)
-defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps",
- int_arm_neon_vrecps,
- v2f32, v4f32, v2f64, 0>;
-
-// Vector Reciprocal Square Root Step (Floating Point)
-defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts",
- int_arm_neon_vrsqrts,
- v2f32, v4f32, v2f64, 0>;
-
-// Vector Comparisons
-
-def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs),
- (Neon_cmp node:$lhs, node:$rhs, SETEQ)>;
-def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs),
- (Neon_cmp node:$lhs, node:$rhs, SETUGE)>;
-def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs),
- (Neon_cmp node:$lhs, node:$rhs, SETGE)>;
-def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs),
- (Neon_cmp node:$lhs, node:$rhs, SETUGT)>;
-def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
- (Neon_cmp node:$lhs, node:$rhs, SETGT)>;
-
-// NeonI_compare_aliases class: swaps register operands to implement
-// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
-class NeonI_compare_aliases<string asmop, string asmlane,
- Instruction inst, RegisterOperand VPRC>
- : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
- ", $Rm" # asmlane,
- (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
-
-// Vector Comparisons (Integer)
-
-// Vector Compare Mask Equal (Integer)
-let isCommutable =1 in {
-defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>;
-}
-
-// Vector Compare Mask Higher or Same (Unsigned Integer)
-defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>;
-
-// Vector Compare Mask Greater Than or Equal (Integer)
-defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>;
-
-// Vector Compare Mask Higher (Unsigned Integer)
-defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>;
-
-// Vector Compare Mask Greater Than (Integer)
-defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>;
-
-// Vector Compare Mask Bitwise Test (Integer)
-defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>;
-
-// Vector Compare Mask Less or Same (Unsigned Integer)
-// CMLS is alias for CMHS with operands reversed.
-def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>;
-def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>;
-def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>;
-def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>;
-def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>;
-def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>;
-def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>;
-
-// Vector Compare Mask Less Than or Equal (Integer)
-// CMLE is alias for CMGE with operands reversed.
-def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>;
-def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>;
-def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>;
-def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>;
-def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>;
-def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>;
-def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>;
-
-// Vector Compare Mask Lower (Unsigned Integer)
-// CMLO is alias for CMHI with operands reversed.
-def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>;
-def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>;
-def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>;
-def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>;
-def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>;
-def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>;
-def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>;
-
-// Vector Compare Mask Less Than (Integer)
-// CMLT is alias for CMGT with operands reversed.
-def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>;
-def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>;
-def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>;
-def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>;
-def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>;
-def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>;
-def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>;
-
-
-def neon_uimm0_asmoperand : AsmOperandClass
-{
- let Name = "UImm0";
- let PredicateMethod = "isUImm<0>";
- let RenderMethod = "addImmOperands";
-}
-
-def neon_uimm0 : Operand<i32>, ImmLeaf<i32, [{return Imm == 0;}]> {
- let ParserMatchClass = neon_uimm0_asmoperand;
- let PrintMethod = "printNeonUImm0Operand";
-
-}
-
-multiclass NeonI_cmpz_sizes<bit u, bits<5> opcode, string asmop, CondCode CC>
-{
- def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.8b, $Rn.8b, $Imm",
- [(set (v8i8 VPR64:$Rd),
- (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.16b, $Rn.16b, $Imm",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.4h, $Rn.4h, $Imm",
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.8h, $Rn.8h, $Imm",
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.2s, $Rn.2s, $Imm",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.4s, $Rn.4s, $Imm",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm),
- asmop # "\t$Rd.2d, $Rn.2d, $Imm",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-// Vector Compare Mask Equal to Zero (Integer)
-defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>;
-
-// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer)
-defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>;
-
-// Vector Compare Mask Greater Than Zero (Signed Integer)
-defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>;
-
-// Vector Compare Mask Less Than or Equal To Zero (Signed Integer)
-defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>;
-
-// Vector Compare Mask Less Than Zero (Signed Integer)
-defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>;
-
-// Vector Comparisons (Floating Point)
-
-// Vector Compare Mask Equal (Floating Point)
-let isCommutable =1 in {
-defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq,
- v2i32, v4i32, v2i64, 0>;
-}
-
-// Vector Compare Mask Greater Than Or Equal (Floating Point)
-defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge,
- v2i32, v4i32, v2i64, 0>;
-
-// Vector Compare Mask Greater Than (Floating Point)
-defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt,
- v2i32, v4i32, v2i64, 0>;
-
-// Vector Compare Mask Less Than Or Equal (Floating Point)
-// FCMLE is alias for FCMGE with operands reversed.
-def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>;
-def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>;
-def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>;
-
-// Vector Compare Mask Less Than (Floating Point)
-// FCMLT is alias for FCMGT with operands reversed.
-def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>;
-def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>;
-def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>;
-
-def fpzero_izero_asmoperand : AsmOperandClass {
- let Name = "FPZeroIZero";
- let ParserMethod = "ParseFPImm0AndImm0Operand";
- let DiagnosticType = "FPZero";
-}
-
-def fpzz32 : Operand<f32>,
- ComplexPattern<f32, 1, "SelectFPZeroOperand", [fpimm]> {
- let ParserMatchClass = fpzero_izero_asmoperand;
- let PrintMethod = "printFPZeroOperand";
- let DecoderMethod = "DecodeFPZeroOperand";
-}
-
-multiclass NeonI_fpcmpz_sizes<bit u, bit size, bits<5> opcode,
- string asmop, CondCode CC>
-{
- def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn, fpzz32:$FPImm),
- asmop # "\t$Rd.2s, $Rn.2s, $FPImm",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpzz32:$FPImm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
- asmop # "\t$Rd.4s, $Rn.4s, $FPImm",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, fpzz32:$FPImm),
- asmop # "\t$Rd.2d, $Rn.2d, $FPImm",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpzz32:$FPImm), CC)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-// Vector Compare Mask Equal to Zero (Floating Point)
-defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>;
-
-// Vector Compare Mask Greater Than or Equal to Zero (Floating Point)
-defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>;
-
-// Vector Compare Mask Greater Than Zero (Floating Point)
-defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>;
-
-// Vector Compare Mask Less Than or Equal To Zero (Floating Point)
-defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>;
-
-// Vector Compare Mask Less Than Zero (Floating Point)
-defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>;
-
-// Vector Absolute Comparisons (Floating Point)
-
-// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point)
-defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge",
- int_arm_neon_vacge,
- v2i32, v4i32, v2i64, 0>;
-
-// Vector Absolute Compare Mask Greater Than (Floating Point)
-defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt",
- int_arm_neon_vacgt,
- v2i32, v4i32, v2i64, 0>;
-
-// Vector Absolute Compare Mask Less Than Or Equal (Floating Point)
-// FACLE is alias for FACGE with operands reversed.
-def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>;
-def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>;
-def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>;
-
-// Vector Absolute Compare Mask Less Than (Floating Point)
-// FACLT is alias for FACGT with operands reversed.
-def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>;
-def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>;
-def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>;
-
-// Vector halving add (Integer Signed, Unsigned)
-defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd",
- int_arm_neon_vhadds, 1>;
-defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd",
- int_arm_neon_vhaddu, 1>;
-
-// Vector halving sub (Integer Signed, Unsigned)
-defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub",
- int_arm_neon_vhsubs, 0>;
-defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub",
- int_arm_neon_vhsubu, 0>;
-
-// Vector rouding halving add (Integer Signed, Unsigned)
-defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd",
- int_arm_neon_vrhadds, 1>;
-defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd",
- int_arm_neon_vrhaddu, 1>;
-
-// Vector Saturating add (Integer Signed, Unsigned)
-defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd",
- int_arm_neon_vqadds, 1>;
-defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd",
- int_arm_neon_vqaddu, 1>;
-
-// Vector Saturating sub (Integer Signed, Unsigned)
-defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub",
- int_arm_neon_vqsubs, 1>;
-defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub",
- int_arm_neon_vqsubu, 1>;
-
-// Vector Shift Left (Signed and Unsigned Integer)
-defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl",
- int_arm_neon_vshifts, 1>;
-defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl",
- int_arm_neon_vshiftu, 1>;
-
-// Vector Saturating Shift Left (Signed and Unsigned Integer)
-defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl",
- int_arm_neon_vqshifts, 1>;
-defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl",
- int_arm_neon_vqshiftu, 1>;
-
-// Vector Rouding Shift Left (Signed and Unsigned Integer)
-defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl",
- int_arm_neon_vrshifts, 1>;
-defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl",
- int_arm_neon_vrshiftu, 1>;
-
-// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer)
-defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl",
- int_arm_neon_vqrshifts, 1>;
-defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl",
- int_arm_neon_vqrshiftu, 1>;
-
-// Vector Maximum (Signed and Unsigned Integer)
-defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>;
-defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>;
-
-// Vector Minimum (Signed and Unsigned Integer)
-defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>;
-defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>;
-
-// Vector Maximum (Floating Point)
-defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax",
- int_arm_neon_vmaxs,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector Minimum (Floating Point)
-defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin",
- int_arm_neon_vmins,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector maxNum (Floating Point) - prefer a number over a quiet NaN)
-defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm",
- int_aarch64_neon_vmaxnm,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector minNum (Floating Point) - prefer a number over a quiet NaN)
-defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm",
- int_aarch64_neon_vminnm,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector Maximum Pairwise (Signed and Unsigned Integer)
-defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>;
-defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>;
-
-// Vector Minimum Pairwise (Signed and Unsigned Integer)
-defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>;
-defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>;
-
-// Vector Maximum Pairwise (Floating Point)
-defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp",
- int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>;
-
-// Vector Minimum Pairwise (Floating Point)
-defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp",
- int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>;
-
-// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
-defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp",
- int_aarch64_neon_vpmaxnm,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN)
-defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp",
- int_aarch64_neon_vpminnm,
- v2f32, v4f32, v2f64, 1>;
-
-// Vector Addition Pairwise (Integer)
-defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>;
-
-// Vector Addition Pairwise (Floating Point)
-defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp",
- int_arm_neon_vpadd,
- v2f32, v4f32, v2f64, 1>;
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-// Vector Saturating Doubling Multiply High
-defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh",
- int_arm_neon_vqdmulh, 1>;
-
-// Vector Saturating Rouding Doubling Multiply High
-defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh",
- int_arm_neon_vqrdmulh, 1>;
-
-// Vector Multiply Extended (Floating Point)
-defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx",
- int_aarch64_neon_vmulx,
- v2f32, v4f32, v2f64, 1>;
-}
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// ADDP, SMINP, UMINP, SMAXP, UMAXP having i32 as output
-class Neon_VectorPair_v2i32_pattern<SDPatternOperator opnode, Instruction INST>
- : Pat<(v1i32 (opnode (v2i32 VPR64:$Rn))),
- (EXTRACT_SUBREG
- (v2i32 (INST (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rn))),
- sub_32)>;
-
-def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_sminv, SMINPvvv_2S>;
-def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_uminv, UMINPvvv_2S>;
-def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_smaxv, SMAXPvvv_2S>;
-def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_umaxv, UMAXPvvv_2S>;
-def : Neon_VectorPair_v2i32_pattern<int_aarch64_neon_vaddv, ADDP_2S>;
-
-// Vector Immediate Instructions
-
-multiclass neon_mov_imm_shift_asmoperands<string PREFIX>
-{
- def _asmoperand : AsmOperandClass
- {
- let Name = "NeonMovImmShift" # PREFIX;
- let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands";
- let PredicateMethod = "isNeonMovImmShift" # PREFIX;
- }
-}
-
-// Definition of vector immediates shift operands
-
-// The selectable use-cases extract the shift operation
-// information from the OpCmode fields encoded in the immediate.
-def neon_mod_shift_imm_XFORM : SDNodeXForm<imm, [{
- uint64_t OpCmode = N->getZExtValue();
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- unsigned HasShift =
- A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
- if (!HasShift) return SDValue();
- return CurDAG->getTargetConstant(ShiftImm, MVT::i32);
-}]>;
-
-// Vector immediates shift operands which accept LSL and MSL
-// shift operators with shift value in the range of 0, 8, 16, 24 (LSL),
-// or 0, 8 (LSLH) or 8, 16 (MSL).
-defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">;
-defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">;
-// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24
-defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">;
-
-multiclass neon_mov_imm_shift_operands<string PREFIX,
- string HALF, string ISHALF, code pred>
-{
- def _operand : Operand<i32>, ImmLeaf<i32, pred, neon_mod_shift_imm_XFORM>
- {
- let PrintMethod =
- "printNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
- let DecoderMethod =
- "DecodeNeonMovImmShiftOperand<A64SE::" # PREFIX # ", " # ISHALF # ">";
- let ParserMatchClass =
- !cast<AsmOperandClass>("neon_mov_imm_" # PREFIX # HALF # "_asmoperand");
- }
-}
-
-defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- unsigned HasShift =
- A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
- return (HasShift && !ShiftOnesIn);
-}]>;
-
-defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- unsigned HasShift =
- A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
- return (HasShift && ShiftOnesIn);
-}]>;
-
-defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- unsigned HasShift =
- A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
- return (HasShift && !ShiftOnesIn);
-}]>;
-
-def neon_uimm1_asmoperand : AsmOperandClass
-{
- let Name = "UImm1";
- let PredicateMethod = "isUImm<1>";
- let RenderMethod = "addImmOperands";
-}
-
-def neon_uimm2_asmoperand : AsmOperandClass
-{
- let Name = "UImm2";
- let PredicateMethod = "isUImm<2>";
- let RenderMethod = "addImmOperands";
-}
-
-def neon_uimm8_asmoperand : AsmOperandClass
-{
- let Name = "UImm8";
- let PredicateMethod = "isUImm<8>";
- let RenderMethod = "addImmOperands";
-}
-
-def neon_uimm8 : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm8_asmoperand;
- let PrintMethod = "printUImmHexOperand";
-}
-
-def neon_uimm64_mask_asmoperand : AsmOperandClass
-{
- let Name = "NeonUImm64Mask";
- let PredicateMethod = "isNeonUImm64Mask";
- let RenderMethod = "addNeonUImm64MaskOperands";
-}
-
-// MCOperand for 64-bit bytemask with each byte having only the
-// value 0x00 and 0xff is encoded as an unsigned 8-bit value
-def neon_uimm64_mask : Operand<i32>, ImmLeaf<i32, [{(void)Imm; return true;}]> {
- let ParserMatchClass = neon_uimm64_mask_asmoperand;
- let PrintMethod = "printNeonUImm64MaskOperand";
-}
-
-multiclass NeonI_mov_imm_lsl_sizes<string asmop, bit op,
- SDPatternOperator opnode>
-{
- // shift zeros, per word
- def _2S : NeonI_1VModImm<0b0, op,
- (outs VPR64:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (opnode (timm:$Imm),
- (neon_mov_imm_LSL_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bits<2> Simm;
- let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
- }
-
- def _4S : NeonI_1VModImm<0b1, op,
- (outs VPR128:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (timm:$Imm),
- (neon_mov_imm_LSL_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bits<2> Simm;
- let cmode = {0b0, Simm{1}, Simm{0}, 0b0};
- }
-
- // shift zeros, per halfword
- def _4H : NeonI_1VModImm<0b0, op,
- (outs VPR64:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (opnode (timm:$Imm),
- (neon_mov_imm_LSLH_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b0, Simm, 0b0};
- }
-
- def _8H : NeonI_1VModImm<0b1, op,
- (outs VPR128:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (opnode (timm:$Imm),
- (neon_mov_imm_LSLH_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b0, Simm, 0b0};
- }
-}
-
-multiclass NeonI_mov_imm_with_constraint_lsl_sizes<string asmop, bit op,
- SDPatternOperator opnode,
- SDPatternOperator neonopnode>
-{
- let Constraints = "$src = $Rd" in {
- // shift zeros, per word
- def _2S : NeonI_1VModImm<0b0, op,
- (outs VPR64:$Rd),
- (ins VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (opnode (v2i32 VPR64:$src),
- (v2i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<2> Simm;
- let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
- }
-
- def _4S : NeonI_1VModImm<0b1, op,
- (outs VPR128:$Rd),
- (ins VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (v4i32 VPR128:$src),
- (v4i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<2> Simm;
- let cmode = {0b0, Simm{1}, Simm{0}, 0b1};
- }
-
- // shift zeros, per halfword
- def _4H : NeonI_1VModImm<0b0, op,
- (outs VPR64:$Rd),
- (ins VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"),
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (opnode (v4i16 VPR64:$src),
- (v4i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b0, Simm, 0b1};
- }
-
- def _8H : NeonI_1VModImm<0b1, op,
- (outs VPR128:$Rd),
- (ins VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm),
- !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"),
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (opnode (v8i16 VPR128:$src),
- (v8i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSL_operand:$Simm)))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b0, Simm, 0b1};
- }
- }
-}
-
-multiclass NeonI_mov_imm_msl_sizes<string asmop, bit op,
- SDPatternOperator opnode>
-{
- // shift ones, per word
- def _2S : NeonI_1VModImm<0b0, op,
- (outs VPR64:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"),
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (opnode (timm:$Imm),
- (neon_mov_imm_MSL_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b1, 0b0, Simm};
- }
-
- def _4S : NeonI_1VModImm<0b1, op,
- (outs VPR128:$Rd),
- (ins neon_uimm8:$Imm,
- neon_mov_imm_MSL_operand:$Simm),
- !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"),
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (timm:$Imm),
- (neon_mov_imm_MSL_operand:$Simm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- bit Simm;
- let cmode = {0b1, 0b1, 0b0, Simm};
- }
-}
-
-// Vector Move Immediate Shifted
-let isReMaterializable = 1 in {
-defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>;
-}
-
-// Vector Move Inverted Immediate Shifted
-let isReMaterializable = 1 in {
-defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>;
-}
-
-// Vector Bitwise Bit Clear (AND NOT) - immediate
-let isReMaterializable = 1 in {
-defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1,
- and, Neon_mvni>;
-}
-
-// Vector Bitwise OR - immedidate
-
-let isReMaterializable = 1 in {
-defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0,
- or, Neon_movi>;
-}
-
-// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate
-// LowerBUILD_VECTOR favors lowering MOVI over MVNI.
-// BIC immediate instructions selection requires additional patterns to
-// transform Neon_movi operands into BIC immediate operands
-
-def neon_mov_imm_LSLH_transform_XFORM : SDNodeXForm<imm, [{
- uint64_t OpCmode = N->getZExtValue();
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn);
- // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1
- // Transform encoded shift amount 0 to 1 and 1 to 0.
- return CurDAG->getTargetConstant(!ShiftImm, MVT::i32);
-}]>;
-
-def neon_mov_imm_LSLH_transform_operand
- : ImmLeaf<i32, [{
- unsigned ShiftImm;
- unsigned ShiftOnesIn;
- unsigned HasShift =
- A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn);
- return (HasShift && !ShiftOnesIn); }],
- neon_mov_imm_LSLH_transform_XFORM>;
-
-// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0xff, LSL 8)
-// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0xff)
-def : Pat<(v4i16 (and VPR64:$src,
- (v4i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)))),
- (BICvi_lsl_4H VPR64:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-
-// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0xff, LSL 8)
-// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0xff)
-def : Pat<(v8i16 (and VPR128:$src,
- (v8i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)))),
- (BICvi_lsl_8H VPR128:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-
-def : Pat<(v8i8 (and VPR64:$src,
- (bitconvert(v4i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_4H VPR64:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-def : Pat<(v2i32 (and VPR64:$src,
- (bitconvert(v4i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_4H VPR64:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-def : Pat<(v1i64 (and VPR64:$src,
- (bitconvert(v4i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_4H VPR64:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-
-def : Pat<(v16i8 (and VPR128:$src,
- (bitconvert(v8i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_8H VPR128:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-def : Pat<(v4i32 (and VPR128:$src,
- (bitconvert(v8i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_8H VPR128:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-def : Pat<(v2i64 (and VPR128:$src,
- (bitconvert(v8i16 (Neon_movi 255,
- neon_mov_imm_LSLH_transform_operand:$Simm))))),
- (BICvi_lsl_8H VPR128:$src, 255,
- neon_mov_imm_LSLH_transform_operand:$Simm)>;
-
-multiclass Neon_bitwiseVi_patterns<SDPatternOperator opnode,
- SDPatternOperator neonopnode,
- Instruction INST4H,
- Instruction INST8H,
- Instruction INST2S,
- Instruction INST4S> {
- def : Pat<(v8i8 (opnode VPR64:$src,
- (bitconvert(v4i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4H VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v2i32 (opnode VPR64:$src,
- (bitconvert(v4i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4H VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v1i64 (opnode VPR64:$src,
- (bitconvert(v4i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4H VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
-
- def : Pat<(v16i8 (opnode VPR128:$src,
- (bitconvert(v8i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST8H VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v4i32 (opnode VPR128:$src,
- (bitconvert(v8i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST8H VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v2i64 (opnode VPR128:$src,
- (bitconvert(v8i16 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST8H VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
-
- def : Pat<(v8i8 (opnode VPR64:$src,
- (bitconvert(v2i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST2S VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v4i16 (opnode VPR64:$src,
- (bitconvert(v2i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST2S VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v1i64 (opnode VPR64:$src,
- (bitconvert(v2i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST2S VPR64:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
-
- def : Pat<(v16i8 (opnode VPR128:$src,
- (bitconvert(v4i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4S VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v8i16 (opnode VPR128:$src,
- (bitconvert(v4i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4S VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
- def : Pat<(v2i64 (opnode VPR128:$src,
- (bitconvert(v4i32 (neonopnode timm:$Imm,
- neon_mov_imm_LSLH_operand:$Simm))))),
- (INST4S VPR128:$src, neon_uimm8:$Imm,
- neon_mov_imm_LSLH_operand:$Simm)>;
-}
-
-// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate
-defm : Neon_bitwiseVi_patterns<and, Neon_mvni, BICvi_lsl_4H, BICvi_lsl_8H,
- BICvi_lsl_2S, BICvi_lsl_4S>;
-
-// Additional patterns for Vector Bitwise OR - immedidate
-defm : Neon_bitwiseVi_patterns<or, Neon_movi, ORRvi_lsl_4H, ORRvi_lsl_8H,
- ORRvi_lsl_2S, ORRvi_lsl_4S>;
-
-
-// Vector Move Immediate Masked
-let isReMaterializable = 1 in {
-defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>;
-}
-
-// Vector Move Inverted Immediate Masked
-let isReMaterializable = 1 in {
-defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
-}
-
-class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
- Instruction inst, RegisterOperand VPRC>
- : NeonInstAlias<!strconcat(asmop, "\t$Rd," # asmlane # ", $Imm"),
- (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
-
-// Aliases for Vector Move Immediate Shifted
-def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>;
-def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>;
-
-// Aliases for Vector Move Inverted Immediate Shifted
-def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>;
-def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>;
-
-// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate
-def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>;
-def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>;
-
-// Aliases for Vector Bitwise OR - immedidate
-def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>;
-def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>;
-def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>;
-
-// Vector Move Immediate - per byte
-let isReMaterializable = 1 in {
-def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0,
- (outs VPR64:$Rd), (ins neon_uimm8:$Imm),
- "movi\t$Rd.8b, $Imm",
- [(set (v8i8 VPR64:$Rd),
- (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- let cmode = 0b1110;
-}
-
-def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0,
- (outs VPR128:$Rd), (ins neon_uimm8:$Imm),
- "movi\t$Rd.16b, $Imm",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- let cmode = 0b1110;
-}
-}
-
-// Vector Move Immediate - bytemask, per double word
-let isReMaterializable = 1 in {
-def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1,
- (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm),
- "movi\t $Rd.2d, $Imm",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- let cmode = 0b1110;
-}
-}
-
-// Vector Move Immediate - bytemask, one doubleword
-
-let isReMaterializable = 1 in {
-def MOVIdi : NeonI_1VModImm<0b0, 0b1,
- (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm),
- "movi\t $Rd, $Imm",
- [(set (v1i64 FPR64:$Rd),
- (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- let cmode = 0b1110;
-}
-}
-
-// Vector Floating Point Move Immediate
-
-class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
- Operand immOpType, bit q, bit op>
- : NeonI_1VModImm<q, op,
- (outs VPRC:$Rd), (ins immOpType:$Imm),
- "fmov\t$Rd" # asmlane # ", $Imm",
- [(set (OpTy VPRC:$Rd),
- (OpTy (Neon_fmovi (timm:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU]> {
- let cmode = 0b1111;
- }
-
-let isReMaterializable = 1 in {
-def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>;
-def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>;
-def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>;
-}
-
-// Vector Shift (Immediate)
-
-// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded
-// as follows:
-//
-// Offset Encoding
-// 8 immh:immb<6:3> = '0001xxx', <imm> is encoded in immh:immb<2:0>
-// 16 immh:immb<6:4> = '001xxxx', <imm> is encoded in immh:immb<3:0>
-// 32 immh:immb<6:5> = '01xxxxx', <imm> is encoded in immh:immb<4:0>
-// 64 immh:immb<6> = '1xxxxxx', <imm> is encoded in immh:immb<5:0>
-//
-// The shift right immediate amount, in the range 1 to element bits, is computed
-// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0
-// to element bits - 1, is computed as UInt(immh:immb) - Offset.
-
-class shr_imm_asmoperands<string OFFSET> : AsmOperandClass {
- let Name = "ShrImm" # OFFSET;
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "ShrImm" # OFFSET;
-}
-
-class shr_imm<string OFFSET> : Operand<i32> {
- let EncoderMethod = "getShiftRightImm" # OFFSET;
- let DecoderMethod = "DecodeShiftRightImm" # OFFSET;
- let ParserMatchClass =
- !cast<AsmOperandClass>("shr_imm" # OFFSET # "_asmoperand");
-}
-
-def shr_imm8_asmoperand : shr_imm_asmoperands<"8">;
-def shr_imm16_asmoperand : shr_imm_asmoperands<"16">;
-def shr_imm32_asmoperand : shr_imm_asmoperands<"32">;
-def shr_imm64_asmoperand : shr_imm_asmoperands<"64">;
-
-def shr_imm8 : shr_imm<"8">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 8;}]>;
-def shr_imm16 : shr_imm<"16">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 16;}]>;
-def shr_imm32 : shr_imm<"32">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 32;}]>;
-def shr_imm64 : shr_imm<"64">, ImmLeaf<i32, [{return Imm > 0 && Imm <= 64;}]>;
-
-class shl_imm_asmoperands<string OFFSET> : AsmOperandClass {
- let Name = "ShlImm" # OFFSET;
- let RenderMethod = "addImmOperands";
- let DiagnosticType = "ShlImm" # OFFSET;
-}
-
-class shl_imm<string OFFSET> : Operand<i32> {
- let EncoderMethod = "getShiftLeftImm" # OFFSET;
- let DecoderMethod = "DecodeShiftLeftImm" # OFFSET;
- let ParserMatchClass =
- !cast<AsmOperandClass>("shl_imm" # OFFSET # "_asmoperand");
-}
-
-def shl_imm8_asmoperand : shl_imm_asmoperands<"8">;
-def shl_imm16_asmoperand : shl_imm_asmoperands<"16">;
-def shl_imm32_asmoperand : shl_imm_asmoperands<"32">;
-def shl_imm64_asmoperand : shl_imm_asmoperands<"64">;
-
-def shl_imm8 : shl_imm<"8">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 8;}]>;
-def shl_imm16 : shl_imm<"16">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 16;}]>;
-def shl_imm32 : shl_imm<"32">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 32;}]>;
-def shl_imm64 : shl_imm<"64">, ImmLeaf<i32, [{return Imm >= 0 && Imm < 64;}]>;
-
-class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (Ty VPRC:$Rd),
- (Ty (OpNode (Ty VPRC:$Rn),
- (Ty (Neon_vdup (i32 ImmTy:$Imm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_N2VShL<bit u, bits<5> opcode, string asmop> {
- // 64-bit vector types.
- def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> {
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- }
-
- def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> {
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- }
-
- def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> {
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- }
-
- // 128-bit vector types.
- def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> {
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- }
-
- def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> {
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- }
-
- def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> {
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- }
-
- def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> {
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- }
-}
-
-multiclass NeonI_N2VShR<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
- def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
- OpNode> {
- let Inst{22} = 0b1;
- }
-}
-
-// Shift left
-
-defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">;
-
-// Additional patterns to match vector shift left by immediate.
-// (v1i8/v1i16/v1i32 types)
-def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn),
- (v1i8 (Neon_vdup (i32 (shl_imm8:$Imm)))))),
- (EXTRACT_SUBREG
- (SHLvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- shl_imm8:$Imm),
- sub_8)>;
-def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn),
- (v1i16 (Neon_vdup (i32 (shl_imm16:$Imm)))))),
- (EXTRACT_SUBREG
- (SHLvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- shl_imm16:$Imm),
- sub_16)>;
-def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn),
- (v1i32 (Neon_vdup (i32 (shl_imm32:$Imm)))))),
- (EXTRACT_SUBREG
- (SHLvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- shl_imm32:$Imm),
- sub_32)>;
-
-// Shift right
-defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>;
-defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>;
-
-// Additional patterns to match vector shift right by immediate.
-// (v1i8/v1i16/v1i32 types)
-def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn),
- (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
- (EXTRACT_SUBREG
- (SSHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- shr_imm8:$Imm),
- sub_8)>;
-def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn),
- (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
- (EXTRACT_SUBREG
- (SSHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- shr_imm16:$Imm),
- sub_16)>;
-def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn),
- (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
- (EXTRACT_SUBREG
- (SSHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- shr_imm32:$Imm),
- sub_32)>;
-def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn),
- (v1i8 (Neon_vdup (i32 (shr_imm8:$Imm)))))),
- (EXTRACT_SUBREG
- (USHRvvi_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- shr_imm8:$Imm),
- sub_8)>;
-def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn),
- (v1i16 (Neon_vdup (i32 (shr_imm16:$Imm)))))),
- (EXTRACT_SUBREG
- (USHRvvi_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- shr_imm16:$Imm),
- sub_16)>;
-def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn),
- (v1i32 (Neon_vdup (i32 (shr_imm32:$Imm)))))),
- (EXTRACT_SUBREG
- (USHRvvi_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- shr_imm32:$Imm),
- sub_32)>;
-
-def Neon_High16B : PatFrag<(ops node:$in),
- (extract_subvector (v16i8 node:$in), (iPTR 8))>;
-def Neon_High8H : PatFrag<(ops node:$in),
- (extract_subvector (v8i16 node:$in), (iPTR 4))>;
-def Neon_High4S : PatFrag<(ops node:$in),
- (extract_subvector (v4i32 node:$in), (iPTR 2))>;
-def Neon_High2D : PatFrag<(ops node:$in),
- (extract_subvector (v2i64 node:$in), (iPTR 1))>;
-def Neon_High4float : PatFrag<(ops node:$in),
- (extract_subvector (v4f32 node:$in), (iPTR 2))>;
-def Neon_High2double : PatFrag<(ops node:$in),
- (extract_subvector (v2f64 node:$in), (iPTR 1))>;
-
-def Neon_Low16B : PatFrag<(ops node:$in),
- (v8i8 (extract_subvector (v16i8 node:$in),
- (iPTR 0)))>;
-def Neon_Low8H : PatFrag<(ops node:$in),
- (v4i16 (extract_subvector (v8i16 node:$in),
- (iPTR 0)))>;
-def Neon_Low4S : PatFrag<(ops node:$in),
- (v2i32 (extract_subvector (v4i32 node:$in),
- (iPTR 0)))>;
-def Neon_Low2D : PatFrag<(ops node:$in),
- (v1i64 (extract_subvector (v2i64 node:$in),
- (iPTR 0)))>;
-def Neon_Low4float : PatFrag<(ops node:$in),
- (v2f32 (extract_subvector (v4f32 node:$in),
- (iPTR 0)))>;
-def Neon_Low2double : PatFrag<(ops node:$in),
- (v1f64 (extract_subvector (v2f64 node:$in),
- (iPTR 0)))>;
-
-class N2VShiftLong<bit q, bit u, bits<5> opcode, string asmop, string DestT,
- string SrcT, ValueType DestTy, ValueType SrcTy,
- Operand ImmTy, SDPatternOperator ExtOp>
- : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
- (ins VPR64:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
- [(set (DestTy VPR128:$Rd),
- (DestTy (shl
- (DestTy (ExtOp (SrcTy VPR64:$Rn))),
- (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-class N2VShiftLongHigh<bit q, bit u, bits<5> opcode, string asmop, string DestT,
- string SrcT, ValueType DestTy, ValueType SrcTy,
- int StartIndex, Operand ImmTy,
- SDPatternOperator ExtOp, PatFrag getTop>
- : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
- (ins VPR128:$Rn, ImmTy:$Imm),
- asmop # "2\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
- [(set (DestTy VPR128:$Rd),
- (DestTy (shl
- (DestTy (ExtOp
- (SrcTy (getTop VPR128:$Rn)))),
- (DestTy (Neon_vdup (i32 ImmTy:$Imm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_N2VShLL<string prefix, bit u, bits<5> opcode, string asmop,
- SDNode ExtOp> {
- // 64-bit vector types.
- def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8,
- shl_imm8, ExtOp> {
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- }
-
- def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16,
- shl_imm16, ExtOp> {
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- }
-
- def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32,
- shl_imm32, ExtOp> {
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- }
-
- // 128-bit vector types
- def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8,
- 8, shl_imm8, ExtOp, Neon_High16B> {
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- }
-
- def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16,
- 4, shl_imm16, ExtOp, Neon_High8H> {
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- }
-
- def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32,
- 2, shl_imm32, ExtOp, Neon_High4S> {
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- }
-
- // Use other patterns to match when the immediate is 0.
- def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))),
- (!cast<Instruction>(prefix # "_8B") VPR64:$Rn, 0)>;
-
- def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))),
- (!cast<Instruction>(prefix # "_4H") VPR64:$Rn, 0)>;
-
- def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))),
- (!cast<Instruction>(prefix # "_2S") VPR64:$Rn, 0)>;
-
- def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))),
- (!cast<Instruction>(prefix # "_16B") VPR128:$Rn, 0)>;
-
- def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))),
- (!cast<Instruction>(prefix # "_8H") VPR128:$Rn, 0)>;
-
- def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))),
- (!cast<Instruction>(prefix # "_4S") VPR128:$Rn, 0)>;
-}
-
-// Shift left long
-defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>;
-defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
-
-class NeonI_ext_len_alias<string asmop, string lane, string laneOp,
- Instruction inst, RegisterOperand VPRC,
- RegisterOperand VPRCOp>
- : NeonInstAlias<asmop # "\t$Rd" # lane #", $Rn" # laneOp,
- (inst VPRC:$Rd, VPRCOp:$Rn, 0), 0b0>;
-
-// Signed integer lengthen (vector) is alias for SSHLL Vd, Vn, #0
-// Signed integer lengthen (vector, second part) is alias for SSHLL2 Vd, Vn, #0
-// FIXME: This is actually the preferred syntax but TableGen can't deal with
-// custom printing of aliases.
-def SXTLvv_8B : NeonI_ext_len_alias<"sxtl", ".8h", ".8b", SSHLLvvi_8B, VPR128, VPR64>;
-def SXTLvv_4H : NeonI_ext_len_alias<"sxtl", ".4s", ".4h", SSHLLvvi_4H, VPR128, VPR64>;
-def SXTLvv_2S : NeonI_ext_len_alias<"sxtl", ".2d", ".2s", SSHLLvvi_2S, VPR128, VPR64>;
-def SXTL2vv_16B : NeonI_ext_len_alias<"sxtl2", ".8h", ".16b", SSHLLvvi_16B, VPR128, VPR128>;
-def SXTL2vv_8H : NeonI_ext_len_alias<"sxtl2", ".4s", ".8h", SSHLLvvi_8H, VPR128, VPR128>;
-def SXTL2vv_4S : NeonI_ext_len_alias<"sxtl2", ".2d", ".4s", SSHLLvvi_4S, VPR128, VPR128>;
-
-// Unsigned integer lengthen (vector) is alias for USHLL Vd, Vn, #0
-// Unsigned integer lengthen (vector, second part) is alias for USHLL2 Vd, Vn, #0
-// FIXME: This is actually the preferred syntax but TableGen can't deal with
-// custom printing of aliases.
-def UXTLvv_8B : NeonI_ext_len_alias<"uxtl", ".8h", ".8b", USHLLvvi_8B, VPR128, VPR64>;
-def UXTLvv_4H : NeonI_ext_len_alias<"uxtl", ".4s", ".4h", USHLLvvi_4H, VPR128, VPR64>;
-def UXTLvv_2S : NeonI_ext_len_alias<"uxtl", ".2d", ".2s", USHLLvvi_2S, VPR128, VPR64>;
-def UXTL2vv_16B : NeonI_ext_len_alias<"uxtl2", ".8h", ".16b", USHLLvvi_16B, VPR128, VPR128>;
-def UXTL2vv_8H : NeonI_ext_len_alias<"uxtl2", ".4s", ".8h", USHLLvvi_8H, VPR128, VPR128>;
-def UXTL2vv_4S : NeonI_ext_len_alias<"uxtl2", ".2d", ".4s", USHLLvvi_4S, VPR128, VPR128>;
-
-def : Pat<(v8i16 (anyext (v8i8 VPR64:$Rn))), (USHLLvvi_8B VPR64:$Rn, 0)>;
-def : Pat<(v4i32 (anyext (v4i16 VPR64:$Rn))), (USHLLvvi_4H VPR64:$Rn, 0)>;
-def : Pat<(v2i64 (anyext (v2i32 VPR64:$Rn))), (USHLLvvi_2S VPR64:$Rn, 0)>;
-
-// Rounding/Saturating shift
-class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
- SDPatternOperator OpNode>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$Rn),
- (i32 ImmTy:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-// shift right (vector by immediate)
-multiclass NeonI_N2VShR_RQ<bit u, bits<5> opcode, string asmop,
- SDPatternOperator OpNode> {
- def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
- OpNode> {
- let Inst{22} = 0b1;
- }
-}
-
-multiclass NeonI_N2VShL_Q<bit u, bits<5> opcode, string asmop,
- SDPatternOperator OpNode> {
- // 64-bit vector types.
- def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- // 128-bit vector types.
- def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
- OpNode> {
- let Inst{22} = 0b1;
- }
-}
-
-// Rounding shift right
-defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr",
- int_aarch64_neon_vsrshr>;
-defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr",
- int_aarch64_neon_vurshr>;
-
-// Saturating shift left unsigned
-defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>;
-
-// Saturating shift left
-defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
-defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
-
-class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
- SDNode OpNode>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
- (Ty (OpNode (Ty VPRC:$Rn),
- (Ty (Neon_vdup (i32 ImmTy:$Imm))))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-// Shift Right accumulate
-multiclass NeonI_N2VShRAdd<bit u, bits<5> opcode, string asmop, SDNode OpNode> {
- def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
- OpNode> {
- let Inst{22} = 0b1;
- }
-}
-
-// Shift right and accumulate
-defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>;
-defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
-
-// Rounding shift accumulate
-class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
- SDPatternOperator OpNode>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (Ty VPRC:$Rd), (Ty (add (Ty VPRC:$src),
- (Ty (OpNode (Ty VPRC:$Rn), (i32 ImmTy:$Imm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-multiclass NeonI_N2VShRAdd_R<bit u, bits<5> opcode, string asmop,
- SDPatternOperator OpNode> {
- def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
- OpNode> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
- OpNode> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
- OpNode> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
- OpNode> {
- let Inst{22} = 0b1;
- }
-}
-
-// Rounding shift right and accumulate
-defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>;
-defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
-
-// Shift insert by immediate
-class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
- SDPatternOperator OpNode>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (Ty VPRC:$Rd), (Ty (OpNode (Ty VPRC:$src), (Ty VPRC:$Rn),
- (i32 ImmTy:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-// shift left insert (vector by immediate)
-multiclass NeonI_N2VShLIns<bit u, bits<5> opcode, string asmop> {
- def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8,
- int_aarch64_neon_vsli> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16,
- int_aarch64_neon_vsli> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32,
- int_aarch64_neon_vsli> {
- let Inst{22-21} = 0b01;
- }
-
- // 128-bit vector types
- def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8,
- int_aarch64_neon_vsli> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16,
- int_aarch64_neon_vsli> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32,
- int_aarch64_neon_vsli> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64,
- int_aarch64_neon_vsli> {
- let Inst{22} = 0b1;
- }
-}
-
-// shift right insert (vector by immediate)
-multiclass NeonI_N2VShRIns<bit u, bits<5> opcode, string asmop> {
- // 64-bit vector types.
- def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8,
- int_aarch64_neon_vsri> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16,
- int_aarch64_neon_vsri> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32,
- int_aarch64_neon_vsri> {
- let Inst{22-21} = 0b01;
- }
-
- // 128-bit vector types
- def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8,
- int_aarch64_neon_vsri> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16,
- int_aarch64_neon_vsri> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32,
- int_aarch64_neon_vsri> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64,
- int_aarch64_neon_vsri> {
- let Inst{22} = 0b1;
- }
-}
-
-// Shift left and insert
-defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">;
-
-// Shift right and insert
-defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">;
-
-class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
- string SrcT, Operand ImmTy>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
- string SrcT, Operand ImmTy>
- : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
- (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-// left long shift by immediate
-multiclass NeonI_N2VShR_Narrow<bit u, bits<5> opcode, string asmop> {
- def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> {
- let Inst{22-19} = 0b0001;
- }
-
- def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> {
- let Inst{22-20} = 0b001;
- }
-
- def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> {
- let Inst{22-21} = 0b01;
- }
-
- // Shift Narrow High
- def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h",
- shr_imm8> {
- let Inst{22-19} = 0b0001;
- }
-
- def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s",
- shr_imm16> {
- let Inst{22-20} = 0b001;
- }
-
- def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d",
- shr_imm32> {
- let Inst{22-21} = 0b01;
- }
-}
-
-// Shift right narrow
-defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">;
-
-// Shift right narrow (prefix Q is saturating, prefix R is rounding)
-defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">;
-defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">;
-defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">;
-defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">;
-defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">;
-defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">;
-defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">;
-
-def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn),
- (v2i64 (concat_vectors (v1i64 node:$Rm),
- (v1i64 node:$Rn)))>;
-def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn),
- (v8i16 (concat_vectors (v4i16 node:$Rm),
- (v4i16 node:$Rn)))>;
-def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn),
- (v4i32 (concat_vectors (v2i32 node:$Rm),
- (v2i32 node:$Rn)))>;
-def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn),
- (v4f32 (concat_vectors (v2f32 node:$Rm),
- (v2f32 node:$Rn)))>;
-def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn),
- (v2f64 (concat_vectors (v1f64 node:$Rm),
- (v1f64 node:$Rn)))>;
-
-def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
- (v8i16 (srl (v8i16 node:$lhs),
- (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
-def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
- (v4i32 (srl (v4i32 node:$lhs),
- (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
-def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
- (v2i64 (srl (v2i64 node:$lhs),
- (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
-def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs),
- (v8i16 (sra (v8i16 node:$lhs),
- (v8i16 (Neon_vdup (i32 node:$rhs)))))>;
-def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs),
- (v4i32 (sra (v4i32 node:$lhs),
- (v4i32 (Neon_vdup (i32 node:$rhs)))))>;
-def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs),
- (v2i64 (sra (v2i64 node:$lhs),
- (v2i64 (Neon_vdup (i32 node:$rhs)))))>;
-
-// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors)
-multiclass Neon_shiftNarrow_patterns<string shr> {
- def : Pat<(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") VPR128:$Rn,
- (i32 shr_imm8:$Imm)))),
- (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>;
- def : Pat<(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") VPR128:$Rn,
- (i32 shr_imm16:$Imm)))),
- (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>;
- def : Pat<(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") VPR128:$Rn,
- (i32 shr_imm32:$Imm)))),
- (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>;
-
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
- (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
- VPR128:$Rn, (i32 shr_imm8:$Imm))))))),
- (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
- VPR128:$Rn, imm:$Imm)>;
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
- (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
- VPR128:$Rn, (i32 shr_imm16:$Imm))))))),
- (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, imm:$Imm)>;
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert
- (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
- VPR128:$Rn, (i32 shr_imm32:$Imm))))))),
- (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, imm:$Imm)>;
-}
-
-multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
- def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)),
- (!cast<Instruction>(prefix # "_8B") VPR128:$Rn, imm:$Imm)>;
- def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)),
- (!cast<Instruction>(prefix # "_4H") VPR128:$Rn, imm:$Imm)>;
- def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)),
- (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
-
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
- (v1i64 (bitconvert (v8i8
- (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))),
- (!cast<Instruction>(prefix # "_16B")
- (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, imm:$Imm)>;
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
- (v1i64 (bitconvert (v4i16
- (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))),
- (!cast<Instruction>(prefix # "_8H")
- (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, imm:$Imm)>;
- def : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
- (v1i64 (bitconvert (v2i32
- (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))),
- (!cast<Instruction>(prefix # "_4S")
- (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, imm:$Imm)>;
-}
-
-defm : Neon_shiftNarrow_patterns<"lshr">;
-defm : Neon_shiftNarrow_patterns<"ashr">;
-
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrun, "QSHRUNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vrshrn, "RSHRNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrun, "QRSHRUNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqshrn, "SQSHRNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqshrn, "UQSHRNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vsqrshrn, "SQRSHRNvvi">;
-defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
-
-// Convert fix-point and float-pointing
-class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
- RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
- Operand ImmTy, SDPatternOperator IntOp>
- : NeonI_2VShiftImm<q, u, opcode,
- (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
- asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
- [(set (DestTy VPRC:$Rd), (DestTy (IntOp (SrcTy VPRC:$Rn),
- (i32 ImmTy:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_N2VCvt_Fx2fp<bit u, bits<5> opcode, string asmop,
- SDPatternOperator IntOp> {
- def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32,
- shr_imm32, IntOp> {
- let Inst{22-21} = 0b01;
- }
-
- def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32,
- shr_imm32, IntOp> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64,
- shr_imm64, IntOp> {
- let Inst{22} = 0b1;
- }
-}
-
-multiclass NeonI_N2VCvt_Fp2fx<bit u, bits<5> opcode, string asmop,
- SDPatternOperator IntOp> {
- def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32,
- shr_imm32, IntOp> {
- let Inst{22-21} = 0b01;
- }
-
- def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32,
- shr_imm32, IntOp> {
- let Inst{22-21} = 0b01;
- }
-
- def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64,
- shr_imm64, IntOp> {
- let Inst{22} = 0b1;
- }
-}
-
-// Convert fixed-point to floating-point
-defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf",
- int_arm_neon_vcvtfxs2fp>;
-defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf",
- int_arm_neon_vcvtfxu2fp>;
-
-// Convert floating-point to fixed-point
-defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs",
- int_arm_neon_vcvtfp2fxs>;
-defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu",
- int_arm_neon_vcvtfp2fxu>;
-
-multiclass Neon_sshll2_0<SDNode ext>
-{
- def _v8i8 : PatFrag<(ops node:$Rn),
- (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>;
- def _v4i16 : PatFrag<(ops node:$Rn),
- (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>;
- def _v2i32 : PatFrag<(ops node:$Rn),
- (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>;
-}
-
-defm NI_sext_high : Neon_sshll2_0<sext>;
-defm NI_zext_high : Neon_sshll2_0<zext>;
-
-
-//===----------------------------------------------------------------------===//
-// Multiclasses for NeonI_Across
-//===----------------------------------------------------------------------===//
-
-// Variant 1
-
-multiclass NeonI_2VAcross_1<bit u, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
-{
- def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
- (outs FPR16:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd, $Rn.8b",
- [(set (v1i16 FPR16:$Rd),
- (v1i16 (opnode (v8i8 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
- (outs FPR16:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.16b",
- [(set (v1i16 FPR16:$Rd),
- (v1i16 (opnode (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
- (outs FPR32:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd, $Rn.4h",
- [(set (v1i32 FPR32:$Rd),
- (v1i32 (opnode (v4i16 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
- (outs FPR32:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.8h",
- [(set (v1i32 FPR32:$Rd),
- (v1i32 (opnode (v8i16 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- // _1d2s doesn't exist!
-
- def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
- (outs FPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.4s",
- [(set (v1i64 FPR64:$Rd),
- (v1i64 (opnode (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>;
-defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>;
-
-// Variant 2
-
-multiclass NeonI_2VAcross_2<bit u, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
-{
- def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode,
- (outs FPR8:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd, $Rn.8b",
- [(set (v1i8 FPR8:$Rd),
- (v1i8 (opnode (v8i8 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode,
- (outs FPR8:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.16b",
- [(set (v1i8 FPR8:$Rd),
- (v1i8 (opnode (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode,
- (outs FPR16:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd, $Rn.4h",
- [(set (v1i16 FPR16:$Rd),
- (v1i16 (opnode (v4i16 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode,
- (outs FPR16:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.8h",
- [(set (v1i16 FPR16:$Rd),
- (v1i16 (opnode (v8i16 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- // _1s2s doesn't exist!
-
- def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode,
- (outs FPR32:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.4s",
- [(set (v1i32 FPR32:$Rd),
- (v1i32 (opnode (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>;
-defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>;
-
-defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>;
-defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>;
-
-defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>;
-
-// Variant 3
-
-multiclass NeonI_2VAcross_3<bit u, bits<5> opcode, bits<2> size,
- string asmop, SDPatternOperator opnode> {
- def _1s4s: NeonI_2VAcross<0b1, u, size, opcode,
- (outs FPR32:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd, $Rn.4s",
- [(set (f32 FPR32:$Rd),
- (f32 (opnode (v4f32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv",
- int_aarch64_neon_vmaxnmv>;
-defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv",
- int_aarch64_neon_vminnmv>;
-
-defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv",
- int_aarch64_neon_vmaxv>;
-defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv",
- int_aarch64_neon_vminv>;
-
-// The followings are for instruction class (Perm)
-
-class NeonI_Permute<bit q, bits<2> size, bits<3> opcode,
- string asmop, RegisterOperand OpVPR, string OpS,
- SDPatternOperator opnode, ValueType Ty>
- : NeonI_Perm<q, size, opcode,
- (outs OpVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # OpS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (Ty OpVPR:$Rd),
- (Ty (opnode (Ty OpVPR:$Rn), (Ty OpVPR:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_Perm_pat<bits<3> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop,
- VPR64, "8b", opnode, v8i8>;
- def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop,
- VPR128, "16b",opnode, v16i8>;
- def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop,
- VPR64, "4h", opnode, v4i16>;
- def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop,
- VPR128, "8h", opnode, v8i16>;
- def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop,
- VPR64, "2s", opnode, v2i32>;
- def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop,
- VPR128, "4s", opnode, v4i32>;
- def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop,
- VPR128, "2d", opnode, v2i64>;
-}
-
-defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>;
-defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>;
-defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>;
-defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>;
-defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>;
-defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>;
-
-multiclass NeonI_Perm_float_pat<string INS, SDPatternOperator opnode> {
- def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))),
- (!cast<Instruction>(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>;
-
- def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))),
- (!cast<Instruction>(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>;
-
- def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))),
- (!cast<Instruction>(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>;
-}
-
-defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>;
-defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>;
-defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>;
-defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>;
-defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>;
-defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>;
-
-// The followings are for instruction class (3V Diff)
-
-// normal long/long2 pattern
-class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode, SDPatternOperator ext,
- RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (opnode (ResTy (ext (OpTy OpVPR:$Rn))),
- (ResTy (ext (OpTy OpVPR:$Rm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_3VDL_s<bit u, bits<4> opcode,
- string asmop, SDPatternOperator opnode,
- bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, sext, VPR64, v8i16, v8i8>;
- def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, sext, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, sext, VPR64, v2i64, v2i32>;
- }
-}
-
-multiclass NeonI_3VDL2_s<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
- def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
- }
-}
-
-multiclass NeonI_3VDL_u<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, zext, VPR64, v8i16, v8i8>;
- def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, zext, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, zext, VPR64, v2i64, v2i32>;
- }
-}
-
-multiclass NeonI_3VDL2_u<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
- def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
- }
-}
-
-defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>;
-defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>;
-
-defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>;
-defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>;
-
-defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>;
-defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>;
-
-defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>;
-defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
-
-// normal wide/wide2 pattern
-class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode, SDPatternOperator ext,
- RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # ResS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (opnode (ResTy VPR128:$Rn),
- (ResTy (ext (OpTy OpVPR:$Rm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_3VDW_s<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, sext, VPR64, v8i16, v8i8>;
- def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, sext, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, sext, VPR64, v2i64, v2i32>;
-}
-
-defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>;
-defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>;
-
-multiclass NeonI_3VDW2_s<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>;
- def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>;
-}
-
-defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>;
-defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>;
-
-multiclass NeonI_3VDW_u<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, zext, VPR64, v8i16, v8i8>;
- def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, zext, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, zext, VPR64, v2i64, v2i32>;
-}
-
-defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>;
-defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>;
-
-multiclass NeonI_3VDW2_u<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>;
- def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>;
-}
-
-defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>;
-defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>;
-
-// Get the high half part of the vector element.
-multiclass NeonI_get_high {
- def _8h : PatFrag<(ops node:$Rn),
- (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn),
- (v8i16 (Neon_vdup (i32 8)))))))>;
- def _4s : PatFrag<(ops node:$Rn),
- (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn),
- (v4i32 (Neon_vdup (i32 16)))))))>;
- def _2d : PatFrag<(ops node:$Rn),
- (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn),
- (v2i64 (Neon_vdup (i32 32)))))))>;
-}
-
-defm NI_get_hi : NeonI_get_high;
-
-// pattern for addhn/subhn with 2 operands
-class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode, SDPatternOperator get_hi,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR64:$Rd),
- (ResTy (get_hi
- (OpTy (opnode (OpTy VPR128:$Rn),
- (OpTy VPR128:$Rm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_3VDN_addhn_2Op<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
- opnode, NI_get_hi_8h, v8i8, v8i16>;
- def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
- opnode, NI_get_hi_4s, v4i16, v4i32>;
- def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
- opnode, NI_get_hi_2d, v2i32, v2i64>;
- }
-}
-
-defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>;
-defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
-
-// pattern for operation with 2 operands
-class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy ResVPR:$Rd),
- (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-// normal narrow pattern
-multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h",
- opnode, VPR64, VPR128, v8i8, v8i16>;
- def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s",
- opnode, VPR64, VPR128, v4i16, v4i32>;
- def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d",
- opnode, VPR64, VPR128, v2i32, v2i64>;
- }
-}
-
-defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
-defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
-
-// pattern for acle intrinsic with 3 operands
-class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let neverHasSideEffects = 1;
-}
-
-multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, string asmop> {
- def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
- def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
- def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
-}
-
-defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
-defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
-
-defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
-defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
-
-// Patterns have to be separate because there's a SUBREG_TO_REG in the output
-// part.
-class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
- SDPatternOperator coreop>
- : Pat<(Neon_combine_2D (v1i64 VPR64:$src),
- (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
- (SrcTy VPR128:$Rm)))))),
- (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- VPR128:$Rn, VPR128:$Rm)>;
-
-// addhn2 patterns
-def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
- BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
-def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
- BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
-def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
- BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
-
-// subhn2 patterns
-def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
- BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
-def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
- BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
-def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
- BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
-
-// raddhn2 patterns
-def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
-def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
-def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
-
-// rsubhn2 patterns
-def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
-def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
-def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
-
-// pattern that need to extend result
-class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode,
- RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy, ValueType OpSTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (zext (OpSTy (opnode (OpTy OpVPR:$Rn),
- (OpTy OpVPR:$Rm))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_3VDL_zext<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, VPR64, v8i16, v8i8, v8i8>;
- def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, VPR64, v4i32, v4i16, v4i16>;
- def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, VPR64, v2i64, v2i32, v2i32>;
- }
-}
-
-defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>;
-defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>;
-
-multiclass NeonI_Op_High<SDPatternOperator op> {
- def _16B : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v8i8 (Neon_High16B node:$Rn)),
- (v8i8 (Neon_High16B node:$Rm)))>;
- def _8H : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v4i16 (Neon_High8H node:$Rn)),
- (v4i16 (Neon_High8H node:$Rm)))>;
- def _4S : PatFrag<(ops node:$Rn, node:$Rm),
- (op (v2i32 (Neon_High4S node:$Rn)),
- (v2i32 (Neon_High4S node:$Rm)))>;
-}
-
-defm NI_sabdl_hi : NeonI_Op_High<int_arm_neon_vabds>;
-defm NI_uabdl_hi : NeonI_Op_High<int_arm_neon_vabdu>;
-defm NI_smull_hi : NeonI_Op_High<int_arm_neon_vmulls>;
-defm NI_umull_hi : NeonI_Op_High<int_arm_neon_vmullu>;
-defm NI_qdmull_hi : NeonI_Op_High<int_arm_neon_vqdmull>;
-defm NI_pmull_hi : NeonI_Op_High<int_arm_neon_vmullp>;
-
-multiclass NeonI_3VDL_Abd_u<bit u, bits<4> opcode, string asmop, string opnode,
- bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- !cast<PatFrag>(opnode # "_16B"),
- VPR128, v8i16, v16i8, v8i8>;
- def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- !cast<PatFrag>(opnode # "_8H"),
- VPR128, v4i32, v8i16, v4i16>;
- def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- !cast<PatFrag>(opnode # "_4S"),
- VPR128, v2i64, v4i32, v2i32>;
- }
-}
-
-defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>;
-defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
-
-// For pattern that need two operators being chained.
-class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode, SDPatternOperator subop,
- RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy, ValueType OpSTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (opnode
- (ResTy VPR128:$src),
- (ResTy (zext (OpSTy (subop (OpTy OpVPR:$Rn),
- (OpTy OpVPR:$Rm))))))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-multiclass NeonI_3VDL_Aba_v1<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, SDPatternOperator subop>{
- def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, subop, VPR64, v8i16, v8i8, v8i8>;
- def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, subop, VPR64, v4i32, v4i16, v4i16>;
- def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, subop, VPR64, v2i64, v2i32, v2i32>;
-}
-
-defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal",
- add, int_arm_neon_vabds>;
-defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal",
- add, int_arm_neon_vabdu>;
-
-multiclass NeonI_3VDL2_Aba_v1<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, string subop> {
- def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- opnode, !cast<PatFrag>(subop # "_16B"),
- VPR128, v8i16, v16i8, v8i8>;
- def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, !cast<PatFrag>(subop # "_8H"),
- VPR128, v4i32, v8i16, v4i16>;
- def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, !cast<PatFrag>(subop # "_4S"),
- VPR128, v2i64, v4i32, v2i32>;
-}
-
-defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add,
- "NI_sabdl_hi">;
-defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add,
- "NI_uabdl_hi">;
-
-// Long pattern with 2 operands
-multiclass NeonI_3VDL_2Op<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable,
- SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
- def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, VPR128, VPR64, v8i16, v8i8>;
- def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, VPR128, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, VPR128, VPR64, v2i64, v2i32>;
- }
-}
-
-defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>;
-defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>;
-
-class NeonI_3VDL2_2Op_mull<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (opnode (OpTy VPR128:$Rn), (OpTy VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
-
-multiclass NeonI_3VDL2_2Op_mull_v1<bit u, bits<4> opcode, string asmop,
- string opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- !cast<PatFrag>(opnode # "_16B"),
- v8i16, v16i8>;
- def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- !cast<PatFrag>(opnode # "_8H"),
- v4i32, v8i16>;
- def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- !cast<PatFrag>(opnode # "_4S"),
- v2i64, v4i32>;
- }
-}
-
-defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2",
- "NI_smull_hi", 1>;
-defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2",
- "NI_umull_hi", 1>;
-
-// Long pattern with 3 operands
-class NeonI_3VDL_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator opnode,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR64:$Rn, VPR64:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (opnode
- (ResTy VPR128:$src),
- (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
- let Constraints = "$src = $Rd";
-}
-
-multiclass NeonI_3VDL_3Op_v1<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode, v8i16, v8i8>;
- def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, v4i32, v4i16>;
- def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, v2i64, v2i32>;
-}
-
-def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
- (add node:$Rd,
- (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
-
-def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
- (add node:$Rd,
- (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
-
-def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
- (sub node:$Rd,
- (int_arm_neon_vmulls node:$Rn, node:$Rm))>;
-
-def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm),
- (sub node:$Rd,
- (int_arm_neon_vmullu node:$Rn, node:$Rm))>;
-
-defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>;
-defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>;
-
-defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>;
-defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
-
-class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS,
- SDPatternOperator subop, SDPatternOperator opnode,
- RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy>
- : NeonI_3VDiff<q, u, size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
- [(set (ResTy VPR128:$Rd),
- (ResTy (subop
- (ResTy VPR128:$src),
- (ResTy (opnode (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm))))))],
- NoItinerary>,
- Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
- let Constraints = "$src = $Rd";
-}
-
-multiclass NeonI_3VDL2_3Op_mlas_v1<bit u, bits<4> opcode, string asmop,
- SDPatternOperator subop, string opnode> {
- def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- subop, !cast<PatFrag>(opnode # "_16B"),
- VPR128, v8i16, v16i8>;
- def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- subop, !cast<PatFrag>(opnode # "_8H"),
- VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- subop, !cast<PatFrag>(opnode # "_4S"),
- VPR128, v2i64, v4i32>;
-}
-
-defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2",
- add, "NI_smull_hi">;
-defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2",
- add, "NI_umull_hi">;
-
-defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2",
- sub, "NI_smull_hi">;
-defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2",
- sub, "NI_umull_hi">;
-
-multiclass NeonI_3VDL_qdmlal_3Op_v2<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, int_arm_neon_vqdmull,
- VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, int_arm_neon_vqdmull,
- VPR64, v2i64, v2i32>;
-}
-
-defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal",
- int_arm_neon_vqadds>;
-defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl",
- int_arm_neon_vqsubs>;
-
-multiclass NeonI_3VDL_v2<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h",
- opnode, VPR128, VPR64, v4i32, v4i16>;
- def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s",
- opnode, VPR128, VPR64, v2i64, v2i32>;
- }
-}
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull",
- int_arm_neon_vqdmull, 1>;
-}
-
-multiclass NeonI_3VDL2_2Op_mull_v2<bit u, bits<4> opcode, string asmop,
- string opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- !cast<PatFrag>(opnode # "_8H"),
- v4i32, v8i16>;
- def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- !cast<PatFrag>(opnode # "_4S"),
- v2i64, v4i32>;
- }
-}
-
-defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2",
- "NI_qdmull_hi", 1>;
-
-multiclass NeonI_3VDL2_3Op_qdmlal_v2<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode> {
- def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h",
- opnode, NI_qdmull_hi_8H,
- VPR128, v4i32, v8i16>;
- def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s",
- opnode, NI_qdmull_hi_4S,
- VPR128, v2i64, v4i32>;
-}
-
-defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2",
- int_arm_neon_vqadds>;
-defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2",
- int_arm_neon_vqsubs>;
-
-multiclass NeonI_3VDL_v3<bit u, bits<4> opcode, string asmop,
- SDPatternOperator opnode_8h8b,
- SDPatternOperator opnode_1q1d, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b",
- opnode_8h8b, VPR128, VPR64, v8i16, v8i8>;
-
- def _1q1d : NeonI_3VD_2Op<0b0, u, 0b11, opcode, asmop, "1q", "1d",
- opnode_1q1d, VPR128, VPR64, v16i8, v1i64>;
- }
-}
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in
-defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp,
- int_aarch64_neon_vmull_p64, 1>;
-
-multiclass NeonI_3VDL2_2Op_mull_v3<bit u, bits<4> opcode, string asmop,
- string opnode, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b",
- !cast<PatFrag>(opnode # "_16B"),
- v8i16, v16i8>;
-
- def _1q2d :
- NeonI_3VDiff<0b1, u, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (int_aarch64_neon_vmull_p64
- (v1i64 (scalar_to_vector
- (i64 (vector_extract (v2i64 VPR128:$Rn), 1)))),
- (v1i64 (scalar_to_vector
- (i64 (vector_extract (v2i64 VPR128:$Rm), 1)))))))],
- NoItinerary>,
- Sched<[WriteFPMul, ReadFPMul, ReadFPMul]>;
- }
-
- def : Pat<(v16i8 (int_aarch64_neon_vmull_p64
- (v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 1))),
- (v1i64 (extract_subvector (v2i64 VPR128:$Rm), (i64 1))))),
- (!cast<Instruction>(NAME # "_1q2d") VPR128:$Rn, VPR128:$Rm)>;
-}
-
-defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi",
- 1>;
-
-// End of implementation for instruction class (3V Diff)
-
-// The followings are vector load/store multiple N-element structure
-// (class SIMD lselem).
-
-// ld1: load multiple 1-element structure to 1/2/3/4 registers.
-// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4).
-// The structure consists of a sequence of sets of N values.
-// The first element of the structure is placed in the first lane
-// of the first first vector, the second element in the first lane
-// of the second vector, and so on.
-// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into
-// the three 64-bit vectors list {BA, DC, FE}.
-// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three
-// 64-bit vectors list {DA, EB, FC}.
-// Store instructions store multiple structure to N registers like load.
-
-
-class NeonI_LDVList<bit q, bits<4> opcode, bits<2> size,
- RegisterOperand VecList, string asmop>
- : NeonI_LdStMult<q, 1, opcode, size,
- (outs VecList:$Rt), (ins GPR64xsp:$Rn),
- asmop # "\t$Rt, [$Rn]",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, ReadVecLd]> {
- let mayLoad = 1;
- let neverHasSideEffects = 1;
-}
-
-multiclass LDVList_BHSD<bits<4> opcode, string List, string asmop> {
- def _8B : NeonI_LDVList<0, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"), asmop>;
-
- def _4H : NeonI_LDVList<0, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"), asmop>;
-
- def _2S : NeonI_LDVList<0, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"), asmop>;
-
- def _16B : NeonI_LDVList<1, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"), asmop>;
-
- def _8H : NeonI_LDVList<1, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"), asmop>;
-
- def _4S : NeonI_LDVList<1, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"), asmop>;
-
- def _2D : NeonI_LDVList<1, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"), asmop>;
-}
-
-// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4)
-defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">;
-def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">;
-
-defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">;
-
-defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">;
-
-defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">;
-
-// Load multiple 1-element structure to N consecutive registers (N = 2,3,4)
-defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">;
-def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">;
-
-defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">;
-def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">;
-
-defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">;
-def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">;
-
-class NeonI_STVList<bit q, bits<4> opcode, bits<2> size,
- RegisterOperand VecList, string asmop>
- : NeonI_LdStMult<q, 0, opcode, size,
- (outs), (ins GPR64xsp:$Rn, VecList:$Rt),
- asmop # "\t$Rt, [$Rn]",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
- let mayStore = 1;
- let neverHasSideEffects = 1;
-}
-
-multiclass STVList_BHSD<bits<4> opcode, string List, string asmop> {
- def _8B : NeonI_STVList<0, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"), asmop>;
-
- def _4H : NeonI_STVList<0, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"), asmop>;
-
- def _2S : NeonI_STVList<0, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"), asmop>;
-
- def _16B : NeonI_STVList<1, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"), asmop>;
-
- def _8H : NeonI_STVList<1, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"), asmop>;
-
- def _4S : NeonI_STVList<1, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"), asmop>;
-
- def _2D : NeonI_STVList<1, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"), asmop>;
-}
-
-// Store multiple N-element structures from N registers (N = 1,2,3,4)
-defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">;
-def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">;
-
-defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">;
-
-defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">;
-
-defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">;
-
-// Store multiple 1-element structures from N consecutive registers (N = 2,3,4)
-defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">;
-def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">;
-
-defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">;
-def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">;
-
-defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">;
-def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">;
-
-def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
-def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>;
-
-def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
-def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>;
-
-def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>;
-def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>;
-
-def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
-def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>;
-
-def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
-def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>;
-
-def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>;
-def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>;
-
-def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr),
- (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
-def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr),
- (ST1_2D GPR64xsp:$addr, VPR128:$value)>;
-
-def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr),
- (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
-def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr),
- (ST1_4S GPR64xsp:$addr, VPR128:$value)>;
-
-def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr),
- (ST1_8H GPR64xsp:$addr, VPR128:$value)>;
-def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr),
- (ST1_16B GPR64xsp:$addr, VPR128:$value)>;
-
-def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr),
- (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
-def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr),
- (ST1_1D GPR64xsp:$addr, VPR64:$value)>;
-
-def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr),
- (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
-def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr),
- (ST1_2S GPR64xsp:$addr, VPR64:$value)>;
-
-def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr),
- (ST1_4H GPR64xsp:$addr, VPR64:$value)>;
-def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr),
- (ST1_8B GPR64xsp:$addr, VPR64:$value)>;
-
-// Match load/store of v1i8/v1i16/v1i32 type to FPR8/FPR16/FPR32 load/store.
-// FIXME: for now we have v1i8, v1i16, v1i32 legal types, if they are illegal,
-// these patterns are not needed any more.
-def : Pat<(v1i8 (load GPR64xsp:$addr)), (LSFP8_LDR $addr, 0)>;
-def : Pat<(v1i16 (load GPR64xsp:$addr)), (LSFP16_LDR $addr, 0)>;
-def : Pat<(v1i32 (load GPR64xsp:$addr)), (LSFP32_LDR $addr, 0)>;
-
-def : Pat<(store (v1i8 FPR8:$value), GPR64xsp:$addr),
- (LSFP8_STR $value, $addr, 0)>;
-def : Pat<(store (v1i16 FPR16:$value), GPR64xsp:$addr),
- (LSFP16_STR $value, $addr, 0)>;
-def : Pat<(store (v1i32 FPR32:$value), GPR64xsp:$addr),
- (LSFP32_STR $value, $addr, 0)>;
-
-
-// End of vector load/store multiple N-element structure(class SIMD lselem)
-
-// The followings are post-index vector load/store multiple N-element
-// structure(class SIMD lselem-post)
-def exact1_asmoperand : AsmOperandClass {
- let Name = "Exact1";
- let PredicateMethod = "isExactImm<1>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact1 : Operand<i32>, ImmLeaf<i32, [{return Imm == 1;}]> {
- let ParserMatchClass = exact1_asmoperand;
-}
-
-def exact2_asmoperand : AsmOperandClass {
- let Name = "Exact2";
- let PredicateMethod = "isExactImm<2>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact2 : Operand<i32>, ImmLeaf<i32, [{return Imm == 2;}]> {
- let ParserMatchClass = exact2_asmoperand;
-}
-
-def exact3_asmoperand : AsmOperandClass {
- let Name = "Exact3";
- let PredicateMethod = "isExactImm<3>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact3 : Operand<i32>, ImmLeaf<i32, [{return Imm == 3;}]> {
- let ParserMatchClass = exact3_asmoperand;
-}
-
-def exact4_asmoperand : AsmOperandClass {
- let Name = "Exact4";
- let PredicateMethod = "isExactImm<4>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact4 : Operand<i32>, ImmLeaf<i32, [{return Imm == 4;}]> {
- let ParserMatchClass = exact4_asmoperand;
-}
-
-def exact6_asmoperand : AsmOperandClass {
- let Name = "Exact6";
- let PredicateMethod = "isExactImm<6>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact6 : Operand<i32>, ImmLeaf<i32, [{return Imm == 6;}]> {
- let ParserMatchClass = exact6_asmoperand;
-}
-
-def exact8_asmoperand : AsmOperandClass {
- let Name = "Exact8";
- let PredicateMethod = "isExactImm<8>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact8 : Operand<i32>, ImmLeaf<i32, [{return Imm == 8;}]> {
- let ParserMatchClass = exact8_asmoperand;
-}
-
-def exact12_asmoperand : AsmOperandClass {
- let Name = "Exact12";
- let PredicateMethod = "isExactImm<12>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact12 : Operand<i32>, ImmLeaf<i32, [{return Imm == 12;}]> {
- let ParserMatchClass = exact12_asmoperand;
-}
-
-def exact16_asmoperand : AsmOperandClass {
- let Name = "Exact16";
- let PredicateMethod = "isExactImm<16>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact16 : Operand<i32>, ImmLeaf<i32, [{return Imm == 16;}]> {
- let ParserMatchClass = exact16_asmoperand;
-}
-
-def exact24_asmoperand : AsmOperandClass {
- let Name = "Exact24";
- let PredicateMethod = "isExactImm<24>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact24 : Operand<i32>, ImmLeaf<i32, [{return Imm == 24;}]> {
- let ParserMatchClass = exact24_asmoperand;
-}
-
-def exact32_asmoperand : AsmOperandClass {
- let Name = "Exact32";
- let PredicateMethod = "isExactImm<32>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact32 : Operand<i32>, ImmLeaf<i32, [{return Imm == 32;}]> {
- let ParserMatchClass = exact32_asmoperand;
-}
-
-def exact48_asmoperand : AsmOperandClass {
- let Name = "Exact48";
- let PredicateMethod = "isExactImm<48>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact48 : Operand<i32>, ImmLeaf<i32, [{return Imm == 48;}]> {
- let ParserMatchClass = exact48_asmoperand;
-}
-
-def exact64_asmoperand : AsmOperandClass {
- let Name = "Exact64";
- let PredicateMethod = "isExactImm<64>";
- let RenderMethod = "addImmOperands";
-}
-def uimm_exact64 : Operand<i32>, ImmLeaf<i32, [{return Imm == 64;}]> {
- let ParserMatchClass = exact64_asmoperand;
-}
-
-multiclass NeonI_LDWB_VList<bit q, bits<4> opcode, bits<2> size,
- RegisterOperand VecList, Operand ImmTy,
- string asmop> {
- let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1,
- DecoderMethod = "DecodeVLDSTPostInstruction" in {
- def _fixed : NeonI_LdStMult_Post<q, 1, opcode, size,
- (outs VecList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, ImmTy:$amt),
- asmop # "\t$Rt, [$Rn], $amt",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
- let Rm = 0b11111;
- }
-
- def _register : NeonI_LdStMult_Post<q, 1, opcode, size,
- (outs VecList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
- asmop # "\t$Rt, [$Rn], $Rm",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
- }
-}
-
-multiclass LDWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
- Operand ImmTy2, string asmop> {
- defm _8B : NeonI_LDWB_VList<0, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"),
- ImmTy, asmop>;
-
- defm _4H : NeonI_LDWB_VList<0, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"),
- ImmTy, asmop>;
-
- defm _2S : NeonI_LDWB_VList<0, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"),
- ImmTy, asmop>;
-
- defm _16B : NeonI_LDWB_VList<1, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"),
- ImmTy2, asmop>;
-
- defm _8H : NeonI_LDWB_VList<1, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"),
- ImmTy2, asmop>;
-
- defm _4S : NeonI_LDWB_VList<1, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"),
- ImmTy2, asmop>;
-
- defm _2D : NeonI_LDWB_VList<1, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"),
- ImmTy2, asmop>;
-}
-
-// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
-defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">;
-defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
- "ld1">;
-
-defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">;
-
-defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
- "ld3">;
-
-defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">;
-
-// Post-index load multiple 1-element structures from N consecutive registers
-// (N = 2,3,4)
-defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
- "ld1">;
-defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand,
- uimm_exact16, "ld1">;
-
-defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
- "ld1">;
-defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
- uimm_exact24, "ld1">;
-
-defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
- "ld1">;
-defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
- uimm_exact32, "ld1">;
-
-multiclass NeonI_STWB_VList<bit q, bits<4> opcode, bits<2> size,
- RegisterOperand VecList, Operand ImmTy,
- string asmop> {
- let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1,
- DecoderMethod = "DecodeVLDSTPostInstruction" in {
- def _fixed : NeonI_LdStMult_Post<q, 0, opcode, size,
- (outs GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, ImmTy:$amt, VecList:$Rt),
- asmop # "\t$Rt, [$Rn], $amt",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
- let Rm = 0b11111;
- }
-
- def _register : NeonI_LdStMult_Post<q, 0, opcode, size,
- (outs GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VecList:$Rt),
- asmop # "\t$Rt, [$Rn], $Rm",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
- }
-}
-
-multiclass STWB_VList_BHSD<bits<4> opcode, string List, Operand ImmTy,
- Operand ImmTy2, string asmop> {
- defm _8B : NeonI_STWB_VList<0, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"), ImmTy, asmop>;
-
- defm _4H : NeonI_STWB_VList<0, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"),
- ImmTy, asmop>;
-
- defm _2S : NeonI_STWB_VList<0, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"),
- ImmTy, asmop>;
-
- defm _16B : NeonI_STWB_VList<1, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"),
- ImmTy2, asmop>;
-
- defm _8H : NeonI_STWB_VList<1, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"),
- ImmTy2, asmop>;
-
- defm _4S : NeonI_STWB_VList<1, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"),
- ImmTy2, asmop>;
-
- defm _2D : NeonI_STWB_VList<1, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"),
- ImmTy2, asmop>;
-}
-
-// Post-index load multiple N-element structures from N registers (N = 1,2,3,4)
-defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">;
-defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8,
- "st1">;
-
-defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">;
-
-defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48,
- "st3">;
-
-defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">;
-
-// Post-index load multiple 1-element structures from N consecutive registers
-// (N = 2,3,4)
-defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32,
- "st1">;
-defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand,
- uimm_exact16, "st1">;
-
-defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48,
- "st1">;
-defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand,
- uimm_exact24, "st1">;
-
-defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64,
- "st1">;
-defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand,
- uimm_exact32, "st1">;
-
-// End of post-index vector load/store multiple N-element structure
-// (class SIMD lselem-post)
-
-// The followings are vector load/store single N-element structure
-// (class SIMD lsone).
-def neon_uimm0_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm == 0;}]> {
- let ParserMatchClass = neon_uimm0_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm1_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 2;}]> {
- let ParserMatchClass = neon_uimm1_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm2_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 4;}]> {
- let ParserMatchClass = neon_uimm2_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm3_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 8;}]> {
- let ParserMatchClass = uimm3_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-def neon_uimm4_bare : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 16;}]> {
- let ParserMatchClass = uimm4_asmoperand;
- let PrintMethod = "printUImmBareOperand";
-}
-
-class NeonI_LDN_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
- RegisterOperand VecList, string asmop>
- : NeonI_LdOne_Dup<q, r, opcode, size,
- (outs VecList:$Rt), (ins GPR64xsp:$Rn),
- asmop # "\t$Rt, [$Rn]",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, ReadVecLd]> {
- let mayLoad = 1;
- let neverHasSideEffects = 1;
-}
-
-multiclass LDN_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop> {
- def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"), asmop>;
-
- def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"), asmop>;
-
- def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"), asmop>;
-
- def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11,
- !cast<RegisterOperand>(List # "1D_operand"), asmop>;
-
- def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"), asmop>;
-
- def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"), asmop>;
-
- def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"), asmop>;
-
- def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"), asmop>;
-}
-
-// Load single 1-element structure to all lanes of 1 register
-defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">;
-
-// Load single N-element structure to all lanes of N consecutive
-// registers (N = 2,3,4)
-defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">;
-defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">;
-defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">;
-
-
-class LD1R_pattern <ValueType VTy, ValueType DTy, PatFrag LoadOp,
- Instruction INST>
- : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))),
- (VTy (INST GPR64xsp:$Rn))>;
-
-// Match all LD1R instructions
-def : LD1R_pattern<v8i8, i32, extloadi8, LD1R_8B>;
-
-def : LD1R_pattern<v16i8, i32, extloadi8, LD1R_16B>;
-
-def : LD1R_pattern<v4i16, i32, extloadi16, LD1R_4H>;
-
-def : LD1R_pattern<v8i16, i32, extloadi16, LD1R_8H>;
-
-def : LD1R_pattern<v2i32, i32, load, LD1R_2S>;
-def : LD1R_pattern<v2f32, f32, load, LD1R_2S>;
-
-def : LD1R_pattern<v4i32, i32, load, LD1R_4S>;
-def : LD1R_pattern<v4f32, f32, load, LD1R_4S>;
-
-def : LD1R_pattern<v2i64, i64, load, LD1R_2D>;
-def : LD1R_pattern<v2f64, f64, load, LD1R_2D>;
-
-class LD1R_pattern_v1 <ValueType VTy, ValueType DTy, PatFrag LoadOp,
- Instruction INST>
- : Pat<(VTy (scalar_to_vector (DTy (LoadOp GPR64xsp:$Rn)))),
- (VTy (INST GPR64xsp:$Rn))>;
-
-def : LD1R_pattern_v1<v1i64, i64, load, LD1R_1D>;
-def : LD1R_pattern_v1<v1f64, f64, load, LD1R_1D>;
-
-multiclass VectorList_Bare_BHSD<string PREFIX, int Count,
- RegisterClass RegList> {
- defm B : VectorList_operands<PREFIX, "B", Count, RegList>;
- defm H : VectorList_operands<PREFIX, "H", Count, RegList>;
- defm S : VectorList_operands<PREFIX, "S", Count, RegList>;
- defm D : VectorList_operands<PREFIX, "D", Count, RegList>;
-}
-
-// Special vector list operand of 128-bit vectors with bare layout.
-// i.e. only show ".b", ".h", ".s", ".d"
-defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>;
-defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>;
-defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>;
-defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>;
-
-class NeonI_LDN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane<1, r, op2_1, op0,
- (outs VList:$Rt),
- (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn]",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, ReadVecLd, ReadVecLd]> {
- let mayLoad = 1;
- let neverHasSideEffects = 1;
- let hasExtraDefRegAllocReq = 1;
- let Constraints = "$src = $Rt";
-}
-
-multiclass LDN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
- def _B : NeonI_LDN_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _H : NeonI_LDN_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _S : NeonI_LDN_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _D : NeonI_LDN_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- neon_uimm1_bare, asmop> {
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-}
-
-// Load single 1-element structure to one lane of 1 register.
-defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">;
-
-// Load single N-element structure to one lane of N consecutive registers
-// (N = 2,3,4)
-defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">;
-defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">;
-defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">;
-
-multiclass LD1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
- Operand ImmOp, Operand ImmOp2, PatFrag LoadOp,
- Instruction INST> {
- def : Pat<(VTy (vector_insert (VTy VPR64:$src),
- (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))),
- (VTy (EXTRACT_SUBREG
- (INST GPR64xsp:$Rn,
- (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
- ImmOp:$lane),
- sub_64))>;
-
- def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src),
- (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))),
- (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>;
-}
-
-// Match all LD1LN instructions
-defm : LD1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
- extloadi8, LD1LN_B>;
-
-defm : LD1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
- extloadi16, LD1LN_H>;
-
-defm : LD1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
- load, LD1LN_S>;
-defm : LD1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
- load, LD1LN_S>;
-
-defm : LD1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
- load, LD1LN_D>;
-defm : LD1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
- load, LD1LN_D>;
-
-class NeonI_STN_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane<0, r, op2_1, op0,
- (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn]",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
- let mayStore = 1;
- let neverHasSideEffects = 1;
- let hasExtraDefRegAllocReq = 1;
-}
-
-multiclass STN_Lane_BHSD<bit r, bit op0, string List, string asmop> {
- def _B : NeonI_STN_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _H : NeonI_STN_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _S : NeonI_STN_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _D : NeonI_STN_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- neon_uimm1_bare, asmop>{
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-}
-
-// Store single 1-element structure from one lane of 1 register.
-defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">;
-
-// Store single N-element structure from one lane of N consecutive registers
-// (N = 2,3,4)
-defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">;
-defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">;
-defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">;
-
-multiclass ST1LN_patterns<ValueType VTy, ValueType VTy2, ValueType DTy,
- Operand ImmOp, Operand ImmOp2, PatFrag StoreOp,
- Instruction INST> {
- def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)),
- GPR64xsp:$Rn),
- (INST GPR64xsp:$Rn,
- (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64),
- ImmOp:$lane)>;
-
- def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)),
- GPR64xsp:$Rn),
- (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>;
-}
-
-// Match all ST1LN instructions
-defm : ST1LN_patterns<v8i8, v16i8, i32, neon_uimm3_bare, neon_uimm4_bare,
- truncstorei8, ST1LN_B>;
-
-defm : ST1LN_patterns<v4i16, v8i16, i32, neon_uimm2_bare, neon_uimm3_bare,
- truncstorei16, ST1LN_H>;
-
-defm : ST1LN_patterns<v2i32, v4i32, i32, neon_uimm1_bare, neon_uimm2_bare,
- store, ST1LN_S>;
-defm : ST1LN_patterns<v2f32, v4f32, f32, neon_uimm1_bare, neon_uimm2_bare,
- store, ST1LN_S>;
-
-defm : ST1LN_patterns<v1i64, v2i64, i64, neon_uimm0_bare, neon_uimm1_bare,
- store, ST1LN_D>;
-defm : ST1LN_patterns<v1f64, v2f64, f64, neon_uimm0_bare, neon_uimm1_bare,
- store, ST1LN_D>;
-
-// End of vector load/store single N-element structure (class SIMD lsone).
-
-
-// The following are post-index load/store single N-element instructions
-// (class SIMD lsone-post)
-
-multiclass NeonI_LDN_WB_Dup<bit q, bit r, bits<3> opcode, bits<2> size,
- RegisterOperand VecList, Operand ImmTy,
- string asmop> {
- let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn",
- DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
- def _fixed : NeonI_LdOne_Dup_Post<q, r, opcode, size,
- (outs VecList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, ImmTy:$amt),
- asmop # "\t$Rt, [$Rn], $amt",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd]> {
- let Rm = 0b11111;
- }
-
- def _register : NeonI_LdOne_Dup_Post<q, r, opcode, size,
- (outs VecList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, GPR64noxzr:$Rm),
- asmop # "\t$Rt, [$Rn], $Rm",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]>;
- }
-}
-
-multiclass LDWB_Dup_BHSD<bit r, bits<3> opcode, string List, string asmop,
- Operand uimm_b, Operand uimm_h,
- Operand uimm_s, Operand uimm_d> {
- defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00,
- !cast<RegisterOperand>(List # "8B_operand"),
- uimm_b, asmop>;
-
- defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01,
- !cast<RegisterOperand>(List # "4H_operand"),
- uimm_h, asmop>;
-
- defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10,
- !cast<RegisterOperand>(List # "2S_operand"),
- uimm_s, asmop>;
-
- defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11,
- !cast<RegisterOperand>(List # "1D_operand"),
- uimm_d, asmop>;
-
- defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00,
- !cast<RegisterOperand>(List # "16B_operand"),
- uimm_b, asmop>;
-
- defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01,
- !cast<RegisterOperand>(List # "8H_operand"),
- uimm_h, asmop>;
-
- defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10,
- !cast<RegisterOperand>(List # "4S_operand"),
- uimm_s, asmop>;
-
- defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11,
- !cast<RegisterOperand>(List # "2D_operand"),
- uimm_d, asmop>;
-}
-
-// Post-index load single 1-element structure to all lanes of 1 register
-defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1,
- uimm_exact2, uimm_exact4, uimm_exact8>;
-
-// Post-index load single N-element structure to all lanes of N consecutive
-// registers (N = 2,3,4)
-defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2,
- uimm_exact4, uimm_exact8, uimm_exact16>;
-defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3,
- uimm_exact6, uimm_exact12, uimm_exact24>;
-defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4,
- uimm_exact8, uimm_exact16, uimm_exact32>;
-
-let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
- Constraints = "$Rn = $wb, $Rt = $src",
- DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
- class LDN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmTy, Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
- (outs VList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, ImmTy:$amt,
- VList:$src, ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn], $amt",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd]> {
- let Rm = 0b11111;
- }
-
- class LDN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmTy, Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0,
- (outs VList:$Rt, GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, GPR64noxzr:$Rm,
- VList:$src, ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn], $Rm",
- [],
- NoItinerary>,
- Sched<[WriteVecLd, WriteVecLd, ReadVecLd, ReadVecLd, ReadVecLd]>;
-}
-
-multiclass LD_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
- Operand uimm_b, Operand uimm_h,
- Operand uimm_s, Operand uimm_d> {
- def _B_fixed : LDN_WBFx_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- uimm_b, neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _B_register : LDN_WBReg_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- uimm_b, neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _H_fixed : LDN_WBFx_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- uimm_h, neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _H_register : LDN_WBReg_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- uimm_h, neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _S_fixed : LDN_WBFx_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- uimm_s, neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _S_register : LDN_WBReg_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- uimm_s, neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _D_fixed : LDN_WBFx_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- uimm_d, neon_uimm1_bare, asmop> {
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-
- def _D_register : LDN_WBReg_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- uimm_d, neon_uimm1_bare, asmop> {
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-}
-
-// Post-index load single 1-element structure to one lane of 1 register.
-defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1,
- uimm_exact2, uimm_exact4, uimm_exact8>;
-
-// Post-index load single N-element structure to one lane of N consecutive
-// registers
-// (N = 2,3,4)
-defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2,
- uimm_exact4, uimm_exact8, uimm_exact16>;
-defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3,
- uimm_exact6, uimm_exact12, uimm_exact24>;
-defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4,
- uimm_exact8, uimm_exact16, uimm_exact32>;
-
-let mayStore = 1, neverHasSideEffects = 1,
- hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb",
- DecoderMethod = "DecodeVLDSTLanePostInstruction" in {
- class STN_WBFx_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmTy, Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
- (outs GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, ImmTy:$amt,
- VList:$Rt, ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn], $amt",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt]> {
- let Rm = 0b11111;
- }
-
- class STN_WBReg_Lane<bit r, bits<2> op2_1, bit op0, RegisterOperand VList,
- Operand ImmTy, Operand ImmOp, string asmop>
- : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0,
- (outs GPR64xsp:$wb),
- (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt,
- ImmOp:$lane),
- asmop # "\t$Rt[$lane], [$Rn], $Rm",
- [],
- NoItinerary>,
- Sched<[WriteVecSt, ReadVecSt, ReadVecSt, ReadVecSt]>;
-}
-
-multiclass ST_Lane_WB_BHSD<bit r, bit op0, string List, string asmop,
- Operand uimm_b, Operand uimm_h,
- Operand uimm_s, Operand uimm_d> {
- def _B_fixed : STN_WBFx_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- uimm_b, neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _B_register : STN_WBReg_Lane<r, 0b00, op0,
- !cast<RegisterOperand>(List # "B_operand"),
- uimm_b, neon_uimm4_bare, asmop> {
- let Inst{12-10} = lane{2-0};
- let Inst{30} = lane{3};
- }
-
- def _H_fixed : STN_WBFx_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- uimm_h, neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _H_register : STN_WBReg_Lane<r, 0b01, op0,
- !cast<RegisterOperand>(List # "H_operand"),
- uimm_h, neon_uimm3_bare, asmop> {
- let Inst{12-10} = {lane{1}, lane{0}, 0b0};
- let Inst{30} = lane{2};
- }
-
- def _S_fixed : STN_WBFx_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- uimm_s, neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _S_register : STN_WBReg_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "S_operand"),
- uimm_s, neon_uimm2_bare, asmop> {
- let Inst{12-10} = {lane{0}, 0b0, 0b0};
- let Inst{30} = lane{1};
- }
-
- def _D_fixed : STN_WBFx_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- uimm_d, neon_uimm1_bare, asmop> {
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-
- def _D_register : STN_WBReg_Lane<r, 0b10, op0,
- !cast<RegisterOperand>(List # "D_operand"),
- uimm_d, neon_uimm1_bare, asmop> {
- let Inst{12-10} = 0b001;
- let Inst{30} = lane{0};
- }
-}
-
-// Post-index store single 1-element structure from one lane of 1 register.
-defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1,
- uimm_exact2, uimm_exact4, uimm_exact8>;
-
-// Post-index store single N-element structure from one lane of N consecutive
-// registers (N = 2,3,4)
-defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2,
- uimm_exact4, uimm_exact8, uimm_exact16>;
-defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3,
- uimm_exact6, uimm_exact12, uimm_exact24>;
-defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4,
- uimm_exact8, uimm_exact16, uimm_exact32>;
-
-// End of post-index load/store single N-element instructions
-// (class SIMD lsone-post)
-
-// Neon Scalar instructions implementation
-// Scalar Three Same
-
-class NeonI_Scalar3Same_size<bit u, bits<2> size, bits<5> opcode, string asmop,
- RegisterClass FPRC>
- : NeonI_Scalar3Same<u, size, opcode,
- (outs FPRC:$Rd), (ins FPRC:$Rn, FPRC:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-class NeonI_Scalar3Same_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
-
-multiclass NeonI_Scalar3Same_HS_sizes<bit u, bits<5> opcode, string asmop,
- bit Commutable = 0> {
- let isCommutable = Commutable in {
- def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
- def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
- }
-}
-
-multiclass NeonI_Scalar3Same_SD_sizes<bit u, bit size_high, bits<5> opcode,
- string asmop, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def sss : NeonI_Scalar3Same_size<u, {size_high, 0b0}, opcode, asmop, FPR32>;
- def ddd : NeonI_Scalar3Same_size<u, {size_high, 0b1}, opcode, asmop, FPR64>;
- }
-}
-
-multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
- string asmop, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def bbb : NeonI_Scalar3Same_size<u, 0b00, opcode, asmop, FPR8>;
- def hhh : NeonI_Scalar3Same_size<u, 0b01, opcode, asmop, FPR16>;
- def sss : NeonI_Scalar3Same_size<u, 0b10, opcode, asmop, FPR32>;
- def ddd : NeonI_Scalar3Same_size<u, 0b11, opcode, asmop, FPR64>;
- }
-}
-
-multiclass Neon_Scalar3Same_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD> {
- def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
- (INSTD FPR64:$Rn, FPR64:$Rm)>;
-}
-
-multiclass Neon_Scalar3Same_BHSD_size_patterns<SDPatternOperator opnode,
- Instruction INSTB,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD>
- : Neon_Scalar3Same_D_size_patterns<opnode, INSTD> {
- def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
- (INSTB FPR8:$Rn, FPR8:$Rm)>;
- def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (INSTH FPR16:$Rn, FPR16:$Rm)>;
- def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (INSTS FPR32:$Rn, FPR32:$Rm)>;
-}
-
-multiclass Neon_Scalar3Same_HS_size_patterns<SDPatternOperator opnode,
- Instruction INSTH,
- Instruction INSTS> {
- def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (INSTH FPR16:$Rn, FPR16:$Rm)>;
- def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (INSTS FPR32:$Rn, FPR32:$Rm)>;
-}
-
-multiclass Neon_Scalar3Same_SD_size_patterns<SDPatternOperator opnode,
- ValueType SResTy, ValueType STy,
- Instruction INSTS, ValueType DResTy,
- ValueType DTy, Instruction INSTD> {
- def : Pat<(SResTy (opnode (STy FPR32:$Rn), (STy FPR32:$Rm))),
- (INSTS FPR32:$Rn, FPR32:$Rm)>;
- def : Pat<(DResTy (opnode (DTy FPR64:$Rn), (DTy FPR64:$Rm))),
- (INSTD FPR64:$Rn, FPR64:$Rm)>;
-}
-
-class Neon_Scalar3Same_cmp_V1_D_size_patterns<CondCode CC,
- Instruction INSTD>
- : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)),
- (INSTD FPR64:$Rn, FPR64:$Rm)>;
-
-// Scalar Three Different
-
-class NeonI_Scalar3Diff_size<bit u, bits<2> size, bits<4> opcode, string asmop,
- RegisterClass FPRCD, RegisterClass FPRCS>
- : NeonI_Scalar3Diff<u, size, opcode,
- (outs FPRCD:$Rd), (ins FPRCS:$Rn, FPRCS:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_Scalar3Diff_HS_size<bit u, bits<4> opcode, string asmop> {
- def shh : NeonI_Scalar3Diff_size<u, 0b01, opcode, asmop, FPR32, FPR16>;
- def dss : NeonI_Scalar3Diff_size<u, 0b10, opcode, asmop, FPR64, FPR32>;
-}
-
-multiclass NeonI_Scalar3Diff_ml_HS_size<bit u, bits<4> opcode, string asmop> {
- let Constraints = "$Src = $Rd" in {
- def shh : NeonI_Scalar3Diff<u, 0b01, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Src, FPR16:$Rn, FPR16:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
- def dss : NeonI_Scalar3Diff<u, 0b10, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Src, FPR32:$Rn, FPR32:$Rm),
- !strconcat(asmop, "\t$Rd, $Rn, $Rm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-multiclass Neon_Scalar3Diff_HS_size_patterns<SDPatternOperator opnode,
- Instruction INSTH,
- Instruction INSTS> {
- def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (INSTH FPR16:$Rn, FPR16:$Rm)>;
- def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (INSTS FPR32:$Rn, FPR32:$Rm)>;
-}
-
-multiclass Neon_Scalar3Diff_ml_HS_size_patterns<SDPatternOperator opnode,
- Instruction INSTH,
- Instruction INSTS> {
- def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>;
- def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>;
-}
-
-// Scalar Two Registers Miscellaneous
-
-class NeonI_Scalar2SameMisc_size<bit u, bits<2> size, bits<5> opcode, string asmop,
- RegisterClass FPRCD, RegisterClass FPRCS>
- : NeonI_Scalar2SameMisc<u, size, opcode,
- (outs FPRCD:$Rd), (ins FPRCS:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_Scalar2SameMisc_SD_size<bit u, bit size_high, bits<5> opcode,
- string asmop> {
- def ss : NeonI_Scalar2SameMisc_size<u, {size_high, 0b0}, opcode, asmop, FPR32,
- FPR32>;
- def dd : NeonI_Scalar2SameMisc_size<u, {size_high, 0b1}, opcode, asmop, FPR64,
- FPR64>;
-}
-
-multiclass NeonI_Scalar2SameMisc_D_size<bit u, bits<5> opcode, string asmop> {
- def dd : NeonI_Scalar2SameMisc_size<u, 0b11, opcode, asmop, FPR64, FPR64>;
-}
-
-multiclass NeonI_Scalar2SameMisc_BHSD_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar2SameMisc_D_size<u, opcode, asmop> {
- def bb : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR8>;
- def hh : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR16>;
- def ss : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR32>;
-}
-
-class NeonI_Scalar2SameMisc_fcvtxn_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR32, FPR64>;
-
-multiclass NeonI_Scalar2SameMisc_narrow_HSD_size<bit u, bits<5> opcode,
- string asmop> {
- def bh : NeonI_Scalar2SameMisc_size<u, 0b00, opcode, asmop, FPR8, FPR16>;
- def hs : NeonI_Scalar2SameMisc_size<u, 0b01, opcode, asmop, FPR16, FPR32>;
- def sd : NeonI_Scalar2SameMisc_size<u, 0b10, opcode, asmop, FPR32, FPR64>;
-}
-
-class NeonI_Scalar2SameMisc_accum_size<bit u, bits<2> size, bits<5> opcode,
- string asmop, RegisterClass FPRC>
- : NeonI_Scalar2SameMisc<u, size, opcode,
- (outs FPRC:$Rd), (ins FPRC:$Src, FPRC:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-multiclass NeonI_Scalar2SameMisc_accum_BHSD_size<bit u, bits<5> opcode,
- string asmop> {
-
- let Constraints = "$Src = $Rd" in {
- def bb : NeonI_Scalar2SameMisc_accum_size<u, 0b00, opcode, asmop, FPR8>;
- def hh : NeonI_Scalar2SameMisc_accum_size<u, 0b01, opcode, asmop, FPR16>;
- def ss : NeonI_Scalar2SameMisc_accum_size<u, 0b10, opcode, asmop, FPR32>;
- def dd : NeonI_Scalar2SameMisc_accum_size<u, 0b11, opcode, asmop, FPR64>;
- }
-}
-
-class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(f32 (opnode (f64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-
-multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(v1i32 (opnode (f32 FPR32:$Rn))),
- (INSTS FPR32:$Rn)>;
- def : Pat<(v1i64 (opnode (f64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-}
-
-class Neon_Scalar2SameMisc_vcvt_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-
-multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(f32 (opnode (v1i32 FPR32:$Rn))),
- (INSTS FPR32:$Rn)>;
- def : Pat<(f64 (opnode (v1i64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-}
-
-multiclass Neon_Scalar2SameMisc_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(f32 (opnode (f32 FPR32:$Rn))),
- (INSTS FPR32:$Rn)>;
- def : Pat<(f64 (opnode (f64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-}
-
-class Neon_Scalar2SameMisc_V1_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-
-class NeonI_Scalar2SameMisc_cmpz_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_Scalar2SameMisc<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, neon_uimm0:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_Scalar2SameMisc_cmpz_SD_size<bit u, bits<5> opcode,
- string asmop> {
- def ssi : NeonI_Scalar2SameMisc<u, 0b10, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn, fpzz32:$FPImm),
- !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
- def ddi : NeonI_Scalar2SameMisc<u, 0b11, opcode,
- (outs FPR64:$Rd), (ins FPR64:$Rn, fpzz32:$FPImm),
- !strconcat(asmop, "\t$Rd, $Rn, $FPImm"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-class Neon_Scalar2SameMisc_cmpz_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
- (v1i64 (bitconvert (v8i8 Neon_AllZero))))),
- (INSTD FPR64:$Rn, 0)>;
-
-class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<CondCode CC,
- Instruction INSTD>
- : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn),
- (i32 neon_uimm0:$Imm), CC)),
- (INSTD FPR64:$Rn, neon_uimm0:$Imm)>;
-
-multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns<SDPatternOperator opnode,
- CondCode CC,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (f32 fpzz32:$FPImm))),
- (INSTS FPR32:$Rn, fpzz32:$FPImm)>;
- def : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (f32 fpzz32:$FPImm))),
- (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
- def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpzz32:$FPImm), CC)),
- (INSTD FPR64:$Rn, fpzz32:$FPImm)>;
-}
-
-multiclass Neon_Scalar2SameMisc_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD> {
- def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-}
-
-multiclass Neon_Scalar2SameMisc_BHSD_size_patterns<SDPatternOperator opnode,
- Instruction INSTB,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD>
- : Neon_Scalar2SameMisc_D_size_patterns<opnode, INSTD> {
- def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))),
- (INSTB FPR8:$Rn)>;
- def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))),
- (INSTH FPR16:$Rn)>;
- def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))),
- (INSTS FPR32:$Rn)>;
-}
-
-multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns<
- SDPatternOperator opnode,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))),
- (INSTH FPR16:$Rn)>;
- def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))),
- (INSTS FPR32:$Rn)>;
- def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))),
- (INSTD FPR64:$Rn)>;
-
-}
-
-multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns<
- SDPatternOperator opnode,
- Instruction INSTB,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))),
- (INSTB FPR8:$Src, FPR8:$Rn)>;
- def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))),
- (INSTH FPR16:$Src, FPR16:$Rn)>;
- def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))),
- (INSTS FPR32:$Src, FPR32:$Rn)>;
- def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))),
- (INSTD FPR64:$Src, FPR64:$Rn)>;
-}
-
-// Scalar Shift By Immediate
-
-class NeonI_ScalarShiftImm_size<bit u, bits<5> opcode, string asmop,
- RegisterClass FPRC, Operand ImmTy>
- : NeonI_ScalarShiftImm<u, opcode,
- (outs FPRC:$Rd), (ins FPRC:$Rn, ImmTy:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_ScalarShiftRightImm_D_size<bit u, bits<5> opcode,
- string asmop> {
- def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
- bits<6> Imm;
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- let Inst{21-16} = Imm;
- }
-}
-
-multiclass NeonI_ScalarShiftRightImm_BHSD_size<bit u, bits<5> opcode,
- string asmop>
- : NeonI_ScalarShiftRightImm_D_size<u, opcode, asmop> {
- def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shr_imm8> {
- bits<3> Imm;
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- let Inst{18-16} = Imm;
- }
- def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shr_imm16> {
- bits<4> Imm;
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- let Inst{19-16} = Imm;
- }
- def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
- bits<5> Imm;
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- let Inst{20-16} = Imm;
- }
-}
-
-multiclass NeonI_ScalarShiftLeftImm_D_size<bit u, bits<5> opcode,
- string asmop> {
- def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shl_imm64> {
- bits<6> Imm;
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- let Inst{21-16} = Imm;
- }
-}
-
-multiclass NeonI_ScalarShiftLeftImm_BHSD_size<bit u, bits<5> opcode,
- string asmop>
- : NeonI_ScalarShiftLeftImm_D_size<u, opcode, asmop> {
- def bbi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR8, shl_imm8> {
- bits<3> Imm;
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- let Inst{18-16} = Imm;
- }
- def hhi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR16, shl_imm16> {
- bits<4> Imm;
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- let Inst{19-16} = Imm;
- }
- def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shl_imm32> {
- bits<5> Imm;
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- let Inst{20-16} = Imm;
- }
-}
-
-class NeonI_ScalarShiftRightImm_accum_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_ScalarShiftImm<u, opcode,
- (outs FPR64:$Rd),
- (ins FPR64:$Src, FPR64:$Rn, shr_imm64:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- bits<6> Imm;
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- let Inst{21-16} = Imm;
- let Constraints = "$Src = $Rd";
-}
-
-class NeonI_ScalarShiftLeftImm_accum_D_size<bit u, bits<5> opcode, string asmop>
- : NeonI_ScalarShiftImm<u, opcode,
- (outs FPR64:$Rd),
- (ins FPR64:$Src, FPR64:$Rn, shl_imm64:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- bits<6> Imm;
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- let Inst{21-16} = Imm;
- let Constraints = "$Src = $Rd";
-}
-
-class NeonI_ScalarShiftImm_narrow_size<bit u, bits<5> opcode, string asmop,
- RegisterClass FPRCD, RegisterClass FPRCS,
- Operand ImmTy>
- : NeonI_ScalarShiftImm<u, opcode,
- (outs FPRCD:$Rd), (ins FPRCS:$Rn, ImmTy:$Imm),
- !strconcat(asmop, "\t$Rd, $Rn, $Imm"),
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-multiclass NeonI_ScalarShiftImm_narrow_HSD_size<bit u, bits<5> opcode,
- string asmop> {
- def bhi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR8, FPR16,
- shr_imm8> {
- bits<3> Imm;
- let Inst{22-19} = 0b0001; // immh:immb = 0001xxx
- let Inst{18-16} = Imm;
- }
- def hsi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR16, FPR32,
- shr_imm16> {
- bits<4> Imm;
- let Inst{22-20} = 0b001; // immh:immb = 001xxxx
- let Inst{19-16} = Imm;
- }
- def sdi : NeonI_ScalarShiftImm_narrow_size<u, opcode, asmop, FPR32, FPR64,
- shr_imm32> {
- bits<5> Imm;
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- let Inst{20-16} = Imm;
- }
-}
-
-multiclass NeonI_ScalarShiftImm_cvt_SD_size<bit u, bits<5> opcode, string asmop> {
- def ssi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR32, shr_imm32> {
- bits<5> Imm;
- let Inst{22-21} = 0b01; // immh:immb = 01xxxxx
- let Inst{20-16} = Imm;
- }
- def ddi : NeonI_ScalarShiftImm_size<u, opcode, asmop, FPR64, shr_imm64> {
- bits<6> Imm;
- let Inst{22} = 0b1; // immh:immb = 1xxxxxx
- let Inst{21-16} = Imm;
- }
-}
-
-multiclass Neon_ScalarShiftRImm_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD> {
- def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-}
-
-multiclass Neon_ScalarShiftLImm_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD> {
- def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-}
-
-class Neon_ScalarShiftLImm_V1_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
- (v1i64 (Neon_vdup (i32 shl_imm64:$Imm))))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-
-class Neon_ScalarShiftRImm_V1_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn),
- (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-
-multiclass Neon_ScalarShiftLImm_BHSD_size_patterns<SDPatternOperator opnode,
- Instruction INSTB,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD>
- : Neon_ScalarShiftLImm_D_size_patterns<opnode, INSTD> {
- def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))),
- (INSTB FPR8:$Rn, imm:$Imm)>;
- def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))),
- (INSTH FPR16:$Rn, imm:$Imm)>;
- def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))),
- (INSTS FPR32:$Rn, imm:$Imm)>;
-}
-
-class Neon_ScalarShiftLImm_accum_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
- (i32 shl_imm64:$Imm))),
- (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
-
-class Neon_ScalarShiftRImm_accum_D_size_patterns<SDPatternOperator opnode,
- Instruction INSTD>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn),
- (i32 shr_imm64:$Imm))),
- (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>;
-
-multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns<
- SDPatternOperator opnode,
- Instruction INSTH,
- Instruction INSTS,
- Instruction INSTD> {
- def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))),
- (INSTH FPR16:$Rn, imm:$Imm)>;
- def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
- (INSTS FPR32:$Rn, imm:$Imm)>;
- def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-}
-
-multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def ssi : Pat<(f32 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
- (INSTS FPR32:$Rn, imm:$Imm)>;
- def ddi : Pat<(f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-}
-
-multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def ssi : Pat<(v1i32 (opnode (f32 FPR32:$Rn), (i32 shr_imm32:$Imm))),
- (INSTS FPR32:$Rn, imm:$Imm)>;
- def ddi : Pat<(v1i64 (opnode (f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INSTD FPR64:$Rn, imm:$Imm)>;
-}
-
-// Scalar Signed Shift Right (Immediate)
-defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">;
-defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrds_n, SSHRddi>;
-// Pattern to match llvm.arm.* intrinsic.
-def : Neon_ScalarShiftRImm_V1_D_size_patterns<sra, SSHRddi>;
-
-// Scalar Unsigned Shift Right (Immediate)
-defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">;
-defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vshrdu_n, USHRddi>;
-// Pattern to match llvm.arm.* intrinsic.
-def : Neon_ScalarShiftRImm_V1_D_size_patterns<srl, USHRddi>;
-
-// Scalar Signed Rounding Shift Right (Immediate)
-defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">;
-defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vsrshr, SRSHRddi>;
-
-// Scalar Unigned Rounding Shift Right (Immediate)
-defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">;
-defm : Neon_ScalarShiftRImm_D_size_patterns<int_aarch64_neon_vurshr, URSHRddi>;
-
-// Scalar Signed Shift Right and Accumulate (Immediate)
-def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">;
-def : Neon_ScalarShiftRImm_accum_D_size_patterns
- <int_aarch64_neon_vsrads_n, SSRA>;
-
-// Scalar Unsigned Shift Right and Accumulate (Immediate)
-def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">;
-def : Neon_ScalarShiftRImm_accum_D_size_patterns
- <int_aarch64_neon_vsradu_n, USRA>;
-
-// Scalar Signed Rounding Shift Right and Accumulate (Immediate)
-def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">;
-def : Neon_ScalarShiftRImm_accum_D_size_patterns
- <int_aarch64_neon_vrsrads_n, SRSRA>;
-
-// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate)
-def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">;
-def : Neon_ScalarShiftRImm_accum_D_size_patterns
- <int_aarch64_neon_vrsradu_n, URSRA>;
-
-// Scalar Shift Left (Immediate)
-defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">;
-defm : Neon_ScalarShiftLImm_D_size_patterns<int_aarch64_neon_vshld_n, SHLddi>;
-// Pattern to match llvm.arm.* intrinsic.
-def : Neon_ScalarShiftLImm_V1_D_size_patterns<shl, SHLddi>;
-
-// Signed Saturating Shift Left (Immediate)
-defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">;
-defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshls_n,
- SQSHLbbi, SQSHLhhi,
- SQSHLssi, SQSHLddi>;
-// Pattern to match llvm.arm.* intrinsic.
-defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_sqrshlImm, SQSHLddi>;
-
-// Unsigned Saturating Shift Left (Immediate)
-defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">;
-defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vqshlu_n,
- UQSHLbbi, UQSHLhhi,
- UQSHLssi, UQSHLddi>;
-// Pattern to match llvm.arm.* intrinsic.
-defm : Neon_ScalarShiftLImm_D_size_patterns<Neon_uqrshlImm, UQSHLddi>;
-
-// Signed Saturating Shift Left Unsigned (Immediate)
-defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">;
-defm : Neon_ScalarShiftLImm_BHSD_size_patterns<int_aarch64_neon_vsqshlu,
- SQSHLUbbi, SQSHLUhhi,
- SQSHLUssi, SQSHLUddi>;
-
-// Shift Right And Insert (Immediate)
-def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">;
-def : Neon_ScalarShiftRImm_accum_D_size_patterns
- <int_aarch64_neon_vsri, SRI>;
-
-// Shift Left And Insert (Immediate)
-def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">;
-def : Neon_ScalarShiftLImm_accum_D_size_patterns
- <int_aarch64_neon_vsli, SLI>;
-
-// Signed Saturating Shift Right Narrow (Immediate)
-defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrn,
- SQSHRNbhi, SQSHRNhsi,
- SQSHRNsdi>;
-
-// Unsigned Saturating Shift Right Narrow (Immediate)
-defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqshrn,
- UQSHRNbhi, UQSHRNhsi,
- UQSHRNsdi>;
-
-// Signed Saturating Rounded Shift Right Narrow (Immediate)
-defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrn,
- SQRSHRNbhi, SQRSHRNhsi,
- SQRSHRNsdi>;
-
-// Unsigned Saturating Rounded Shift Right Narrow (Immediate)
-defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vuqrshrn,
- UQRSHRNbhi, UQRSHRNhsi,
- UQRSHRNsdi>;
-
-// Signed Saturating Shift Right Unsigned Narrow (Immediate)
-defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqshrun,
- SQSHRUNbhi, SQSHRUNhsi,
- SQSHRUNsdi>;
-
-// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate)
-defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">;
-defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns<int_aarch64_neon_vsqrshrun,
- SQRSHRUNbhi, SQRSHRUNhsi,
- SQRSHRUNsdi>;
-
-// Scalar Signed Fixed-point Convert To Floating-Point (Immediate)
-defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">;
-defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxs2fp_n,
- SCVTF_Nssi, SCVTF_Nddi>;
-
-// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate)
-defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">;
-defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns<int_aarch64_neon_vcvtfxu2fp_n,
- UCVTF_Nssi, UCVTF_Nddi>;
-
-// Scalar Floating-point Convert To Signed Fixed-point (Immediate)
-defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">;
-defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxs_n,
- FCVTZS_Nssi, FCVTZS_Nddi>;
-
-// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate)
-defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">;
-defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns<int_aarch64_neon_vcvtfp2fxu_n,
- FCVTZU_Nssi, FCVTZU_Nddi>;
-
-// Patterns For Convert Instructions Between v1f64 and v1i64
-class Neon_ScalarShiftImm_cvtf_v1f64_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INST FPR64:$Rn, imm:$Imm)>;
-
-class Neon_ScalarShiftImm_fcvt_v1f64_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))),
- (INST FPR64:$Rn, imm:$Imm)>;
-
-def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxs2fp,
- SCVTF_Nddi>;
-
-def : Neon_ScalarShiftImm_cvtf_v1f64_pattern<int_arm_neon_vcvtfxu2fp,
- UCVTF_Nddi>;
-
-def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxs,
- FCVTZS_Nddi>;
-
-def : Neon_ScalarShiftImm_fcvt_v1f64_pattern<int_arm_neon_vcvtfp2fxu,
- FCVTZU_Nddi>;
-
-// Scalar Integer Add
-let isCommutable = 1 in {
-def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">;
-}
-
-// Scalar Integer Sub
-def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">;
-
-// Pattern for Scalar Integer Add and Sub with D register only
-defm : Neon_Scalar3Same_D_size_patterns<add, ADDddd>;
-defm : Neon_Scalar3Same_D_size_patterns<sub, SUBddd>;
-
-// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vaddds, ADDddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vadddu, ADDddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubds, SUBddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vsubdu, SUBddd>;
-
-// Scalar Integer Saturating Add (Signed, Unsigned)
-defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>;
-defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>;
-
-// Scalar Integer Saturating Sub (Signed, Unsigned)
-defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>;
-defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>;
-
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Integer Saturating Add, Sub (Signed, Unsigned)
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqadds, SQADDbbb,
- SQADDhhh, SQADDsss, SQADDddd>;
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqaddu, UQADDbbb,
- UQADDhhh, UQADDsss, UQADDddd>;
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubs, SQSUBbbb,
- SQSUBhhh, SQSUBsss, SQSUBddd>;
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_arm_neon_vqsubu, UQSUBbbb,
- UQSUBhhh, UQSUBsss, UQSUBddd>;
-
-// Scalar Integer Saturating Doubling Multiply Half High
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in
-defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>;
-
-// Scalar Integer Saturating Rounding Doubling Multiply Half High
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>;
-}
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Integer Saturating Doubling Multiply Half High and
-// Scalar Integer Saturating Rounding Doubling Multiply Half High
-defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqdmulh, SQDMULHhhh,
- SQDMULHsss>;
-defm : Neon_Scalar3Same_HS_size_patterns<int_arm_neon_vqrdmulh, SQRDMULHhhh,
- SQRDMULHsss>;
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
-// Scalar Floating-point Multiply Extended
-defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>;
-}
-
-// Scalar Floating-point Reciprocal Step
-defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrecps, f32, f32,
- FRECPSsss, f64, f64, FRECPSddd>;
-def : Pat<(v1f64 (int_arm_neon_vrecps (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FRECPSddd FPR64:$Rn, FPR64:$Rm)>;
-
-// Scalar Floating-point Reciprocal Square Root Step
-defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vrsqrts, f32, f32,
- FRSQRTSsss, f64, f64, FRSQRTSddd>;
-def : Pat<(v1f64 (int_arm_neon_vrsqrts (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FRSQRTSddd FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>;
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Floating-point Multiply Extended,
-multiclass Neon_Scalar3Same_MULX_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
- (INSTS FPR32:$Rn, FPR32:$Rm)>;
- def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
- (INSTD FPR64:$Rn, FPR64:$Rm)>;
-}
-
-defm : Neon_Scalar3Same_MULX_SD_size_patterns<int_aarch64_neon_vmulx,
- FMULXsss, FMULXddd>;
-def : Pat<(v1f64 (int_aarch64_neon_vmulx (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FMULXddd FPR64:$Rn, FPR64:$Rm)>;
-
-// Scalar Integer Shift Left (Signed, Unsigned)
-def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">;
-def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">;
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Integer Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshifts, SSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vshiftu, USHLddd>;
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Integer Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshlds, SSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vshldu, USHLddd>;
-
-// Scalar Integer Saturating Shift Left (Signed, Unsigned)
-defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>;
-defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>;
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshls, SQSHLbbb,
- SQSHLhhh, SQSHLsss, SQSHLddd>;
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqshlu, UQSHLbbb,
- UQSHLhhh, UQSHLsss, UQSHLddd>;
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Integer Saturating Shift Letf (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshifts, SQSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqshiftu, UQSHLddd>;
-
-// Scalar Integer Rounding Shift Left (Signed, Unsigned)
-def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">;
-def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">;
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Integer Rounding Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshlds, SRSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vrshldu, URSHLddd>;
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Integer Rounding Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshifts, SRSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vrshiftu, URSHLddd>;
-
-// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
-defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>;
-defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>;
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshls, SQRSHLbbb,
- SQRSHLhhh, SQRSHLsss, SQRSHLddd>;
-defm : Neon_Scalar3Same_BHSD_size_patterns<int_aarch64_neon_vqrshlu, UQRSHLbbb,
- UQRSHLhhh, UQRSHLsss, UQRSHLddd>;
-
-// Patterns to match llvm.arm.* intrinsic for
-// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned)
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshifts, SQRSHLddd>;
-defm : Neon_Scalar3Same_D_size_patterns<int_arm_neon_vqrshiftu, UQRSHLddd>;
-
-let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
-// Signed Saturating Doubling Multiply-Add Long
-defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">;
-}
-defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlal,
- SQDMLALshh, SQDMLALdss>;
-
-// Signed Saturating Doubling Multiply-Subtract Long
-let SchedRW = [WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC] in {
-defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">;
-}
-defm : Neon_Scalar3Diff_ml_HS_size_patterns<int_aarch64_neon_vqdmlsl,
- SQDMLSLshh, SQDMLSLdss>;
-
-// Signed Saturating Doubling Multiply Long
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul, ReadFPMul] in {
-defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">;
-}
-defm : Neon_Scalar3Diff_HS_size_patterns<int_arm_neon_vqdmull,
- SQDMULLshh, SQDMULLdss>;
-
-// Scalar Signed Integer Convert To Floating-point
-defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">;
-defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fps,
- SCVTFss, SCVTFdd>;
-
-// Scalar Unsigned Integer Convert To Floating-point
-defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">;
-defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns<int_aarch64_neon_vcvtint2fpu,
- UCVTFss, UCVTFdd>;
-
-// Scalar Floating-point Converts
-def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">;
-def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns<int_aarch64_neon_fcvtxn,
- FCVTXN>;
-
-defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtns,
- FCVTNSss, FCVTNSdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtns, FCVTNSdd>;
-
-defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtnu,
- FCVTNUss, FCVTNUdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtnu, FCVTNUdd>;
-
-defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtms,
- FCVTMSss, FCVTMSdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtms, FCVTMSdd>;
-
-defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtmu,
- FCVTMUss, FCVTMUdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtmu, FCVTMUdd>;
-
-defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtas,
- FCVTASss, FCVTASdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtas, FCVTASdd>;
-
-defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtau,
- FCVTAUss, FCVTAUdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtau, FCVTAUdd>;
-
-defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtps,
- FCVTPSss, FCVTPSdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtps, FCVTPSdd>;
-
-defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtpu,
- FCVTPUss, FCVTPUdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_arm_neon_vcvtpu, FCVTPUdd>;
-
-defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzs,
- FCVTZSss, FCVTZSdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzs,
- FCVTZSdd>;
-
-defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">;
-defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns<int_aarch64_neon_fcvtzu,
- FCVTZUss, FCVTZUdd>;
-def : Neon_Scalar2SameMisc_vcvt_D_size_patterns<int_aarch64_neon_vcvtzu,
- FCVTZUdd>;
-
-// Patterns For Convert Instructions Between v1f64 and v1i64
-class Neon_Scalar2SameMisc_cvtf_v1f64_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>;
-
-class Neon_Scalar2SameMisc_fcvt_v1f64_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
-
-def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<sint_to_fp, SCVTFdd>;
-def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern<uint_to_fp, UCVTFdd>;
-
-def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_sint, FCVTZSdd>;
-def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern<fp_to_uint, FCVTZUdd>;
-
-// Scalar Floating-point Reciprocal Estimate
-defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">;
-defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpe,
- FRECPEss, FRECPEdd>;
-def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrecpe,
- FRECPEdd>;
-
-// Scalar Floating-point Reciprocal Exponent
-defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">;
-defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrecpx,
- FRECPXss, FRECPXdd>;
-
-// Scalar Floating-point Reciprocal Square Root Estimate
-defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">;
-defm : Neon_Scalar2SameMisc_SD_size_patterns<int_aarch64_neon_vrsqrte,
- FRSQRTEss, FRSQRTEdd>;
-def : Neon_Scalar2SameMisc_V1_D_size_patterns<int_arm_neon_vrsqrte,
- FRSQRTEdd>;
-
-// Scalar Floating-point Round
-class Neon_ScalarFloatRound_pattern<SDPatternOperator opnode, Instruction INST>
- : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
-
-def : Neon_ScalarFloatRound_pattern<fceil, FRINTPdd>;
-def : Neon_ScalarFloatRound_pattern<ffloor, FRINTMdd>;
-def : Neon_ScalarFloatRound_pattern<ftrunc, FRINTZdd>;
-def : Neon_ScalarFloatRound_pattern<frint, FRINTXdd>;
-def : Neon_ScalarFloatRound_pattern<fnearbyint, FRINTIdd>;
-def : Neon_ScalarFloatRound_pattern<frnd, FRINTAdd>;
-def : Neon_ScalarFloatRound_pattern<int_aarch64_neon_frintn, FRINTNdd>;
-
-// Scalar Integer Compare
-
-// Scalar Compare Bitwise Equal
-def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vceq, CMEQddd>;
-
-class Neon_Scalar3Same_cmp_D_size_v1_patterns<SDPatternOperator opnode,
- Instruction INSTD,
- CondCode CC>
- : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)),
- (INSTD FPR64:$Rn, FPR64:$Rm)>;
-
-def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMEQddd, SETEQ>;
-
-// Scalar Compare Signed Greather Than Or Equal
-def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcge, CMGEddd>;
-def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGEddd, SETGE>;
-
-// Scalar Compare Unsigned Higher Or Same
-def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchs, CMHSddd>;
-def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHSddd, SETUGE>;
-
-// Scalar Compare Unsigned Higher
-def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vchi, CMHIddd>;
-def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMHIddd, SETUGT>;
-
-// Scalar Compare Signed Greater Than
-def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vcgt, CMGTddd>;
-def : Neon_Scalar3Same_cmp_D_size_v1_patterns<Neon_cmp, CMGTddd, SETGT>;
-
-// Scalar Compare Bitwise Test Bits
-def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">;
-defm : Neon_Scalar3Same_D_size_patterns<int_aarch64_neon_vtstd, CMTSTddd>;
-defm : Neon_Scalar3Same_D_size_patterns<Neon_tst, CMTSTddd>;
-
-// Scalar Compare Bitwise Equal To Zero
-def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">;
-def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vceq,
- CMEQddi>;
-def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETEQ, CMEQddi>;
-
-// Scalar Compare Signed Greather Than Or Equal To Zero
-def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">;
-def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcge,
- CMGEddi>;
-def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGE, CMGEddi>;
-
-// Scalar Compare Signed Greater Than Zero
-def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">;
-def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcgt,
- CMGTddi>;
-def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETGT, CMGTddi>;
-
-// Scalar Compare Signed Less Than Or Equal To Zero
-def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">;
-def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vclez,
- CMLEddi>;
-def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLE, CMLEddi>;
-
-// Scalar Compare Less Than Zero
-def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">;
-def : Neon_Scalar2SameMisc_cmpz_D_size_patterns<int_aarch64_neon_vcltz,
- CMLTddi>;
-def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns<SETLT, CMLTddi>;
-
-// Scalar Floating-point Compare
-
-// Scalar Floating-point Compare Mask Equal
-defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fceq, v1i32, f32,
- FCMEQsss, v1i64, f64, FCMEQddd>;
-def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETEQ, FCMEQddd>;
-
-// Scalar Floating-point Compare Mask Equal To Zero
-defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">;
-defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fceq, SETEQ,
- FCMEQZssi, FCMEQZddi>;
-
-// Scalar Floating-point Compare Mask Greater Than Or Equal
-defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcge, v1i32, f32,
- FCMGEsss, v1i64, f64, FCMGEddd>;
-def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGE, FCMGEddd>;
-
-// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero
-defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">;
-defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcge, SETGE,
- FCMGEZssi, FCMGEZddi>;
-
-// Scalar Floating-point Compare Mask Greather Than
-defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcgt, v1i32, f32,
- FCMGTsss, v1i64, f64, FCMGTddd>;
-def : Neon_Scalar3Same_cmp_V1_D_size_patterns<SETGT, FCMGTddd>;
-
-// Scalar Floating-point Compare Mask Greather Than Zero
-defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">;
-defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcgt, SETGT,
- FCMGTZssi, FCMGTZddi>;
-
-// Scalar Floating-point Compare Mask Less Than Or Equal To Zero
-defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">;
-defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fclez, SETLE,
- FCMLEZssi, FCMLEZddi>;
-
-// Scalar Floating-point Compare Mask Less Than Zero
-defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">;
-defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns<int_aarch64_neon_fcltz, SETLT,
- FCMLTZssi, FCMLTZddi>;
-
-// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal
-defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcage, v1i32, f32,
- FACGEsss, v1i64, f64, FACGEddd>;
-def : Pat<(v1i64 (int_arm_neon_vacge (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FACGEddd FPR64:$Rn, FPR64:$Rm)>;
-
-// Scalar Floating-point Absolute Compare Mask Greater Than
-defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_fcagt, v1i32, f32,
- FACGTsss, v1i64, f64, FACGTddd>;
-def : Pat<(v1i64 (int_arm_neon_vacgt (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FACGTddd FPR64:$Rn, FPR64:$Rm)>;
-
-// Scalar Floating-point Absolute Difference
-defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">;
-defm : Neon_Scalar3Same_SD_size_patterns<int_aarch64_neon_vabd, f32, f32,
- FABDsss, f64, f64, FABDddd>;
-
-// Scalar Absolute Value
-defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">;
-defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vabs, ABSdd>;
-
-// Scalar Signed Saturating Absolute Value
-defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">;
-defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqabs,
- SQABSbb, SQABShh, SQABSss, SQABSdd>;
-
-// Scalar Negate
-defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">;
-defm : Neon_Scalar2SameMisc_D_size_patterns<int_aarch64_neon_vneg, NEGdd>;
-
-// Scalar Signed Saturating Negate
-defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">;
-defm : Neon_Scalar2SameMisc_BHSD_size_patterns<int_arm_neon_vqneg,
- SQNEGbb, SQNEGhh, SQNEGss, SQNEGdd>;
-
-// Scalar Signed Saturating Accumulated of Unsigned Value
-defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">;
-defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vuqadd,
- SUQADDbb, SUQADDhh,
- SUQADDss, SUQADDdd>;
-
-// Scalar Unsigned Saturating Accumulated of Signed Value
-defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">;
-defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns<int_aarch64_neon_vsqadd,
- USQADDbb, USQADDhh,
- USQADDss, USQADDdd>;
-
-def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src),
- (v1i64 FPR64:$Rn))),
- (SUQADDdd FPR64:$Src, FPR64:$Rn)>;
-
-def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src),
- (v1i64 FPR64:$Rn))),
- (USQADDdd FPR64:$Src, FPR64:$Rn)>;
-
-def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))),
- (ABSdd FPR64:$Rn)>;
-
-def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))),
- (SQABSdd FPR64:$Rn)>;
-
-def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))),
- (SQNEGdd FPR64:$Rn)>;
-
-def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))),
- (v1i64 FPR64:$Rn))),
- (NEGdd FPR64:$Rn)>;
-
-// Scalar Signed Saturating Extract Unsigned Narrow
-defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">;
-defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnsu,
- SQXTUNbh, SQXTUNhs,
- SQXTUNsd>;
-
-// Scalar Signed Saturating Extract Narrow
-defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">;
-defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovns,
- SQXTNbh, SQXTNhs,
- SQXTNsd>;
-
-// Scalar Unsigned Saturating Extract Narrow
-defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">;
-defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns<int_arm_neon_vqmovnu,
- UQXTNbh, UQXTNhs,
- UQXTNsd>;
-
-// Scalar Reduce Pairwise
-
-multiclass NeonI_ScalarPair_D_sizes<bit u, bit size, bits<5> opcode,
- string asmop, bit Commutable = 0> {
- let isCommutable = Commutable in {
- def _D_2D : NeonI_ScalarPair<u, {size, 0b1}, opcode,
- (outs FPR64:$Rd), (ins VPR128:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn.2d"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
- }
-}
-
-multiclass NeonI_ScalarPair_SD_sizes<bit u, bit size, bits<5> opcode,
- string asmop, bit Commutable = 0>
- : NeonI_ScalarPair_D_sizes<u, size, opcode, asmop, Commutable> {
- let isCommutable = Commutable in {
- def _S_2S : NeonI_ScalarPair<u, {size, 0b0}, opcode,
- (outs FPR32:$Rd), (ins VPR64:$Rn),
- !strconcat(asmop, "\t$Rd, $Rn.2s"),
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
- }
-}
-
-// Scalar Reduce Addition Pairwise (Integer) with
-// Pattern to match llvm.arm.* intrinsic
-defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>;
-
-// Pattern to match llvm.aarch64.* intrinsic for
-// Scalar Reduce Addition Pairwise (Integer)
-def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))),
- (ADDPvv_D_2D VPR128:$Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))),
- (ADDPvv_D_2D VPR128:$Rn)>;
-
-// Scalar Reduce Addition Pairwise (Floating Point)
-defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>;
-
-// Scalar Reduce Maximum Pairwise (Floating Point)
-defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>;
-
-// Scalar Reduce Minimum Pairwise (Floating Point)
-defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>;
-
-// Scalar Reduce maxNum Pairwise (Floating Point)
-defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>;
-
-// Scalar Reduce minNum Pairwise (Floating Point)
-defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>;
-
-multiclass Neon_ScalarPair_SD_size_patterns<SDPatternOperator opnode,
- Instruction INSTS,
- Instruction INSTD> {
- def : Pat<(f32 (opnode (v2f32 VPR64:$Rn))),
- (INSTS VPR64:$Rn)>;
- def : Pat<(f64 (opnode (v2f64 VPR128:$Rn))),
- (INSTD VPR128:$Rn)>;
-}
-
-// Patterns to match llvm.aarch64.* intrinsic for
-// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point)
-defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfadd,
- FADDPvv_S_2S, FADDPvv_D_2D>;
-
-defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmax,
- FMAXPvv_S_2S, FMAXPvv_D_2D>;
-
-defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpmin,
- FMINPvv_S_2S, FMINPvv_D_2D>;
-
-defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfmaxnm,
- FMAXNMPvv_S_2S, FMAXNMPvv_D_2D>;
-
-defm : Neon_ScalarPair_SD_size_patterns<int_aarch64_neon_vpfminnm,
- FMINNMPvv_S_2S, FMINNMPvv_D_2D>;
-
-def : Pat<(f32 (int_aarch64_neon_vpfadd (v4f32 VPR128:$Rn))),
- (FADDPvv_S_2S (v2f32
- (EXTRACT_SUBREG
- (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))),
- sub_64)))>;
-
-// Scalar by element Arithmetic
-
-class NeonI_ScalarXIndexedElemArith<string asmop, bits<4> opcode,
- string rmlane, bit u, bit szhi, bit szlo,
- RegisterClass ResFPR, RegisterClass OpFPR,
- RegisterOperand OpVPR, Operand OpImm>
- : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
- (outs ResFPR:$Rd),
- (ins OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
- asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
- [],
- NoItinerary>,
- Sched<[WriteFPMul, ReadFPMul, ReadFPMul]> {
- bits<3> Imm;
- bits<5> MRm;
-}
-
-class NeonI_ScalarXIndexedElemArith_Constraint_Impl<string asmop, bits<4> opcode,
- string rmlane,
- bit u, bit szhi, bit szlo,
- RegisterClass ResFPR,
- RegisterClass OpFPR,
- RegisterOperand OpVPR,
- Operand OpImm>
- : NeonI_ScalarXIndexedElem<u, szhi, szlo, opcode,
- (outs ResFPR:$Rd),
- (ins ResFPR:$src, OpFPR:$Rn, OpVPR:$MRm, OpImm:$Imm),
- asmop # "\t$Rd, $Rn, $MRm" # rmlane # "[$Imm]",
- [],
- NoItinerary>,
- Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
- let Constraints = "$src = $Rd";
- bits<3> Imm;
- bits<5> MRm;
-}
-
-// Scalar Floating Point multiply (scalar, by element)
-def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul",
- 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul",
- 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
- let Inst{11} = Imm{0}; // h
- let Inst{21} = 0b0; // l
- let Inst{20-16} = MRm;
-}
-
-// Scalar Floating Point multiply extended (scalar, by element)
-def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx",
- 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx",
- 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
- let Inst{11} = Imm{0}; // h
- let Inst{21} = 0b0; // l
- let Inst{20-16} = MRm;
-}
-
-multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns<
- SDPatternOperator opnode,
- Instruction INST,
- ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
- ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
-
- def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
- (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))),
- (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode (ResTy FPRC:$Rn),
- (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))),
- (ResTy (INST (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-
- // swapped operands
- def : Pat<(ResTy (opnode
- (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
- (ResTy FPRC:$Rn))),
- (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode
- (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
- (ResTy FPRC:$Rn))),
- (ResTy (INST (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-}
-
-// Patterns for Scalar Floating Point multiply (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULssv_4S,
- f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<fmul, FMULddv_2D,
- f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
-
-// Patterns for Scalar Floating Point multiply extended (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
- FMULXssv_4S, f32, FPR32, v4f32, neon_uimm2_bare,
- v2f32, v4f32, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns<int_aarch64_neon_vmulx,
- FMULXddv_2D, f64, FPR64, v2f64, neon_uimm1_bare,
- v1f64, v2f64, neon_uimm0_bare>;
-
-// Scalar Floating Point fused multiply-add (scalar, by element)
-def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
- 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla",
- 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
- let Inst{11} = Imm{0}; // h
- let Inst{21} = 0b0; // l
- let Inst{20-16} = MRm;
-}
-
-// Scalar Floating Point fused multiply-subtract (scalar, by element)
-def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
- 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls",
- 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> {
- let Inst{11} = Imm{0}; // h
- let Inst{21} = 0b0; // l
- let Inst{20-16} = MRm;
-}
-// We are allowed to match the fma instruction regardless of compile options.
-multiclass Neon_ScalarXIndexedElem_FMA_Patterns<
- Instruction FMLAI, Instruction FMLSI,
- ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm,
- ValueType OpNTy, ValueType ExTy, Operand OpNImm> {
- // fmla
- def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
- (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLAI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
- (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLAI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-
- // swapped fmla operands
- def : Pat<(ResTy (fma
- (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)),
- (ResTy FPRC:$Rn),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLAI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (fma
- (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)),
- (ResTy FPRC:$Rn),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLAI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-
- // fmls
- def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
- (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLSI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (fma (ResTy FPRC:$Rn),
- (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLSI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-
- // swapped fmls operands
- def : Pat<(ResTy (fma
- (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))),
- (ResTy FPRC:$Rn),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLSI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (fma
- (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))),
- (ResTy FPRC:$Rn),
- (ResTy FPRC:$Ra))),
- (ResTy (FMLSI (ResTy FPRC:$Ra),
- (ResTy FPRC:$Rn),
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)),
- OpNImm:$Imm))>;
-}
-
-// Scalar Floating Point fused multiply-add and
-// multiply-subtract (scalar, by element)
-defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAssv_4S, FMLSssv_4S,
- f32, FPR32, v4f32, neon_uimm2_bare, v2f32, v4f32, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
- f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
-defm : Neon_ScalarXIndexedElem_FMA_Patterns<FMLAddv_2D, FMLSddv_2D,
- f64, FPR64, v2f64, neon_uimm1_bare, v1f64, v2f64, neon_uimm0_bare>;
-
-// Scalar Signed saturating doubling multiply long (scalar, by element)
-def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull",
- 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull",
- 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
- let Inst{11} = Imm{2}; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull",
- 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull",
- 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-
-multiclass Neon_ScalarXIndexedElem_MUL_Patterns<
- SDPatternOperator opnode,
- Instruction INST,
- ValueType ResTy, RegisterClass FPRC,
- ValueType OpVTy, ValueType OpTy,
- ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
-
- def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
- (OpVTy (scalar_to_vector
- (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))),
- (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn),
- (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
- (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
-
- //swapped operands
- def : Pat<(ResTy (opnode
- (OpVTy (scalar_to_vector
- (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))),
- (OpVTy FPRC:$Rn))),
- (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode
- (OpVTy (extract_subvector (VecOpTy VPRC:$MRm), OpImm:$Imm)),
- (OpVTy FPRC:$Rn))),
- (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>;
-}
-
-
-// Patterns for Scalar Signed saturating doubling
-// multiply long (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
- SQDMULLshv_4H, v1i32, FPR16, v1i16, i16, v4i16,
- i32, VPR64Lo, neon_uimm2_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
- SQDMULLshv_8H, v1i32, FPR16, v1i16, i16, v8i16,
- i32, VPR128Lo, neon_uimm3_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
- SQDMULLdsv_2S, v1i64, FPR32, v1i32, i32, v2i32,
- i32, VPR64Lo, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmull,
- SQDMULLdsv_4S, v1i64, FPR32, v1i32, i32, v4i32,
- i32, VPR128Lo, neon_uimm2_bare>;
-
-// Scalar Signed saturating doubling multiply-add long (scalar, by element)
-def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
- 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
- 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
- let Inst{11} = Imm{2}; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
- 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal",
- 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-
-// Scalar Signed saturating doubling
-// multiply-subtract long (scalar, by element)
-def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
- 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
- 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> {
- let Inst{11} = Imm{2}; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
- 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl",
- 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-
-multiclass Neon_ScalarXIndexedElem_MLAL_Patterns<
- SDPatternOperator opnode,
- SDPatternOperator coreopnode,
- Instruction INST,
- ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC,
- ValueType OpTy,
- ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> {
-
- def : Pat<(ResTy (opnode
- (ResTy ResFPRC:$Ra),
- (ResTy (coreopnode (OpTy FPRC:$Rn),
- (OpTy (scalar_to_vector
- (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))),
- (ResTy (INST (ResTy ResFPRC:$Ra),
- (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode
- (ResTy ResFPRC:$Ra),
- (ResTy (coreopnode (OpTy FPRC:$Rn),
- (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)))))),
- (ResTy (INST (ResTy ResFPRC:$Ra),
- (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
-
- // swapped operands
- def : Pat<(ResTy (opnode
- (ResTy ResFPRC:$Ra),
- (ResTy (coreopnode
- (OpTy (scalar_to_vector
- (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))),
- (OpTy FPRC:$Rn))))),
- (ResTy (INST (ResTy ResFPRC:$Ra),
- (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
-
- def : Pat<(ResTy (opnode
- (ResTy ResFPRC:$Ra),
- (ResTy (coreopnode
- (OpTy (extract_subvector (OpVTy VPRC:$MRm), OpImm:$Imm)),
- (OpTy FPRC:$Rn))))),
- (ResTy (INST (ResTy ResFPRC:$Ra),
- (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>;
-}
-
-// Patterns for Scalar Signed saturating
-// doubling multiply-add long (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
- int_arm_neon_vqdmull, SQDMLALshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
- i32, VPR64Lo, neon_uimm2_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
- int_arm_neon_vqdmull, SQDMLALshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
- i32, VPR128Lo, neon_uimm3_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
- int_arm_neon_vqdmull, SQDMLALdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
- i32, VPR64Lo, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqadds,
- int_arm_neon_vqdmull, SQDMLALdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
- i32, VPR128Lo, neon_uimm2_bare>;
-
-// Patterns for Scalar Signed saturating
-// doubling multiply-sub long (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
- int_arm_neon_vqdmull, SQDMLSLshv_4H, v1i32, FPR32, FPR16, v1i16, v4i16,
- i32, VPR64Lo, neon_uimm2_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
- int_arm_neon_vqdmull, SQDMLSLshv_8H, v1i32, FPR32, FPR16, v1i16, v8i16,
- i32, VPR128Lo, neon_uimm3_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
- int_arm_neon_vqdmull, SQDMLSLdsv_2S, v1i64, FPR64, FPR32, v1i32, v2i32,
- i32, VPR64Lo, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MLAL_Patterns<int_arm_neon_vqsubs,
- int_arm_neon_vqdmull, SQDMLSLdsv_4S, v1i64, FPR64, FPR32, v1i32, v4i32,
- i32, VPR128Lo, neon_uimm2_bare>;
-
-// Scalar Signed saturating doubling multiply returning
-// high half (scalar, by element)
-def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
- 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh",
- 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
- let Inst{11} = Imm{2}; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
- 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh",
- 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-
-// Patterns for Scalar Signed saturating doubling multiply returning
-// high half (scalar, by element)
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
- SQDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16,
- i32, VPR64Lo, neon_uimm2_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
- SQDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16,
- i32, VPR128Lo, neon_uimm3_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
- SQDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32,
- i32, VPR64Lo, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqdmulh,
- SQDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32,
- i32, VPR128Lo, neon_uimm2_bare>;
-
-// Scalar Signed saturating rounding doubling multiply
-// returning high half (scalar, by element)
-def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
- 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
- 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> {
- let Inst{11} = Imm{2}; // h
- let Inst{21} = Imm{1}; // l
- let Inst{20} = Imm{0}; // m
- let Inst{19-16} = MRm{3-0};
-}
-def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
- 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> {
- let Inst{11} = 0b0; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh",
- 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> {
- let Inst{11} = Imm{1}; // h
- let Inst{21} = Imm{0}; // l
- let Inst{20-16} = MRm;
-}
-
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
- SQRDMULHhhv_4H, v1i16, FPR16, v1i16, i16, v4i16, i32,
- VPR64Lo, neon_uimm2_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
- SQRDMULHhhv_8H, v1i16, FPR16, v1i16, i16, v8i16, i32,
- VPR128Lo, neon_uimm3_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
- SQRDMULHssv_2S, v1i32, FPR32, v1i32, i32, v2i32, i32,
- VPR64Lo, neon_uimm1_bare>;
-defm : Neon_ScalarXIndexedElem_MUL_Patterns<int_arm_neon_vqrdmulh,
- SQRDMULHssv_4S, v1i32, FPR32, v1i32, i32, v4i32, i32,
- VPR128Lo, neon_uimm2_bare>;
-
-// Scalar general arithmetic operation
-class Neon_Scalar_GeneralMath2D_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>;
-
-class Neon_Scalar_GeneralMath3D_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (INST FPR64:$Rn, FPR64:$Rm)>;
-
-class Neon_Scalar_GeneralMath4D_pattern<SDPatternOperator opnode,
- Instruction INST>
- : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm),
- (v1f64 FPR64:$Ra))),
- (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
-
-def : Neon_Scalar_GeneralMath3D_pattern<fadd, FADDddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<fmul, FMULddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<fsub, FSUBddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<fdiv, FDIVddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vabds, FABDddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmaxs, FMAXddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<int_arm_neon_vmins, FMINddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vmaxnm, FMAXNMddd>;
-def : Neon_Scalar_GeneralMath3D_pattern<int_aarch64_neon_vminnm, FMINNMddd>;
-
-def : Neon_Scalar_GeneralMath2D_pattern<fabs, FABSdd>;
-def : Neon_Scalar_GeneralMath2D_pattern<fneg, FNEGdd>;
-
-def : Neon_Scalar_GeneralMath4D_pattern<fma, FMADDdddd>;
-def : Neon_Scalar_GeneralMath4D_pattern<fmsub, FMSUBdddd>;
-
-// Scalar Copy - DUP element to scalar
-class NeonI_Scalar_DUP<string asmop, string asmlane,
- RegisterClass ResRC, RegisterOperand VPRC,
- Operand OpImm>
- : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm),
- asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]",
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<4> Imm;
-}
-
-def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> {
- let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
-}
-
-def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 0)),
- (f32 (EXTRACT_SUBREG (v4f32 VPR128:$Rn), sub_32))>;
-def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 1)),
- (f32 (DUPsv_S (v4f32 VPR128:$Rn), 1))>;
-def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 2)),
- (f32 (DUPsv_S (v4f32 VPR128:$Rn), 2))>;
-def : Pat<(f32 (vector_extract (v4f32 VPR128:$Rn), 3)),
- (f32 (DUPsv_S (v4f32 VPR128:$Rn), 3))>;
-
-def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 0)),
- (f64 (EXTRACT_SUBREG (v2f64 VPR128:$Rn), sub_64))>;
-def : Pat<(f64 (vector_extract (v2f64 VPR128:$Rn), 1)),
- (f64 (DUPdv_D (v2f64 VPR128:$Rn), 1))>;
-
-def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 0)),
- (f32 (EXTRACT_SUBREG (v2f32 VPR64:$Rn), sub_32))>;
-def : Pat<(f32 (vector_extract (v2f32 VPR64:$Rn), 1)),
- (f32 (DUPsv_S (v4f32 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- 1))>;
-
-def : Pat<(f64 (vector_extract (v1f64 VPR64:$Rn), 0)),
- (f64 (EXTRACT_SUBREG (v1f64 VPR64:$Rn), sub_64))>;
-
-multiclass NeonI_Scalar_DUP_Ext_Vec_pattern<Instruction DUPI,
- ValueType ResTy, ValueType OpTy,Operand OpLImm,
- ValueType NOpTy, ValueType ExTy, Operand OpNImm> {
-
- def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)),
- (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>;
-
- def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)),
- (ResTy (DUPI
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- OpNImm:$Imm))>;
-}
-
-// Patterns for extract subvectors of v1ix data using scalar DUP instructions.
-defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPbv_B, v1i8, v16i8, neon_uimm4_bare,
- v8i8, v16i8, neon_uimm3_bare>;
-defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPhv_H, v1i16, v8i16, neon_uimm3_bare,
- v4i16, v8i16, neon_uimm2_bare>;
-defm : NeonI_Scalar_DUP_Ext_Vec_pattern<DUPsv_S, v1i32, v4i32, neon_uimm2_bare,
- v2i32, v4i32, neon_uimm1_bare>;
-
-multiclass NeonI_Scalar_DUP_Copy_pattern1<Instruction DUPI, ValueType ResTy,
- ValueType OpTy, ValueType ElemTy,
- Operand OpImm, ValueType OpNTy,
- ValueType ExTy, Operand OpNImm> {
-
- def : Pat<(ResTy (vector_insert (ResTy undef),
- (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)),
- (neon_uimm0_bare:$Imm))),
- (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
-
- def : Pat<(ResTy (vector_insert (ResTy undef),
- (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)),
- (OpNImm:$Imm))),
- (ResTy (DUPI
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- OpNImm:$Imm))>;
-}
-
-multiclass NeonI_Scalar_DUP_Copy_pattern2<Instruction DUPI, ValueType ResTy,
- ValueType OpTy, ValueType ElemTy,
- Operand OpImm, ValueType OpNTy,
- ValueType ExTy, Operand OpNImm> {
-
- def : Pat<(ResTy (scalar_to_vector
- (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))),
- (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>;
-
- def : Pat<(ResTy (scalar_to_vector
- (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))),
- (ResTy (DUPI
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- OpNImm:$Imm))>;
-}
-
-// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP
-// instructions.
-defm : NeonI_Scalar_DUP_Copy_pattern1<DUPdv_D,
- v1i64, v2i64, i64, neon_uimm1_bare,
- v1i64, v2i64, neon_uimm0_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern1<DUPsv_S,
- v1i32, v4i32, i32, neon_uimm2_bare,
- v2i32, v4i32, neon_uimm1_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern1<DUPhv_H,
- v1i16, v8i16, i32, neon_uimm3_bare,
- v4i16, v8i16, neon_uimm2_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern1<DUPbv_B,
- v1i8, v16i8, i32, neon_uimm4_bare,
- v8i8, v16i8, neon_uimm3_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern2<DUPdv_D,
- v1i64, v2i64, i64, neon_uimm1_bare,
- v1i64, v2i64, neon_uimm0_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern2<DUPsv_S,
- v1i32, v4i32, i32, neon_uimm2_bare,
- v2i32, v4i32, neon_uimm1_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern2<DUPhv_H,
- v1i16, v8i16, i32, neon_uimm3_bare,
- v4i16, v8i16, neon_uimm2_bare>;
-defm : NeonI_Scalar_DUP_Copy_pattern2<DUPbv_B,
- v1i8, v16i8, i32, neon_uimm4_bare,
- v8i8, v16i8, neon_uimm3_bare>;
-
-multiclass NeonI_Scalar_DUP_alias<string asmop, string asmlane,
- Instruction DUPI, Operand OpImm,
- RegisterClass ResRC> {
- def : NeonInstAlias<!strconcat(asmop, "$Rd, $Rn" # asmlane # "[$Imm]"),
- (DUPI ResRC:$Rd, VPR128:$Rn, OpImm:$Imm), 0b0>;
-}
-
-// Aliases for Scalar copy - DUP element (scalar)
-// FIXME: This is actually the preferred syntax but TableGen can't deal with
-// custom printing of aliases.
-defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>;
-defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>;
-defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>;
-defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>;
-
-multiclass NeonI_SDUP<PatFrag GetLow, PatFrag GetHigh, ValueType ResTy,
- ValueType OpTy> {
- def : Pat<(ResTy (GetLow VPR128:$Rn)),
- (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>;
- def : Pat<(ResTy (GetHigh VPR128:$Rn)),
- (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>;
-}
-
-defm : NeonI_SDUP<Neon_Low16B, Neon_High16B, v8i8, v16i8>;
-defm : NeonI_SDUP<Neon_Low8H, Neon_High8H, v4i16, v8i16>;
-defm : NeonI_SDUP<Neon_Low4S, Neon_High4S, v2i32, v4i32>;
-defm : NeonI_SDUP<Neon_Low2D, Neon_High2D, v1i64, v2i64>;
-defm : NeonI_SDUP<Neon_Low4float, Neon_High4float, v2f32, v4f32>;
-defm : NeonI_SDUP<Neon_Low2double, Neon_High2double, v1f64, v2f64>;
-
-// The following is for sext/zext from v1xx to v1xx
-multiclass NeonI_ext<string prefix, SDNode ExtOp> {
- // v1i32 -> v1i64
- def : Pat<(v1i64 (ExtOp (v1i32 FPR32:$Rn))),
- (EXTRACT_SUBREG
- (v2i64 (!cast<Instruction>(prefix # "_2S")
- (v2i32 (SUBREG_TO_REG (i64 0), $Rn, sub_32)), 0)),
- sub_64)>;
-
- // v1i16 -> v1i32
- def : Pat<(v1i32 (ExtOp (v1i16 FPR16:$Rn))),
- (EXTRACT_SUBREG
- (v4i32 (!cast<Instruction>(prefix # "_4H")
- (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
- sub_32)>;
-
- // v1i8 -> v1i16
- def : Pat<(v1i16 (ExtOp (v1i8 FPR8:$Rn))),
- (EXTRACT_SUBREG
- (v8i16 (!cast<Instruction>(prefix # "_8B")
- (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
- sub_16)>;
-}
-
-defm NeonI_zext : NeonI_ext<"USHLLvvi", zext>;
-defm NeonI_sext : NeonI_ext<"SSHLLvvi", sext>;
-
-// zext v1i8 -> v1i32
-def : Pat<(v1i32 (zext (v1i8 FPR8:$Rn))),
- (v1i32 (EXTRACT_SUBREG
- (v1i64 (SUBREG_TO_REG (i64 0),
- (v1i8 (DUPbv_B
- (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
- 0)),
- sub_8)),
- sub_32))>;
-
-// zext v1i8 -> v1i64
-def : Pat<(v1i64 (zext (v1i8 FPR8:$Rn))),
- (v1i64 (SUBREG_TO_REG (i64 0),
- (v1i8 (DUPbv_B
- (v16i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)),
- 0)),
- sub_8))>;
-
-// zext v1i16 -> v1i64
-def : Pat<(v1i64 (zext (v1i16 FPR16:$Rn))),
- (v1i64 (SUBREG_TO_REG (i64 0),
- (v1i16 (DUPhv_H
- (v8i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)),
- 0)),
- sub_16))>;
-
-// sext v1i8 -> v1i32
-def : Pat<(v1i32 (sext (v1i8 FPR8:$Rn))),
- (EXTRACT_SUBREG
- (v4i32 (SSHLLvvi_4H
- (v4i16 (SUBREG_TO_REG (i64 0),
- (v1i16 (EXTRACT_SUBREG
- (v8i16 (SSHLLvvi_8B
- (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
- sub_16)),
- sub_16)), 0)),
- sub_32)>;
-
-// sext v1i8 -> v1i64
-def : Pat<(v1i64 (sext (v1i8 FPR8:$Rn))),
- (EXTRACT_SUBREG
- (v2i64 (SSHLLvvi_2S
- (v2i32 (SUBREG_TO_REG (i64 0),
- (v1i32 (EXTRACT_SUBREG
- (v4i32 (SSHLLvvi_4H
- (v4i16 (SUBREG_TO_REG (i64 0),
- (v1i16 (EXTRACT_SUBREG
- (v8i16 (SSHLLvvi_8B
- (v8i8 (SUBREG_TO_REG (i64 0), $Rn, sub_8)), 0)),
- sub_16)),
- sub_16)), 0)),
- sub_32)),
- sub_32)), 0)),
- sub_64)>;
-
-
-// sext v1i16 -> v1i64
-def : Pat<(v1i64 (sext (v1i16 FPR16:$Rn))),
- (EXTRACT_SUBREG
- (v2i64 (SSHLLvvi_2S
- (v2i32 (SUBREG_TO_REG (i64 0),
- (v1i32 (EXTRACT_SUBREG
- (v4i32 (SSHLLvvi_4H
- (v4i16 (SUBREG_TO_REG (i64 0), $Rn, sub_16)), 0)),
- sub_32)),
- sub_32)), 0)),
- sub_64)>;
-
-//===----------------------------------------------------------------------===//
-// Non-Instruction Patterns
-//===----------------------------------------------------------------------===//
-
-// 64-bit vector bitcasts...
-
-def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>;
-
-def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>;
-
-def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>;
-
-def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>;
-
-def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
-
-def : Pat<(v1i64 (bitconvert (v1f64 VPR64:$src))), (v1i64 VPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1f64 VPR64:$src))), (v2f32 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1f64 VPR64:$src))), (v2i32 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1f64 VPR64:$src))), (v4i16 VPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v1f64 VPR64:$src))), (v8i8 VPR64:$src)>;
-def : Pat<(f64 (bitconvert (v1f64 VPR64:$src))), (f64 VPR64:$src)>;
-
-def : Pat<(v1f64 (bitconvert (v1i64 VPR64:$src))), (v1f64 VPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2f32 VPR64:$src))), (v1f64 VPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2i32 VPR64:$src))), (v1f64 VPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v4i16 VPR64:$src))), (v1f64 VPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v8i8 VPR64:$src))), (v1f64 VPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (f64 VPR64:$src))), (v1f64 VPR64:$src)>;
-
-// ..and 128-bit vector bitcasts...
-
-def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>;
-
-def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>;
-
-// ...and scalar bitcasts...
-def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>;
-def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
-
-def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>;
-def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>;
-def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>;
-def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>;
-def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>;
-def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>;
-
-def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>;
-
-def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
-
-def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
-def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
-def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
-
-def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
-def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
-def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
-def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
-
-def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>;
-def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
-
-def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>;
-
-def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>;
-
-def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-// Scalar Three Same
-
-def neon_uimm3 : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 8;}]> {
- let ParserMatchClass = uimm3_asmoperand;
- let PrintMethod = "printUImmHexOperand";
-}
-
-def neon_uimm4 : Operand<i64>,
- ImmLeaf<i64, [{return Imm < 16;}]> {
- let ParserMatchClass = uimm4_asmoperand;
- let PrintMethod = "printUImmHexOperand";
-}
-
-// Bitwise Extract
-class NeonI_Extract<bit q, bits<2> op2, string asmop,
- string OpS, RegisterOperand OpVPR, Operand OpImm>
- : NeonI_BitExtract<q, op2, (outs OpVPR:$Rd),
- (ins OpVPR:$Rn, OpVPR:$Rm, OpImm:$Index),
- asmop # "\t$Rd." # OpS # ", $Rn." # OpS #
- ", $Rm." # OpS # ", $Index",
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>{
- bits<4> Index;
-}
-
-def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b",
- VPR64, neon_uimm3> {
- let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}};
-}
-
-def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b",
- VPR128, neon_uimm4> {
- let Inst{14-11} = Index;
-}
-
-class NI_Extract<ValueType OpTy, RegisterOperand OpVPR, Instruction INST,
- Operand OpImm>
- : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm),
- (i64 OpImm:$Imm))),
- (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>;
-
-def : NI_Extract<v8i8, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v4i16, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v2i32, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v1i64, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v2f32, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v1f64, VPR64, EXTvvvi_8b, neon_uimm3>;
-def : NI_Extract<v16i8, VPR128, EXTvvvi_16b, neon_uimm4>;
-def : NI_Extract<v8i16, VPR128, EXTvvvi_16b, neon_uimm4>;
-def : NI_Extract<v4i32, VPR128, EXTvvvi_16b, neon_uimm4>;
-def : NI_Extract<v2i64, VPR128, EXTvvvi_16b, neon_uimm4>;
-def : NI_Extract<v4f32, VPR128, EXTvvvi_16b, neon_uimm4>;
-def : NI_Extract<v2f64, VPR128, EXTvvvi_16b, neon_uimm4>;
-
-// Table lookup
-class NI_TBL<bit q, bits<2> op2, bits<2> len, bit op,
- string asmop, string OpS, RegisterOperand OpVPR,
- RegisterOperand VecList>
- : NeonI_TBL<q, op2, len, op,
- (outs OpVPR:$Rd), (ins VecList:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
-// The vectors in look up table are always 16b
-multiclass NI_TBL_pat<bits<2> len, bit op, string asmop, string List> {
- def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64,
- !cast<RegisterOperand>(List # "16B_operand")>;
-
- def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128,
- !cast<RegisterOperand>(List # "16B_operand")>;
-}
-
-defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">;
-defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">;
-defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">;
-defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">;
-
-// Table lookup extension
-class NI_TBX<bit q, bits<2> op2, bits<2> len, bit op,
- string asmop, string OpS, RegisterOperand OpVPR,
- RegisterOperand VecList>
- : NeonI_TBL<q, op2, len, op,
- (outs OpVPR:$Rd), (ins OpVPR:$src, VecList:$Rn, OpVPR:$Rm),
- asmop # "\t$Rd." # OpS # ", $Rn, $Rm." # OpS,
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
-}
-
-// The vectors in look up table are always 16b
-multiclass NI_TBX_pat<bits<2> len, bit op, string asmop, string List> {
- def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64,
- !cast<RegisterOperand>(List # "16B_operand")>;
-
- def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128,
- !cast<RegisterOperand>(List # "16B_operand")>;
-}
-
-defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">;
-defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">;
-defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">;
-defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">;
-
-class NeonI_INS_main<string asmop, string Res, ValueType ResTy,
- RegisterClass OpGPR, ValueType OpTy, Operand OpImm>
- : NeonI_copy<0b1, 0b0, 0b0011,
- (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm),
- asmop # "\t$Rd." # Res # "[$Imm], $Rn",
- [(set (ResTy VPR128:$Rd),
- (ResTy (vector_insert
- (ResTy VPR128:$src),
- (OpTy OpGPR:$Rn),
- (OpImm:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- bits<4> Imm;
- let Constraints = "$src = $Rd";
-}
-
-//Insert element (vector, from main)
-def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32,
- neon_uimm4_bare> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32,
- neon_uimm3_bare> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32,
- neon_uimm2_bare> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64,
- neon_uimm1_bare> {
- let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
-}
-
-def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn",
- (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>;
-def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn",
- (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>;
-def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn",
- (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>;
-def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn",
- (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>;
-
-class Neon_INS_main_pattern <ValueType ResTy,ValueType ExtResTy,
- RegisterClass OpGPR, ValueType OpTy,
- Operand OpImm, Instruction INS>
- : Pat<(ResTy (vector_insert
- (ResTy VPR64:$src),
- (OpTy OpGPR:$Rn),
- (OpImm:$Imm))),
- (ResTy (EXTRACT_SUBREG
- (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
- OpGPR:$Rn, OpImm:$Imm)), sub_64))>;
-
-def INSbw_pattern : Neon_INS_main_pattern<v8i8, v16i8, GPR32, i32,
- neon_uimm3_bare, INSbw>;
-def INShw_pattern : Neon_INS_main_pattern<v4i16, v8i16, GPR32, i32,
- neon_uimm2_bare, INShw>;
-def INSsw_pattern : Neon_INS_main_pattern<v2i32, v4i32, GPR32, i32,
- neon_uimm1_bare, INSsw>;
-def INSdx_pattern : Neon_INS_main_pattern<v1i64, v2i64, GPR64, i64,
- neon_uimm0_bare, INSdx>;
-
-class NeonI_INS_element<string asmop, string Res, Operand ResImm>
- : NeonI_insert<0b1, 0b1,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn,
- ResImm:$Immd, ResImm:$Immn),
- asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]",
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- bits<4> Immd;
- bits<4> Immn;
-}
-
-//Insert element (vector, from element)
-def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> {
- let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1};
- let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}};
-}
-def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> {
- let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0};
- let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0};
- // bit 11 is unspecified, but should be set to zero.
-}
-def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> {
- let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0};
- let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0};
- // bits 11-12 are unspecified, but should be set to zero.
-}
-def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> {
- let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0};
- let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0};
- // bits 11-13 are unspecified, but should be set to zero.
-}
-
-def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]",
- (INSELb VPR128:$Rd, VPR128:$Rn,
- neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>;
-def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]",
- (INSELh VPR128:$Rd, VPR128:$Rn,
- neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>;
-def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]",
- (INSELs VPR128:$Rd, VPR128:$Rn,
- neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>;
-def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]",
- (INSELd VPR128:$Rd, VPR128:$Rn,
- neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>;
-
-multiclass Neon_INS_elt_pattern<ValueType ResTy, ValueType NaTy,
- ValueType MidTy, Operand StImm, Operand NaImm,
- Instruction INS> {
-def : Pat<(ResTy (vector_insert
- (ResTy VPR128:$src),
- (MidTy (vector_extract
- (ResTy VPR128:$Rn),
- (StImm:$Immn))),
- (StImm:$Immd))),
- (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn),
- StImm:$Immd, StImm:$Immn)>;
-
-def : Pat <(ResTy (vector_insert
- (ResTy VPR128:$src),
- (MidTy (vector_extract
- (NaTy VPR64:$Rn),
- (NaImm:$Immn))),
- (StImm:$Immd))),
- (INS (ResTy VPR128:$src),
- (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
- StImm:$Immd, NaImm:$Immn)>;
-
-def : Pat <(NaTy (vector_insert
- (NaTy VPR64:$src),
- (MidTy (vector_extract
- (ResTy VPR128:$Rn),
- (StImm:$Immn))),
- (NaImm:$Immd))),
- (NaTy (EXTRACT_SUBREG
- (ResTy (INS
- (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
- (ResTy VPR128:$Rn),
- NaImm:$Immd, StImm:$Immn)),
- sub_64))>;
-
-def : Pat <(NaTy (vector_insert
- (NaTy VPR64:$src),
- (MidTy (vector_extract
- (NaTy VPR64:$Rn),
- (NaImm:$Immn))),
- (NaImm:$Immd))),
- (NaTy (EXTRACT_SUBREG
- (ResTy (INS
- (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
- (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)),
- NaImm:$Immd, NaImm:$Immn)),
- sub_64))>;
-}
-
-defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, neon_uimm2_bare,
- neon_uimm1_bare, INSELs>;
-defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, neon_uimm1_bare,
- neon_uimm0_bare, INSELd>;
-defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
- neon_uimm3_bare, INSELb>;
-defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
- neon_uimm2_bare, INSELh>;
-defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
- neon_uimm1_bare, INSELs>;
-defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, neon_uimm1_bare,
- neon_uimm0_bare, INSELd>;
-
-multiclass Neon_INS_elt_float_pattern<ValueType ResTy, ValueType NaTy,
- ValueType MidTy,
- RegisterClass OpFPR, Operand ResImm,
- SubRegIndex SubIndex, Instruction INS> {
-def : Pat <(ResTy (vector_insert
- (ResTy VPR128:$src),
- (MidTy OpFPR:$Rn),
- (ResImm:$Imm))),
- (INS (ResTy VPR128:$src),
- (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)),
- ResImm:$Imm,
- (i64 0))>;
-
-def : Pat <(NaTy (vector_insert
- (NaTy VPR64:$src),
- (MidTy OpFPR:$Rn),
- (ResImm:$Imm))),
- (NaTy (EXTRACT_SUBREG
- (ResTy (INS
- (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)),
- (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)),
- ResImm:$Imm,
- (i64 0))),
- sub_64))>;
-}
-
-defm : Neon_INS_elt_float_pattern<v4f32, v2f32, f32, FPR32, neon_uimm2_bare,
- sub_32, INSELs>;
-defm : Neon_INS_elt_float_pattern<v2f64, v1f64, f64, FPR64, neon_uimm1_bare,
- sub_64, INSELd>;
-
-class NeonI_SMOV<string asmop, string Res, bit Q,
- ValueType OpTy, ValueType eleTy,
- Operand OpImm, RegisterClass ResGPR, ValueType ResTy>
- : NeonI_copy<Q, 0b0, 0b0101,
- (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
- asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
- [(set (ResTy ResGPR:$Rd),
- (ResTy (sext_inreg
- (ResTy (vector_extract
- (OpTy VPR128:$Rn), (OpImm:$Imm))),
- eleTy)))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<4> Imm;
-}
-
-//Signed integer move (main, from element)
-def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare,
- GPR32, i32> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare,
- GPR32, i32> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare,
- GPR64, i64> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare,
- GPR64, i64> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare,
- GPR64, i64> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-
-multiclass Neon_SMOVx_pattern <ValueType StTy, ValueType NaTy,
- ValueType eleTy, Operand StImm, Operand NaImm,
- Instruction SMOVI> {
- def : Pat<(i64 (sext_inreg
- (i64 (anyext
- (i32 (vector_extract
- (StTy VPR128:$Rn), (StImm:$Imm))))),
- eleTy)),
- (SMOVI VPR128:$Rn, StImm:$Imm)>;
-
- def : Pat<(i64 (sext
- (i32 (vector_extract
- (StTy VPR128:$Rn), (StImm:$Imm))))),
- (SMOVI VPR128:$Rn, StImm:$Imm)>;
-
- def : Pat<(i64 (sext_inreg
- (i64 (vector_extract
- (NaTy VPR64:$Rn), (NaImm:$Imm))),
- eleTy)),
- (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Imm)>;
-
- def : Pat<(i64 (sext_inreg
- (i64 (anyext
- (i32 (vector_extract
- (NaTy VPR64:$Rn), (NaImm:$Imm))))),
- eleTy)),
- (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Imm)>;
-
- def : Pat<(i64 (sext
- (i32 (vector_extract
- (NaTy VPR64:$Rn), (NaImm:$Imm))))),
- (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Imm)>;
-}
-
-defm : Neon_SMOVx_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
- neon_uimm3_bare, SMOVxb>;
-defm : Neon_SMOVx_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
- neon_uimm2_bare, SMOVxh>;
-defm : Neon_SMOVx_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
- neon_uimm1_bare, SMOVxs>;
-
-class Neon_SMOVw_pattern <ValueType StTy, ValueType NaTy,
- ValueType eleTy, Operand StImm, Operand NaImm,
- Instruction SMOVI>
- : Pat<(i32 (sext_inreg
- (i32 (vector_extract
- (NaTy VPR64:$Rn), (NaImm:$Imm))),
- eleTy)),
- (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Imm)>;
-
-def : Neon_SMOVw_pattern<v16i8, v8i8, i8, neon_uimm4_bare,
- neon_uimm3_bare, SMOVwb>;
-def : Neon_SMOVw_pattern<v8i16, v4i16, i16, neon_uimm3_bare,
- neon_uimm2_bare, SMOVwh>;
-
-class NeonI_UMOV<string asmop, string Res, bit Q,
- ValueType OpTy, Operand OpImm,
- RegisterClass ResGPR, ValueType ResTy>
- : NeonI_copy<Q, 0b0, 0b0111,
- (outs ResGPR:$Rd), (ins VPR128:$Rn, OpImm:$Imm),
- asmop # "\t$Rd, $Rn." # Res # "[$Imm]",
- [(set (ResTy ResGPR:$Rd),
- (ResTy (vector_extract
- (OpTy VPR128:$Rn), (OpImm:$Imm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<4> Imm;
-}
-
-//Unsigned integer move (main, from element)
-def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare,
- GPR32, i32> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare,
- GPR32, i32> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare,
- GPR32, i32> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare,
- GPR64, i64> {
- let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
-}
-
-def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]",
- (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>;
-def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]",
- (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>;
-
-class Neon_UMOV_pattern <ValueType StTy, ValueType NaTy, ValueType ResTy,
- Operand StImm, Operand NaImm,
- Instruction SMOVI>
- : Pat<(ResTy (vector_extract
- (NaTy VPR64:$Rn), NaImm:$Imm)),
- (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- NaImm:$Imm)>;
-
-def : Neon_UMOV_pattern<v16i8, v8i8, i32, neon_uimm4_bare,
- neon_uimm3_bare, UMOVwb>;
-def : Neon_UMOV_pattern<v8i16, v4i16, i32, neon_uimm3_bare,
- neon_uimm2_bare, UMOVwh>;
-def : Neon_UMOV_pattern<v4i32, v2i32, i32, neon_uimm2_bare,
- neon_uimm1_bare, UMOVws>;
-
-def : Pat<(i32 (and
- (i32 (vector_extract
- (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))),
- 255)),
- (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>;
-
-def : Pat<(i32 (and
- (i32 (vector_extract
- (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))),
- 65535)),
- (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>;
-
-def : Pat<(i64 (zext
- (i32 (vector_extract
- (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))),
- (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>;
-
-def : Pat<(i32 (and
- (i32 (vector_extract
- (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))),
- 255)),
- (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
- neon_uimm3_bare:$Imm)>;
-
-def : Pat<(i32 (and
- (i32 (vector_extract
- (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))),
- 65535)),
- (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
- neon_uimm2_bare:$Imm)>;
-
-def : Pat<(i64 (zext
- (i32 (vector_extract
- (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))),
- (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64),
- neon_uimm0_bare:$Imm)>;
-
-// Additional copy patterns for scalar types
-def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))),
- (UMOVwb (v16i8
- (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>;
-
-def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))),
- (UMOVwh (v8i16
- (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>;
-
-def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))),
- (FMOVws FPR32:$Rn)>;
-
-def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))),
- (FMOVxd FPR64:$Rn)>;
-
-def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))),
- (f64 FPR64:$Rn)>;
-
-def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)),
- (v1i8 (EXTRACT_SUBREG (v16i8
- (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_8))>;
-
-def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)),
- (v1i16 (EXTRACT_SUBREG (v8i16
- (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_16))>;
-
-def : Pat<(v1i32 (scalar_to_vector GPR32:$src)),
- (FMOVsw $src)>;
-
-def : Pat<(v1i64 (scalar_to_vector GPR64:$src)),
- (FMOVdx $src)>;
-
-def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
- (v8i8 (EXTRACT_SUBREG (v16i8
- (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_64))>;
-
-def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
- (v4i16 (EXTRACT_SUBREG (v8i16
- (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_64))>;
-
-def : Pat<(v2i32 (scalar_to_vector GPR32:$Rn)),
- (v2i32 (EXTRACT_SUBREG (v16i8
- (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))),
- sub_64))>;
-
-def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
- (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))>;
-
-def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
- (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))>;
-
-def : Pat<(v4i32 (scalar_to_vector GPR32:$Rn)),
- (INSsw (v4i32 (IMPLICIT_DEF)), $Rn, (i64 0))>;
-
-def : Pat<(v2i64 (scalar_to_vector GPR64:$Rn)),
- (INSdx (v2i64 (IMPLICIT_DEF)), $Rn, (i64 0))>;
-
-def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
- (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
-def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
- (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)>;
-
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))),
- (v1f64 FPR64:$Rn)>;
-
-def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)),
- (f64 FPR64:$src), sub_64)>;
-
-class NeonI_DUP_Elt<bit Q, string asmop, string rdlane, string rnlane,
- RegisterOperand ResVPR, Operand OpImm>
- : NeonI_copy<Q, 0b0, 0b0000, (outs ResVPR:$Rd),
- (ins VPR128:$Rn, OpImm:$Imm),
- asmop # "\t$Rd" # rdlane # ", $Rn" # rnlane # "[$Imm]",
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- bits<4> Imm;
-}
-
-def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128,
- neon_uimm4_bare> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-
-def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128,
- neon_uimm3_bare> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-
-def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128,
- neon_uimm2_bare> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-
-def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128,
- neon_uimm1_bare> {
- let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0};
-}
-
-def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64,
- neon_uimm4_bare> {
- let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1};
-}
-
-def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64,
- neon_uimm3_bare> {
- let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0};
-}
-
-def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64,
- neon_uimm2_bare> {
- let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0};
-}
-
-multiclass NeonI_DUP_Elt_pattern<Instruction DUPELT, ValueType ResTy,
- ValueType OpTy,ValueType NaTy,
- ValueType ExTy, Operand OpLImm,
- Operand OpNImm> {
-def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)),
- (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>;
-
-def : Pat<(ResTy (Neon_vduplane
- (NaTy VPR64:$Rn), OpNImm:$Imm)),
- (ResTy (DUPELT
- (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>;
-}
-defm : NeonI_DUP_Elt_pattern<DUPELT16b, v16i8, v16i8, v8i8, v16i8,
- neon_uimm4_bare, neon_uimm3_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT8b, v8i8, v16i8, v8i8, v16i8,
- neon_uimm4_bare, neon_uimm3_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT8h, v8i16, v8i16, v4i16, v8i16,
- neon_uimm3_bare, neon_uimm2_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT4h, v4i16, v8i16, v4i16, v8i16,
- neon_uimm3_bare, neon_uimm2_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4i32, v4i32, v2i32, v4i32,
- neon_uimm2_bare, neon_uimm1_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2i32, v4i32, v2i32, v4i32,
- neon_uimm2_bare, neon_uimm1_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2i64, v2i64, v1i64, v2i64,
- neon_uimm1_bare, neon_uimm0_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT4s, v4f32, v4f32, v2f32, v4f32,
- neon_uimm2_bare, neon_uimm1_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT2s, v2f32, v4f32, v2f32, v4f32,
- neon_uimm2_bare, neon_uimm1_bare>;
-defm : NeonI_DUP_Elt_pattern<DUPELT2d, v2f64, v2f64, v1f64, v2f64,
- neon_uimm1_bare, neon_uimm0_bare>;
-
-def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))),
- (v2f32 (DUPELT2s
- (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (i64 0)))>;
-def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))),
- (v4f32 (DUPELT4s
- (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (i64 0)))>;
-def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))),
- (v2f64 (DUPELT2d
- (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64),
- (i64 0)))>;
-
-multiclass NeonI_DUP_pattern<Instruction DUPELT, ValueType ResTy,
- ValueType OpTy, RegisterClass OpRC,
- Operand OpNImm, SubRegIndex SubIndex> {
-def : Pat<(ResTy (Neon_vduplane (OpTy OpRC:$Rn), OpNImm:$Imm)),
- (ResTy (DUPELT
- (SUBREG_TO_REG (i64 0), OpRC:$Rn, SubIndex), OpNImm:$Imm))>;
-}
-
-defm : NeonI_DUP_pattern<DUPELT4h, v4i16, v1i16, FPR16, neon_uimm2_bare,sub_16>;
-defm : NeonI_DUP_pattern<DUPELT4s, v4i32, v1i32, FPR32, neon_uimm2_bare,sub_32>;
-defm : NeonI_DUP_pattern<DUPELT8b, v8i8, v1i8, FPR8, neon_uimm3_bare, sub_8>;
-defm : NeonI_DUP_pattern<DUPELT8h, v8i16, v1i16, FPR16, neon_uimm3_bare,sub_16>;
-defm : NeonI_DUP_pattern<DUPELT16b, v16i8, v1i8, FPR8, neon_uimm4_bare, sub_8>;
-
-class NeonI_DUP<bit Q, string asmop, string rdlane,
- RegisterOperand ResVPR, ValueType ResTy,
- RegisterClass OpGPR, ValueType OpTy>
- : NeonI_copy<Q, 0b0, 0b0001, (outs ResVPR:$Rd), (ins OpGPR:$Rn),
- asmop # "\t$Rd" # rdlane # ", $Rn",
- [(set (ResTy ResVPR:$Rd),
- (ResTy (Neon_vdup (OpTy OpGPR:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> {
- let Inst{20-16} = 0b00001;
- // bits 17-20 are unspecified, but should be set to zero.
-}
-
-def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> {
- let Inst{20-16} = 0b00010;
- // bits 18-20 are unspecified, but should be set to zero.
-}
-
-def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> {
- let Inst{20-16} = 0b00100;
- // bits 19-20 are unspecified, but should be set to zero.
-}
-
-def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> {
- let Inst{20-16} = 0b01000;
- // bit 20 is unspecified, but should be set to zero.
-}
-
-def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> {
- let Inst{20-16} = 0b00001;
- // bits 17-20 are unspecified, but should be set to zero.
-}
-
-def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> {
- let Inst{20-16} = 0b00010;
- // bits 18-20 are unspecified, but should be set to zero.
-}
-
-def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> {
- let Inst{20-16} = 0b00100;
- // bits 19-20 are unspecified, but should be set to zero.
-}
-
-// patterns for CONCAT_VECTORS
-multiclass Concat_Vector_Pattern<ValueType ResTy, ValueType OpTy> {
-def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)),
- (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>;
-def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))),
- (INSELd
- (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)),
- (i64 1),
- (i64 0))>;
-def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))),
- (DUPELT2d
- (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- (i64 0))> ;
-}
-
-defm : Concat_Vector_Pattern<v16i8, v8i8>;
-defm : Concat_Vector_Pattern<v8i16, v4i16>;
-defm : Concat_Vector_Pattern<v4i32, v2i32>;
-defm : Concat_Vector_Pattern<v2i64, v1i64>;
-defm : Concat_Vector_Pattern<v4f32, v2f32>;
-defm : Concat_Vector_Pattern<v2f64, v1f64>;
-
-def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), undef)),
- (v2i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32))>;
-def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (EXTRACT_SUBREG
- (v4i32 (INSELs
- (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32)),
- (v4i32 (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
- (i64 1),
- (i64 0))),
- sub_64)>;
-def : Pat<(v2i32 (concat_vectors (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rn))),
- (DUPELT2s (v4i32 (SUBREG_TO_REG(i64 0), $Rn, sub_32)), 0)>;
-
-//patterns for EXTRACT_SUBVECTOR
-def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))),
- (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))),
- (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))),
- (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))),
- (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))),
- (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))),
- (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>;
-
-// The followings are for instruction class (3V Elem)
-
-// Variant 1
-
-class NI_2VE<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS, string EleOpS,
- Operand OpImm, RegisterOperand ResVPR,
- RegisterOperand OpVPR, RegisterOperand EleOpVPR>
- : NeonI_2VElem<q, u, size, opcode,
- (outs ResVPR:$Rd), (ins ResVPR:$src, OpVPR:$Rn,
- EleOpVPR:$Re, OpImm:$Index),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
- ", $Re." # EleOpS # "[$Index]",
- [],
- NoItinerary>,
- Sched<[WriteFPMAC, ReadFPMAC, ReadFPMAC, ReadFPMAC]> {
- bits<3> Index;
- bits<5> Re;
-
- let Constraints = "$src = $Rd";
-}
-
-multiclass NI_2VE_v1<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
- neon_uimm2_bare, VPR64, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // Index operations on 16-bit(H) elements are restricted to using v0-v15.
- def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
- neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-
- def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
- neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-}
-
-defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">;
-defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">;
-
-// Pattern for lane in 128-bit vector
-class NI_2VE_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
- ValueType EleOpTy>
- : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
- (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VE_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- RegisterOperand EleOpVPR, ValueType ResTy, ValueType OpTy,
- ValueType EleOpTy>
- : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn),
- (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST ResVPR:$src, OpVPR:$Rn,
- (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-
-multiclass NI_2VE_v1_pat<string subop, SDPatternOperator op>
-{
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
- op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>;
-
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
- op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>;
-
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
- op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
-
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
- op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
- op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>;
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
- op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
-}
-
-defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>;
-defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>;
-
-class NI_2VE_2op<bit q, bit u, bits<2> size, bits<4> opcode,
- string asmop, string ResS, string OpS, string EleOpS,
- Operand OpImm, RegisterOperand ResVPR,
- RegisterOperand OpVPR, RegisterOperand EleOpVPR>
- : NeonI_2VElem<q, u, size, opcode,
- (outs ResVPR:$Rd), (ins OpVPR:$Rn,
- EleOpVPR:$Re, OpImm:$Index),
- asmop # "\t$Rd." # ResS # ", $Rn." # OpS #
- ", $Re." # EleOpS # "[$Index]",
- [],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- bits<3> Index;
- bits<5> Re;
-}
-
-multiclass NI_2VE_v1_2op<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
- neon_uimm2_bare, VPR64, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // Index operations on 16-bit(H) elements are restricted to using v0-v15.
- def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h",
- neon_uimm3_bare, VPR64, VPR64, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-
- def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h",
- neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-}
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">;
-defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">;
-defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">;
-}
-
-// Pattern for lane in 128-bit vector
-class NI_2VE_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand OpVPR, RegisterOperand EleOpVPR,
- ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
- : Pat<(ResTy (op (OpTy OpVPR:$Rn),
- (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VE_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand OpVPR, RegisterOperand EleOpVPR,
- ValueType ResTy, ValueType OpTy, ValueType EleOpTy>
- : Pat<(ResTy (op (OpTy OpVPR:$Rn),
- (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST OpVPR:$Rn,
- (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-
-multiclass NI_2VE_mul_v1_pat<string subop, SDPatternOperator op> {
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
- op, VPR64, VPR128, v2i32, v2i32, v4i32>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
- op, VPR128, VPR128, v4i32, v4i32, v4i32>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4h8h"), neon_uimm3_bare,
- op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_8h8h"), neon_uimm3_bare,
- op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
- op, VPR64, VPR64, v2i32, v2i32, v2i32>;
-
- def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4h8h"), neon_uimm2_bare,
- op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>;
-}
-
-defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>;
-defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>;
-defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>;
-
-// Variant 2
-
-multiclass NI_2VE_v2_2op<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
- neon_uimm2_bare, VPR64, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // _1d2d doesn't exist!
-
- def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
- neon_uimm1_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{0}};
- let Inst{21} = 0b0;
- let Inst{20-16} = Re;
- }
-}
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">;
-defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">;
-}
-
-class NI_2VE_mul_lane_2d<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand OpVPR, RegisterOperand EleOpVPR,
- ValueType ResTy, ValueType OpTy, ValueType EleOpTy,
- SDPatternOperator coreop>
- : Pat<(ResTy (op (OpTy OpVPR:$Rn),
- (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))),
- (INST OpVPR:$Rn,
- (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>;
-
-multiclass NI_2VE_mul_v2_pat<string subop, SDPatternOperator op> {
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2s4s"), neon_uimm2_bare,
- op, VPR64, VPR128, v2f32, v2f32, v4f32>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4s"), neon_uimm2_bare,
- op, VPR128, VPR128, v4f32, v4f32, v4f32>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
- op, VPR128, VPR128, v2f64, v2f64, v2f64>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2s4s"), neon_uimm1_bare,
- op, VPR64, VPR64, v2f32, v2f32, v2f32>;
-
- def : NI_2VE_mul_lane_2d<!cast<Instruction>(subop # "_2d2d"), neon_uimm1_bare,
- op, VPR128, VPR64, v2f64, v2f64, v1f64,
- BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
-}
-
-defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>;
-defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>;
-
-def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))),
- (v2f32 VPR64:$Rn))),
- (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
-
-def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))),
- (v4f32 VPR128:$Rn))),
- (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
-
-def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))),
- (v2f64 VPR128:$Rn))),
- (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>;
-
-// The followings are patterns using fma
-// -ffp-contract=fast generates fma
-
-multiclass NI_2VE_v2<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s",
- neon_uimm2_bare, VPR64, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // _1d2d doesn't exist!
-
- def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d",
- neon_uimm1_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{0}};
- let Inst{21} = 0b0;
- let Inst{20-16} = Re;
- }
-}
-
-defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">;
-defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">;
-
-// Pattern for lane in 128-bit vector
-class NI_2VEswap_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy,
- SDPatternOperator coreop>
- : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
- (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
- (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>;
-
-// Pattern for lane 0
-class NI_2VEfma_lane0<Instruction INST, SDPatternOperator op,
- RegisterOperand ResVPR, ValueType ResTy>
- : Pat<(ResTy (op (ResTy ResVPR:$Rn),
- (ResTy (Neon_vdup (f32 FPR32:$Re))),
- (ResTy ResVPR:$src))),
- (INST ResVPR:$src, ResVPR:$Rn,
- (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VEswap_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy,
- SDPatternOperator coreop>
- : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))),
- (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
- (INST ResVPR:$src, ResVPR:$Rn,
- (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VEswap_lane_2d2d<Instruction INST, Operand OpImm,
- SDPatternOperator op,
- RegisterOperand ResVPR, RegisterOperand OpVPR,
- ValueType ResTy, ValueType OpTy,
- SDPatternOperator coreop>
- : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))),
- (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))),
- (INST ResVPR:$src, ResVPR:$Rn,
- (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>;
-
-
-multiclass NI_2VE_fma_v2_pat<string subop, SDPatternOperator op> {
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
- BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
-
- def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_2s4s"),
- op, VPR64, v2f32>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
- neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
- BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
-
- def : NI_2VEfma_lane0<!cast<Instruction>(subop # "_4s4s"),
- op, VPR128, v4f32>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
- BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
- BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>;
-
- def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
- BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>;
-}
-
-defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>;
-
-// Pattern for lane 0
-class NI_2VEfms_lane0<Instruction INST, SDPatternOperator op,
- RegisterOperand ResVPR, ValueType ResTy>
- : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)),
- (ResTy (Neon_vdup (f32 FPR32:$Re))),
- (ResTy ResVPR:$src))),
- (INST ResVPR:$src, ResVPR:$Rn,
- (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>;
-
-multiclass NI_2VE_fms_v2_pat<string subop, SDPatternOperator op>
-{
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
- BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32,
- BinOpFrag<(Neon_vduplane
- (fneg node:$LHS), node:$RHS)>>;
-
- def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_2s4s"),
- op, VPR64, v2f32>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
- neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
- BinOpFrag<(fneg (Neon_vduplane
- node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_4s4s"),
- neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32,
- BinOpFrag<(Neon_vduplane
- (fneg node:$LHS), node:$RHS)>>;
-
- def : NI_2VEfms_lane0<!cast<Instruction>(subop # "_4s4s"),
- op, VPR128, v4f32>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
- BinOpFrag<(fneg (Neon_vduplane
- node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_laneq<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64,
- BinOpFrag<(Neon_vduplane
- (fneg node:$LHS), node:$RHS)>>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
- BinOpFrag<(fneg (Neon_vduplane
- node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_lane<!cast<Instruction>(subop # "_2s4s"),
- neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32,
- BinOpFrag<(Neon_vduplane
- (fneg node:$LHS), node:$RHS)>>;
-
- def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
- neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
- BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_lane<!cast<Instruction>(subop # "_4s4s"),
- neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32,
- BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>;
-
- def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
- BinOpFrag<(fneg (Neon_combine_2d
- node:$LHS, node:$RHS))>>;
-
- def : NI_2VEswap_lane_2d2d<!cast<Instruction>(subop # "_2d2d"),
- neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64,
- BinOpFrag<(Neon_combine_2d
- (fneg node:$LHS), (fneg node:$RHS))>>;
-}
-
-defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>;
-
-// Variant 3: Long type
-// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S
-// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S
-
-multiclass NI_2VE_v3<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
- neon_uimm2_bare, VPR128, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // Index operations on 16-bit(H) elements are restricted to using v0-v15.
- def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
- neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-
- def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
- neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-}
-
-defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">;
-defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">;
-defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">;
-defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">;
-defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">;
-defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">;
-
-multiclass NI_2VE_v3_2op<bit u, bits<4> opcode, string asmop> {
- // vector register class for element is always 128-bit to cover the max index
- def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s",
- neon_uimm2_bare, VPR128, VPR64, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s",
- neon_uimm2_bare, VPR128, VPR128, VPR128> {
- let Inst{11} = {Index{1}};
- let Inst{21} = {Index{0}};
- let Inst{20-16} = Re;
- }
-
- // Index operations on 16-bit(H) elements are restricted to using v0-v15.
- def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h",
- neon_uimm3_bare, VPR128, VPR128, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-
- def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h",
- neon_uimm3_bare, VPR128, VPR64, VPR128Lo> {
- let Inst{11} = {Index{2}};
- let Inst{21} = {Index{1}};
- let Inst{20} = {Index{0}};
- let Inst{19-16} = Re{3-0};
- }
-}
-
-let SchedRW = [WriteFPMul, ReadFPMul, ReadFPMul] in {
-defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">;
-defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">;
-defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">;
-}
-
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))),
- (FMOVdd $src)>;
-
-// Pattern for lane in 128-bit vector
-class NI_2VEL2_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand EleOpVPR, ValueType ResTy,
- ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
- SDPatternOperator hiop>
- : Pat<(ResTy (op (ResTy VPR128:$src),
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vduplane
- (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VEL2_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand EleOpVPR, ValueType ResTy,
- ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
- SDPatternOperator hiop>
- : Pat<(ResTy (op (ResTy VPR128:$src),
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vduplane
- (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST VPR128:$src, VPR128:$Rn,
- (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-
-class NI_2VEL2_lane0<Instruction INST, SDPatternOperator op,
- ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
- SDPatternOperator hiop, Instruction DupInst>
- : Pat<(ResTy (op (ResTy VPR128:$src),
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
- (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>;
-
-multiclass NI_2VEL_v3_pat<string subop, SDPatternOperator op> {
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
- op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
-
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
- op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>;
-
- def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
- op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
- op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
-
- def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
- op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
-
- def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
- op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
- op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
- op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>;
-
- def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
- op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
- op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
-}
-
-defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>;
-defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>;
-defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>;
-defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>;
-
-// Pattern for lane in 128-bit vector
-class NI_2VEL2_mul_laneq<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand EleOpVPR, ValueType ResTy,
- ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
- SDPatternOperator hiop>
- : Pat<(ResTy (op
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vduplane
- (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>;
-
-// Pattern for lane in 64-bit vector
-class NI_2VEL2_mul_lane<Instruction INST, Operand OpImm, SDPatternOperator op,
- RegisterOperand EleOpVPR, ValueType ResTy,
- ValueType OpTy, ValueType EleOpTy, ValueType HalfOpTy,
- SDPatternOperator hiop>
- : Pat<(ResTy (op
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vduplane
- (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))),
- (INST VPR128:$Rn,
- (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>;
-
-// Pattern for fixed lane 0
-class NI_2VEL2_mul_lane0<Instruction INST, SDPatternOperator op,
- ValueType ResTy, ValueType OpTy, ValueType HalfOpTy,
- SDPatternOperator hiop, Instruction DupInst>
- : Pat<(ResTy (op
- (HalfOpTy (hiop (OpTy VPR128:$Rn))),
- (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))),
- (INST VPR128:$Rn, (DupInst $Re), 0)>;
-
-multiclass NI_2VEL_mul_v3_pat<string subop, SDPatternOperator op> {
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
- op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>;
-
- def : NI_2VE_mul_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
- op, VPR64, VPR128, v2i64, v2i32, v4i32>;
-
- def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
- op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_mul_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
- op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
-
- def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_4s8h"),
- op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
-
- def : NI_2VEL2_mul_lane0<!cast<Instruction>(subop # "_2d4s"),
- op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
- op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>;
-
- def : NI_2VE_mul_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
- op, VPR64, VPR64, v2i64, v2i32, v2i32>;
-
- def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
- op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_mul_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
- op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
-}
-
-defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>;
-defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>;
-defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>;
-
-multiclass NI_qdma<SDPatternOperator op> {
- def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (op node:$Ra,
- (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
-
- def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm),
- (op node:$Ra,
- (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>;
-}
-
-defm Neon_qdmlal : NI_qdma<int_arm_neon_vqadds>;
-defm Neon_qdmlsl : NI_qdma<int_arm_neon_vqsubs>;
-
-multiclass NI_2VEL_v3_qdma_pat<string subop, string op> {
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_4s4h"), neon_uimm3_bare,
- !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR128Lo,
- v4i32, v4i16, v8i16>;
-
- def : NI_2VE_laneq<!cast<Instruction>(subop # "_2d2s"), neon_uimm2_bare,
- !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR128,
- v2i64, v2i32, v4i32>;
-
- def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_4s8h"), neon_uimm3_bare,
- !cast<PatFrag>(op # "_4s"), VPR128Lo,
- v4i32, v8i16, v8i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_laneq<!cast<Instruction>(subop # "_2d4s"), neon_uimm2_bare,
- !cast<PatFrag>(op # "_2d"), VPR128,
- v2i64, v4i32, v4i32, v2i32, Neon_High4S>;
-
- def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_4s8h"),
- !cast<PatFrag>(op # "_4s"),
- v4i32, v8i16, v4i16, Neon_High8H, DUP8h>;
-
- def : NI_2VEL2_lane0<!cast<Instruction>(subop # "_2d4s"),
- !cast<PatFrag>(op # "_2d"),
- v2i64, v4i32, v2i32, Neon_High4S, DUP4s>;
-
- // Index can only be half of the max value for lane in 64-bit vector
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_4s4h"), neon_uimm2_bare,
- !cast<PatFrag>(op # "_4s"), VPR128, VPR64, VPR64Lo,
- v4i32, v4i16, v4i16>;
-
- def : NI_2VE_lane<!cast<Instruction>(subop # "_2d2s"), neon_uimm1_bare,
- !cast<PatFrag>(op # "_2d"), VPR128, VPR64, VPR64,
- v2i64, v2i32, v2i32>;
-
- def : NI_2VEL2_lane<!cast<Instruction>(subop # "_4s8h"), neon_uimm2_bare,
- !cast<PatFrag>(op # "_4s"), VPR64Lo,
- v4i32, v8i16, v4i16, v4i16, Neon_High8H>;
-
- def : NI_2VEL2_lane<!cast<Instruction>(subop # "_2d4s"), neon_uimm1_bare,
- !cast<PatFrag>(op # "_2d"), VPR64,
- v2i64, v4i32, v2i32, v2i32, Neon_High4S>;
-}
-
-defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">;
-defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">;
-
-// End of implementation for instruction class (3V Elem)
-
-class NeonI_REV<string asmop, string Res, bits<2> size, bit Q, bit U,
- bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy,
- SDPatternOperator Neon_Rev>
- : NeonI_2VMisc<Q, U, size, opcode,
- (outs ResVPR:$Rd), (ins ResVPR:$Rn),
- asmop # "\t$Rd." # Res # ", $Rn." # Res,
- [(set (ResTy ResVPR:$Rd),
- (ResTy (Neon_Rev (ResTy ResVPR:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128,
- v16i8, Neon_rev64>;
-def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128,
- v8i16, Neon_rev64>;
-def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128,
- v4i32, Neon_rev64>;
-def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64,
- v8i8, Neon_rev64>;
-def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64,
- v4i16, Neon_rev64>;
-def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64,
- v2i32, Neon_rev64>;
-
-def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>;
-def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>;
-
-def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128,
- v16i8, Neon_rev32>;
-def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128,
- v8i16, Neon_rev32>;
-def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64,
- v8i8, Neon_rev32>;
-def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64,
- v4i16, Neon_rev32>;
-
-def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128,
- v16i8, Neon_rev16>;
-def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64,
- v8i8, Neon_rev16>;
-
-multiclass NeonI_PairwiseAdd<string asmop, bit U, bits<5> opcode,
- SDPatternOperator Neon_Padd> {
- def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.8h, $Rn.16b",
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.4h, $Rn.8b",
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.8h",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.4h",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.4s",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.1d, $Rn.2s",
- [(set (v1i64 VPR64:$Rd),
- (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010,
- int_arm_neon_vpaddls>;
-defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010,
- int_arm_neon_vpaddlu>;
-
-def : Pat<(v1i64 (int_aarch64_neon_saddlv (v2i32 VPR64:$Rn))),
- (SADDLP2s1d $Rn)>;
-def : Pat<(v1i64 (int_aarch64_neon_uaddlv (v2i32 VPR64:$Rn))),
- (UADDLP2s1d $Rn)>;
-
-multiclass NeonI_PairwiseAddAcc<string asmop, bit U, bits<5> opcode,
- SDPatternOperator Neon_Padd> {
- let Constraints = "$src = $Rd" in {
- def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.8h, $Rn.16b",
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (Neon_Padd
- (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.4h, $Rn.8b",
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (Neon_Padd
- (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.8h",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_Padd
- (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.4h",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_Padd
- (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.4s",
- [(set (v2i64 VPR128:$Rd),
- (v2i64 (Neon_Padd
- (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.1d, $Rn.2s",
- [(set (v1i64 VPR64:$Rd),
- (v1i64 (Neon_Padd
- (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110,
- int_arm_neon_vpadals>;
-defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110,
- int_arm_neon_vpadalu>;
-
-multiclass NeonI_2VMisc_BHSDsize_1Arg<string asmop, bit U, bits<5> opcode> {
- def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.8h, $Rn.8h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.8b, $Rn.8b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.4h, $Rn.4h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>;
-defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>;
-defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>;
-defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>;
-
-multiclass NeonI_2VMisc_BHSD_1Arg_Pattern<string Prefix,
- SDPatternOperator Neon_Op> {
- def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))),
- (v16i8 (!cast<Instruction>(Prefix # 16b) (v16i8 VPR128:$Rn)))>;
-
- def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))),
- (v8i16 (!cast<Instruction>(Prefix # 8h) (v8i16 VPR128:$Rn)))>;
-
- def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))),
- (v4i32 (!cast<Instruction>(Prefix # 4s) (v4i32 VPR128:$Rn)))>;
-
- def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))),
- (v2i64 (!cast<Instruction>(Prefix # 2d) (v2i64 VPR128:$Rn)))>;
-
- def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))),
- (v8i8 (!cast<Instruction>(Prefix # 8b) (v8i8 VPR64:$Rn)))>;
-
- def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))),
- (v4i16 (!cast<Instruction>(Prefix # 4h) (v4i16 VPR64:$Rn)))>;
-
- def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))),
- (v2i32 (!cast<Instruction>(Prefix # 2s) (v2i32 VPR64:$Rn)))>;
-}
-
-defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>;
-defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>;
-defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>;
-
-def : Pat<(v16i8 (sub
- (v16i8 Neon_AllZero),
- (v16i8 VPR128:$Rn))),
- (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>;
-def : Pat<(v8i8 (sub
- (v8i8 Neon_AllZero),
- (v8i8 VPR64:$Rn))),
- (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>;
-def : Pat<(v8i16 (sub
- (v8i16 (bitconvert (v16i8 Neon_AllZero))),
- (v8i16 VPR128:$Rn))),
- (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>;
-def : Pat<(v4i16 (sub
- (v4i16 (bitconvert (v8i8 Neon_AllZero))),
- (v4i16 VPR64:$Rn))),
- (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>;
-def : Pat<(v4i32 (sub
- (v4i32 (bitconvert (v16i8 Neon_AllZero))),
- (v4i32 VPR128:$Rn))),
- (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>;
-def : Pat<(v2i32 (sub
- (v2i32 (bitconvert (v8i8 Neon_AllZero))),
- (v2i32 VPR64:$Rn))),
- (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>;
-def : Pat<(v2i64 (sub
- (v2i64 (bitconvert (v16i8 Neon_AllZero))),
- (v2i64 VPR128:$Rn))),
- (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>;
-
-multiclass NeonI_2VMisc_BHSDsize_2Args<string asmop, bit U, bits<5> opcode> {
- let Constraints = "$src = $Rd" in {
- def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.8h, $Rn.8h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.8b, $Rn.8b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.4h, $Rn.4h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>;
-defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>;
-
-multiclass NeonI_2VMisc_BHSD_2Args_Pattern<string Prefix,
- SDPatternOperator Neon_Op> {
- def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))),
- (v16i8 (!cast<Instruction>(Prefix # 16b)
- (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>;
-
- def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))),
- (v8i16 (!cast<Instruction>(Prefix # 8h)
- (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>;
-
- def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))),
- (v4i32 (!cast<Instruction>(Prefix # 4s)
- (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>;
-
- def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))),
- (v2i64 (!cast<Instruction>(Prefix # 2d)
- (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>;
-
- def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))),
- (v8i8 (!cast<Instruction>(Prefix # 8b)
- (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>;
-
- def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))),
- (v4i16 (!cast<Instruction>(Prefix # 4h)
- (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>;
-
- def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))),
- (v2i32 (!cast<Instruction>(Prefix # 2s)
- (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>;
-}
-
-defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>;
-defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>;
-
-multiclass NeonI_2VMisc_BHSsizes<string asmop, bit U,
- SDPatternOperator Neon_Op> {
- def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.8h, $Rn.8h",
- [(set (v8i16 VPR128:$Rd),
- (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.8b, $Rn.8b",
- [(set (v8i8 VPR64:$Rd),
- (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.4h, $Rn.4h",
- [(set (v4i16 VPR64:$Rd),
- (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>;
-defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>;
-
-multiclass NeonI_2VMisc_Bsize<string asmop, bit U, bits<2> size,
- bits<5> Opcode> {
- def 16b : NeonI_2VMisc<0b1, U, size, Opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8b : NeonI_2VMisc<0b0, U, size, Opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.8b, $Rn.8b",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>;
-defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>;
-defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>;
-
-def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b",
- (NOT16b VPR128:$Rd, VPR128:$Rn), 0>;
-def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b",
- (NOT8b VPR64:$Rd, VPR64:$Rn), 0>;
-
-def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))),
- (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>;
-def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))),
- (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>;
-
-def : Pat<(v16i8 (xor
- (v16i8 VPR128:$Rn),
- (v16i8 Neon_AllOne))),
- (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>;
-def : Pat<(v8i8 (xor
- (v8i8 VPR64:$Rn),
- (v8i8 Neon_AllOne))),
- (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>;
-def : Pat<(v8i16 (xor
- (v8i16 VPR128:$Rn),
- (v8i16 (bitconvert (v16i8 Neon_AllOne))))),
- (NOT16b VPR128:$Rn)>;
-def : Pat<(v4i16 (xor
- (v4i16 VPR64:$Rn),
- (v4i16 (bitconvert (v8i8 Neon_AllOne))))),
- (NOT8b VPR64:$Rn)>;
-def : Pat<(v4i32 (xor
- (v4i32 VPR128:$Rn),
- (v4i32 (bitconvert (v16i8 Neon_AllOne))))),
- (NOT16b VPR128:$Rn)>;
-def : Pat<(v2i32 (xor
- (v2i32 VPR64:$Rn),
- (v2i32 (bitconvert (v8i8 Neon_AllOne))))),
- (NOT8b VPR64:$Rn)>;
-def : Pat<(v2i64 (xor
- (v2i64 VPR128:$Rn),
- (v2i64 (bitconvert (v16i8 Neon_AllOne))))),
- (NOT16b VPR128:$Rn)>;
-
-def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))),
- (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>;
-def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))),
- (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>;
-
-multiclass NeonI_2VMisc_SDsizes<string asmop, bit U, bits<5> opcode,
- SDPatternOperator Neon_Op> {
- def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [(set (v4f32 VPR128:$Rd),
- (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.2d",
- [(set (v2f64 VPR128:$Rd),
- (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [(set (v2f32 VPR64:$Rd),
- (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>;
-defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>;
-
-multiclass NeonI_2VMisc_HSD_Narrow<string asmop, bit U, bits<5> opcode> {
- def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.8b, $Rn.8h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4h, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- let Constraints = "$Rd = $src" in {
- def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.16b, $Rn.8h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.8h, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.4s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>;
-defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>;
-defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>;
-defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>;
-
-multiclass NeonI_2VMisc_Narrow_Patterns<string Prefix,
- SDPatternOperator Neon_Op> {
- def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))),
- (v8i8 (!cast<Instruction>(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>;
-
- def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))),
- (v4i16 (!cast<Instruction>(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>;
-
- def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))),
- (v2i32 (!cast<Instruction>(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>;
-
- def : Pat<(v16i8 (concat_vectors
- (v8i8 VPR64:$src),
- (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))),
- (!cast<Instruction>(Prefix # 8h16b)
- (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
- VPR128:$Rn)>;
-
- def : Pat<(v8i16 (concat_vectors
- (v4i16 VPR64:$src),
- (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))),
- (!cast<Instruction>(Prefix # 4s8h)
- (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
- VPR128:$Rn)>;
-
- def : Pat<(v4i32 (concat_vectors
- (v2i32 VPR64:$src),
- (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))),
- (!cast<Instruction>(Prefix # 2d4s)
- (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64),
- VPR128:$Rn)>;
-}
-
-defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>;
-defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>;
-defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>;
-defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>;
-
-multiclass NeonI_2VMisc_SHIFT<string asmop, bit U, bits<5> opcode> {
- let DecoderMethod = "DecodeSHLLInstruction" in {
- def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR128:$Rd),
- (ins VPR64:$Rn, uimm_exact8:$Imm),
- asmop # "\t$Rd.8h, $Rn.8b, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR128:$Rd),
- (ins VPR64:$Rn, uimm_exact16:$Imm),
- asmop # "\t$Rd.4s, $Rn.4h, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode,
- (outs VPR128:$Rd),
- (ins VPR64:$Rn, uimm_exact32:$Imm),
- asmop # "\t$Rd.2d, $Rn.2s, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd),
- (ins VPR128:$Rn, uimm_exact8:$Imm),
- asmop # "2\t$Rd.8h, $Rn.16b, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd),
- (ins VPR128:$Rn, uimm_exact16:$Imm),
- asmop # "2\t$Rd.4s, $Rn.8h, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode,
- (outs VPR128:$Rd),
- (ins VPR128:$Rn, uimm_exact32:$Imm),
- asmop # "2\t$Rd.2d, $Rn.4s, $Imm",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
- }
-}
-
-defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>;
-
-class NeonI_SHLL_Patterns<ValueType OpTy, ValueType DesTy,
- SDPatternOperator ExtOp, Operand Neon_Imm,
- string suffix>
- : Pat<(DesTy (shl
- (DesTy (ExtOp (OpTy VPR64:$Rn))),
- (DesTy (Neon_vdup
- (i32 Neon_Imm:$Imm))))),
- (!cast<Instruction>("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>;
-
-class NeonI_SHLL_High_Patterns<ValueType OpTy, ValueType DesTy,
- SDPatternOperator ExtOp, Operand Neon_Imm,
- string suffix, PatFrag GetHigh>
- : Pat<(DesTy (shl
- (DesTy (ExtOp
- (OpTy (GetHigh VPR128:$Rn)))),
- (DesTy (Neon_vdup
- (i32 Neon_Imm:$Imm))))),
- (!cast<Instruction>("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>;
-
-def : NeonI_SHLL_Patterns<v8i8, v8i16, zext, uimm_exact8, "8b8h">;
-def : NeonI_SHLL_Patterns<v8i8, v8i16, sext, uimm_exact8, "8b8h">;
-def : NeonI_SHLL_Patterns<v4i16, v4i32, zext, uimm_exact16, "4h4s">;
-def : NeonI_SHLL_Patterns<v4i16, v4i32, sext, uimm_exact16, "4h4s">;
-def : NeonI_SHLL_Patterns<v2i32, v2i64, zext, uimm_exact32, "2s2d">;
-def : NeonI_SHLL_Patterns<v2i32, v2i64, sext, uimm_exact32, "2s2d">;
-def : NeonI_SHLL_High_Patterns<v8i8, v8i16, zext, uimm_exact8, "16b8h",
- Neon_High16B>;
-def : NeonI_SHLL_High_Patterns<v8i8, v8i16, sext, uimm_exact8, "16b8h",
- Neon_High16B>;
-def : NeonI_SHLL_High_Patterns<v4i16, v4i32, zext, uimm_exact16, "8h4s",
- Neon_High8H>;
-def : NeonI_SHLL_High_Patterns<v4i16, v4i32, sext, uimm_exact16, "8h4s",
- Neon_High8H>;
-def : NeonI_SHLL_High_Patterns<v2i32, v2i64, zext, uimm_exact32, "4s2d",
- Neon_High4S>;
-def : NeonI_SHLL_High_Patterns<v2i32, v2i64, sext, uimm_exact32, "4s2d",
- Neon_High4S>;
-
-multiclass NeonI_2VMisc_SD_Narrow<string asmop, bit U, bits<5> opcode> {
- def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4h, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- let Constraints = "$src = $Rd" in {
- def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.8h, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
-
- def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.4s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]>;
- }
-}
-
-defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>;
-
-multiclass NeonI_2VMisc_Narrow_Pattern<string prefix,
- SDPatternOperator f32_to_f16_Op,
- SDPatternOperator f64_to_f32_Op> {
-
- def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))),
- (!cast<Instruction>(prefix # "4s4h") (v4f32 VPR128:$Rn))>;
-
- def : Pat<(v8i16 (concat_vectors
- (v4i16 VPR64:$src),
- (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))),
- (!cast<Instruction>(prefix # "4s8h")
- (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
- (v4f32 VPR128:$Rn))>;
-
- def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))),
- (!cast<Instruction>(prefix # "2d2s") (v2f64 VPR128:$Rn))>;
-
- def : Pat<(v4f32 (concat_vectors
- (v2f32 VPR64:$src),
- (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))),
- (!cast<Instruction>(prefix # "2d4s")
- (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
- (v2f64 VPR128:$Rn))>;
-}
-
-defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>;
-
-multiclass NeonI_2VMisc_D_Narrow<string asmop, string prefix, bit U,
- bits<5> opcode> {
- def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR64:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "2\t$Rd.4s, $Rn.2d",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- }
-
- def : Pat<(v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))),
- (!cast<Instruction>(prefix # "2d2s") VPR128:$Rn)>;
-
- def : Pat<(v4f32 (concat_vectors
- (v2f32 VPR64:$src),
- (v2f32 (int_aarch64_neon_vcvtxn (v2f64 VPR128:$Rn))))),
- (!cast<Instruction>(prefix # "2d4s")
- (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)),
- VPR128:$Rn)>;
-}
-
-defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>;
-
-def Neon_High4Float : PatFrag<(ops node:$in),
- (extract_subvector (v4f32 node:$in), (iPTR 2))>;
-
-multiclass NeonI_2VMisc_HS_Extend<string asmop, bit U, bits<5> opcode> {
- def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.4s, $Rn.4h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2d, $Rn.2s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "2\t$Rd.4s, $Rn.8h",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "2\t$Rd.2d, $Rn.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>;
-
-multiclass NeonI_2VMisc_Extend_Pattern<string prefix> {
- def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))),
- (!cast<Instruction>(prefix # "4h4s") VPR64:$Rn)>;
-
- def : Pat<(v4f32 (int_arm_neon_vcvthf2fp
- (v4i16 (Neon_High8H
- (v8i16 VPR128:$Rn))))),
- (!cast<Instruction>(prefix # "8h4s") VPR128:$Rn)>;
-
- def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))),
- (!cast<Instruction>(prefix # "2s2d") VPR64:$Rn)>;
-
- def : Pat<(v2f64 (fextend
- (v2f32 (Neon_High4Float
- (v4f32 VPR128:$Rn))))),
- (!cast<Instruction>(prefix # "4s2d") VPR128:$Rn)>;
-}
-
-defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">;
-
-multiclass NeonI_2VMisc_SD_Conv<string asmop, bit Size, bit U, bits<5> opcode,
- ValueType ResTy4s, ValueType OpTy4s,
- ValueType ResTy2d, ValueType OpTy2d,
- ValueType ResTy2s, ValueType OpTy2s,
- SDPatternOperator Neon_Op> {
-
- def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [(set (ResTy4s VPR128:$Rd),
- (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.2d, $Rn.2d",
- [(set (ResTy2d VPR128:$Rd),
- (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [(set (ResTy2s VPR64:$Rd),
- (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-multiclass NeonI_2VMisc_fp_to_int<string asmop, bit Size, bit U,
- bits<5> opcode, SDPatternOperator Neon_Op> {
- defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4i32, v4f32, v2i64,
- v2f64, v2i32, v2f32, Neon_Op>;
-}
-
-defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010,
- int_arm_neon_vcvtns>;
-defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010,
- int_arm_neon_vcvtnu>;
-defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010,
- int_arm_neon_vcvtps>;
-defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010,
- int_arm_neon_vcvtpu>;
-defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011,
- int_arm_neon_vcvtms>;
-defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011,
- int_arm_neon_vcvtmu>;
-defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>;
-defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>;
-defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100,
- int_arm_neon_vcvtas>;
-defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100,
- int_arm_neon_vcvtau>;
-
-multiclass NeonI_2VMisc_int_to_fp<string asmop, bit Size, bit U,
- bits<5> opcode, SDPatternOperator Neon_Op> {
- defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4i32, v2f64,
- v2i64, v2f32, v2i32, Neon_Op>;
-}
-
-defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>;
-defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>;
-
-multiclass NeonI_2VMisc_fp_to_fp<string asmop, bit Size, bit U,
- bits<5> opcode, SDPatternOperator Neon_Op> {
- defm _ : NeonI_2VMisc_SD_Conv<asmop, Size, U, opcode, v4f32, v4f32, v2f64,
- v2f64, v2f32, v2f32, Neon_Op>;
-}
-
-defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000,
- int_aarch64_neon_frintn>;
-defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>;
-defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>;
-defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>;
-defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>;
-defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>;
-defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>;
-defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101,
- int_arm_neon_vrecpe>;
-defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101,
- int_arm_neon_vrsqrte>;
-let SchedRW = [WriteFPSqrt, ReadFPSqrt] in {
-defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>;
-}
-
-multiclass NeonI_2VMisc_S_Conv<string asmop, bit Size, bit U,
- bits<5> opcode, SDPatternOperator Neon_Op> {
- def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
- def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode,
- (outs VPR64:$Rd), (ins VPR64:$Rn),
- asmop # "\t$Rd.2s, $Rn.2s",
- [(set (v2i32 VPR64:$Rd),
- (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-}
-
-defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100,
- int_arm_neon_vrecpe>;
-defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100,
- int_arm_neon_vrsqrte>;
-
-// Crypto Class
-class NeonI_Cryptoaes_2v<bits<2> size, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
- : NeonI_Crypto_AES<size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (opnode (v16i8 VPR128:$src),
- (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let Predicates = [HasNEON, HasCrypto];
-}
-
-def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>;
-def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>;
-
-class NeonI_Cryptoaes<bits<2> size, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
- : NeonI_Crypto_AES<size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$Rn),
- asmop # "\t$Rd.16b, $Rn.16b",
- [(set (v16i8 VPR128:$Rd),
- (v16i8 (opnode (v16i8 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]>;
-
-def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>;
-def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>;
-
-class NeonI_Cryptosha_vv<bits<2> size, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
- : NeonI_Crypto_SHA<size, opcode,
- (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn),
- asmop # "\t$Rd.4s, $Rn.4s",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (v4i32 VPR128:$src),
- (v4i32 VPR128:$Rn))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let Predicates = [HasNEON, HasCrypto];
-}
-
-def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1",
- int_arm_neon_sha1su1>;
-def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0",
- int_arm_neon_sha256su0>;
-
-class NeonI_Cryptosha_ss<bits<2> size, bits<5> opcode,
- string asmop, SDPatternOperator opnode>
- : NeonI_Crypto_SHA<size, opcode,
- (outs FPR32:$Rd), (ins FPR32:$Rn),
- asmop # "\t$Rd, $Rn",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU]> {
- let Predicates = [HasNEON, HasCrypto];
- let hasSideEffects = 0;
-}
-
-def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>;
-def : Pat<(i32 (int_arm_neon_sha1h i32:$Rn)),
- (COPY_TO_REGCLASS (SHA1H (COPY_TO_REGCLASS i32:$Rn, FPR32)), GPR32)>;
-
-
-class NeonI_Cryptosha3_vvv<bits<2> size, bits<3> opcode, string asmop,
- SDPatternOperator opnode>
- : NeonI_Crypto_3VSHA<size, opcode,
- (outs VPR128:$Rd),
- (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s",
- [(set (v4i32 VPR128:$Rd),
- (v4i32 (opnode (v4i32 VPR128:$src),
- (v4i32 VPR128:$Rn),
- (v4i32 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let Predicates = [HasNEON, HasCrypto];
-}
-
-def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0",
- int_arm_neon_sha1su0>;
-def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1",
- int_arm_neon_sha256su1>;
-
-class NeonI_Cryptosha3_qqv<bits<2> size, bits<3> opcode, string asmop,
- SDPatternOperator opnode>
- : NeonI_Crypto_3VSHA<size, opcode,
- (outs FPR128:$Rd),
- (ins FPR128:$src, FPR128:$Rn, VPR128:$Rm),
- asmop # "\t$Rd, $Rn, $Rm.4s",
- [(set (v4i32 FPR128:$Rd),
- (v4i32 (opnode (v4i32 FPR128:$src),
- (v4i32 FPR128:$Rn),
- (v4i32 VPR128:$Rm))))],
- NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let Predicates = [HasNEON, HasCrypto];
-}
-
-def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h",
- int_arm_neon_sha256h>;
-def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2",
- int_arm_neon_sha256h2>;
-
-class NeonI_Cryptosha3_qsv<bits<2> size, bits<3> opcode, string asmop>
- : NeonI_Crypto_3VSHA<size, opcode,
- (outs FPR128:$Rd),
- (ins FPR128:$src, FPR32:$Rn, VPR128:$Rm),
- asmop # "\t$Rd, $Rn, $Rm.4s",
- [], NoItinerary>,
- Sched<[WriteFPALU, ReadFPALU, ReadFPALU, ReadFPALU]> {
- let Constraints = "$src = $Rd";
- let hasSideEffects = 0;
- let Predicates = [HasNEON, HasCrypto];
-}
-
-def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c">;
-def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p">;
-def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m">;
-
-def : Pat<(int_arm_neon_sha1c v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
- (SHA1C v4i32:$hash_abcd,
- (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
-def : Pat<(int_arm_neon_sha1m v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
- (SHA1M v4i32:$hash_abcd,
- (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
-def : Pat<(int_arm_neon_sha1p v4i32:$hash_abcd, i32:$hash_e, v4i32:$wk),
- (SHA1P v4i32:$hash_abcd,
- (COPY_TO_REGCLASS i32:$hash_e, FPR32), v4i32:$wk)>;
-
-// Additional patterns to match shl to USHL.
-def : Pat<(v8i8 (shl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
- (USHLvvv_8B $Rn, $Rm)>;
-def : Pat<(v4i16 (shl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
- (USHLvvv_4H $Rn, $Rm)>;
-def : Pat<(v2i32 (shl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
- (USHLvvv_2S $Rn, $Rm)>;
-def : Pat<(v1i64 (shl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
- (USHLddd $Rn, $Rm)>;
-def : Pat<(v16i8 (shl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
- (USHLvvv_16B $Rn, $Rm)>;
-def : Pat<(v8i16 (shl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
- (USHLvvv_8H $Rn, $Rm)>;
-def : Pat<(v4i32 (shl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
- (USHLvvv_4S $Rn, $Rm)>;
-def : Pat<(v2i64 (shl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
- (USHLvvv_2D $Rn, $Rm)>;
-
-def : Pat<(v1i8 (shl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8)),
- sub_8)>;
-def : Pat<(v1i16 (shl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16)),
- sub_16)>;
-def : Pat<(v1i32 (shl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32)),
- sub_32)>;
-
-// Additional patterns to match sra, srl.
-// For a vector right shift by vector, the shift amounts of SSHL/USHL are
-// negative. Negate the vector of shift amount first.
-def : Pat<(v8i8 (srl (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
- (USHLvvv_8B $Rn, (NEG8b $Rm))>;
-def : Pat<(v4i16 (srl (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
- (USHLvvv_4H $Rn, (NEG4h $Rm))>;
-def : Pat<(v2i32 (srl (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
- (USHLvvv_2S $Rn, (NEG2s $Rm))>;
-def : Pat<(v1i64 (srl (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
- (USHLddd $Rn, (NEGdd $Rm))>;
-def : Pat<(v16i8 (srl (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
- (USHLvvv_16B $Rn, (NEG16b $Rm))>;
-def : Pat<(v8i16 (srl (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
- (USHLvvv_8H $Rn, (NEG8h $Rm))>;
-def : Pat<(v4i32 (srl (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
- (USHLvvv_4S $Rn, (NEG4s $Rm))>;
-def : Pat<(v2i64 (srl (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
- (USHLvvv_2D $Rn, (NEG2d $Rm))>;
-
-def : Pat<(v1i8 (srl (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
- sub_8)>;
-def : Pat<(v1i16 (srl (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
- sub_16)>;
-def : Pat<(v1i32 (srl (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (EXTRACT_SUBREG
- (USHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
- sub_32)>;
-
-def : Pat<(v8i8 (sra (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))),
- (SSHLvvv_8B $Rn, (NEG8b $Rm))>;
-def : Pat<(v4i16 (sra (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))),
- (SSHLvvv_4H $Rn, (NEG4h $Rm))>;
-def : Pat<(v2i32 (sra (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))),
- (SSHLvvv_2S $Rn, (NEG2s $Rm))>;
-def : Pat<(v1i64 (sra (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))),
- (SSHLddd $Rn, (NEGdd $Rm))>;
-def : Pat<(v16i8 (sra (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))),
- (SSHLvvv_16B $Rn, (NEG16b $Rm))>;
-def : Pat<(v8i16 (sra (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))),
- (SSHLvvv_8H $Rn, (NEG8h $Rm))>;
-def : Pat<(v4i32 (sra (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))),
- (SSHLvvv_4S $Rn, (NEG4s $Rm))>;
-def : Pat<(v2i64 (sra (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))),
- (SSHLvvv_2D $Rn, (NEG2d $Rm))>;
-
-def : Pat<(v1i8 (sra (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))),
- (EXTRACT_SUBREG
- (SSHLvvv_8B (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8),
- (NEG8b (SUBREG_TO_REG (i64 0), FPR8:$Rm, sub_8))),
- sub_8)>;
-def : Pat<(v1i16 (sra (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))),
- (EXTRACT_SUBREG
- (SSHLvvv_4H (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16),
- (NEG4h (SUBREG_TO_REG (i64 0), FPR16:$Rm, sub_16))),
- sub_16)>;
-def : Pat<(v1i32 (sra (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))),
- (EXTRACT_SUBREG
- (SSHLvvv_2S (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32),
- (NEG2s (SUBREG_TO_REG (i64 0), FPR32:$Rm, sub_32))),
- sub_32)>;
-
-//
-// Patterns for handling half-precision values
-//
-
-// Convert between f16 value and f32 value
-def : Pat<(f32 (f16_to_f32 (i32 GPR32:$Rn))),
- (FCVTsh (EXTRACT_SUBREG (FMOVsw $Rn), sub_16))>;
-def : Pat<(i32 (f32_to_f16 (f32 FPR32:$Rn))),
- (FMOVws (SUBREG_TO_REG (i64 0), (f16 (FCVThs $Rn)), sub_16))>;
-
-// Convert f16 value coming in as i16 value to f32
-def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))),
- (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
-def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))),
- (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>;
-
-def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 (
- f32_to_f16 (f32 FPR32:$Rn))))))),
- (f32 FPR32:$Rn)>;
-
-// Patterns for vector extract of half-precision FP value in i16 storage type
-def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
- (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))),
- (FCVTsh (f16 (DUPhv_H
- (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- neon_uimm2_bare:$Imm)))>;
-
-def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract
- (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))),
- (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>;
-
-// Patterns for vector insert of half-precision FP value 0 in i16 storage type
-def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
- (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
- (neon_uimm3_bare:$Imm))),
- (v8i16 (INSELh (v8i16 VPR128:$Rn),
- (v8i16 (SUBREG_TO_REG (i64 0),
- (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
- sub_16)),
- neon_uimm3_bare:$Imm, 0))>;
-
-def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
- (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))),
- (neon_uimm2_bare:$Imm))),
- (v4i16 (EXTRACT_SUBREG
- (v8i16 (INSELh
- (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- (v8i16 (SUBREG_TO_REG (i64 0),
- (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)),
- sub_16)),
- neon_uimm2_bare:$Imm, 0)),
- sub_64))>;
-
-// Patterns for vector insert of half-precision FP value in i16 storage type
-def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
- (i32 (assertsext (i32 (fp_to_sint
- (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
- (neon_uimm3_bare:$Imm))),
- (v8i16 (INSELh (v8i16 VPR128:$Rn),
- (v8i16 (SUBREG_TO_REG (i64 0),
- (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
- sub_16)),
- neon_uimm3_bare:$Imm, 0))>;
-
-def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
- (i32 (assertsext (i32 (fp_to_sint
- (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))),
- (neon_uimm2_bare:$Imm))),
- (v4i16 (EXTRACT_SUBREG
- (v8i16 (INSELh
- (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- (v8i16 (SUBREG_TO_REG (i64 0),
- (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)),
- sub_16)),
- neon_uimm2_bare:$Imm, 0)),
- sub_64))>;
-
-def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
- (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
- (neon_uimm3_bare:$Imm1))),
- (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
- neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
-
-// Patterns for vector copy of half-precision FP value in i16 storage type
-def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn),
- (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
- (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)),
- 65535)))))))),
- (neon_uimm3_bare:$Imm1))),
- (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src),
- neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>;
-
-def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn),
- (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32
- (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)),
- 65535)))))))),
- (neon_uimm3_bare:$Imm1))),
- (v4i16 (EXTRACT_SUBREG
- (v8i16 (INSELh
- (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)),
- (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)),
- neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)),
- sub_64))>;
-
-
diff --git a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index c0031a4..e7454be 100644
--- a/lib/Target/ARM64/ARM64LoadStoreOptimizer.cpp
+++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1,4 +1,4 @@
-//===-- ARM64LoadStoreOptimizer.cpp - ARM64 load/store opt. pass --*- C++ -*-=//
+//=- AArch64LoadStoreOptimizer.cpp - AArch64 load/store opt. pass -*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -12,9 +12,8 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-ldst-opt"
-#include "ARM64InstrInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
+#include "AArch64InstrInfo.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -30,7 +29,9 @@
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-/// ARM64AllocLoadStoreOpt - Post-register allocation pass to combine
+#define DEBUG_TYPE "aarch64-ldst-opt"
+
+/// AArch64AllocLoadStoreOpt - Post-register allocation pass to combine
/// load / store instructions to form ldp / stp instructions.
STATISTIC(NumPairCreated, "Number of load/store pair instructions generated");
@@ -39,23 +40,21 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded");
STATISTIC(NumUnscaledPairCreated,
"Number of load/store from unscaled generated");
-static cl::opt<bool> DoLoadStoreOpt("arm64-load-store-opt", cl::init(true),
- cl::Hidden);
-static cl::opt<unsigned> ScanLimit("arm64-load-store-scan-limit", cl::init(20),
+static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20),
cl::Hidden);
// Place holder while testing unscaled load/store combining
static cl::opt<bool>
-EnableARM64UnscaledMemOp("arm64-unscaled-mem-op", cl::Hidden,
- cl::desc("Allow ARM64 unscaled load/store combining"),
+EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden,
+ cl::desc("Allow AArch64 unscaled load/store combining"),
cl::init(true));
namespace {
-struct ARM64LoadStoreOpt : public MachineFunctionPass {
+struct AArch64LoadStoreOpt : public MachineFunctionPass {
static char ID;
- ARM64LoadStoreOpt() : MachineFunctionPass(ID) {}
+ AArch64LoadStoreOpt() : MachineFunctionPass(ID) {}
- const ARM64InstrInfo *TII;
+ const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
// Scan the instructions looking for a load/store that can be combined
@@ -70,7 +69,7 @@ struct ARM64LoadStoreOpt : public MachineFunctionPass {
// Merge the two instructions indicated into a single pair-wise instruction.
// If mergeForward is true, erase the first instruction and fold its
// operation into the second. If false, the reverse. Return the instruction
- // following the first instruction (which may change during proecessing).
+ // following the first instruction (which may change during processing).
MachineBasicBlock::iterator
mergePairedInsns(MachineBasicBlock::iterator I,
MachineBasicBlock::iterator Paired, bool mergeForward);
@@ -100,79 +99,79 @@ struct ARM64LoadStoreOpt : public MachineFunctionPass {
bool optimizeBlock(MachineBasicBlock &MBB);
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
- return "ARM64 load / store optimization pass";
+ const char *getPassName() const override {
+ return "AArch64 load / store optimization pass";
}
private:
int getMemSize(MachineInstr *MemMI);
};
-char ARM64LoadStoreOpt::ID = 0;
+char AArch64LoadStoreOpt::ID = 0;
}
static bool isUnscaledLdst(unsigned Opc) {
switch (Opc) {
default:
return false;
- case ARM64::STURSi:
+ case AArch64::STURSi:
return true;
- case ARM64::STURDi:
+ case AArch64::STURDi:
return true;
- case ARM64::STURQi:
+ case AArch64::STURQi:
return true;
- case ARM64::STURWi:
+ case AArch64::STURWi:
return true;
- case ARM64::STURXi:
+ case AArch64::STURXi:
return true;
- case ARM64::LDURSi:
+ case AArch64::LDURSi:
return true;
- case ARM64::LDURDi:
+ case AArch64::LDURDi:
return true;
- case ARM64::LDURQi:
+ case AArch64::LDURQi:
return true;
- case ARM64::LDURWi:
+ case AArch64::LDURWi:
return true;
- case ARM64::LDURXi:
+ case AArch64::LDURXi:
return true;
}
}
// Size in bytes of the data moved by an unscaled load or store
-int ARM64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
+int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) {
switch (MemMI->getOpcode()) {
default:
llvm_unreachable("Opcode has has unknown size!");
- case ARM64::STRSui:
- case ARM64::STURSi:
+ case AArch64::STRSui:
+ case AArch64::STURSi:
return 4;
- case ARM64::STRDui:
- case ARM64::STURDi:
+ case AArch64::STRDui:
+ case AArch64::STURDi:
return 8;
- case ARM64::STRQui:
- case ARM64::STURQi:
+ case AArch64::STRQui:
+ case AArch64::STURQi:
return 16;
- case ARM64::STRWui:
- case ARM64::STURWi:
+ case AArch64::STRWui:
+ case AArch64::STURWi:
return 4;
- case ARM64::STRXui:
- case ARM64::STURXi:
+ case AArch64::STRXui:
+ case AArch64::STURXi:
return 8;
- case ARM64::LDRSui:
- case ARM64::LDURSi:
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
return 4;
- case ARM64::LDRDui:
- case ARM64::LDURDi:
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
return 8;
- case ARM64::LDRQui:
- case ARM64::LDURQi:
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
return 16;
- case ARM64::LDRWui:
- case ARM64::LDURWi:
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
return 4;
- case ARM64::LDRXui:
- case ARM64::LDURXi:
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
return 8;
}
}
@@ -181,36 +180,36 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pairwise equivalent!");
- case ARM64::STRSui:
- case ARM64::STURSi:
- return ARM64::STPSi;
- case ARM64::STRDui:
- case ARM64::STURDi:
- return ARM64::STPDi;
- case ARM64::STRQui:
- case ARM64::STURQi:
- return ARM64::STPQi;
- case ARM64::STRWui:
- case ARM64::STURWi:
- return ARM64::STPWi;
- case ARM64::STRXui:
- case ARM64::STURXi:
- return ARM64::STPXi;
- case ARM64::LDRSui:
- case ARM64::LDURSi:
- return ARM64::LDPSi;
- case ARM64::LDRDui:
- case ARM64::LDURDi:
- return ARM64::LDPDi;
- case ARM64::LDRQui:
- case ARM64::LDURQi:
- return ARM64::LDPQi;
- case ARM64::LDRWui:
- case ARM64::LDURWi:
- return ARM64::LDPWi;
- case ARM64::LDRXui:
- case ARM64::LDURXi:
- return ARM64::LDPXi;
+ case AArch64::STRSui:
+ case AArch64::STURSi:
+ return AArch64::STPSi;
+ case AArch64::STRDui:
+ case AArch64::STURDi:
+ return AArch64::STPDi;
+ case AArch64::STRQui:
+ case AArch64::STURQi:
+ return AArch64::STPQi;
+ case AArch64::STRWui:
+ case AArch64::STURWi:
+ return AArch64::STPWi;
+ case AArch64::STRXui:
+ case AArch64::STURXi:
+ return AArch64::STPXi;
+ case AArch64::LDRSui:
+ case AArch64::LDURSi:
+ return AArch64::LDPSi;
+ case AArch64::LDRDui:
+ case AArch64::LDURDi:
+ return AArch64::LDPDi;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ return AArch64::LDPQi;
+ case AArch64::LDRWui:
+ case AArch64::LDURWi:
+ return AArch64::LDPWi;
+ case AArch64::LDRXui:
+ case AArch64::LDURXi:
+ return AArch64::LDPXi;
}
}
@@ -218,16 +217,16 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no pre-indexed equivalent!");
- case ARM64::STRSui: return ARM64::STRSpre;
- case ARM64::STRDui: return ARM64::STRDpre;
- case ARM64::STRQui: return ARM64::STRQpre;
- case ARM64::STRWui: return ARM64::STRWpre;
- case ARM64::STRXui: return ARM64::STRXpre;
- case ARM64::LDRSui: return ARM64::LDRSpre;
- case ARM64::LDRDui: return ARM64::LDRDpre;
- case ARM64::LDRQui: return ARM64::LDRQpre;
- case ARM64::LDRWui: return ARM64::LDRWpre;
- case ARM64::LDRXui: return ARM64::LDRXpre;
+ case AArch64::STRSui: return AArch64::STRSpre;
+ case AArch64::STRDui: return AArch64::STRDpre;
+ case AArch64::STRQui: return AArch64::STRQpre;
+ case AArch64::STRWui: return AArch64::STRWpre;
+ case AArch64::STRXui: return AArch64::STRXpre;
+ case AArch64::LDRSui: return AArch64::LDRSpre;
+ case AArch64::LDRDui: return AArch64::LDRDpre;
+ case AArch64::LDRQui: return AArch64::LDRQpre;
+ case AArch64::LDRWui: return AArch64::LDRWpre;
+ case AArch64::LDRXui: return AArch64::LDRXpre;
}
}
@@ -235,33 +234,33 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
switch (Opc) {
default:
llvm_unreachable("Opcode has no post-indexed wise equivalent!");
- case ARM64::STRSui:
- return ARM64::STRSpost;
- case ARM64::STRDui:
- return ARM64::STRDpost;
- case ARM64::STRQui:
- return ARM64::STRQpost;
- case ARM64::STRWui:
- return ARM64::STRWpost;
- case ARM64::STRXui:
- return ARM64::STRXpost;
- case ARM64::LDRSui:
- return ARM64::LDRSpost;
- case ARM64::LDRDui:
- return ARM64::LDRDpost;
- case ARM64::LDRQui:
- return ARM64::LDRQpost;
- case ARM64::LDRWui:
- return ARM64::LDRWpost;
- case ARM64::LDRXui:
- return ARM64::LDRXpost;
+ case AArch64::STRSui:
+ return AArch64::STRSpost;
+ case AArch64::STRDui:
+ return AArch64::STRDpost;
+ case AArch64::STRQui:
+ return AArch64::STRQpost;
+ case AArch64::STRWui:
+ return AArch64::STRWpost;
+ case AArch64::STRXui:
+ return AArch64::STRXpost;
+ case AArch64::LDRSui:
+ return AArch64::LDRSpost;
+ case AArch64::LDRDui:
+ return AArch64::LDRDpost;
+ case AArch64::LDRQui:
+ return AArch64::LDRQpost;
+ case AArch64::LDRWui:
+ return AArch64::LDRWpost;
+ case AArch64::LDRXui:
+ return AArch64::LDRXpost;
}
}
MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Paired,
- bool mergeForward) {
+AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ bool mergeForward) {
MachineBasicBlock::iterator NextI = I;
++NextI;
// If NextI is the second of the two instructions to be merged, we need
@@ -272,7 +271,8 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
++NextI;
bool IsUnscaled = isUnscaledLdst(I->getOpcode());
- int OffsetStride = IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(I) : 1;
+ int OffsetStride =
+ IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(I) : 1;
unsigned NewOpc = getMatchingPairOpcode(I->getOpcode());
// Insert our new paired instruction after whichever of the paired
@@ -295,7 +295,7 @@ ARM64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I,
}
// Handle Unscaled
int OffsetImm = RtMI->getOperand(2).getImm();
- if (IsUnscaled && EnableARM64UnscaledMemOp)
+ if (IsUnscaled && EnableAArch64UnscaledMemOp)
OffsetImm /= OffsetStride;
// Construct the new instruction.
@@ -373,8 +373,8 @@ static int alignTo(int Num, int PowOf2) {
/// findMatchingInsn - Scan the instructions looking for a load/store that can
/// be combined with the current instruction into a load/store pair.
MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
- bool &mergeForward, unsigned Limit) {
+AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
+ bool &mergeForward, unsigned Limit) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineBasicBlock::iterator MBBI = I;
MachineInstr *FirstMI = I;
@@ -395,7 +395,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
if (FirstMI->modifiesRegister(BaseReg, TRI))
return E;
int OffsetStride =
- IsUnscaled && EnableARM64UnscaledMemOp ? getMemSize(FirstMI) : 1;
+ IsUnscaled && EnableAArch64UnscaledMemOp ? getMemSize(FirstMI) : 1;
if (!inBoundsForPair(IsUnscaled, Offset, OffsetStride))
return E;
@@ -445,7 +445,7 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the alignment requirements of the paired (scaled) instruction
// can't express the offset of the unscaled input, bail and keep
// looking.
- if (IsUnscaled && EnableARM64UnscaledMemOp &&
+ if (IsUnscaled && EnableAArch64UnscaledMemOp &&
(alignTo(MinOffset, OffsetStride) != MinOffset)) {
trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI);
continue;
@@ -508,10 +508,10 @@ ARM64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
}
MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update) {
- assert((Update->getOpcode() == ARM64::ADDXri ||
- Update->getOpcode() == ARM64::SUBXri) &&
+AArch64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Update) {
+ assert((Update->getOpcode() == AArch64::ADDXri ||
+ Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator NextI = I;
// Return the instruction following the merged instruction, which is
@@ -521,14 +521,15 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
++NextI;
int Value = Update->getOperand(2).getImm();
- assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
+ assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
"Can't merge 1 << 12 offset into pre-indexed load / store");
- if (Update->getOpcode() == ARM64::SUBXri)
+ if (Update->getOpcode() == AArch64::SUBXri)
Value = -Value;
unsigned NewOpc = getPreIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(Update->getOperand(0))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addImm(Value);
@@ -550,11 +551,10 @@ ARM64LoadStoreOpt::mergePreIdxUpdateInsn(MachineBasicBlock::iterator I,
return NextI;
}
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator Update) {
- assert((Update->getOpcode() == ARM64::ADDXri ||
- Update->getOpcode() == ARM64::SUBXri) &&
+MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePostIdxUpdateInsn(
+ MachineBasicBlock::iterator I, MachineBasicBlock::iterator Update) {
+ assert((Update->getOpcode() == AArch64::ADDXri ||
+ Update->getOpcode() == AArch64::SUBXri) &&
"Unexpected base register update instruction to merge!");
MachineBasicBlock::iterator NextI = I;
// Return the instruction following the merged instruction, which is
@@ -564,14 +564,15 @@ ARM64LoadStoreOpt::mergePostIdxUpdateInsn(MachineBasicBlock::iterator I,
++NextI;
int Value = Update->getOperand(2).getImm();
- assert(ARM64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
+ assert(AArch64_AM::getShiftValue(Update->getOperand(3).getImm()) == 0 &&
"Can't merge 1 << 12 offset into post-indexed load / store");
- if (Update->getOpcode() == ARM64::SUBXri)
+ if (Update->getOpcode() == AArch64::SUBXri)
Value = -Value;
unsigned NewOpc = getPostIndexedOpcode(I->getOpcode());
MachineInstrBuilder MIB =
BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(NewOpc))
+ .addOperand(Update->getOperand(0))
.addOperand(I->getOperand(0))
.addOperand(I->getOperand(1))
.addImm(Value);
@@ -598,17 +599,17 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
switch (MI->getOpcode()) {
default:
break;
- case ARM64::SUBXri:
+ case AArch64::SUBXri:
// Negate the offset for a SUB instruction.
Offset *= -1;
// FALLTHROUGH
- case ARM64::ADDXri:
+ case AArch64::ADDXri:
// Make sure it's a vanilla immediate operand, not a relocation or
// anything else we can't handle.
if (!MI->getOperand(2).isImm())
break;
// Watch out for 1 << 12 shifted value.
- if (ARM64_AM::getShiftValue(MI->getOperand(3).getImm()))
+ if (AArch64_AM::getShiftValue(MI->getOperand(3).getImm()))
break;
// If the instruction has the base register as source and dest and the
// immediate will fit in a signed 9-bit integer, then we have a match.
@@ -626,9 +627,8 @@ static bool isMatchingUpdateInsn(MachineInstr *MI, unsigned BaseReg,
return false;
}
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
- unsigned Limit, int Value) {
+MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
+ MachineBasicBlock::iterator I, unsigned Limit, int Value) {
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
MachineBasicBlock::iterator MBBI = I;
@@ -681,9 +681,8 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnForward(MachineBasicBlock::iterator I,
return E;
}
-MachineBasicBlock::iterator
-ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
- unsigned Limit) {
+MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
+ MachineBasicBlock::iterator I, unsigned Limit) {
MachineBasicBlock::iterator B = I->getParent()->begin();
MachineBasicBlock::iterator E = I->getParent()->end();
MachineInstr *MemMI = I;
@@ -735,7 +734,7 @@ ARM64LoadStoreOpt::findMatchingUpdateInsnBackward(MachineBasicBlock::iterator I,
return E;
}
-bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
+bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
bool Modified = false;
// Two tranformations to do here:
// 1) Find loads and stores that can be merged into a single load or store
@@ -761,27 +760,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STRQui:
- case ARM64::STRXui:
- case ARM64::STRWui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
- case ARM64::LDRXui:
- case ARM64::LDRWui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
// do the unscaled versions as well
- case ARM64::STURSi:
- case ARM64::STURDi:
- case ARM64::STURQi:
- case ARM64::STURWi:
- case ARM64::STURXi:
- case ARM64::LDURSi:
- case ARM64::LDURDi:
- case ARM64::LDURQi:
- case ARM64::LDURWi:
- case ARM64::LDURXi: {
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi: {
// If this is a volatile load/store, don't mess with it.
if (MI->hasOrderedMemoryRef()) {
++MBBI;
@@ -793,7 +792,7 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
break;
}
// Check if this load/store has a hint to avoid pair formation.
- // MachineMemOperands hints are set by the ARM64StorePairSuppress pass.
+ // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
if (TII->isLdStPairSuppressed(MI)) {
++MBBI;
break;
@@ -832,27 +831,27 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// Just move on to the next instruction.
++MBBI;
break;
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STRQui:
- case ARM64::STRXui:
- case ARM64::STRWui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
- case ARM64::LDRXui:
- case ARM64::LDRWui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ case AArch64::STRXui:
+ case AArch64::STRWui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::LDRXui:
+ case AArch64::LDRWui:
// do the unscaled versions as well
- case ARM64::STURSi:
- case ARM64::STURDi:
- case ARM64::STURQi:
- case ARM64::STURWi:
- case ARM64::STURXi:
- case ARM64::LDURSi:
- case ARM64::LDURDi:
- case ARM64::LDURQi:
- case ARM64::LDURWi:
- case ARM64::LDURXi: {
+ case AArch64::STURSi:
+ case AArch64::STURDi:
+ case AArch64::STURQi:
+ case AArch64::STURWi:
+ case AArch64::STURXi:
+ case AArch64::LDURSi:
+ case AArch64::LDURDi:
+ case AArch64::LDURQi:
+ case AArch64::LDURWi:
+ case AArch64::LDURXi: {
// Make sure this is a reg+imm (as opposed to an address reloc).
if (!MI->getOperand(2).isImm()) {
++MBBI;
@@ -893,7 +892,7 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
// ldr x1, [x0, #64]
// add x0, x0, #64
// merged into:
- // ldr x1, [x0], #64
+ // ldr x1, [x0, #64]!
// The immediate in the load/store is scaled by the size of the register
// being loaded. The immediate in the add we're looking for,
@@ -921,13 +920,9 @@ bool ARM64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) {
return Modified;
}
-bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
- // Early exit if pass disabled.
- if (!DoLoadStoreOpt)
- return false;
-
+bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
- TII = static_cast<const ARM64InstrInfo *>(TM.getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(TM.getInstrInfo());
TRI = TM.getRegisterInfo();
bool Modified = false;
@@ -942,6 +937,6 @@ bool ARM64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
/// optimization pass.
-FunctionPass *llvm::createARM64LoadStoreOptimizationPass() {
- return new ARM64LoadStoreOpt();
+FunctionPass *llvm::createAArch64LoadStoreOptimizationPass() {
+ return new AArch64LoadStoreOpt();
}
diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp
index 3842bfd..ab6d375 100644
--- a/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst -==//
+//==-- AArch64MCInstLower.cpp - Convert AArch64 MachineInstr to an MCInst --==//
//
// The LLVM Compiler Infrastructure
//
@@ -12,146 +12,191 @@
//
//===----------------------------------------------------------------------===//
-#include "AArch64AsmPrinter.h"
-#include "AArch64TargetMachine.h"
+#include "AArch64MCInstLower.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-MCOperand
-AArch64AsmPrinter::lowerSymbolOperand(const MachineOperand &MO,
- const MCSymbol *Sym) const {
- const MCExpr *Expr = 0;
+AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, Mangler &mang,
+ AsmPrinter &printer)
+ : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, OutContext);
+MCSymbol *
+AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
+ return Printer.getSymbol(MO.getGlobal());
+}
- switch (MO.getTargetFlags()) {
- case AArch64II::MO_GOT:
- Expr = AArch64MCExpr::CreateGOT(Expr, OutContext);
- break;
- case AArch64II::MO_GOT_LO12:
- Expr = AArch64MCExpr::CreateGOTLo12(Expr, OutContext);
- break;
- case AArch64II::MO_LO12:
- Expr = AArch64MCExpr::CreateLo12(Expr, OutContext);
- break;
- case AArch64II::MO_DTPREL_G1:
- Expr = AArch64MCExpr::CreateDTPREL_G1(Expr, OutContext);
- break;
- case AArch64II::MO_DTPREL_G0_NC:
- Expr = AArch64MCExpr::CreateDTPREL_G0_NC(Expr, OutContext);
- break;
- case AArch64II::MO_GOTTPREL:
- Expr = AArch64MCExpr::CreateGOTTPREL(Expr, OutContext);
- break;
- case AArch64II::MO_GOTTPREL_LO12:
- Expr = AArch64MCExpr::CreateGOTTPRELLo12(Expr, OutContext);
- break;
- case AArch64II::MO_TLSDESC:
- Expr = AArch64MCExpr::CreateTLSDesc(Expr, OutContext);
- break;
- case AArch64II::MO_TLSDESC_LO12:
- Expr = AArch64MCExpr::CreateTLSDescLo12(Expr, OutContext);
- break;
- case AArch64II::MO_TPREL_G1:
- Expr = AArch64MCExpr::CreateTPREL_G1(Expr, OutContext);
- break;
- case AArch64II::MO_TPREL_G0_NC:
- Expr = AArch64MCExpr::CreateTPREL_G0_NC(Expr, OutContext);
- break;
- case AArch64II::MO_ABS_G3:
- Expr = AArch64MCExpr::CreateABS_G3(Expr, OutContext);
- break;
- case AArch64II::MO_ABS_G2_NC:
- Expr = AArch64MCExpr::CreateABS_G2_NC(Expr, OutContext);
- break;
- case AArch64II::MO_ABS_G1_NC:
- Expr = AArch64MCExpr::CreateABS_G1_NC(Expr, OutContext);
- break;
- case AArch64II::MO_ABS_G0_NC:
- Expr = AArch64MCExpr::CreateABS_G0_NC(Expr, OutContext);
- break;
- case AArch64II::MO_NO_FLAG:
- // Expr is already correct
- break;
- default:
- llvm_unreachable("Unexpected MachineOperand flag");
+MCSymbol *
+AArch64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
+ return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ // FIXME: We would like an efficient form for this, so we don't have to do a
+ // lot of extra uniquing.
+ MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+ if ((MO.getTargetFlags() & AArch64II::MO_GOT) != 0) {
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefKind = MCSymbolRefExpr::VK_GOTPAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
+ else
+ assert(0 && "Unexpected target flags with MO_GOT on GV operand");
+ } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) {
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
+ else
+ llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
+ } else {
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefKind = MCSymbolRefExpr::VK_PAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefKind = MCSymbolRefExpr::VK_PAGEOFF;
}
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+ if (!MO.isJTI() && MO.getOffset())
+ Expr = MCBinaryExpr::CreateAdd(
+ Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
+ return MCOperand::CreateExpr(Expr);
+}
+
+MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ uint32_t RefFlags = 0;
+ if (MO.getTargetFlags() & AArch64II::MO_GOT)
+ RefFlags |= AArch64MCExpr::VK_GOT;
+ else if (MO.getTargetFlags() & AArch64II::MO_TLS) {
+ TLSModel::Model Model;
+ if (MO.isGlobal()) {
+ const GlobalValue *GV = MO.getGlobal();
+ Model = Printer.TM.getTLSModel(GV);
+ } else {
+ assert(MO.isSymbol() &&
+ StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
+ "unexpected external TLS symbol");
+ Model = TLSModel::GeneralDynamic;
+ }
+ switch (Model) {
+ case TLSModel::InitialExec:
+ RefFlags |= AArch64MCExpr::VK_GOTTPREL;
+ break;
+ case TLSModel::LocalExec:
+ RefFlags |= AArch64MCExpr::VK_TPREL;
+ break;
+ case TLSModel::LocalDynamic:
+ RefFlags |= AArch64MCExpr::VK_DTPREL;
+ break;
+ case TLSModel::GeneralDynamic:
+ RefFlags |= AArch64MCExpr::VK_TLSDESC;
+ break;
+ }
+ } else {
+ // No modifier means this is a generic reference, classified as absolute for
+ // the cases where it matters (:abs_g0: etc).
+ RefFlags |= AArch64MCExpr::VK_ABS;
+ }
+
+ if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE)
+ RefFlags |= AArch64MCExpr::VK_PAGE;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) ==
+ AArch64II::MO_PAGEOFF)
+ RefFlags |= AArch64MCExpr::VK_PAGEOFF;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G3)
+ RefFlags |= AArch64MCExpr::VK_G3;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G2)
+ RefFlags |= AArch64MCExpr::VK_G2;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G1)
+ RefFlags |= AArch64MCExpr::VK_G1;
+ else if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_G0)
+ RefFlags |= AArch64MCExpr::VK_G0;
+
+ if (MO.getTargetFlags() & AArch64II::MO_NC)
+ RefFlags |= AArch64MCExpr::VK_NC;
+
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(Expr,
- MCConstantExpr::Create(MO.getOffset(),
- OutContext),
- OutContext);
+ Expr = MCBinaryExpr::CreateAdd(
+ Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
+
+ AArch64MCExpr::VariantKind RefKind;
+ RefKind = static_cast<AArch64MCExpr::VariantKind>(RefFlags);
+ Expr = AArch64MCExpr::Create(Expr, RefKind, Ctx);
return MCOperand::CreateExpr(Expr);
}
-bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO,
- MCOperand &MCOp) const {
+MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+ MCSymbol *Sym) const {
+ if (TargetTriple.isOSDarwin())
+ return lowerSymbolOperandDarwin(MO, Sym);
+
+ assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
+ return lowerSymbolOperandELF(MO, Sym);
+}
+
+bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
switch (MO.getType()) {
- default: llvm_unreachable("unknown operand type");
+ default:
+ assert(0 && "unknown operand type");
case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
if (MO.isImplicit())
return false;
- assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
+ case MachineOperand::MO_RegisterMask:
+ // Regmasks are like implicit defs.
+ return false;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
- case MachineOperand::MO_FPImmediate: {
- assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported");
- MCOp = MCOperand::CreateFPImm(0.0);
- break;
- }
- case MachineOperand::MO_BlockAddress:
- MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = MCOperand::CreateExpr(
+ MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal()));
+ MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
- MO.getMBB()->getSymbol(), OutContext));
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
break;
case MachineOperand::MO_JumpTableIndex:
- MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()));
+ MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
break;
case MachineOperand::MO_ConstantPoolIndex:
- MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()));
+ MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = LowerSymbolOperand(
+ MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
break;
- case MachineOperand::MO_RegisterMask:
- // Ignore call clobbers
- return false;
-
}
-
return true;
}
-void llvm::LowerAArch64MachineInstrToMCInst(const MachineInstr *MI,
- MCInst &OutMI,
- AArch64AsmPrinter &AP) {
+void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
-
MCOperand MCOp;
- if (AP.lowerOperand(MO, MCOp))
+ if (lowerOperand(MI->getOperand(i), MCOp))
OutMI.addOperand(MCOp);
}
}
diff --git a/lib/Target/ARM64/ARM64MCInstLower.h b/lib/Target/AArch64/AArch64MCInstLower.h
index 7e3a2c8..ba50ba9 100644
--- a/lib/Target/ARM64/ARM64MCInstLower.h
+++ b/lib/Target/AArch64/AArch64MCInstLower.h
@@ -1,4 +1,4 @@
-//===-- ARM64MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//===-- AArch64MCInstLower.h - Lower MachineInstr to MCInst ---------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef ARM64_MCINSTLOWER_H
-#define ARM64_MCINSTLOWER_H
+#ifndef AArch64_MCINSTLOWER_H
+#define AArch64_MCINSTLOWER_H
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
@@ -25,15 +25,15 @@ class MachineModuleInfoMachO;
class MachineOperand;
class Mangler;
-/// ARM64MCInstLower - This class is used to lower an MachineInstr
+/// AArch64MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
-class LLVM_LIBRARY_VISIBILITY ARM64MCInstLower {
+class LLVM_LIBRARY_VISIBILITY AArch64MCInstLower {
MCContext &Ctx;
AsmPrinter &Printer;
Triple TargetTriple;
public:
- ARM64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
+ AArch64MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void Lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp b/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
deleted file mode 100644
index f45d8f7..0000000
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-//===-- AArch64MachineFuctionInfo.cpp - AArch64 machine function info -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file just contains the anchor for the AArch64MachineFunctionInfo to
-// force vtable emission.
-//
-//===----------------------------------------------------------------------===//
-#include "AArch64MachineFunctionInfo.h"
-
-using namespace llvm;
-
-void AArch64MachineFunctionInfo::anchor() { }
diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 33da54f..7c257ba 100644
--- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- AArch64MachineFuctionInfo.h - AArch64 machine function info -*- C++ -*-==//
+//=- AArch64MachineFuctionInfo.h - AArch64 machine function info --*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef AARCH64MACHINEFUNCTIONINFO_H
-#define AARCH64MACHINEFUNCTIONINFO_H
+#ifndef AArch64MACHINEFUNCTIONINFO_H
+#define AArch64MACHINEFUNCTIONINFO_H
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/MC/MCLinkerOptimizationHint.h"
namespace llvm {
-/// This class is derived from MachineFunctionInfo and contains private AArch64
-/// target-specific information for each MachineFunction.
-class AArch64MachineFunctionInfo : public MachineFunctionInfo {
- virtual void anchor();
+/// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private AArch64-specific information for each MachineFunction.
+class AArch64FunctionInfo : public MachineFunctionInfo {
/// Number of bytes of arguments this function has on the stack. If the callee
/// is expected to restore the argument stack this should be a multiple of 16,
@@ -39,111 +41,123 @@ class AArch64MachineFunctionInfo : public MachineFunctionInfo {
/// callee is expected to pop the args.
unsigned ArgumentStackToRestore;
- /// If the stack needs to be adjusted on frame entry in two stages, this
- /// records the size of the first adjustment just prior to storing
- /// callee-saved registers. The callee-saved slots are addressed assuming
- /// SP == <incoming-SP> - InitialStackAdjust.
- unsigned InitialStackAdjust;
+ /// HasStackFrame - True if this function has a stack frame. Set by
+ /// processFunctionBeforeCalleeSavedScan().
+ bool HasStackFrame;
- /// Number of local-dynamic TLS accesses.
- unsigned NumLocalDynamics;
+ /// \brief Amount of stack frame size, not including callee-saved registers.
+ unsigned LocalStackSize;
- /// @see AArch64 Procedure Call Standard, B.3
- ///
- /// The Frame index of the area where LowerFormalArguments puts the
- /// general-purpose registers that might contain variadic parameters.
- int VariadicGPRIdx;
+ /// \brief Number of TLS accesses using the special (combinable)
+ /// _TLS_MODULE_BASE_ symbol.
+ unsigned NumLocalDynamicTLSAccesses;
- /// @see AArch64 Procedure Call Standard, B.3
- ///
- /// The size of the frame object used to store the general-purpose registers
- /// which might contain variadic arguments. This is the offset from
- /// VariadicGPRIdx to what's stored in __gr_top.
- unsigned VariadicGPRSize;
+ /// \brief FrameIndex for start of varargs area for arguments passed on the
+ /// stack.
+ int VarArgsStackIndex;
- /// @see AArch64 Procedure Call Standard, B.3
- ///
- /// The Frame index of the area where LowerFormalArguments puts the
- /// floating-point registers that might contain variadic parameters.
- int VariadicFPRIdx;
+ /// \brief FrameIndex for start of varargs area for arguments passed in
+ /// general purpose registers.
+ int VarArgsGPRIndex;
- /// @see AArch64 Procedure Call Standard, B.3
- ///
- /// The size of the frame object used to store the floating-point registers
- /// which might contain variadic arguments. This is the offset from
- /// VariadicFPRIdx to what's stored in __vr_top.
- unsigned VariadicFPRSize;
+ /// \brief Size of the varargs area for arguments passed in general purpose
+ /// registers.
+ unsigned VarArgsGPRSize;
- /// @see AArch64 Procedure Call Standard, B.3
- ///
- /// The Frame index of an object pointing just past the last known stacked
- /// argument on entry to a variadic function. This goes into the __stack field
- /// of the va_list type.
- int VariadicStackIdx;
+ /// \brief FrameIndex for start of varargs area for arguments passed in
+ /// floating-point registers.
+ int VarArgsFPRIndex;
- /// The offset of the frame pointer from the stack pointer on function
- /// entry. This is expected to be negative.
- int FramePointerOffset;
+ /// \brief Size of the varargs area for arguments passed in floating-point
+ /// registers.
+ unsigned VarArgsFPRSize;
public:
- AArch64MachineFunctionInfo()
- : BytesInStackArgArea(0),
- ArgumentStackToRestore(0),
- InitialStackAdjust(0),
- NumLocalDynamics(0),
- VariadicGPRIdx(0),
- VariadicGPRSize(0),
- VariadicFPRIdx(0),
- VariadicFPRSize(0),
- VariadicStackIdx(0),
- FramePointerOffset(0) {}
-
- explicit AArch64MachineFunctionInfo(MachineFunction &MF)
- : BytesInStackArgArea(0),
- ArgumentStackToRestore(0),
- InitialStackAdjust(0),
- NumLocalDynamics(0),
- VariadicGPRIdx(0),
- VariadicGPRSize(0),
- VariadicFPRIdx(0),
- VariadicFPRSize(0),
- VariadicStackIdx(0),
- FramePointerOffset(0) {}
+ AArch64FunctionInfo()
+ : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
+ NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
+
+ explicit AArch64FunctionInfo(MachineFunction &MF)
+ : BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
+ NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
+ VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0) {
+ (void)MF;
+ }
unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
- void setBytesInStackArgArea (unsigned bytes) { BytesInStackArgArea = bytes;}
+ void setBytesInStackArgArea(unsigned bytes) { BytesInStackArgArea = bytes; }
unsigned getArgumentStackToRestore() const { return ArgumentStackToRestore; }
void setArgumentStackToRestore(unsigned bytes) {
ArgumentStackToRestore = bytes;
}
- unsigned getInitialStackAdjust() const { return InitialStackAdjust; }
- void setInitialStackAdjust(unsigned bytes) { InitialStackAdjust = bytes; }
+ bool hasStackFrame() const { return HasStackFrame; }
+ void setHasStackFrame(bool s) { HasStackFrame = s; }
- unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
- void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+ void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
+ unsigned getLocalStackSize() const { return LocalStackSize; }
- int getVariadicGPRIdx() const { return VariadicGPRIdx; }
- void setVariadicGPRIdx(int Idx) { VariadicGPRIdx = Idx; }
+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
+ unsigned getNumLocalDynamicTLSAccesses() const {
+ return NumLocalDynamicTLSAccesses;
+ }
- unsigned getVariadicGPRSize() const { return VariadicGPRSize; }
- void setVariadicGPRSize(unsigned Size) { VariadicGPRSize = Size; }
+ int getVarArgsStackIndex() const { return VarArgsStackIndex; }
+ void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
- int getVariadicFPRIdx() const { return VariadicFPRIdx; }
- void setVariadicFPRIdx(int Idx) { VariadicFPRIdx = Idx; }
+ int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
+ void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
- unsigned getVariadicFPRSize() const { return VariadicFPRSize; }
- void setVariadicFPRSize(unsigned Size) { VariadicFPRSize = Size; }
+ unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; }
+ void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; }
- int getVariadicStackIdx() const { return VariadicStackIdx; }
- void setVariadicStackIdx(int Idx) { VariadicStackIdx = Idx; }
+ int getVarArgsFPRIndex() const { return VarArgsFPRIndex; }
+ void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; }
- int getFramePointerOffset() const { return FramePointerOffset; }
- void setFramePointerOffset(int Idx) { FramePointerOffset = Idx; }
+ unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
+ void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
-};
+ typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
+
+ const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
+
+ // Shortcuts for LOH related types.
+ class MILOHDirective {
+ MCLOHType Kind;
+ /// Arguments of this directive. Order matters.
+ SmallVector<const MachineInstr *, 3> Args;
+
+ public:
+ typedef SmallVectorImpl<const MachineInstr *> LOHArgs;
+
+ MILOHDirective(MCLOHType Kind, const LOHArgs &Args)
+ : Kind(Kind), Args(Args.begin(), Args.end()) {
+ assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!");
+ }
+
+ MCLOHType getKind() const { return Kind; }
+ const LOHArgs &getArgs() const { return Args; }
+ };
+
+ typedef MILOHDirective::LOHArgs MILOHArgs;
+ typedef SmallVector<MILOHDirective, 32> MILOHContainer;
+
+ const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
+
+ /// Add a LOH directive of this @p Kind and this @p Args.
+ void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) {
+ LOHContainerSet.push_back(MILOHDirective(Kind, Args));
+ LOHRelated.insert(Args.begin(), Args.end());
+ }
+
+private:
+ // Hold the lists of LOHs.
+ MILOHContainer LOHContainerSet;
+ SetOfInstructions LOHRelated;
+};
} // End llvm namespace
-#endif
+#endif // AArch64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM64/ARM64PerfectShuffle.h b/lib/Target/AArch64/AArch64PerfectShuffle.h
index 6759236..b22fa24 100644
--- a/lib/Target/ARM64/ARM64PerfectShuffle.h
+++ b/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -1,4 +1,4 @@
-//===-- ARM64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -------------===//
+//===-- AArch64PerfectShuffle.h - AdvSIMD Perfect Shuffle Table -----------===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM64/ARM64PromoteConstant.cpp b/lib/Target/AArch64/AArch64PromoteConstant.cpp
index 9fbaedb..4723cc4 100644
--- a/lib/Target/ARM64/ARM64PromoteConstant.cpp
+++ b/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -1,5 +1,4 @@
-
-//===-- ARM64PromoteConstant.cpp --- Promote constant to global for ARM64 -===//
+//=- AArch64PromoteConstant.cpp --- Promote constant to global for AArch64 -==//
//
// The LLVM Compiler Infrastructure
//
@@ -8,23 +7,20 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the ARM64PromoteConstant pass which promotes constant
-// to global variables when this is likely to be more efficient.
-// Currently only types related to constant vector (i.e., constant vector, array
-// of constant vectors, constant structure with a constant vector field, etc.)
-// are promoted to global variables.
-// Indeed, constant vector are likely to be lowered in target constant pool
-// during instruction selection.
-// Therefore, the access will remain the same (memory load), but the structures
-// types are not split into different constant pool accesses for each field.
-// The bonus side effect is that created globals may be merged by the global
-// merge pass.
+// This file implements the AArch64PromoteConstant pass which promotes constants
+// to global variables when this is likely to be more efficient. Currently only
+// types related to constant vector (i.e., constant vector, array of constant
+// vectors, constant structure with a constant vector field, etc.) are promoted
+// to global variables. Constant vectors are likely to be lowered in target
+// constant pool during instruction selection already; therefore, the access
+// will remain the same (memory load), but the structure types are not split
+// into different constant pool accesses for each field. A bonus side effect is
+// that created globals may be merged by the global merge pass.
//
// FIXME: This pass may be useful for other targets too.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-promote-const"
-#include "ARM64.h"
+#include "AArch64.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
@@ -44,15 +40,17 @@
using namespace llvm;
+#define DEBUG_TYPE "aarch64-promote-const"
+
// Stress testing mode - disable heuristics.
-static cl::opt<bool> Stress("arm64-stress-promote-const", cl::Hidden,
+static cl::opt<bool> Stress("aarch64-stress-promote-const", cl::Hidden,
cl::desc("Promote all vector constants"));
STATISTIC(NumPromoted, "Number of promoted constants");
STATISTIC(NumPromotedUses, "Number of promoted constants uses");
//===----------------------------------------------------------------------===//
-// ARM64PromoteConstant
+// AArch64PromoteConstant
//===----------------------------------------------------------------------===//
namespace {
@@ -74,26 +72,28 @@ namespace {
/// return ret;
/// }
///
-/// The constants in that example are folded into the uses. Thus, 4 different
+/// The constants in this example are folded into the uses. Thus, 4 different
/// constants are created.
+///
/// As their type is vector the cheapest way to create them is to load them
/// for the memory.
-/// Therefore the final assembly final has 4 different load.
-/// With this pass enabled, only one load is issued for the constants.
-class ARM64PromoteConstant : public ModulePass {
+///
+/// Therefore the final assembly final has 4 different loads. With this pass
+/// enabled, only one load is issued for the constants.
+class AArch64PromoteConstant : public ModulePass {
public:
static char ID;
- ARM64PromoteConstant() : ModulePass(ID) {}
+ AArch64PromoteConstant() : ModulePass(ID) {}
- virtual const char *getPassName() const { return "ARM64 Promote Constant"; }
+ const char *getPassName() const override { return "AArch64 Promote Constant"; }
/// Iterate over the functions and promote the interesting constants into
/// global variables with module scope.
- bool runOnModule(Module &M) {
+ bool runOnModule(Module &M) override {
DEBUG(dbgs() << getPassName() << '\n');
bool Changed = false;
- for (auto &MF: M) {
+ for (auto &MF : M) {
Changed |= runOnFunction(MF);
}
return Changed;
@@ -106,18 +106,18 @@ private:
bool runOnFunction(Function &F);
// This transformation requires dominator info
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
}
- /// Type to store a list of User
+ /// Type to store a list of User.
typedef SmallVector<Value::user_iterator, 4> Users;
/// Map an insertion point to all the uses it dominates.
typedef DenseMap<Instruction *, Users> InsertionPoints;
/// Map a function to the required insertion point of load for a
- /// global variable
+ /// global variable.
typedef DenseMap<Function *, InsertionPoints> InsertionPointsPerFunc;
/// Find the closest point that dominates the given Use.
@@ -187,34 +187,34 @@ private:
Value::user_iterator &UseIt,
InsertionPoints::iterator &IPI,
InsertionPoints &InsertPts) {
- // Record the dominated use
+ // Record the dominated use.
IPI->second.push_back(UseIt);
// Transfer the dominated uses of IPI to NewPt
// Inserting into the DenseMap may invalidate existing iterator.
// Keep a copy of the key to find the iterator to erase.
Instruction *OldInstr = IPI->first;
InsertPts.insert(InsertionPoints::value_type(NewPt, IPI->second));
- // Erase IPI
+ // Erase IPI.
IPI = InsertPts.find(OldInstr);
InsertPts.erase(IPI);
}
};
} // end anonymous namespace
-char ARM64PromoteConstant::ID = 0;
+char AArch64PromoteConstant::ID = 0;
namespace llvm {
-void initializeARM64PromoteConstantPass(PassRegistry &);
+void initializeAArch64PromoteConstantPass(PassRegistry &);
}
-INITIALIZE_PASS_BEGIN(ARM64PromoteConstant, "arm64-promote-const",
- "ARM64 Promote Constant Pass", false, false)
+INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const",
+ "AArch64 Promote Constant Pass", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ARM64PromoteConstant, "arm64-promote-const",
- "ARM64 Promote Constant Pass", false, false)
+INITIALIZE_PASS_END(AArch64PromoteConstant, "aarch64-promote-const",
+ "AArch64 Promote Constant Pass", false, false)
-ModulePass *llvm::createARM64PromoteConstantPass() {
- return new ARM64PromoteConstant();
+ModulePass *llvm::createAArch64PromoteConstantPass() {
+ return new AArch64PromoteConstant();
}
/// Check if the given type uses a vector type.
@@ -243,26 +243,26 @@ static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
if (isa<const ShuffleVectorInst>(Instr) && OpIdx == 2)
return false;
- // extractvalue instruction expects a const idx
+ // extractvalue instruction expects a const idx.
if (isa<const ExtractValueInst>(Instr) && OpIdx > 0)
return false;
- // extractvalue instruction expects a const idx
+ // extractvalue instruction expects a const idx.
if (isa<const InsertValueInst>(Instr) && OpIdx > 1)
return false;
if (isa<const AllocaInst>(Instr) && OpIdx > 0)
return false;
- // Alignment argument must be constant
+ // Alignment argument must be constant.
if (isa<const LoadInst>(Instr) && OpIdx > 0)
return false;
- // Alignment argument must be constant
+ // Alignment argument must be constant.
if (isa<const StoreInst>(Instr) && OpIdx > 1)
return false;
- // Index must be constant
+ // Index must be constant.
if (isa<const GetElementPtrInst>(Instr) && OpIdx > 0)
return false;
@@ -271,19 +271,19 @@ static bool shouldConvertUse(const Constant *Cst, const Instruction *Instr,
if (isa<const LandingPadInst>(Instr))
return false;
- // switch instruction expects constants to compare to
+ // Switch instruction expects constants to compare to.
if (isa<const SwitchInst>(Instr))
return false;
- // Expected address must be a constant
+ // Expected address must be a constant.
if (isa<const IndirectBrInst>(Instr))
return false;
- // Do not mess with intrinsic
+ // Do not mess with intrinsics.
if (isa<const IntrinsicInst>(Instr))
return false;
- // Do not mess with inline asm
+ // Do not mess with inline asm.
const CallInst *CI = dyn_cast<const CallInst>(Instr);
if (CI && isa<const InlineAsm>(CI->getCalledValue()))
return false;
@@ -329,9 +329,9 @@ static bool shouldConvert(const Constant *Cst) {
}
Instruction *
-ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
+AArch64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
// If this user is a phi, the insertion point is in the related
- // incoming basic block
+ // incoming basic block.
PHINode *PhiInst = dyn_cast<PHINode>(*Use);
Instruction *InsertionPoint;
if (PhiInst)
@@ -343,46 +343,43 @@ ARM64PromoteConstant::findInsertionPoint(Value::user_iterator &Use) {
return InsertionPoint;
}
-bool ARM64PromoteConstant::isDominated(Instruction *NewPt,
- Value::user_iterator &UseIt,
- InsertionPoints &InsertPts) {
+bool AArch64PromoteConstant::isDominated(Instruction *NewPt,
+ Value::user_iterator &UseIt,
+ InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
- // Traverse all the existing insertion point and check if one is dominating
- // NewPt
- for (InsertionPoints::iterator IPI = InsertPts.begin(),
- EndIPI = InsertPts.end();
- IPI != EndIPI; ++IPI) {
- if (NewPt == IPI->first || DT.dominates(IPI->first, NewPt) ||
- // When IPI->first is a terminator instruction, DT may think that
+ // Traverse all the existing insertion points and check if one is dominating
+ // NewPt. If it is, remember that.
+ for (auto &IPI : InsertPts) {
+ if (NewPt == IPI.first || DT.dominates(IPI.first, NewPt) ||
+ // When IPI.first is a terminator instruction, DT may think that
// the result is defined on the edge.
// Here we are testing the insertion point, not the definition.
- (IPI->first->getParent() != NewPt->getParent() &&
- DT.dominates(IPI->first->getParent(), NewPt->getParent()))) {
- // No need to insert this point
- // Record the dominated use
+ (IPI.first->getParent() != NewPt->getParent() &&
+ DT.dominates(IPI.first->getParent(), NewPt->getParent()))) {
+ // No need to insert this point. Just record the dominated use.
DEBUG(dbgs() << "Insertion point dominated by:\n");
- DEBUG(IPI->first->print(dbgs()));
+ DEBUG(IPI.first->print(dbgs()));
DEBUG(dbgs() << '\n');
- IPI->second.push_back(UseIt);
+ IPI.second.push_back(UseIt);
return true;
}
}
return false;
}
-bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
- Value::user_iterator &UseIt,
- InsertionPoints &InsertPts) {
+bool AArch64PromoteConstant::tryAndMerge(Instruction *NewPt,
+ Value::user_iterator &UseIt,
+ InsertionPoints &InsertPts) {
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*NewPt->getParent()->getParent()).getDomTree();
BasicBlock *NewBB = NewPt->getParent();
// Traverse all the existing insertion point and check if one is dominated by
// NewPt and thus useless or can be combined with NewPt into a common
- // dominator
+ // dominator.
for (InsertionPoints::iterator IPI = InsertPts.begin(),
EndIPI = InsertPts.end();
IPI != EndIPI; ++IPI) {
@@ -400,19 +397,19 @@ bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
// Look for a common dominator
BasicBlock *CommonDominator = DT.findNearestCommonDominator(NewBB, CurBB);
- // If none exists, we cannot merge these two points
+ // If none exists, we cannot merge these two points.
if (!CommonDominator)
continue;
if (CommonDominator != NewBB) {
- // By construction, the CommonDominator cannot be CurBB
+ // By construction, the CommonDominator cannot be CurBB.
assert(CommonDominator != CurBB &&
"Instruction has not been rejected during isDominated check!");
// Take the last instruction of the CommonDominator as insertion point
NewPt = CommonDominator->getTerminator();
}
// else, CommonDominator is the block of NewBB, hence NewBB is the last
- // possible insertion point in that block
+ // possible insertion point in that block.
DEBUG(dbgs() << "Merge insertion point with:\n");
DEBUG(IPI->first->print(dbgs()));
DEBUG(dbgs() << '\n');
@@ -424,17 +421,17 @@ bool ARM64PromoteConstant::tryAndMerge(Instruction *NewPt,
return false;
}
-void ARM64PromoteConstant::computeInsertionPoints(
+void AArch64PromoteConstant::computeInsertionPoints(
Constant *Val, InsertionPointsPerFunc &InsPtsPerFunc) {
DEBUG(dbgs() << "** Compute insertion points **\n");
for (Value::user_iterator UseIt = Val->user_begin(),
EndUseIt = Val->user_end();
UseIt != EndUseIt; ++UseIt) {
- // If the user is not an Instruction, we cannot modify it
+ // If the user is not an Instruction, we cannot modify it.
if (!isa<Instruction>(*UseIt))
continue;
- // Filter out uses that should not be converted
+ // Filter out uses that should not be converted.
if (!shouldConvertUse(Val, cast<Instruction>(*UseIt), UseIt.getOperandNo()))
continue;
@@ -466,19 +463,18 @@ void ARM64PromoteConstant::computeInsertionPoints(
}
}
-bool
-ARM64PromoteConstant::insertDefinitions(Constant *Cst,
- InsertionPointsPerFunc &InsPtsPerFunc) {
- // We will create one global variable per Module
+bool AArch64PromoteConstant::insertDefinitions(
+ Constant *Cst, InsertionPointsPerFunc &InsPtsPerFunc) {
+ // We will create one global variable per Module.
DenseMap<Module *, GlobalVariable *> ModuleToMergedGV;
bool HasChanged = false;
- // Traverse all insertion points in all the function
+ // Traverse all insertion points in all the function.
for (InsertionPointsPerFunc::iterator FctToInstPtsIt = InsPtsPerFunc.begin(),
EndIt = InsPtsPerFunc.end();
FctToInstPtsIt != EndIt; ++FctToInstPtsIt) {
InsertionPoints &InsertPts = FctToInstPtsIt->second;
-// Do more check for debug purposes
+// Do more checking for debug purposes.
#ifndef NDEBUG
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(
*FctToInstPtsIt->first).getDomTree();
@@ -491,8 +487,8 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst,
ModuleToMergedGV.find(M);
if (MapIt == ModuleToMergedGV.end()) {
PromotedGV = new GlobalVariable(
- *M, Cst->getType(), true, GlobalValue::InternalLinkage, 0,
- "_PromotedConst", 0, GlobalVariable::NotThreadLocal);
+ *M, Cst->getType(), true, GlobalValue::InternalLinkage, nullptr,
+ "_PromotedConst", nullptr, GlobalVariable::NotThreadLocal);
PromotedGV->setInitializer(Cst);
ModuleToMergedGV[M] = PromotedGV;
DEBUG(dbgs() << "Global replacement: ");
@@ -507,7 +503,7 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst,
for (InsertionPoints::iterator IPI = InsertPts.begin(),
EndIPI = InsertPts.end();
IPI != EndIPI; ++IPI) {
- // Create the load of the global variable
+ // Create the load of the global variable.
IRBuilder<> Builder(IPI->first->getParent(), IPI->first);
LoadInst *LoadedCst = Builder.CreateLoad(PromotedGV);
DEBUG(dbgs() << "**********\n");
@@ -515,21 +511,19 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst,
DEBUG(LoadedCst->print(dbgs()));
DEBUG(dbgs() << '\n');
- // Update the dominated uses
+ // Update the dominated uses.
Users &DominatedUsers = IPI->second;
- for (Users::iterator UseIt = DominatedUsers.begin(),
- EndIt = DominatedUsers.end();
- UseIt != EndIt; ++UseIt) {
+ for (Value::user_iterator Use : DominatedUsers) {
#ifndef NDEBUG
- assert((DT.dominates(LoadedCst, cast<Instruction>(**UseIt)) ||
- (isa<PHINode>(**UseIt) &&
- DT.dominates(LoadedCst, findInsertionPoint(*UseIt)))) &&
+ assert((DT.dominates(LoadedCst, cast<Instruction>(*Use)) ||
+ (isa<PHINode>(*Use) &&
+ DT.dominates(LoadedCst, findInsertionPoint(Use)))) &&
"Inserted definition does not dominate all its uses!");
#endif
- DEBUG(dbgs() << "Use to update " << UseIt->getOperandNo() << ":");
- DEBUG((*UseIt)->print(dbgs()));
+ DEBUG(dbgs() << "Use to update " << Use.getOperandNo() << ":");
+ DEBUG(Use->print(dbgs()));
DEBUG(dbgs() << '\n');
- (*UseIt)->setOperand(UseIt->getOperandNo(), LoadedCst);
+ Use->setOperand(Use.getOperandNo(), LoadedCst);
++NumPromotedUses;
}
}
@@ -537,13 +531,13 @@ ARM64PromoteConstant::insertDefinitions(Constant *Cst,
return HasChanged;
}
-bool ARM64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
+bool AArch64PromoteConstant::computeAndInsertDefinitions(Constant *Val) {
InsertionPointsPerFunc InsertPtsPerFunc;
computeInsertionPoints(Val, InsertPtsPerFunc);
return insertDefinitions(Val, InsertPtsPerFunc);
}
-bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
+bool AArch64PromoteConstant::promoteConstant(Constant *Cst) {
assert(Cst && "Given variable is not a valid constant.");
if (!shouldConvert(Cst))
@@ -557,24 +551,23 @@ bool ARM64PromoteConstant::promoteConstant(Constant *Cst) {
return computeAndInsertDefinitions(Cst);
}
-bool ARM64PromoteConstant::runOnFunction(Function &F) {
- // Look for instructions using constant vector
- // Promote that constant to a global variable.
- // Create as few load of this variable as possible and update the uses
- // accordingly
+bool AArch64PromoteConstant::runOnFunction(Function &F) {
+ // Look for instructions using constant vector. Promote that constant to a
+ // global variable. Create as few loads of this variable as possible and
+ // update the uses accordingly.
bool LocalChange = false;
SmallSet<Constant *, 8> AlreadyChecked;
for (auto &MBB : F) {
- for (auto &MI: MBB) {
- // Traverse the operand, looking for constant vectors
- // Replace them by a load of a global variable of type constant vector
+ for (auto &MI : MBB) {
+ // Traverse the operand, looking for constant vectors. Replace them by a
+ // load of a global variable of constant vector type.
for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands();
OpIdx != EndOpIdx; ++OpIdx) {
Constant *Cst = dyn_cast<Constant>(MI.getOperand(OpIdx));
- // There is no point is promoting global value, they are already global.
- // Do not promote constant expression, as they may require some code
- // expansion.
+ // There is no point in promoting global values as they are already
+ // global. Do not promote constant expressions either, as they may
+ // require some code expansion.
if (Cst && !isa<GlobalValue>(Cst) && !isa<ConstantExpr>(Cst) &&
AlreadyChecked.insert(Cst))
LocalChange |= promoteConstant(Cst);
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 06e1ffb..01b9587 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -12,175 +12,393 @@
//
//===----------------------------------------------------------------------===//
-
#include "AArch64RegisterInfo.h"
#include "AArch64FrameLowering.h"
-#include "AArch64MachineFunctionInfo.h"
-#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
#define GET_REGINFO_TARGET_DESC
#include "AArch64GenRegisterInfo.inc"
-using namespace llvm;
+AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo *tii,
+ const AArch64Subtarget *sti)
+ : AArch64GenRegisterInfo(AArch64::LR), TII(tii), STI(sti) {}
-AArch64RegisterInfo::AArch64RegisterInfo()
- : AArch64GenRegisterInfo(AArch64::X30) {
-}
-
-const uint16_t *
+const MCPhysReg *
AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- return CSR_PCS_SaveList;
-}
-
-const uint32_t*
-AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID) const {
- return CSR_PCS_RegMask;
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
+ return CSR_AArch64_AllRegs_SaveList;
+ else
+ return CSR_AArch64_AAPCS_SaveList;
}
-const uint32_t *AArch64RegisterInfo::getTLSDescCallPreservedMask() const {
- return TLSDesc_RegMask;
+const uint32_t *
+AArch64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
+ if (CC == CallingConv::AnyReg)
+ return CSR_AArch64_AllRegs_RegMask;
+ else
+ return CSR_AArch64_AAPCS_RegMask;
}
-const TargetRegisterClass *
-AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
- if (RC == &AArch64::FlagClassRegClass)
- return &AArch64::GPR64RegClass;
+const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const {
+ if (STI->isTargetDarwin())
+ return CSR_AArch64_TLS_Darwin_RegMask;
- return RC;
+ assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
+ return CSR_AArch64_TLS_ELF_RegMask;
}
-
+const uint32_t *
+AArch64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
+ // This should return a register mask that is the same as that returned by
+ // getCallPreservedMask but that additionally preserves the register used for
+ // the first i64 argument (which must also be the register used to return a
+ // single i64 return value)
+ //
+ // In case that the calling convention does not use the same register for
+ // both, the function should return NULL (does not currently apply)
+ return CSR_AArch64_AAPCS_ThisReturn_RegMask;
+}
BitVector
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- BitVector Reserved(getNumRegs());
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- Reserved.set(AArch64::XSP);
- Reserved.set(AArch64::WSP);
-
+ // FIXME: avoid re-calculating this every time.
+ BitVector Reserved(getNumRegs());
+ Reserved.set(AArch64::SP);
Reserved.set(AArch64::XZR);
+ Reserved.set(AArch64::WSP);
Reserved.set(AArch64::WZR);
- if (TFI->hasFP(MF)) {
- Reserved.set(AArch64::X29);
+ if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
+ Reserved.set(AArch64::FP);
Reserved.set(AArch64::W29);
}
+ if (STI->isTargetDarwin()) {
+ Reserved.set(AArch64::X18); // Platform register
+ Reserved.set(AArch64::W18);
+ }
+
+ if (hasBasePointer(MF)) {
+ Reserved.set(AArch64::X19);
+ Reserved.set(AArch64::W19);
+ }
+
return Reserved;
}
-static bool hasFrameOffset(int opcode) {
- return opcode != AArch64::LD1x2_8B && opcode != AArch64::LD1x3_8B &&
- opcode != AArch64::LD1x4_8B && opcode != AArch64::ST1x2_8B &&
- opcode != AArch64::ST1x3_8B && opcode != AArch64::ST1x4_8B &&
- opcode != AArch64::LD1x2_16B && opcode != AArch64::LD1x3_16B &&
- opcode != AArch64::LD1x4_16B && opcode != AArch64::ST1x2_16B &&
- opcode != AArch64::ST1x3_16B && opcode != AArch64::ST1x4_16B;
-}
+bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
+ unsigned Reg) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-void
-AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI,
- int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS) const {
- assert(SPAdj == 0 && "Cannot deal with nonzero SPAdj yet");
- MachineInstr &MI = *MBBI;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const AArch64FrameLowering *TFI =
- static_cast<const AArch64FrameLowering *>(MF.getTarget().getFrameLowering());
-
- // In order to work out the base and offset for addressing, the FrameLowering
- // code needs to know (sometimes) whether the instruction is storing/loading a
- // callee-saved register, or whether it's a more generic
- // operation. Fortunately the frame indices are used *only* for that purpose
- // and are contiguous, so we can check here.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- int MinCSFI = 0;
- int MaxCSFI = -1;
-
- if (CSI.size()) {
- MinCSFI = CSI[0].getFrameIdx();
- MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
+ switch (Reg) {
+ default:
+ break;
+ case AArch64::SP:
+ case AArch64::XZR:
+ case AArch64::WSP:
+ case AArch64::WZR:
+ return true;
+ case AArch64::X18:
+ case AArch64::W18:
+ return STI->isTargetDarwin();
+ case AArch64::FP:
+ case AArch64::W29:
+ return TFI->hasFP(MF) || STI->isTargetDarwin();
+ case AArch64::W19:
+ case AArch64::X19:
+ return hasBasePointer(MF);
}
- int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- bool IsCalleeSaveOp = FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI;
+ return false;
+}
- unsigned FrameReg;
- int64_t Offset;
- Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj,
- IsCalleeSaveOp);
- // A vector load/store instruction doesn't have an offset operand.
- bool HasOffsetOp = hasFrameOffset(MI.getOpcode());
- if (HasOffsetOp)
- Offset += MI.getOperand(FIOperandNum + 1).getImm();
+const TargetRegisterClass *
+AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind) const {
+ return &AArch64::GPR64RegClass;
+}
- // DBG_VALUE instructions have no real restrictions so they can be handled
- // easily.
- if (MI.isDebugValue()) {
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/ false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
- return;
- }
+const TargetRegisterClass *
+AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+ if (RC == &AArch64::CCRRegClass)
+ return &AArch64::GPR64RegClass; // Only MSR & MRS copy NZCV.
+ return RC;
+}
- const AArch64InstrInfo &TII =
- *static_cast<const AArch64InstrInfo*>(MF.getTarget().getInstrInfo());
- int MinOffset, MaxOffset, OffsetScale;
- if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s || !HasOffsetOp) {
- MinOffset = 0;
- MaxOffset = 0xfff;
- OffsetScale = 1;
- } else {
- // Load/store of a stack object
- TII.getAddressConstraints(MI, OffsetScale, MinOffset, MaxOffset);
- }
+unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
- // There are two situations we don't use frame + offset directly in the
- // instruction:
- // (1) The offset can't really be scaled
- // (2) Can't encode offset as it doesn't have an offset operand
- if ((Offset % OffsetScale != 0 || Offset < MinOffset || Offset > MaxOffset) ||
- (!HasOffsetOp && Offset != 0)) {
- unsigned BaseReg =
- MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
- emitRegUpdate(MBB, MBBI, MBBI->getDebugLoc(), TII,
- BaseReg, FrameReg, BaseReg, Offset);
- FrameReg = BaseReg;
- Offset = 0;
- }
+bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Negative offsets are expected if we address from FP, but for
- // now this checks nothing has gone horribly wrong.
- assert(Offset >= 0 && "Unexpected negative offset from SP");
+ // In the presence of variable sized objects, if the fixed stack size is
+ // large enough that referencing from the FP won't result in things being
+ // in range relatively often, we can use a base pointer to allow access
+ // from the other direction like the SP normally works.
+ if (MFI->hasVarSizedObjects()) {
+ // Conservatively estimate whether the negative offset from the frame
+ // pointer will be sufficient to reach. If a function has a smallish
+ // frame, it's less likely to have lots of spills and callee saved
+ // space, so it's all more likely to be within range of the frame pointer.
+ // If it's wrong, we'll materialize the constant and still get to the
+ // object; it's just suboptimal. Negative offsets use the unscaled
+ // load/store instructions, which have a 9-bit signed immediate.
+ if (MFI->getLocalFrameSize() < 256)
+ return false;
+ return true;
+ }
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false, false, true);
- if (HasOffsetOp)
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset / OffsetScale);
+ return false;
}
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- if (TFI->hasFP(MF))
- return AArch64::X29;
- else
- return AArch64::XSP;
+ return TFI->hasFP(MF) ? AArch64::FP : AArch64::SP;
+}
+
+bool AArch64RegisterInfo::requiresRegisterScavenging(
+ const MachineFunction &MF) const {
+ return true;
+}
+
+bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
+ const MachineFunction &MF) const {
+ return true;
}
bool
AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // AArch64FrameLowering::resolveFrameIndexReference() can always fall back
+ // to the stack pointer, so only put the emergency spill slot next to the
+ // FP when there's no better way to access it (SP or base pointer).
+ return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
+}
+
+bool AArch64RegisterInfo::requiresFrameIndexScavenging(
+ const MachineFunction &MF) const {
+ return true;
+}
+
+bool
+AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Only consider eliminating leaf frames.
+ if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
+ MFI->adjustsStack()))
+ return true;
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
+ int64_t Offset) const {
+ for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
+ assert(i < MI->getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+
+ // It's the load/store FI references that cause issues, as it can be difficult
+ // to materialize the offset if it won't fit in the literal field. Estimate
+ // based on the size of the local frame and some conservative assumptions
+ // about the rest of the stack frame (note, this is pre-regalloc, so
+ // we don't know everything for certain yet) whether this offset is likely
+ // to be out of range of the immediate. Return true if so.
+
+ // We only generate virtual base registers for loads and stores, so
+ // return false for everything else.
+ if (!MI->mayLoad() && !MI->mayStore())
+ return false;
+
+ // Without a virtual base register, if the function has variable sized
+ // objects, all fixed-size local references will be via the frame pointer,
+ // Approximate the offset and see if it's legal for the instruction.
+ // Note that the incoming offset is based on the SP value at function entry,
+ // so it'll be negative.
+ MachineFunction &MF = *MI->getParent()->getParent();
const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const AArch64FrameLowering *AFI
- = static_cast<const AArch64FrameLowering*>(TFI);
- return AFI->useFPForAddressing(MF);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Estimate an offset from the frame pointer.
+ // Conservatively assume all GPR callee-saved registers get pushed.
+ // FP, LR, X19-X28, D8-D15. 64-bits each.
+ int64_t FPOffset = Offset - 16 * 20;
+ // Estimate an offset from the stack pointer.
+ // The incoming offset is relating to the SP at the start of the function,
+ // but when we access the local it'll be relative to the SP after local
+ // allocation, so adjust our SP-relative offset by that allocation size.
+ Offset += MFI->getLocalFrameSize();
+ // Assume that we'll have at least some spill slots allocated.
+ // FIXME: This is a total SWAG number. We should run some statistics
+ // and pick a real one.
+ Offset += 128; // 128 bytes of spill slots
+
+ // If there is a frame pointer, try using it.
+ // The FP is only available if there is no dynamic realignment. We
+ // don't know for sure yet whether we'll need that, so we guess based
+ // on whether there are any local variables that would trigger it.
+ if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
+ return false;
+
+ // If we can reference via the stack pointer or base pointer, try that.
+ // FIXME: This (and the code that resolves the references) can be improved
+ // to only disallow SP relative references in the live range of
+ // the VLA(s). In practice, it's unclear how much difference that
+ // would make, but it may be worth doing.
+ if (isFrameOffsetLegal(MI, Offset))
+ return false;
+
+ // The offset likely isn't legal; we want to allocate a virtual base register.
+ return true;
+}
+
+bool AArch64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const {
+ assert(Offset <= INT_MAX && "Offset too big to fit in int.");
+ assert(MI && "Unable to get the legal offset for nil instruction.");
+ int SaveOffset = Offset;
+ return isAArch64FrameOffsetLegal(*MI, SaveOffset) & AArch64FrameOffsetIsLegal;
+}
+
+/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
+/// at the beginning of the basic block.
+void AArch64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
+ unsigned BaseReg,
+ int FrameIdx,
+ int64_t Offset) const {
+ MachineBasicBlock::iterator Ins = MBB->begin();
+ DebugLoc DL; // Defaults to "unknown"
+ if (Ins != MBB->end())
+ DL = Ins->getDebugLoc();
+
+ const MCInstrDesc &MCID = TII->get(AArch64::ADDXri);
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ const MachineFunction &MF = *MBB->getParent();
+ MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
+ unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
+
+ BuildMI(*MBB, Ins, DL, MCID, BaseReg)
+ .addFrameIndex(FrameIdx)
+ .addImm(Offset)
+ .addImm(Shifter);
}
+
+void AArch64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const {
+ int Off = Offset; // ARM doesn't need the general 64-bit offsets
+ unsigned i = 0;
+
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+ }
+ bool Done = rewriteAArch64FrameIndex(MI, i, BaseReg, Off, TII);
+ assert(Done && "Unable to resolve frame index!");
+ (void)Done;
+}
+
+void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, unsigned FIOperandNum,
+ RegScavenger *RS) const {
+ assert(SPAdj == 0 && "Unexpected");
+
+ MachineInstr &MI = *II;
+ MachineBasicBlock &MBB = *MI.getParent();
+ MachineFunction &MF = *MBB.getParent();
+ const AArch64FrameLowering *TFI = static_cast<const AArch64FrameLowering *>(
+ MF.getTarget().getFrameLowering());
+
+ int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ unsigned FrameReg;
+ int Offset;
+
+ // Special handling of dbg_value, stackmap and patchpoint instructions.
+ if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
+ MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+ Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
+ /*PreferFP=*/true);
+ Offset += MI.getOperand(FIOperandNum + 1).getImm();
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ // Modify MI as necessary to handle as much of 'Offset' as possible
+ Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
+ if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
+ return;
+
+ assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
+ "Emergency spill slot is out of reach");
+
+ // If we get here, the immediate doesn't fit into the instruction. We folded
+ // as much as possible above. Handle the rest, providing a register that is
+ // SP+LargeImm.
+ unsigned ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
+ MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
+}
+
+namespace llvm {
+
+unsigned AArch64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case AArch64::GPR32RegClassID:
+ case AArch64::GPR32spRegClassID:
+ case AArch64::GPR32allRegClassID:
+ case AArch64::GPR64spRegClassID:
+ case AArch64::GPR64allRegClassID:
+ case AArch64::GPR64RegClassID:
+ case AArch64::GPR32commonRegClassID:
+ case AArch64::GPR64commonRegClassID:
+ return 32 - 1 // XZR/SP
+ - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
+ - STI->isTargetDarwin() // X18 reserved as platform register
+ - hasBasePointer(MF); // X19
+ case AArch64::FPR8RegClassID:
+ case AArch64::FPR16RegClassID:
+ case AArch64::FPR32RegClassID:
+ case AArch64::FPR64RegClassID:
+ case AArch64::FPR128RegClassID:
+ return 32;
+
+ case AArch64::DDRegClassID:
+ case AArch64::DDDRegClassID:
+ case AArch64::DDDDRegClassID:
+ case AArch64::QQRegClassID:
+ case AArch64::QQQRegClassID:
+ case AArch64::QQQQRegClassID:
+ return 32;
+
+ case AArch64::FPR128_loRegClassID:
+ return 16;
+ }
+}
+
+} // namespace llvm
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.h b/lib/Target/AArch64/AArch64RegisterInfo.h
index 4d67943..76af1ed 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -1,4 +1,4 @@
-//==- AArch64RegisterInfo.h - AArch64 Register Information Impl -*- C++ -*-===//
+//==- AArch64RegisterInfo.h - AArch64 Register Information Impl --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,14 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the AArch64 implementation of the MCRegisterInfo class.
+// This file contains the AArch64 implementation of the MRegisterInfo class.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64REGISTERINFO_H
-#define LLVM_TARGET_AARCH64REGISTERINFO_H
-
-#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef LLVM_TARGET_AArch64REGISTERINFO_H
+#define LLVM_TARGET_AArch64REGISTERINFO_H
#define GET_REGINFO_HEADER
#include "AArch64GenRegisterInfo.inc"
@@ -23,49 +21,81 @@ namespace llvm {
class AArch64InstrInfo;
class AArch64Subtarget;
+class MachineFunction;
+class RegScavenger;
+class TargetRegisterClass;
struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
- AArch64RegisterInfo();
+private:
+ const AArch64InstrInfo *TII;
+ const AArch64Subtarget *STI;
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+public:
+ AArch64RegisterInfo(const AArch64InstrInfo *tii, const AArch64Subtarget *sti);
- const uint32_t *getTLSDescCallPreservedMask() const;
+ bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
- BitVector getReservedRegs(const MachineFunction &MF) const;
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ /// Code Generation virtual methods...
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *Rs = NULL) const;
+ unsigned getCSRFirstUseCost() const override {
+ // The cost will be compared against BlockFrequency where entry has the
+ // value of 1 << 14. A value of 5 will choose to spill or split really
+ // cold path instead of using a callee-saved register.
+ return 5;
+ }
- /// getCrossCopyRegClass - Returns a legal register class to copy a register
- /// in the specified class to or from. Returns original class if it is
- /// possible to copy between a two registers of the specified class.
+ // Calls involved in thread-local variable lookup save more registers than
+ // normal calls, so they need a different mask to represent this.
+ const uint32_t *getTLSCallPreservedMask() const;
+
+ /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
+ /// case that 'returned' is on an i64 first argument if the calling convention
+ /// is one that can (partially) model this attribute with a preserved mask
+ /// (i.e. it is a calling convention that uses the same register for the first
+ /// i64 argument and an i64 return value)
+ ///
+ /// Should return NULL in the case that the calling convention does not have
+ /// this property
+ const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *
- getCrossCopyRegClass(const TargetRegisterClass *RC) const;
-
- /// getLargestLegalSuperClass - Returns the largest super class of RC that is
- /// legal to use in the current sub-target and has the same spill size.
- const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const {
- if (RC == &AArch64::tcGPR64RegClass)
- return &AArch64::GPR64RegClass;
-
- return RC;
- }
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
+ const TargetRegisterClass *
+ getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
+
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
+
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const override;
+ void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
+ int FrameIdx,
+ int64_t Offset) const override;
+ void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
+ int64_t Offset) const override;
+ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
+ bool cannotEliminateFrame(const MachineFunction &MF) const;
- bool requiresRegisterScavenging(const MachineFunction &MF) const {
- return true;
- }
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
+ bool hasBasePointer(const MachineFunction &MF) const;
+ unsigned getBaseRegister() const;
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
- return true;
- }
+ // Debug information queries.
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
- bool useFPForScavengingIndex(const MachineFunction &MF) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
};
} // end namespace llvm
-#endif // LLVM_TARGET_AARCH64REGISTERINFO_H
+#endif // LLVM_TARGET_AArch64REGISTERINFO_H
diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td
index 9de7abd..21c927f 100644
--- a/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -1,4 +1,4 @@
-//===- AArch64RegisterInfo.td - ARM Register defs ----------*- tablegen -*-===//
+//=- AArch64RegisterInfo.td - Describe the AArch64 Regisers --*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,284 +7,587 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains declarations that describe the AArch64 register file
//
//===----------------------------------------------------------------------===//
-let Namespace = "AArch64" in {
-def sub_128 : SubRegIndex<128>;
-def sub_64 : SubRegIndex<64>;
-def sub_32 : SubRegIndex<32>;
-def sub_16 : SubRegIndex<16>;
-def sub_8 : SubRegIndex<8>;
-
-// Note: Code depends on these having consecutive numbers.
-def qqsub : SubRegIndex<256, 256>;
-
-def qsub_0 : SubRegIndex<128>;
-def qsub_1 : SubRegIndex<128, 128>;
-def qsub_2 : ComposedSubRegIndex<qqsub, qsub_0>;
-def qsub_3 : ComposedSubRegIndex<qqsub, qsub_1>;
-
-def dsub_0 : SubRegIndex<64>;
-def dsub_1 : SubRegIndex<64, 64>;
-def dsub_2 : ComposedSubRegIndex<qsub_1, dsub_0>;
-def dsub_3 : ComposedSubRegIndex<qsub_1, dsub_1>;
-}
-// Registers are identified with 5-bit ID numbers.
-class AArch64Reg<bits<16> enc, string n> : Register<n> {
+class AArch64Reg<bits<16> enc, string n, list<Register> subregs = [],
+ list<string> altNames = []>
+ : Register<n, altNames> {
let HWEncoding = enc;
let Namespace = "AArch64";
+ let SubRegs = subregs;
}
-class AArch64RegWithSubs<bits<16> enc, string n, list<Register> subregs = [],
- list<SubRegIndex> inds = []>
- : AArch64Reg<enc, n> {
- let SubRegs = subregs;
- let SubRegIndices = inds;
+let Namespace = "AArch64" in {
+ def sub_32 : SubRegIndex<32>;
+
+ def bsub : SubRegIndex<8>;
+ def hsub : SubRegIndex<16>;
+ def ssub : SubRegIndex<32>;
+ def dsub : SubRegIndex<32>;
+ def qhisub : SubRegIndex<64>;
+ def qsub : SubRegIndex<64>;
+ // Note: Code depends on these having consecutive numbers
+ def dsub0 : SubRegIndex<64>;
+ def dsub1 : SubRegIndex<64>;
+ def dsub2 : SubRegIndex<64>;
+ def dsub3 : SubRegIndex<64>;
+ // Note: Code depends on these having consecutive numbers
+ def qsub0 : SubRegIndex<128>;
+ def qsub1 : SubRegIndex<128>;
+ def qsub2 : SubRegIndex<128>;
+ def qsub3 : SubRegIndex<128>;
+}
+
+let Namespace = "AArch64" in {
+ def vreg : RegAltNameIndex;
+ def vlist1 : RegAltNameIndex;
}
//===----------------------------------------------------------------------===//
-// Integer registers: w0-w30, wzr, wsp, x0-x30, xzr, sp
+// Registers
//===----------------------------------------------------------------------===//
-
-foreach Index = 0-30 in {
- def W#Index : AArch64Reg< Index, "w"#Index>, DwarfRegNum<[Index]>;
+def W0 : AArch64Reg<0, "w0" >, DwarfRegNum<[0]>;
+def W1 : AArch64Reg<1, "w1" >, DwarfRegNum<[1]>;
+def W2 : AArch64Reg<2, "w2" >, DwarfRegNum<[2]>;
+def W3 : AArch64Reg<3, "w3" >, DwarfRegNum<[3]>;
+def W4 : AArch64Reg<4, "w4" >, DwarfRegNum<[4]>;
+def W5 : AArch64Reg<5, "w5" >, DwarfRegNum<[5]>;
+def W6 : AArch64Reg<6, "w6" >, DwarfRegNum<[6]>;
+def W7 : AArch64Reg<7, "w7" >, DwarfRegNum<[7]>;
+def W8 : AArch64Reg<8, "w8" >, DwarfRegNum<[8]>;
+def W9 : AArch64Reg<9, "w9" >, DwarfRegNum<[9]>;
+def W10 : AArch64Reg<10, "w10">, DwarfRegNum<[10]>;
+def W11 : AArch64Reg<11, "w11">, DwarfRegNum<[11]>;
+def W12 : AArch64Reg<12, "w12">, DwarfRegNum<[12]>;
+def W13 : AArch64Reg<13, "w13">, DwarfRegNum<[13]>;
+def W14 : AArch64Reg<14, "w14">, DwarfRegNum<[14]>;
+def W15 : AArch64Reg<15, "w15">, DwarfRegNum<[15]>;
+def W16 : AArch64Reg<16, "w16">, DwarfRegNum<[16]>;
+def W17 : AArch64Reg<17, "w17">, DwarfRegNum<[17]>;
+def W18 : AArch64Reg<18, "w18">, DwarfRegNum<[18]>;
+def W19 : AArch64Reg<19, "w19">, DwarfRegNum<[19]>;
+def W20 : AArch64Reg<20, "w20">, DwarfRegNum<[20]>;
+def W21 : AArch64Reg<21, "w21">, DwarfRegNum<[21]>;
+def W22 : AArch64Reg<22, "w22">, DwarfRegNum<[22]>;
+def W23 : AArch64Reg<23, "w23">, DwarfRegNum<[23]>;
+def W24 : AArch64Reg<24, "w24">, DwarfRegNum<[24]>;
+def W25 : AArch64Reg<25, "w25">, DwarfRegNum<[25]>;
+def W26 : AArch64Reg<26, "w26">, DwarfRegNum<[26]>;
+def W27 : AArch64Reg<27, "w27">, DwarfRegNum<[27]>;
+def W28 : AArch64Reg<28, "w28">, DwarfRegNum<[28]>;
+def W29 : AArch64Reg<29, "w29">, DwarfRegNum<[29]>;
+def W30 : AArch64Reg<30, "w30">, DwarfRegNum<[30]>;
+def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
+def WZR : AArch64Reg<31, "wzr">, DwarfRegAlias<WSP>;
+
+let SubRegIndices = [sub_32] in {
+def X0 : AArch64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>;
+def X1 : AArch64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>;
+def X2 : AArch64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>;
+def X3 : AArch64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>;
+def X4 : AArch64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>;
+def X5 : AArch64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>;
+def X6 : AArch64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>;
+def X7 : AArch64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>;
+def X8 : AArch64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>;
+def X9 : AArch64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>;
+def X10 : AArch64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
+def X11 : AArch64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
+def X12 : AArch64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
+def X13 : AArch64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
+def X14 : AArch64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
+def X15 : AArch64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
+def X16 : AArch64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
+def X17 : AArch64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
+def X18 : AArch64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
+def X19 : AArch64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
+def X20 : AArch64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
+def X21 : AArch64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
+def X22 : AArch64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
+def X23 : AArch64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
+def X24 : AArch64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
+def X25 : AArch64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
+def X26 : AArch64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
+def X27 : AArch64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
+def X28 : AArch64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
+def FP : AArch64Reg<29, "x29", [W29]>, DwarfRegAlias<W29>;
+def LR : AArch64Reg<30, "x30", [W30]>, DwarfRegAlias<W30>;
+def SP : AArch64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>;
+def XZR : AArch64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
}
-def WSP : AArch64Reg<31, "wsp">, DwarfRegNum<[31]>;
-def WZR : AArch64Reg<31, "wzr">;
+// Condition code register.
+def NZCV : AArch64Reg<0, "nzcv">;
-// Could be combined with previous loop, but this way leaves w and x registers
-// consecutive as LLVM register numbers, which makes for easier debugging.
-foreach Index = 0-30 in {
- def X#Index : AArch64RegWithSubs<Index, "x"#Index,
- [!cast<Register>("W"#Index)], [sub_32]>,
- DwarfRegNum<[Index]>;
+// GPR register classes with the intersections of GPR32/GPR32sp and
+// GPR64/GPR64sp for use by the coalescer.
+def GPR32common : RegisterClass<"AArch64", [i32], 32, (sequence "W%u", 0, 30)> {
+ let AltOrders = [(rotl GPR32common, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64common : RegisterClass<"AArch64", [i64], 64,
+ (add (sequence "X%u", 0, 28), FP, LR)> {
+ let AltOrders = [(rotl GPR64common, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+// GPR register classes which exclude SP/WSP.
+def GPR32 : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR)> {
+ let AltOrders = [(rotl GPR32, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64 : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR)> {
+ let AltOrders = [(rotl GPR64, 8)];
+ let AltOrderSelect = [{ return 1; }];
}
-def XSP : AArch64RegWithSubs<31, "sp", [WSP], [sub_32]>, DwarfRegNum<[31]>;
-def XZR : AArch64RegWithSubs<31, "xzr", [WZR], [sub_32]>;
+// GPR register classes which include SP/WSP.
+def GPR32sp : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WSP)> {
+ let AltOrders = [(rotl GPR32sp, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
+def GPR64sp : RegisterClass<"AArch64", [i64], 64, (add GPR64common, SP)> {
+ let AltOrders = [(rotl GPR64sp, 8)];
+ let AltOrderSelect = [{ return 1; }];
+}
-// Most instructions treat register 31 as zero for reads and a black-hole for
-// writes.
+def GPR32sponly : RegisterClass<"AArch64", [i32], 32, (add WSP)>;
+def GPR64sponly : RegisterClass<"AArch64", [i64], 64, (add SP)>;
-// Note that the order of registers is important for the Disassembler here:
-// tablegen uses it to form MCRegisterClass::getRegister, which we assume can
-// take an encoding value.
-def GPR32 : RegisterClass<"AArch64", [i32], 32,
- (add (sequence "W%u", 0, 30), WZR)> {
+def GPR64spPlus0Operand : AsmOperandClass {
+ let Name = "GPR64sp0";
+ let RenderMethod = "addRegOperands";
+ let ParserMethod = "tryParseGPR64sp0Operand";
}
-def GPR64 : RegisterClass<"AArch64", [i64], 64,
- (add (sequence "X%u", 0, 30), XZR)> {
+def GPR64sp0 : RegisterOperand<GPR64sp> {
+ let ParserMatchClass = GPR64spPlus0Operand;
}
-def GPR32nowzr : RegisterClass<"AArch64", [i32], 32,
- (sequence "W%u", 0, 30)> {
+// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
+// constraint used by any instructions, it is used as a common super-class.
+def GPR32all : RegisterClass<"AArch64", [i32], 32, (add GPR32common, WZR, WSP)>;
+def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>;
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// This is for indirect tail calls to store the address of the destination.
+def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21,
+ X22, X23, X24, X25, X26,
+ X27, X28)>;
+
+// GPR register classes for post increment amount of vector load/store that
+// has alternate printing when Rm=31 and prints a constant immediate value
+// equal to the total number of bytes transferred.
+
+// FIXME: TableGen *should* be able to do these itself now. There appears to be
+// a bug in counting how many operands a Post-indexed MCInst should have which
+// means the aliases don't trigger.
+def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand<1>">;
+def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand<2>">;
+def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand<3>">;
+def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand<4>">;
+def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand<6>">;
+def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand<8>">;
+def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand<12>">;
+def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand<16>">;
+def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand<24>">;
+def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand<32>">;
+def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand<48>">;
+def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand<64>">;
+
+// Condition code regclass.
+def CCR : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
+ let CopyCost = -1; // Don't allow copying of status registers.
+
+ // CCR is not allocatable.
+ let isAllocatable = 0;
}
-def GPR64noxzr : RegisterClass<"AArch64", [i64], 64,
- (sequence "X%u", 0, 30)> {
-}
+//===----------------------------------------------------------------------===//
+// Floating Point Scalar Registers
+//===----------------------------------------------------------------------===//
-// For tail calls, we can't use callee-saved registers or the structure-return
-// register, as they are supposed to be live across function calls and may be
-// clobbered by the epilogue.
-def tcGPR64 : RegisterClass<"AArch64", [i64], 64,
- (add (sequence "X%u", 0, 7),
- (sequence "X%u", 9, 18))> {
+def B0 : AArch64Reg<0, "b0">, DwarfRegNum<[64]>;
+def B1 : AArch64Reg<1, "b1">, DwarfRegNum<[65]>;
+def B2 : AArch64Reg<2, "b2">, DwarfRegNum<[66]>;
+def B3 : AArch64Reg<3, "b3">, DwarfRegNum<[67]>;
+def B4 : AArch64Reg<4, "b4">, DwarfRegNum<[68]>;
+def B5 : AArch64Reg<5, "b5">, DwarfRegNum<[69]>;
+def B6 : AArch64Reg<6, "b6">, DwarfRegNum<[70]>;
+def B7 : AArch64Reg<7, "b7">, DwarfRegNum<[71]>;
+def B8 : AArch64Reg<8, "b8">, DwarfRegNum<[72]>;
+def B9 : AArch64Reg<9, "b9">, DwarfRegNum<[73]>;
+def B10 : AArch64Reg<10, "b10">, DwarfRegNum<[74]>;
+def B11 : AArch64Reg<11, "b11">, DwarfRegNum<[75]>;
+def B12 : AArch64Reg<12, "b12">, DwarfRegNum<[76]>;
+def B13 : AArch64Reg<13, "b13">, DwarfRegNum<[77]>;
+def B14 : AArch64Reg<14, "b14">, DwarfRegNum<[78]>;
+def B15 : AArch64Reg<15, "b15">, DwarfRegNum<[79]>;
+def B16 : AArch64Reg<16, "b16">, DwarfRegNum<[80]>;
+def B17 : AArch64Reg<17, "b17">, DwarfRegNum<[81]>;
+def B18 : AArch64Reg<18, "b18">, DwarfRegNum<[82]>;
+def B19 : AArch64Reg<19, "b19">, DwarfRegNum<[83]>;
+def B20 : AArch64Reg<20, "b20">, DwarfRegNum<[84]>;
+def B21 : AArch64Reg<21, "b21">, DwarfRegNum<[85]>;
+def B22 : AArch64Reg<22, "b22">, DwarfRegNum<[86]>;
+def B23 : AArch64Reg<23, "b23">, DwarfRegNum<[87]>;
+def B24 : AArch64Reg<24, "b24">, DwarfRegNum<[88]>;
+def B25 : AArch64Reg<25, "b25">, DwarfRegNum<[89]>;
+def B26 : AArch64Reg<26, "b26">, DwarfRegNum<[90]>;
+def B27 : AArch64Reg<27, "b27">, DwarfRegNum<[91]>;
+def B28 : AArch64Reg<28, "b28">, DwarfRegNum<[92]>;
+def B29 : AArch64Reg<29, "b29">, DwarfRegNum<[93]>;
+def B30 : AArch64Reg<30, "b30">, DwarfRegNum<[94]>;
+def B31 : AArch64Reg<31, "b31">, DwarfRegNum<[95]>;
+
+let SubRegIndices = [bsub] in {
+def H0 : AArch64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>;
+def H1 : AArch64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>;
+def H2 : AArch64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>;
+def H3 : AArch64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>;
+def H4 : AArch64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>;
+def H5 : AArch64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>;
+def H6 : AArch64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>;
+def H7 : AArch64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>;
+def H8 : AArch64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>;
+def H9 : AArch64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>;
+def H10 : AArch64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
+def H11 : AArch64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
+def H12 : AArch64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
+def H13 : AArch64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
+def H14 : AArch64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
+def H15 : AArch64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
+def H16 : AArch64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
+def H17 : AArch64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
+def H18 : AArch64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
+def H19 : AArch64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
+def H20 : AArch64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
+def H21 : AArch64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
+def H22 : AArch64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
+def H23 : AArch64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
+def H24 : AArch64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
+def H25 : AArch64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
+def H26 : AArch64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
+def H27 : AArch64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
+def H28 : AArch64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
+def H29 : AArch64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
+def H30 : AArch64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
+def H31 : AArch64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
}
+let SubRegIndices = [hsub] in {
+def S0 : AArch64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>;
+def S1 : AArch64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>;
+def S2 : AArch64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>;
+def S3 : AArch64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>;
+def S4 : AArch64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>;
+def S5 : AArch64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>;
+def S6 : AArch64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>;
+def S7 : AArch64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>;
+def S8 : AArch64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>;
+def S9 : AArch64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>;
+def S10 : AArch64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
+def S11 : AArch64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
+def S12 : AArch64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
+def S13 : AArch64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
+def S14 : AArch64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
+def S15 : AArch64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
+def S16 : AArch64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
+def S17 : AArch64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
+def S18 : AArch64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
+def S19 : AArch64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
+def S20 : AArch64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
+def S21 : AArch64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
+def S22 : AArch64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
+def S23 : AArch64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
+def S24 : AArch64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
+def S25 : AArch64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
+def S26 : AArch64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
+def S27 : AArch64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
+def S28 : AArch64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
+def S29 : AArch64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
+def S30 : AArch64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
+def S31 : AArch64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
+}
-// Certain addressing-useful instructions accept sp directly. Again the order of
-// registers is important to the Disassembler.
-def GPR32wsp : RegisterClass<"AArch64", [i32], 32,
- (add (sequence "W%u", 0, 30), WSP)> {
+let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
+def D0 : AArch64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
+def D1 : AArch64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
+def D2 : AArch64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
+def D3 : AArch64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
+def D4 : AArch64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
+def D5 : AArch64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
+def D6 : AArch64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
+def D7 : AArch64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
+def D8 : AArch64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
+def D9 : AArch64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
+def D10 : AArch64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
+def D11 : AArch64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
+def D12 : AArch64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
+def D13 : AArch64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
+def D14 : AArch64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
+def D15 : AArch64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
+def D16 : AArch64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
+def D17 : AArch64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
+def D18 : AArch64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
+def D19 : AArch64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
+def D20 : AArch64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
+def D21 : AArch64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
+def D22 : AArch64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
+def D23 : AArch64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
+def D24 : AArch64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
+def D25 : AArch64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
+def D26 : AArch64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
+def D27 : AArch64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
+def D28 : AArch64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
+def D29 : AArch64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
+def D30 : AArch64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
+def D31 : AArch64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
}
-def GPR64xsp : RegisterClass<"AArch64", [i64], 64,
- (add (sequence "X%u", 0, 30), XSP)> {
+let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
+def Q0 : AArch64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
+def Q1 : AArch64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
+def Q2 : AArch64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
+def Q3 : AArch64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
+def Q4 : AArch64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
+def Q5 : AArch64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
+def Q6 : AArch64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
+def Q7 : AArch64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
+def Q8 : AArch64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
+def Q9 : AArch64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
+def Q10 : AArch64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
+def Q11 : AArch64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
+def Q12 : AArch64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
+def Q13 : AArch64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
+def Q14 : AArch64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
+def Q15 : AArch64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
+def Q16 : AArch64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
+def Q17 : AArch64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
+def Q18 : AArch64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
+def Q19 : AArch64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
+def Q20 : AArch64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
+def Q21 : AArch64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
+def Q22 : AArch64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
+def Q23 : AArch64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
+def Q24 : AArch64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
+def Q25 : AArch64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
+def Q26 : AArch64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
+def Q27 : AArch64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
+def Q28 : AArch64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
+def Q29 : AArch64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
+def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
+def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
}
-// Some aliases *only* apply to SP (e.g. MOV uses different encoding for SP and
-// non-SP variants). We can't use a bare register in those patterns because
-// TableGen doesn't like it, so we need a class containing just stack registers
-def Rxsp : RegisterClass<"AArch64", [i64], 64,
- (add XSP)> {
+def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
+ let Size = 8;
}
+def FPR16 : RegisterClass<"AArch64", [f16], 16, (sequence "H%u", 0, 31)> {
+ let Size = 16;
+}
+def FPR32 : RegisterClass<"AArch64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
+def FPR64 : RegisterClass<"AArch64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
+ v1i64],
+ 64, (sequence "D%u", 0, 31)>;
+// We don't (yet) have an f128 legal type, so don't use that here. We
+// normalize 128-bit vectors to v2f64 for arg passing and such, so use
+// that here.
+def FPR128 : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
+ 128, (sequence "Q%u", 0, 31)>;
-def Rwsp : RegisterClass<"AArch64", [i32], 32,
- (add WSP)> {
+// The lower 16 vector registers. Some instructions can only take registers
+// in this range.
+def FPR128_lo : RegisterClass<"AArch64",
+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ 128, (trunc FPR128, 16)>;
+
+// Pairs, triples, and quads of 64-bit vector registers.
+def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
+def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2)]>;
+def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
+ [(rotl FPR64, 0), (rotl FPR64, 1),
+ (rotl FPR64, 2), (rotl FPR64, 3)]>;
+def DD : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> {
+ let Size = 128;
+}
+def DDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> {
+ let Size = 196;
+}
+def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> {
+ let Size = 256;
}
-//===----------------------------------------------------------------------===//
-// Scalar registers in the vector unit:
-// b0-b31, h0-h31, s0-s31, d0-d31, q0-q31
-//===----------------------------------------------------------------------===//
+// Pairs, triples, and quads of 128-bit vector registers.
+def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
+def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2)]>;
+def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
+ [(rotl FPR128, 0), (rotl FPR128, 1),
+ (rotl FPR128, 2), (rotl FPR128, 3)]>;
+def QQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqPairs)> {
+ let Size = 256;
+}
+def QQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqTriples)> {
+ let Size = 384;
+}
+def QQQQ : RegisterClass<"AArch64", [untyped], 128, (add QSeqQuads)> {
+ let Size = 512;
+}
-foreach Index = 0-31 in {
- def B # Index : AArch64Reg< Index, "b" # Index>,
- DwarfRegNum<[!add(Index, 64)]>;
- def H # Index : AArch64RegWithSubs<Index, "h" # Index,
- [!cast<Register>("B" # Index)], [sub_8]>,
- DwarfRegNum<[!add(Index, 64)]>;
+// Vector operand versions of the FP registers. Alternate name printing and
+// assmebler matching.
+def VectorReg64AsmOperand : AsmOperandClass {
+ let Name = "VectorReg64";
+ let PredicateMethod = "isVectorReg";
+}
+def VectorReg128AsmOperand : AsmOperandClass {
+ let Name = "VectorReg128";
+ let PredicateMethod = "isVectorReg";
+}
- def S # Index : AArch64RegWithSubs<Index, "s" # Index,
- [!cast<Register>("H" # Index)], [sub_16]>,
- DwarfRegNum<[!add(Index, 64)]>;
+def V64 : RegisterOperand<FPR64, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg64AsmOperand;
+}
- def D # Index : AArch64RegWithSubs<Index, "d" # Index,
- [!cast<Register>("S" # Index)], [sub_32]>,
- DwarfRegNum<[!add(Index, 64)]>;
+def V128 : RegisterOperand<FPR128, "printVRegOperand"> {
+ let ParserMatchClass = VectorReg128AsmOperand;
+}
- def Q # Index : AArch64RegWithSubs<Index, "q" # Index,
- [!cast<Register>("D" # Index)], [sub_64]>,
- DwarfRegNum<[!add(Index, 64)]>;
+def VectorRegLoAsmOperand : AsmOperandClass { let Name = "VectorRegLo"; }
+def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand"> {
+ let ParserMatchClass = VectorRegLoAsmOperand;
}
+class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
+ : AsmOperandClass {
+ let Name = "TypedVectorList" # count # "_" # lanes # kind;
-def FPR8 : RegisterClass<"AArch64", [v1i8], 8,
- (sequence "B%u", 0, 31)> {
+ let PredicateMethod
+ = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
+ let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
}
-def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16,
- (sequence "H%u", 0, 31)> {
-}
+class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
+ : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
+ # kind # "'>">;
-def FPR32 : RegisterClass<"AArch64", [f32, v1i32], 32,
- (sequence "S%u", 0, 31)> {
-}
+multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
+ // With implicit types (probably on instruction instead). E.g. { v0, v1 }
+ def _64AsmOperand : AsmOperandClass {
+ let Name = NAME # "64";
+ let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+ let RenderMethod = "addVectorList64Operands<" # count # ">";
+ }
-def FPR64 : RegisterClass<"AArch64",
- [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
- 64, (sequence "D%u", 0, 31)>;
+ def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
+ }
-def FPR128 : RegisterClass<"AArch64",
- [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
- 128, (sequence "Q%u", 0, 31)>;
+ def _128AsmOperand : AsmOperandClass {
+ let Name = NAME # "128";
+ let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
+ let RenderMethod = "addVectorList128Operands<" # count # ">";
+ }
+
+ def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
+ }
-def FPR64Lo : RegisterClass<"AArch64",
- [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64],
- 64, (sequence "D%u", 0, 15)>;
+ // 64-bit register lists with explicit type.
-def FPR128Lo : RegisterClass<"AArch64",
- [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8],
- 128, (sequence "Q%u", 0, 15)>;
+ // { v0.8b, v1.8b }
+ def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
+ def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
+ }
-//===----------------------------------------------------------------------===//
-// Vector registers:
-//===----------------------------------------------------------------------===//
+ // { v0.4h, v1.4h }
+ def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
+ def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
+ }
-def VPR64AsmOperand : AsmOperandClass {
- let Name = "VPR";
- let PredicateMethod = "isReg";
- let RenderMethod = "addRegOperands";
-}
+ // { v0.2s, v1.2s }
+ def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
+ def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
+ }
+
+ // { v0.1d, v1.1d }
+ def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
+ def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
+ }
-def VPR64 : RegisterOperand<FPR64, "printVPRRegister">;
+ // 128-bit register lists with explicit type
-def VPR128 : RegisterOperand<FPR128, "printVPRRegister">;
+ // { v0.16b, v1.16b }
+ def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
+ def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
+ }
-def VPR64Lo : RegisterOperand<FPR64Lo, "printVPRRegister">;
+ // { v0.8h, v1.8h }
+ def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
+ def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
+ }
-def VPR128Lo : RegisterOperand<FPR128Lo, "printVPRRegister">;
+ // { v0.4s, v1.4s }
+ def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
+ def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
+ }
-// Flags register
-def NZCV : Register<"nzcv"> {
- let Namespace = "AArch64";
-}
+ // { v0.2d, v1.2d }
+ def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
+ def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
+ }
-def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> {
- let CopyCost = -1;
- let isAllocatable = 0;
-}
+ // { v0.b, v1.b }
+ def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
+ def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
+ }
-//===----------------------------------------------------------------------===//
-// Consecutive vector registers
-//===----------------------------------------------------------------------===//
-// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D31_D0
-def Tuples2D : RegisterTuples<[dsub_0, dsub_1],
- [(rotl FPR64, 0), (rotl FPR64, 1)]>;
-
-// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1
-def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2],
- [(rotl FPR64, 0), (rotl FPR64, 1),
- (rotl FPR64, 2)]>;
-
-// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2
-def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3],
- [(rotl FPR64, 0), (rotl FPR64, 1),
- (rotl FPR64, 2), (rotl FPR64, 3)]>;
-
-// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31
-def Tuples2Q : RegisterTuples<[qsub_0, qsub_1],
- [(rotl FPR128, 0), (rotl FPR128, 1)]>;
-
-// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1
-def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2],
- [(rotl FPR128, 0), (rotl FPR128, 1),
- (rotl FPR128, 2)]>;
-
-// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2
-def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3],
- [(rotl FPR128, 0), (rotl FPR128, 1),
- (rotl FPR128, 2), (rotl FPR128, 3)]>;
-
-// The followings are super register classes to model 2/3/4 consecutive
-// 64-bit/128-bit registers.
-
-def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>;
-
-def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> {
- let Size = 192; // 3 x 64 bits, we have no predefined type of that size.
-}
-
-def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>;
-
-def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>;
-
-def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> {
- let Size = 384; // 3 x 128 bits, we have no predefined type of that size.
-}
-
-def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>;
-
-
-// The followings are vector list operands
-multiclass VectorList_operands<string PREFIX, string LAYOUT, int Count,
- RegisterClass RegList> {
- def _asmoperand : AsmOperandClass {
- let Name = PREFIX # LAYOUT # Count;
- let RenderMethod = "addVectorListOperands";
- let PredicateMethod =
- "isVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">";
- let ParserMethod = "ParseVectorList";
+ // { v0.h, v1.h }
+ def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
+ def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
}
- def _operand : RegisterOperand<RegList,
- "printVectorList<A64Layout::VL_" # LAYOUT # ", " # Count # ">"> {
- let ParserMatchClass =
- !cast<AsmOperandClass>(PREFIX # LAYOUT # "_asmoperand");
+ // { v0.s, v1.s }
+ def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
+ def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
}
-}
-multiclass VectorList_BHSD<string PREFIX, int Count, RegisterClass DRegList,
- RegisterClass QRegList> {
- defm 8B : VectorList_operands<PREFIX, "8B", Count, DRegList>;
- defm 4H : VectorList_operands<PREFIX, "4H", Count, DRegList>;
- defm 2S : VectorList_operands<PREFIX, "2S", Count, DRegList>;
- defm 1D : VectorList_operands<PREFIX, "1D", Count, DRegList>;
- defm 16B : VectorList_operands<PREFIX, "16B", Count, QRegList>;
- defm 8H : VectorList_operands<PREFIX, "8H", Count, QRegList>;
- defm 4S : VectorList_operands<PREFIX, "4S", Count, QRegList>;
- defm 2D : VectorList_operands<PREFIX, "2D", Count, QRegList>;
+ // { v0.d, v1.d }
+ def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
+ def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
+ let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
+ }
+
+
}
-// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand
-defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>;
-defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>;
-defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>;
-defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>;
+defm VecListOne : VectorList<1, FPR64, FPR128>;
+defm VecListTwo : VectorList<2, DD, QQ>;
+defm VecListThree : VectorList<3, DDD, QQQ>;
+defm VecListFour : VectorList<4, DDDD, QQQQ>;
+
+
+// Register operand versions of the scalar FP registers.
+def FPR16Op : RegisterOperand<FPR16, "printOperand">;
+def FPR32Op : RegisterOperand<FPR32, "printOperand">;
+def FPR64Op : RegisterOperand<FPR64, "printOperand">;
+def FPR128Op : RegisterOperand<FPR128, "printOperand">;
diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td
new file mode 100644
index 0000000..0c3949e
--- /dev/null
+++ b/lib/Target/AArch64/AArch64SchedA53.td
@@ -0,0 +1,291 @@
+//==- AArch64SchedA53.td - Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A53 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the simpler per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
+def CortexA53Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // Explicitly set to zero since A53 is in-order.
+ let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
+ let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency.
+ let LoadLatency = 3; // Optimistic load latency assuming bypass.
+ // This is overriden by OperandCycles if the
+ // Itineraries are queried instead.
+ let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
+ // Specification - Instruction Timings"
+ // v 1.0 Spreadsheet
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Cortex-A53 is in-order.
+
+def A53UnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def A53UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def A53UnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division
+def A53UnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def A53UnitB : ProcResource<1> { let BufferSize = 0; } // Branch
+def A53UnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU
+def A53UnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mult/Div/Sqrt
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which both map the ProcResources and
+// set the latency.
+
+let SchedModel = CortexA53Model in {
+
+// ALU - Despite having a full latency of 4, most of the ALU instructions can
+// forward a cycle earlier and then two cycles earlier in the case of a
+// shift-only instruction. These latencies will be incorrect when the
+// result cannot be forwarded, but modeling isn't rocket surgery.
+def : WriteRes<WriteImm, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteI, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteISReg, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIEReg, [A53UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteIS, [A53UnitALU]> { let Latency = 2; }
+def : WriteRes<WriteExtr, [A53UnitALU]> { let Latency = 3; }
+
+// MAC
+def : WriteRes<WriteIM32, [A53UnitMAC]> { let Latency = 4; }
+def : WriteRes<WriteIM64, [A53UnitMAC]> { let Latency = 4; }
+
+// Div
+def : WriteRes<WriteID32, [A53UnitDiv]> { let Latency = 4; }
+def : WriteRes<WriteID64, [A53UnitDiv]> { let Latency = 4; }
+
+// Load
+def : WriteRes<WriteLD, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDIdx, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteLDHi, [A53UnitLdSt]> { let Latency = 4; }
+
+// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
+// below, choosing the median of 3 which makes the latency 6.
+// May model this more carefully in the future. The remaining
+// A53WriteVLD# types represent the 1-5 cycle issues explicitly.
+def : WriteRes<WriteVLD, [A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def A53WriteVLD1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
+def A53WriteVLD2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def A53WriteVLD3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+def A53WriteVLD4 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 7;
+ let ResourceCycles = [4]; }
+def A53WriteVLD5 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+
+// Pre/Post Indexing - Performed as part of address generation which is already
+// accounted for in the WriteST* latencies below
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+def : WriteRes<WriteST, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTP, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTIdx, [A53UnitLdSt]> { let Latency = 4; }
+def : WriteRes<WriteSTX, [A53UnitLdSt]> { let Latency = 4; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2];}
+def A53WriteVST1 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 4; }
+def A53WriteVST2 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def A53WriteVST3 : SchedWriteRes<[A53UnitLdSt]> { let Latency = 6;
+ let ResourceCycles = [3]; }
+
+// Branch
+def : WriteRes<WriteBr, [A53UnitB]>;
+def : WriteRes<WriteBrReg, [A53UnitB]>;
+def : WriteRes<WriteSys, [A53UnitB]>;
+def : WriteRes<WriteBarrier, [A53UnitB]>;
+def : WriteRes<WriteHint, [A53UnitB]>;
+
+// FP ALU
+def : WriteRes<WriteF, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCmp, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCvt, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFCopy, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteFImm, [A53UnitFPALU]> { let Latency = 6; }
+def : WriteRes<WriteV, [A53UnitFPALU]> { let Latency = 6; }
+
+// FP Mul, Div, Sqrt
+def : WriteRes<WriteFMul, [A53UnitFPMDS]> { let Latency = 6; }
+def : WriteRes<WriteFDiv, [A53UnitFPMDS]> { let Latency = 33;
+ let ResourceCycles = [29]; }
+def A53WriteFMAC : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 10; }
+def A53WriteFDivSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 18;
+ let ResourceCycles = [14]; }
+def A53WriteFDivDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 33;
+ let ResourceCycles = [29]; }
+def A53WriteFSqrtSP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 17;
+ let ResourceCycles = [13]; }
+def A53WriteFSqrtDP : SchedWriteRes<[A53UnitFPMDS]> { let Latency = 32;
+ let ResourceCycles = [28]; }
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+// No forwarding for these reads.
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable
+// operands are needed one cycle later if and only if they are to be
+// shifted. Otherwise, they too are needed two cycle later. This same
+// ReadAdvance applies to Extended registers as well, even though there is
+// a seperate SchedPredicate for them.
+def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def A53ReadISReg : SchedReadVariant<[
+ SchedVar<RegShiftedPred, [A53ReadShifted]>,
+ SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
+def : SchedAlias<ReadISReg, A53ReadISReg>;
+
+def A53ReadIEReg : SchedReadVariant<[
+ SchedVar<RegExtendedPred, [A53ReadShifted]>,
+ SchedVar<NoSchedPred, [A53ReadNotShifted]>]>;
+def : SchedAlias<ReadIEReg, A53ReadIEReg>;
+
+// MAC - Operands are generally needed one cycle later in the MAC pipe.
+// Accumulator operands are needed two cycles later.
+def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+// Div
+def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
+ WriteISReg, WriteIEReg,WriteIS,
+ WriteID32,WriteID64,
+ WriteIM32,WriteIM64]>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+//---
+// Miscellaneous
+//---
+def : InstRW<[WriteI], (instrs COPY)>;
+
+//---
+// Vector Loads
+//---
+def : InstRW<[A53WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVLD1], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[A53WriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+def : InstRW<[A53WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVLD3], (instregex "LD3Threev(2d)$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
+
+def : InstRW<[A53WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVLD4], (instregex "LD4Fourv(2d)$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[A53WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVST1], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[A53WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST3Threev(2d)$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>;
+
+def : InstRW<[A53WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[A53WriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>;
+def : InstRW<[A53WriteVST2], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[A53WriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>;
+def : InstRW<[A53WriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+//---
+// Floating Point MAC, DIV, SQRT
+//---
+def : InstRW<[A53WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[A53WriteFMAC], (instregex "^FML(A|S).*")>;
+def : InstRW<[A53WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[A53WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[A53WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[A53WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+}
diff --git a/lib/Target/ARM64/ARM64SchedCyclone.td b/lib/Target/AArch64/AArch64SchedCyclone.td
index 65c68b3..a2a1802 100644
--- a/lib/Target/ARM64/ARM64SchedCyclone.td
+++ b/lib/Target/AArch64/AArch64SchedCyclone.td
@@ -1,4 +1,4 @@
-//=- ARMSchedCyclone.td - ARM64 Cyclone Scheduling Defs ------*- tablegen -*-=//
+//=- ARMSchedCyclone.td - AArch64 Cyclone Scheduling Defs ----*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the machine model for ARM64 Cyclone to support
+// This file defines the machine model for AArch64 Cyclone to support
// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
@@ -239,13 +239,13 @@ def : WriteRes<WriteST, [CyUnitLS]> {
def CyWriteLDIdx : SchedWriteVariant<[
SchedVar<ScaledIdxPred, [WriteIS, WriteLD]>, // Load from scaled register.
SchedVar<NoSchedPred, [WriteLD]>]>; // Load from register offset.
-def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map ARM64->Cyclone type.
+def : SchedAlias<WriteLDIdx, CyWriteLDIdx>; // Map AArch64->Cyclone type.
// EXAMPLE: STR Xn, Xm [, lsl 3]
def CyWriteSTIdx : SchedWriteVariant<[
SchedVar<ScaledIdxPred, [WriteIS, WriteST]>, // Store to scaled register.
SchedVar<NoSchedPred, [WriteST]>]>; // Store to register offset.
-def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map ARM64->Cyclone type.
+def : SchedAlias<WriteSTIdx, CyWriteSTIdx>; // Map AArch64->Cyclone type.
// Read the (unshifted) base register Xn in the second micro-op one cycle later.
// EXAMPLE: LDR Xn, Xm [, lsl 3]
@@ -253,7 +253,7 @@ def ReadBaseRS : SchedReadAdvance<1>;
def CyReadAdrBase : SchedReadVariant<[
SchedVar<ScaledIdxPred, [ReadBaseRS]>, // Read base reg after shifting offset.
SchedVar<NoSchedPred, [ReadDefault]>]>; // Read base reg with no shift.
-def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map ARM64->Cyclone type.
+def : SchedAlias<ReadAdrBase, CyReadAdrBase>; // Map AArch64->Cyclone type.
//---
// 7.8.9,7.8.11. Load/Store, paired
@@ -342,7 +342,9 @@ def : InstRW<[WriteVMov], (instrs ORRv16i8)>;
// INS V[x],V[y] is a WriteV.
// FMOVWSr,FMOVXDr,FMOVXDHighr
-def : SchedAlias<WriteFCopy, WriteVLD>;
+def : WriteRes<WriteFCopy, [CyUnitLS]> {
+ let Latency = 5;
+}
// FMOVSWr,FMOVDXr
def : InstRW<[WriteLD], (instrs FMOVSWr,FMOVDXr,FMOVDXHighr)>;
@@ -849,4 +851,15 @@ def : InstRW<[WriteAdr, WriteVSTPairShuffle], (instregex "ST4i(8|16|32)_POST")>;
def : InstRW<[WriteVSTShuffle, WriteVSTShuffle], (instrs ST4i64)>;
def : InstRW<[WriteAdr, WriteVSTShuffle, WriteVSTShuffle],(instrs ST4i64_POST)>;
+//---
+// Unused SchedRead types
+//---
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+
} // SchedModel = CycloneModel
diff --git a/lib/Target/AArch64/AArch64Schedule.td b/lib/Target/AArch64/AArch64Schedule.td
index ec8450b..eaa9110 100644
--- a/lib/Target/AArch64/AArch64Schedule.td
+++ b/lib/Target/AArch64/AArch64Schedule.td
@@ -1,4 +1,4 @@
-//===- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
+//==-- AArch64Schedule.td - AArch64 Scheduling Definitions -*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,74 +7,98 @@
//
//===----------------------------------------------------------------------===//
-//===----------------------------------------------------------------------===//
-// Generic processor itineraries for legacy compatibility.
-
-def GenericItineraries : ProcessorItineraries<[], [], []>;
-
-
-//===----------------------------------------------------------------------===//
-// Base SchedReadWrite types
-
-// Basic ALU
-def WriteALU : SchedWrite; // Generic: may contain shift and/or ALU operation
-def WriteALUs : SchedWrite; // Shift only with no ALU operation
-def ReadALU : SchedRead; // Operand not needed for shifting
-def ReadALUs : SchedRead; // Operand needed for shifting
-
-// Multiply with optional accumulate
-def WriteMAC : SchedWrite;
-def ReadMAC : SchedRead;
-
-// Compares
-def WriteCMP : SchedWrite;
-def ReadCMP : SchedRead;
-
-// Division
-def WriteDiv : SchedWrite;
-def ReadDiv : SchedRead;
-
-// Loads
-def WriteLd : SchedWrite;
-def WritePreLd : SchedWrite;
-def WriteVecLd : SchedWrite;
-def ReadLd : SchedRead;
-def ReadPreLd : SchedRead;
-def ReadVecLd : SchedRead;
-
-// Stores
-def WriteSt : SchedWrite;
-def WriteVecSt : SchedWrite;
-def ReadSt : SchedRead;
-def ReadVecSt : SchedRead;
-
-// Branches
-def WriteBr : SchedWrite;
-def WriteBrL : SchedWrite;
-def ReadBr : SchedRead;
-
-// Floating Point ALU
-def WriteFPALU : SchedWrite;
-def ReadFPALU : SchedRead;
-
-// Floating Point MAC, Mul, Div, Sqrt
-// Most processors will simply send all of these down a dedicated pipe, but
-// they're explicitly seperated here for flexibility of modeling later. May
-// consider consolidating them into a single WriteFPXXXX type in the future.
-def WriteFPMAC : SchedWrite;
-def WriteFPMul : SchedWrite;
-def WriteFPDiv : SchedWrite;
-def WriteFPSqrt : SchedWrite;
-def ReadFPMAC : SchedRead;
-def ReadFPMul : SchedRead;
-def ReadFPDiv : SchedRead;
-def ReadFPSqrt : SchedRead;
-
-// Noop
-def WriteNoop : SchedWrite;
-
-
-//===----------------------------------------------------------------------===//
-// Subtarget specific Machine Models.
-
-include "AArch64ScheduleA53.td"
+// Define TII for use in SchedVariant Predicates.
+// const MachineInstr *MI and const TargetSchedModel *SchedModel
+// are defined by default.
+def : PredicateProlog<[{
+ const AArch64InstrInfo *TII =
+ static_cast<const AArch64InstrInfo*>(SchedModel->getInstrInfo());
+ (void)TII;
+}]>;
+
+// AArch64 Scheduler Definitions
+
+def WriteImm : SchedWrite; // MOVN, MOVZ
+// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
+// select the correct sequence of WriteImms.
+
+def WriteI : SchedWrite; // ALU
+def WriteISReg : SchedWrite; // ALU of Shifted-Reg
+def WriteIEReg : SchedWrite; // ALU of Extended-Reg
+def ReadI : SchedRead; // ALU
+def ReadISReg : SchedRead; // ALU of Shifted-Reg
+def ReadIEReg : SchedRead; // ALU of Extended-Reg
+def WriteExtr : SchedWrite; // EXTR shifts a reg pair
+def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
+def WriteIS : SchedWrite; // Shift/Scale
+def WriteID32 : SchedWrite; // 32-bit Divide
+def WriteID64 : SchedWrite; // 64-bit Divide
+def ReadID : SchedRead; // 32/64-bit Divide
+def WriteIM32 : SchedWrite; // 32-bit Multiply
+def WriteIM64 : SchedWrite; // 64-bit Multiply
+def ReadIM : SchedRead; // 32/64-bit Multiply
+def ReadIMA : SchedRead; // 32/64-bit Multiply Accumulate
+def WriteBr : SchedWrite; // Branch
+def WriteBrReg : SchedWrite; // Indirect Branch
+
+def WriteLD : SchedWrite; // Load from base addr plus immediate offset
+def WriteST : SchedWrite; // Store to base addr plus immediate offset
+def WriteSTP : SchedWrite; // Store a register pair.
+def WriteAdr : SchedWrite; // Address pre/post increment.
+
+def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
+def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
+def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
+
+// Predicate for determining when a shiftable register is shifted.
+def RegShiftedPred : SchedPredicate<[{TII->hasShiftedReg(MI)}]>;
+
+// Predicate for determining when a extendedable register is extended.
+def RegExtendedPred : SchedPredicate<[{TII->hasExtendedReg(MI)}]>;
+
+// ScaledIdxPred is true if a WriteLDIdx operand will be
+// scaled. Subtargets can use this to dynamically select resources and
+// latency for WriteLDIdx and ReadAdrBase.
+def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
+
+// Serialized two-level address load.
+// EXAMPLE: LOADGot
+def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
+
+// Serialized two-level address lookup.
+// EXAMPLE: MOVaddr...
+def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
+
+// The second register of a load-pair.
+// LDP,LDPSW,LDNP,LDXP,LDAXP
+def WriteLDHi : SchedWrite;
+
+// Store-exclusive is a store followed by a dependent load.
+def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
+
+def WriteSys : SchedWrite; // Long, variable latency system ops.
+def WriteBarrier : SchedWrite; // Memory barrier.
+def WriteHint : SchedWrite; // Hint instruction.
+
+def WriteF : SchedWrite; // General floating-point ops.
+def WriteFCmp : SchedWrite; // Floating-point compare.
+def WriteFCvt : SchedWrite; // Float conversion.
+def WriteFCopy : SchedWrite; // Float-int register copy.
+def WriteFImm : SchedWrite; // Floating-point immediate.
+def WriteFMul : SchedWrite; // Floating-point multiply.
+def WriteFDiv : SchedWrite; // Floating-point division.
+
+def WriteV : SchedWrite; // Vector ops.
+def WriteVLD : SchedWrite; // Vector loads.
+def WriteVST : SchedWrite; // Vector stores.
+
+// Read the unwritten lanes of the VLD's destination registers.
+def ReadVLD : SchedRead;
+
+// Sequential vector load and shuffle.
+def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
+def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
+
+// Store a shuffled vector.
+def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
+def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
diff --git a/lib/Target/AArch64/AArch64ScheduleA53.td b/lib/Target/AArch64/AArch64ScheduleA53.td
deleted file mode 100644
index 20a14e7..0000000
--- a/lib/Target/AArch64/AArch64ScheduleA53.td
+++ /dev/null
@@ -1,144 +0,0 @@
-//=- AArch64ScheduleA53.td - ARM Cortex-A53 Scheduling Definitions -*- tablegen -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the itinerary class data for the ARM Cortex A53 processors.
-//
-//===----------------------------------------------------------------------===//
-
-// ===---------------------------------------------------------------------===//
-// The following definitions describe the simpler per-operand machine model.
-// This works with MachineScheduler. See MCSchedModel.h for details.
-
-// Cortex-A53 machine model for scheduling and other instruction cost heuristics.
-def CortexA53Model : SchedMachineModel {
- let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
- let MinLatency = 1 ; // OperandCycles are interpreted as MinLatency.
- let LoadLatency = 2; // Optimistic load latency assuming bypass.
- // This is overriden by OperandCycles if the
- // Itineraries are queried instead.
- let MispredictPenalty = 9; // Based on "Cortex-A53 Software Optimisation
- // Specification - Instruction Timings"
- // v 1.0 Spreadsheet
-}
-
-
-//===----------------------------------------------------------------------===//
-// Define each kind of processor resource and number available.
-
-// Modeling each pipeline as a ProcResource using the default BufferSize = -1.
-// Cortex-A53 is in-order and therefore should be using BufferSize = 0. The
-// current configuration performs better with the basic latencies provided so
-// far. Will revisit BufferSize once the latency information is more accurate.
-
-let SchedModel = CortexA53Model in {
-
-def A53UnitALU : ProcResource<2>; // Int ALU
-def A53UnitMAC : ProcResource<1>; // Int MAC
-def A53UnitDiv : ProcResource<1>; // Int Division
-def A53UnitLdSt : ProcResource<1>; // Load/Store
-def A53UnitB : ProcResource<1>; // Branch
-def A53UnitFPALU : ProcResource<1>; // FP ALU
-def A53UnitFPMDS : ProcResource<1>; // FP Mult/Div/Sqrt
-
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedWrite types which both map the ProcResources and
-// set the latency.
-
-// Issue - Every instruction must consume an A53WriteIssue. Optionally,
-// instructions that cannot be dual-issued will also include the
-// A53WriteIssue2nd in their SchedRW list. That second WriteRes will
-// ensure that a second issue slot is consumed.
-def A53WriteIssue : SchedWriteRes<[]>;
-def A53WriteIssue2nd : SchedWriteRes<[]> { let Latency = 0; }
-
-// ALU - These are reduced to 1 despite a true latency of 4 in order to easily
-// model forwarding logic. Once forwarding is properly modelled, then
-// they'll be corrected.
-def : WriteRes<WriteALU, [A53UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteALUs, [A53UnitALU]> { let Latency = 1; }
-def : WriteRes<WriteCMP, [A53UnitALU]> { let Latency = 1; }
-
-// MAC
-def : WriteRes<WriteMAC, [A53UnitMAC]> { let Latency = 4; }
-
-// Div
-def : WriteRes<WriteDiv, [A53UnitDiv]> { let Latency = 4; }
-
-// Load - Note: Vector loads take 1-5 cycles to issue. For the WriteVecLd below,
-// choosing the median of 3 which makes the latency 6. May model this more
-// carefully in the future.
-def : WriteRes<WriteLd, [A53UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WritePreLd, [A53UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteVecLd, [A53UnitLdSt]> { let Latency = 6; }
-
-// Store - Note: Vector stores take 1-3 cycles to issue. For the ReadVecSt below,
-// choosing the median of 2 which makes the latency 5. May model this more
-// carefully in the future.
-def : WriteRes<WriteSt, [A53UnitLdSt]> { let Latency = 4; }
-def : WriteRes<WriteVecSt, [A53UnitLdSt]> { let Latency = 5; }
-
-// Branch
-def : WriteRes<WriteBr, [A53UnitB]>;
-def : WriteRes<WriteBrL, [A53UnitB]>;
-
-// FP ALU
-def : WriteRes<WriteFPALU, [A53UnitFPALU]> {let Latency = 6; }
-
-// FP MAC, Mul, Div, Sqrt
-// Using Double Precision numbers for now as a worst case. Additionally, not
-// modeling the exact hazard but instead treating the whole pipe as a hazard.
-// As an example VMUL, VMLA, and others are actually pipelined. VDIV and VSQRT
-// have a total latency of 33 and 32 respectively but only a hazard of 29 and
-// 28 (double-prescion example).
-def : WriteRes<WriteFPMAC, [A53UnitFPMDS]> { let Latency = 10; }
-def : WriteRes<WriteFPMul, [A53UnitFPMDS]> { let Latency = 6; }
-def : WriteRes<WriteFPDiv, [A53UnitFPMDS]> { let Latency = 33;
- let ResourceCycles = [29]; }
-def : WriteRes<WriteFPSqrt, [A53UnitFPMDS]> { let Latency = 32;
- let ResourceCycles = [28]; }
-
-
-//===----------------------------------------------------------------------===//
-// Subtarget-specific SchedRead types.
-
-// No forwarding defined for ReadALU yet.
-def : ReadAdvance<ReadALU, 0>;
-
-// No forwarding defined for ReadCMP yet.
-def : ReadAdvance<ReadCMP, 0>;
-
-// No forwarding defined for ReadBr yet.
-def : ReadAdvance<ReadBr, 0>;
-
-// No forwarding defined for ReadMAC yet.
-def : ReadAdvance<ReadMAC, 0>;
-
-// No forwarding defined for ReadDiv yet.
-def : ReadAdvance<ReadDiv, 0>;
-
-// No forwarding defined for ReadLd, ReadPreLd, ReadVecLd yet.
-def : ReadAdvance<ReadLd, 0>;
-def : ReadAdvance<ReadPreLd, 0>;
-def : ReadAdvance<ReadVecLd, 0>;
-
-// No forwarding defined for ReadSt and ReadVecSt yet.
-def : ReadAdvance<ReadSt, 0>;
-def : ReadAdvance<ReadVecSt, 0>;
-
-// No forwarding defined for ReadFPALU yet.
-def : ReadAdvance<ReadFPALU, 0>;
-
-// No forwarding defined for ReadFPMAC/Mul/Div/Sqrt yet.
-def : ReadAdvance<ReadFPMAC, 0>;
-def : ReadAdvance<ReadFPMul, 0>;
-def : ReadAdvance<ReadFPDiv, 0>;
-def : ReadAdvance<ReadFPSqrt, 0>;
-
-}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 6bbe075..5c65b75 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -11,15 +11,49 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-selectiondag-info"
#include "AArch64TargetMachine.h"
-#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
-AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const AArch64TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {
-}
+#define DEBUG_TYPE "aarch64-selectiondag-info"
+
+AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {}
+
+AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {}
+
+SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
+ SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const {
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+ ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
+ const char *bzeroEntry =
+ (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr;
+ // For small size (< 256), it is not beneficial to use bzero
+ // instead of memset.
+ if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
+ const AArch64TargetLowering &TLI =
+ *static_cast<const AArch64TargetLowering *>(
+ DAG.getTarget().getTargetLowering());
-AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {
+ EVT IntPtr = TLI.getPointerTy();
+ Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ .setDiscardResult();
+ std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+ return CallResult.second;
+ }
+ return SDValue();
}
diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
index d412ed2..8381f99 100644
--- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h
+++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h
@@ -11,22 +11,27 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64SELECTIONDAGINFO_H
-#define LLVM_AARCH64SELECTIONDAGINFO_H
+#ifndef AArch64SELECTIONDAGINFO_H
+#define AArch64SELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
-class AArch64TargetMachine;
-
class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
const AArch64Subtarget *Subtarget;
+
public:
- explicit AArch64SelectionDAGInfo(const AArch64TargetMachine &TM);
+ explicit AArch64SelectionDAGInfo(const TargetMachine &TM);
~AArch64SelectionDAGInfo();
-};
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
+ SDValue Dst, SDValue Src, SDValue Size,
+ unsigned Align, bool isVolatile,
+ MachinePointerInfo DstPtrInfo) const override;
+};
}
#endif
diff --git a/lib/Target/ARM64/ARM64StorePairSuppress.cpp b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 6521d13..45f8ddb 100644
--- a/lib/Target/ARM64/ARM64StorePairSuppress.cpp
+++ b/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -1,4 +1,4 @@
-//===---- ARM64StorePairSuppress.cpp --- Suppress store pair formation ----===//
+//===--- AArch64StorePairSuppress.cpp --- Suppress store pair formation ---===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,22 +11,23 @@
// store pairs. Later we may do the same for floating point loads.
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm64-stp-suppress"
-#include "ARM64InstrInfo.h"
+#include "AArch64InstrInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "aarch64-stp-suppress"
+
namespace {
-class ARM64StorePairSuppress : public MachineFunctionPass {
- const ARM64InstrInfo *TII;
+class AArch64StorePairSuppress : public MachineFunctionPass {
+ const AArch64InstrInfo *TII;
const TargetRegisterInfo *TRI;
const MachineRegisterInfo *MRI;
MachineFunction *MF;
@@ -36,10 +37,10 @@ class ARM64StorePairSuppress : public MachineFunctionPass {
public:
static char ID;
- ARM64StorePairSuppress() : MachineFunctionPass(ID) {}
+ AArch64StorePairSuppress() : MachineFunctionPass(ID) {}
virtual const char *getPassName() const override {
- return "ARM64 Store Pair Suppression";
+ return "AArch64 Store Pair Suppression";
}
bool runOnMachineFunction(MachineFunction &F) override;
@@ -56,11 +57,11 @@ private:
MachineFunctionPass::getAnalysisUsage(AU);
}
};
-char ARM64StorePairSuppress::ID = 0;
+char AArch64StorePairSuppress::ID = 0;
} // anonymous
-FunctionPass *llvm::createARM64StorePairSuppressPass() {
- return new ARM64StorePairSuppress();
+FunctionPass *llvm::createAArch64StorePairSuppressPass() {
+ return new AArch64StorePairSuppress();
}
/// Return true if an STP can be added to this block without increasing the
@@ -69,7 +70,7 @@ FunctionPass *llvm::createARM64StorePairSuppressPass() {
/// critical path. If the critical path is longer than the resource height, the
/// extra vector ops can limit physreg renaming. Otherwise, it could simply
/// oversaturate the vector units.
-bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
+bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
if (!MinInstr)
MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
@@ -78,7 +79,7 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
// Get the machine model's scheduling class for STPQi.
// Bypass TargetSchedule's SchedClass resolution since we only have an opcode.
- unsigned SCIdx = TII->get(ARM64::STPDi).getSchedClass();
+ unsigned SCIdx = TII->get(AArch64::STPDi).getSchedClass();
const MCSchedClassDesc *SCDesc =
SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
@@ -102,22 +103,22 @@ bool ARM64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
/// tell us if it's profitable with no cpu knowledge here.
///
/// FIXME: We plan to develop a decent Target abstraction for simple loads and
-/// stores. Until then use a nasty switch similar to ARM64LoadStoreOptimizer.
-bool ARM64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
+/// stores. Until then use a nasty switch similar to AArch64LoadStoreOptimizer.
+bool AArch64StorePairSuppress::isNarrowFPStore(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STURSi:
- case ARM64::STURDi:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STURSi:
+ case AArch64::STURDi:
return true;
}
}
-bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
+bool AArch64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
MF = &mf;
- TII = static_cast<const ARM64InstrInfo *>(MF->getTarget().getInstrInfo());
+ TII = static_cast<const AArch64InstrInfo *>(MF->getTarget().getInstrInfo());
TRI = MF->getTarget().getRegisterInfo();
MRI = &MF->getRegInfo();
const TargetSubtargetInfo &ST =
@@ -125,7 +126,7 @@ bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
SchedModel.init(*ST.getSchedModel(), &ST, TII);
Traces = &getAnalysis<MachineTraceMetrics>();
- MinInstr = 0;
+ MinInstr = nullptr;
DEBUG(dbgs() << "*** " << getPassName() << ": " << MF->getName() << '\n');
@@ -138,10 +139,10 @@ bool ARM64StorePairSuppress::runOnMachineFunction(MachineFunction &mf) {
// precisely determine whether a store pair can be formed. But we do want to
// filter out most situations where we can't form store pairs to avoid
// computing trace metrics in those cases.
- for (auto &MBB: *MF) {
+ for (auto &MBB : *MF) {
bool SuppressSTP = false;
unsigned PrevBaseReg = 0;
- for (auto &MI: MBB) {
+ for (auto &MI : MBB) {
if (!isNarrowFPStore(MI))
continue;
unsigned BaseReg;
diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp
index 9140bbd..cd69994 100644
--- a/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information --------------===//
+//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,57 +7,110 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the AArch64 specific subclass of TargetSubtargetInfo.
+// This file implements the AArch64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
+#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
-#include "AArch64RegisterInfo.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-subtarget"
-#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
+#define GET_SUBTARGETINFO_TARGET_DESC
#include "AArch64GenSubtargetInfo.inc"
-using namespace llvm;
-
-// Pin the vtable to this file.
-void AArch64Subtarget::anchor() {}
+static cl::opt<bool>
+EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
+ "converter pass"), cl::init(true), cl::Hidden);
-AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS,
- bool LittleEndian)
+AArch64Subtarget::AArch64Subtarget(const std::string &TT,
+ const std::string &CPU,
+ const std::string &FS, bool LittleEndian)
: AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
- HasFPARMv8(false), HasNEON(false), HasCrypto(false), TargetTriple(TT),
- CPUString(CPU), IsLittleEndian(LittleEndian) {
+ HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
+ HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU),
+ TargetTriple(TT), IsLittleEndian(LittleEndian) {
+ // Determine default and user-specified characteristics
+
+ if (CPUString.empty())
+ CPUString = "generic";
- initializeSubtargetFeatures(CPU, FS);
+ ParseSubtargetFeatures(CPUString, FS);
}
-void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU,
- StringRef FS) {
- if (CPU.empty())
- CPUString = "generic";
+/// ClassifyGlobalReference - Find the target operand flags that describe
+/// how a global value should be referenced for the current subtarget.
+unsigned char
+AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM) const {
+
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
+
+ // MachO large model always goes via a GOT, simply to get a single 8-byte
+ // absolute relocation on all global addresses.
+ if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
+ return AArch64II::MO_GOT;
+
+ // The small code mode's direct accesses use ADRP, which cannot necessarily
+ // produce the value 0 (if the code is above 4GB). Therefore they must use the
+ // GOT.
+ if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
+ return AArch64II::MO_GOT;
+
+ // If symbol visibility is hidden, the extra load is not needed if
+ // the symbol is definitely defined in the current translation unit.
- std::string FullFS = FS;
- if (CPUString == "generic") {
- // Enable FP by default.
- if (FullFS.empty())
- FullFS = "+fp-armv8";
+ // The handling of non-hidden symbols in PIC mode is rather target-dependent:
+ // + On MachO, if the symbol is defined in this module the GOT can be
+ // skipped.
+ // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually
+ // defined could end up in unexpected places. Use a GOT.
+ if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
+ if (isTargetMachO())
+ return (isDecl || GV->isWeakForLinker()) ? AArch64II::MO_GOT
+ : AArch64II::MO_NO_FLAG;
else
- FullFS = "+fp-armv8," + FullFS;
+ // No need to go through the GOT for local symbols on ELF.
+ return GV->hasLocalLinkage() ? AArch64II::MO_NO_FLAG : AArch64II::MO_GOT;
}
- ParseSubtargetFeatures(CPU, FullFS);
+ return AArch64II::MO_NO_FLAG;
}
-bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV,
- Reloc::Model RelocM) const {
- if (RelocM == Reloc::Static)
- return false;
+/// This function returns the name of a function which has an interface
+/// like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over
+/// memset with zero passed as the second argument. Otherwise it
+/// returns null.
+const char *AArch64Subtarget::getBZeroEntry() const {
+ // Prefer bzero on Darwin only.
+ if(isTargetDarwin())
+ return "bzero";
+
+ return nullptr;
+}
+
+void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin, MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ // LNT run (at least on Cyclone) showed reasonably significant gains for
+ // bi-directional scheduling. 253.perlbmk.
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+}
- return !GV->hasLocalLinkage() && !GV->hasHiddenVisibility();
+bool AArch64Subtarget::enableEarlyIfConversion() const {
+ return EnableEarlyIfConvert;
}
diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h
index 68c6c4b..590ea05 100644
--- a/lib/Target/AArch64/AArch64Subtarget.h
+++ b/lib/Target/AArch64/AArch64Subtarget.h
@@ -1,4 +1,4 @@
-//==-- AArch64Subtarget.h - Define Subtarget for the AArch64 ---*- C++ -*--===//
+//===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,29 +7,27 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the AArch64 specific subclass of TargetSubtargetInfo.
+// This file declares the AArch64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64_SUBTARGET_H
-#define LLVM_TARGET_AARCH64_SUBTARGET_H
+#ifndef AArch64SUBTARGET_H
+#define AArch64SUBTARGET_H
-#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include "AArch64RegisterInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "AArch64GenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
-class StringRef;
class GlobalValue;
+class StringRef;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
- virtual void anchor();
protected:
- enum ARMProcFamilyEnum {Others, CortexA53, CortexA57};
+ enum ARMProcFamilyEnum {Others, CortexA53, CortexA57, Cyclone};
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily;
@@ -37,47 +35,76 @@ protected:
bool HasFPARMv8;
bool HasNEON;
bool HasCrypto;
+ bool HasCRC;
- /// TargetTriple - What processor and OS we're targeting.
- Triple TargetTriple;
+ // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
+ bool HasZeroCycleRegMove;
+
+ // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
+ bool HasZeroCycleZeroing;
/// CPUString - String name of used CPU.
std::string CPUString;
- /// IsLittleEndian - The target is Little Endian
- bool IsLittleEndian;
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
-private:
- void initializeSubtargetFeatures(StringRef CPU, StringRef FS);
+ /// IsLittleEndian - Is the target little endian?
+ bool IsLittleEndian;
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
- ///
- AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS,
- bool LittleEndian);
+ AArch64Subtarget(const std::string &TT, const std::string &CPU,
+ const std::string &FS, bool LittleEndian);
- virtual bool enableMachineScheduler() const {
- return true;
- }
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool enableMachineScheduler() const override { return true; }
- bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+ bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
- bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
- bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
+ bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
+ bool hasCRC() const { return HasCRC; }
+
+ bool isLittleEndian() const { return IsLittleEndian; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+
+ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
+
+ bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+
+ bool isCyclone() const { return CPUString == "cyclone"; }
+
+ /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+ /// that still makes it profitable to inline the call.
+ unsigned getMaxInlineSizeThreshold() const { return 64; }
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ /// ClassifyGlobalReference - Find the target operand flags that describe
+ /// how a global value should be referenced for the current subtarget.
+ unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ /// This function returns the name of a function which has an interface
+ /// like the non-standard bzero function, if such a function exists on
+ /// the current subtarget and it is considered prefereable over
+ /// memset with zero passed as the second argument. Otherwise it
+ /// returns null.
+ const char *getBZeroEntry() const;
- bool isLittle() const { return IsLittleEndian; }
+ void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const override;
- const std::string & getCPUString() const { return CPUString; }
+ bool enableEarlyIfConversion() const override;
};
} // End llvm namespace
-#endif // LLVM_TARGET_AARCH64_SUBTARGET_H
+#endif // AArch64SUBTARGET_H
diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp
index d9c990d..0b5dd2f 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -7,41 +7,80 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the implementation of the AArch64TargetMachine
-// methods. Principally just setting up the passes needed to generate correct
-// code on this architecture.
//
//===----------------------------------------------------------------------===//
#include "AArch64.h"
#include "AArch64TargetMachine.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
-
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
using namespace llvm;
+static cl::opt<bool>
+EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
+ cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
+ " integer instructions"), cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote "
+ "constant pass"), cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the"
+ " linker optimization hints (LOH)"), cl::init(true),
+ cl::Hidden);
+
+static cl::opt<bool>
+EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden,
+ cl::desc("Enable the pass that removes dead"
+ " definitons and replaces stores to"
+ " them with stores to the zero"
+ " register"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
+ " optimization pass"), cl::init(true), cl::Hidden);
+
extern "C" void LLVMInitializeAArch64Target() {
+ // Register the target.
RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
+
+ RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64leTarget);
+ RegisterTargetMachine<AArch64beTargetMachine> W(TheARM64beTarget);
}
+/// TargetMachine ctor - Create an AArch64 architecture model.
+///
AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool LittleEndian)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS, LittleEndian),
- InstrInfo(Subtarget),
- DL(LittleEndian ?
- "e-m:e-i64:64-i128:128-n32:64-S128" :
- "E-m:e-i64:64-i128:128-n32:64-S128"),
- TLInfo(*this),
- TSInfo(*this),
- FrameLowering(Subtarget) {
+ : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
+ Subtarget(TT, CPU, FS, LittleEndian),
+ // This nested ternary is horrible, but DL needs to be properly
+ // initialized
+ // before TLInfo is constructed.
+ DL(Subtarget.isTargetMachO()
+ ? "e-m:o-i64:64-i128:128-n32:64-S128"
+ : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128"
+ : "E-m:e-i64:64-i128:128-n32:64-S128")),
+ InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
+ TSInfo(*this) {
initAsmInfo();
}
@@ -63,50 +102,107 @@ AArch64beTargetMachine(const Target &T, StringRef TT,
CodeGenOpt::Level OL)
: AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
-void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
- // allows the AArch64 pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createAArch64TargetTransformInfoPass(this));
-}
-
namespace {
/// AArch64 Code Generator Pass Configuration Options.
class AArch64PassConfig : public TargetPassConfig {
public:
AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
+ : TargetPassConfig(TM, PM) {}
AArch64TargetMachine &getAArch64TargetMachine() const {
return getTM<AArch64TargetMachine>();
}
- const AArch64Subtarget &getAArch64Subtarget() const {
- return *getAArch64TargetMachine().getSubtargetImpl();
- }
-
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addILPOpts() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // namespace
+void AArch64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
+ // Add first the target-independent BasicTTI pass, then our AArch64 pass. This
+ // allows the AArch64 pass to delegate to the target independent layer when
+ // appropriate.
+ PM.add(createBasicTargetTransformInfoPass(this));
+ PM.add(createAArch64TargetTransformInfoPass(this));
+}
+
TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
return new AArch64PassConfig(this, PM);
}
-bool AArch64PassConfig::addPreEmitPass() {
- addPass(&UnpackMachineBundlesID);
- addPass(createAArch64BranchFixupPass());
- return true;
+// Pass Pipeline Configuration
+bool AArch64PassConfig::addPreISel() {
+ // Run promote constant before global merge, so that the promoted constants
+ // get a chance to be merged
+ if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
+ addPass(createAArch64PromoteConstantPass());
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createGlobalMergePass(TM));
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64AddressTypePromotionPass());
+
+ // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
+ // ourselves.
+ addPass(createAtomicExpandLoadLinkedPass(TM));
+
+ return false;
}
bool AArch64PassConfig::addInstSelector() {
- addPass(createAArch64ISelDAG(getAArch64TargetMachine(), getOptLevel()));
+ addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
- // For ELF, cleanup any local-dynamic TLS accesses.
- if (getAArch64Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
+ // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
+ // references to _TLS_MODULE_BASE_ as possible.
+ if (TM->getSubtarget<AArch64Subtarget>().isTargetELF() &&
+ getOptLevel() != CodeGenOpt::None)
addPass(createAArch64CleanupLocalDynamicTLSPass());
return false;
}
+
+bool AArch64PassConfig::addILPOpts() {
+ if (EnableCCMP)
+ addPass(createAArch64ConditionalCompares());
+ addPass(&EarlyIfConverterID);
+ if (EnableStPairSuppress)
+ addPass(createAArch64StorePairSuppressPass());
+ return true;
+}
+
+bool AArch64PassConfig::addPreRegAlloc() {
+ // Use AdvSIMD scalar instructions whenever profitable.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar)
+ addPass(createAArch64AdvSIMDScalar());
+ return true;
+}
+
+bool AArch64PassConfig::addPostRegAlloc() {
+ // Change dead register definitions to refer to the zero register.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+ addPass(createAArch64DeadRegisterDefinitions());
+ return true;
+}
+
+bool AArch64PassConfig::addPreSched2() {
+ // Expand some pseudo instructions to allow proper scheduling.
+ addPass(createAArch64ExpandPseudoPass());
+ // Use load/store pair instructions when possible.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
+ addPass(createAArch64LoadStoreOptimizationPass());
+ return true;
+}
+
+bool AArch64PassConfig::addPreEmitPass() {
+ // Relax conditional branch instructions if they're otherwise out of
+ // range of their destination.
+ addPass(createAArch64BranchRelaxation());
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
+ TM->getSubtarget<AArch64Subtarget>().isTargetMachO())
+ addPass(createAArch64CollectLOHPass());
+ return true;
+}
diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h
index 4297c92..079b19b 100644
--- a/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/lib/Target/AArch64/AArch64TargetMachine.h
@@ -1,4 +1,4 @@
-//=== AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-===//
+//==-- AArch64TargetMachine.h - Define TargetMachine for AArch64 -*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -11,60 +11,60 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64TARGETMACHINE_H
-#define LLVM_AARCH64TARGETMACHINE_H
+#ifndef AArch64TARGETMACHINE_H
+#define AArch64TARGETMACHINE_H
-#include "AArch64FrameLowering.h"
-#include "AArch64ISelLowering.h"
#include "AArch64InstrInfo.h"
-#include "AArch64SelectionDAGInfo.h"
+#include "AArch64ISelLowering.h"
#include "AArch64Subtarget.h"
+#include "AArch64FrameLowering.h"
+#include "AArch64SelectionDAGInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCStreamer.h"
namespace llvm {
class AArch64TargetMachine : public LLVMTargetMachine {
- AArch64Subtarget Subtarget;
- AArch64InstrInfo InstrInfo;
- const DataLayout DL;
- AArch64TargetLowering TLInfo;
- AArch64SelectionDAGInfo TSInfo;
- AArch64FrameLowering FrameLowering;
+protected:
+ AArch64Subtarget Subtarget;
+
+private:
+ const DataLayout DL;
+ AArch64InstrInfo InstrInfo;
+ AArch64TargetLowering TLInfo;
+ AArch64FrameLowering FrameLowering;
+ AArch64SelectionDAGInfo TSInfo;
public:
AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU,
StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL,
- bool LittleEndian);
+ CodeGenOpt::Level OL, bool IsLittleEndian);
- const AArch64InstrInfo *getInstrInfo() const {
- return &InstrInfo;
+ const AArch64Subtarget *getSubtargetImpl() const override {
+ return &Subtarget;
}
-
- const AArch64FrameLowering *getFrameLowering() const {
+ const AArch64TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const AArch64FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
-
- const AArch64TargetLowering *getTargetLowering() const {
- return &TLInfo;
+ const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const AArch64RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
}
-
- const AArch64SelectionDAGInfo *getSelectionDAGInfo() const {
+ const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
- const AArch64Subtarget *getSubtargetImpl() const { return &Subtarget; }
-
- const DataLayout *getDataLayout() const { return &DL; }
-
- const TargetRegisterInfo *getRegisterInfo() const {
- return &InstrInfo.getRegisterInfo();
- }
- TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ // Pass Pipeline Configuration
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ /// \brief Register AArch64 analysis passes with a pass manager.
+ void addAnalysisPasses(PassManagerBase &PM) override;
};
// AArch64leTargetMachine - AArch64 little endian target machine.
@@ -72,8 +72,8 @@ public:
class AArch64leTargetMachine : public AArch64TargetMachine {
virtual void anchor();
public:
- AArch64leTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
+ AArch64leTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
@@ -83,12 +83,12 @@ public:
class AArch64beTargetMachine : public AArch64TargetMachine {
virtual void anchor();
public:
- AArch64beTargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
+ AArch64beTargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
-} // End llvm namespace
+} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 663d619..4069038 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -6,19 +6,47 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file deals with any AArch64 specific requirements on object files.
-//
-//===----------------------------------------------------------------------===//
-
#include "AArch64TargetObjectFile.h"
-
+#include "AArch64TargetMachine.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Dwarf.h"
using namespace llvm;
+using namespace dwarf;
-void
-AArch64ElfTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
+void AArch64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
+ const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
InitializeELF(TM.Options.UseInitArray);
}
+
+const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
+ const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
+ const TargetMachine &TM, MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const {
+ // On Darwin, we can reference dwarf symbols with foo@GOT-., which
+ // is an indirect pc-relative reference. The default implementation
+ // won't reference using the GOT, so we need this target-specific
+ // version.
+ if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ const MCSymbol *Sym = TM.getSymbol(GV, Mang);
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
+ MCSymbol *PCSym = getContext().CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+ return MCBinaryExpr::CreateSub(Res, PC, getContext());
+ }
+
+ return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
+ GV, Encoding, Mang, TM, MMI, Streamer);
+}
+
+MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
+ const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+ MachineModuleInfo *MMI) const {
+ return TM.getSymbol(GV, Mang);
+}
diff --git a/lib/Target/AArch64/AArch64TargetObjectFile.h b/lib/Target/AArch64/AArch64TargetObjectFile.h
index 0f00a78..de63cb4 100644
--- a/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- AArch64TargetObjectFile.h - AArch64 Object Info ---------*- C++ -*-===//
+//===-- AArch64TargetObjectFile.h - AArch64 Object Info -*- C++ ---------*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,25 +6,34 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file deals with any AArch64 specific requirements on object files.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_AARCH64_TARGETOBJECTFILE_H
+#ifndef LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
+#define LLVM_TARGET_AArch64_TARGETOBJECTFILE_H
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
namespace llvm {
+class AArch64TargetMachine;
+
+/// This implementation is used for AArch64 ELF targets (Linux in particular).
+class AArch64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+};
+
+/// AArch64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
+class AArch64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+public:
+ const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+ unsigned Encoding, Mangler &Mang,
+ const TargetMachine &TM,
+ MachineModuleInfo *MMI,
+ MCStreamer &Streamer) const override;
- /// AArch64ElfTargetObjectFile - This implementation is used for ELF
- /// AArch64 targets.
- class AArch64ElfTargetObjectFile : public TargetLoweringObjectFileELF {
- virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
- };
+ MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+ const TargetMachine &TM,
+ MachineModuleInfo *MMI) const override;
+};
} // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index e2a1647..33e482a 100644
--- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1,4 +1,4 @@
-//===- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass ---------===//
+//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI pass --------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,15 +14,18 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "aarch64tti"
#include "AArch64.h"
#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "aarch64tti"
+
// Declare the pass initialization routine locally as target-specific passes
// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
@@ -33,25 +36,28 @@ void initializeAArch64TTIPass(PassRegistry &);
namespace {
class AArch64TTI final : public ImmutablePass, public TargetTransformInfo {
+ const AArch64TargetMachine *TM;
const AArch64Subtarget *ST;
const AArch64TargetLowering *TLI;
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the result needs to be inserted and/or extracted from vectors.
+ unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+
public:
- AArch64TTI() : ImmutablePass(ID), ST(0), TLI(0) {
+ AArch64TTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
AArch64TTI(const AArch64TargetMachine *TM)
- : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
+ : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
TLI(TM->getTargetLowering()) {
initializeAArch64TTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override {
- pushTTIStack(this);
- }
+ void initializePass() override { pushTTIStack(this); }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -59,31 +65,37 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo*)this;
+ return (TargetTransformInfo *)this;
return this;
}
/// \name Scalar TTI Implementations
/// @{
+ unsigned getIntImmCost(int64_t Val) const;
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
/// @}
-
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) const {
+ unsigned getNumberOfRegisters(bool Vector) const override {
if (Vector) {
if (ST->hasNEON())
return 32;
return 0;
}
- return 32;
+ return 31;
}
- unsigned getRegisterBitWidth(bool Vector) const {
+ unsigned getRegisterBitWidth(bool Vector) const override {
if (Vector) {
if (ST->hasNEON())
return 128;
@@ -92,6 +104,26 @@ public:
return 64;
}
+ unsigned getMaximumUnrollFactor() const override { return 2; }
+
+ unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
+ override;
+
+ unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
+ override;
+
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Opd1Info = OK_AnyValue,
+ OperandValueKind Opd2Info = OK_AnyValue) const
+ override;
+
+ unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
+
+ unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
+ override;
+
+ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
+ unsigned AddressSpace) const override;
/// @}
};
@@ -105,3 +137,328 @@ ImmutablePass *
llvm::createAArch64TargetTransformInfoPass(const AArch64TargetMachine *TM) {
return new AArch64TTI(TM);
}
+
+/// \brief Calculate the cost of materializing a 64-bit value. This helper
+/// method might only calculate a fraction of a larger immediate. Therefore it
+/// is valid to return a cost of ZERO.
+unsigned AArch64TTI::getIntImmCost(int64_t Val) const {
+ // Check if the immediate can be encoded within an instruction.
+ if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
+ return 0;
+
+ if (Val < 0)
+ Val = ~Val;
+
+ // Calculate how many moves we will need to materialize this constant.
+ unsigned LZ = countLeadingZeros((uint64_t)Val);
+ return (64 - LZ + 15) / 16;
+}
+
+/// \brief Calculate the cost of materializing the given constant.
+unsigned AArch64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ // Sign-extend all constants to a multiple of 64-bit.
+ APInt ImmVal = Imm;
+ if (BitSize & 0x3f)
+ ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
+
+ // Split the constant into 64-bit chunks and calculate the cost for each
+ // chunk.
+ unsigned Cost = 0;
+ for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
+ APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
+ int64_t Val = Tmp.getSExtValue();
+ Cost += getIntImmCost(Val);
+ }
+ // We need at least one instruction to materialze the constant.
+ return std::max(1U, Cost);
+}
+
+unsigned AArch64TTI::getIntImmCost(unsigned Opcode, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TCC_Free;
+
+ unsigned ImmIdx = ~0U;
+ switch (Opcode) {
+ default:
+ return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::Store:
+ ImmIdx = 0;
+ break;
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::ICmp:
+ ImmIdx = 1;
+ break;
+ // Always return TCC_Free for the shift value of a shift instruction.
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (Idx == 1)
+ return TCC_Free;
+ break;
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::IntToPtr:
+ case Instruction::PtrToInt:
+ case Instruction::BitCast:
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Select:
+ case Instruction::Ret:
+ case Instruction::Load:
+ break;
+ }
+
+ if (Idx == ImmIdx) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic)
+ ? static_cast<unsigned>(TCC_Free) : Cost;
+ }
+ return AArch64TTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned AArch64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
+ if (BitSize == 0)
+ return TCC_Free;
+
+ switch (IID) {
+ default:
+ return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow:
+ if (Idx == 1) {
+ unsigned NumConstants = (BitSize + 63) / 64;
+ unsigned Cost = AArch64TTI::getIntImmCost(Imm, Ty);
+ return (Cost <= NumConstants * TCC_Basic)
+ ? static_cast<unsigned>(TCC_Free) : Cost;
+ }
+ break;
+ case Intrinsic::experimental_stackmap:
+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
+ case Intrinsic::experimental_patchpoint_void:
+ case Intrinsic::experimental_patchpoint_i64:
+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
+ return TCC_Free;
+ break;
+ }
+ return AArch64TTI::getIntImmCost(Imm, Ty);
+}
+
+AArch64TTI::PopcntSupportKind
+AArch64TTI::getPopcntSupport(unsigned TyWidth) const {
+ assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
+ if (TyWidth == 32 || TyWidth == 64)
+ return PSK_FastHardware;
+ // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
+ return PSK_Software;
+}
+
+unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
+ Type *Src) const {
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ assert(ISD && "Invalid opcode");
+
+ EVT SrcTy = TLI->getValueType(Src);
+ EVT DstTy = TLI->getValueType(Dst);
+
+ if (!SrcTy.isSimple() || !DstTy.isSimple())
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+
+ static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
+ // LowerVectorINT_TO_FP:
+ { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
+ { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
+ // LowerVectorFP_TO_INT
+ { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 },
+ { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 },
+ { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
+ { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
+ };
+
+ int Idx = ConvertCostTableLookup<MVT>(
+ ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
+ SrcTy.getSimpleVT());
+ if (Idx != -1)
+ return ConversionTbl[Idx].Cost;
+
+ return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
+}
+
+unsigned AArch64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
+ unsigned Index) const {
+ assert(Val->isVectorTy() && "This must be a vector type");
+
+ if (Index != -1U) {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
+
+ // This type is legalized to a scalar type.
+ if (!LT.second.isVector())
+ return 0;
+
+ // The type may be split. Normalize the index to the new type.
+ unsigned Width = LT.second.getVectorNumElements();
+ Index = Index % Width;
+
+ // The element at index zero is already inside the vector.
+ if (Index == 0)
+ return 0;
+ }
+
+ // All other insert/extracts cost this much.
+ return 2;
+}
+
+unsigned AArch64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info) const {
+ // Legalize the type.
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+
+ switch (ISD) {
+ default:
+ return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
+ Opd2Info);
+ case ISD::ADD:
+ case ISD::MUL:
+ case ISD::XOR:
+ case ISD::OR:
+ case ISD::AND:
+ // These nodes are marked as 'custom' for combining purposes only.
+ // We know that they are legal. See LowerAdd in ISelLowering.
+ return 1 * LT.first;
+ }
+}
+
+unsigned AArch64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
+ // Address computations in vectorized code with non-consecutive addresses will
+ // likely result in more instructions compared to scalar code where the
+ // computation can more often be merged into the index mode. The resulting
+ // extra micro-ops can significantly decrease throughput.
+ unsigned NumVectorInstToHideOverhead = 10;
+
+ if (Ty->isVectorTy() && IsComplex)
+ return NumVectorInstToHideOverhead;
+
+ // In many cases the address computation is not merged into the instruction
+ // addressing mode.
+ return 1;
+}
+
+unsigned AArch64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
+ Type *CondTy) const {
+
+ int ISD = TLI->InstructionOpcodeToISD(Opcode);
+ // We don't lower vector selects well that are wider than the register width.
+ if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
+ // We would need this many instructions to hide the scalarization happening.
+ unsigned AmortizationCost = 20;
+ static const TypeConversionCostTblEntry<MVT::SimpleValueType>
+ VectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
+ { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
+ { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
+ { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
+ };
+
+ EVT SelCondTy = TLI->getValueType(CondTy);
+ EVT SelValTy = TLI->getValueType(ValTy);
+ if (SelCondTy.isSimple() && SelValTy.isSimple()) {
+ int Idx =
+ ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
+ SelValTy.getSimpleVT());
+ if (Idx != -1)
+ return VectorSelectTbl[Idx].Cost;
+ }
+ }
+ return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
+}
+
+unsigned AArch64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
+ unsigned Alignment,
+ unsigned AddressSpace) const {
+ std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
+
+ if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
+ Src->getVectorElementType()->isIntegerTy(64)) {
+ // Unaligned stores are extremely inefficient. We don't split
+ // unaligned v2i64 stores because the negative impact that has shown in
+ // practice on inlined memcpy code.
+ // We make v2i64 stores expensive so that we will only vectorize if there
+ // are 6 other instructions getting vectorized.
+ unsigned AmortizationCost = 6;
+
+ return LT.first * 2 * AmortizationCost;
+ }
+
+ if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
+ Src->getVectorNumElements() < 8) {
+ // We scalarize the loads/stores because there is not v.4b register and we
+ // have to promote the elements to v.4h.
+ unsigned NumVecElts = Src->getVectorNumElements();
+ unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
+ // We generate 2 instructions per vector element.
+ return NumVectorizableInstsToAmortize * NumVecElts * 2;
+ }
+
+ return LT.first;
+}
diff --git a/lib/Target/AArch64/Android.mk b/lib/Target/AArch64/Android.mk
index 144c2d3..d0a50da 100644
--- a/lib/Target/AArch64/Android.mk
+++ b/lib/Target/AArch64/Android.mk
@@ -3,31 +3,41 @@ LOCAL_PATH := $(call my-dir)
arm64_codegen_TBLGEN_TABLES := \
AArch64GenRegisterInfo.inc \
AArch64GenInstrInfo.inc \
- AArch64GenCodeEmitter.inc \
- AArch64GenMCCodeEmitter.inc \
- AArch64GenMCPseudoLowering.inc \
AArch64GenAsmWriter.inc \
- AArch64GenAsmMatcher.inc \
+ AArch64GenAsmWriter1.inc \
AArch64GenDAGISel.inc \
- AArch64GenFastISel.inc \
AArch64GenCallingConv.inc \
+ AArch64GenAsmMatcher.inc \
AArch64GenSubtargetInfo.inc \
- AArch64GenDisassemblerTables.inc
+ AArch64GenMCCodeEmitter.inc \
+ AArch64GenFastISel.inc \
+ AArch64GenDisassemblerTables.inc \
+ AArch64GenMCPseudoLowering.inc \
arm64_codegen_SRC_FILES := \
+ AArch64AddressTypePromotion.cpp \
+ AArch64AdvSIMDScalarPass.cpp \
AArch64AsmPrinter.cpp \
+ AArch64BranchRelaxation.cpp \
+ AArch64CleanupLocalDynamicTLSPass.cpp \
+ AArch64CollectLOH.cpp \
+ AArch64ConditionalCompares.cpp \
+ AArch64DeadRegisterDefinitionsPass.cpp \
+ AArch64ExpandPseudoInsts.cpp \
+ AArch64FastISel.cpp \
AArch64FrameLowering.cpp \
- AArch64ISelDAGToDAG.cpp \
- AArch64MachineFunctionInfo.cpp \
- AArch64RegisterInfo.cpp \
- AArch64Subtarget.cpp \
- AArch64TargetObjectFile.cpp \
- AArch64BranchFixupPass.cpp \
AArch64InstrInfo.cpp \
+ AArch64ISelDAGToDAG.cpp \
AArch64ISelLowering.cpp \
+ AArch64LoadStoreOptimizer.cpp \
AArch64MCInstLower.cpp \
+ AArch64PromoteConstant.cpp \
+ AArch64RegisterInfo.cpp \
AArch64SelectionDAGInfo.cpp \
+ AArch64StorePairSuppress.cpp \
+ AArch64Subtarget.cpp \
AArch64TargetMachine.cpp \
+ AArch64TargetObjectFile.cpp \
AArch64TargetTransformInfo.cpp
# For the host
diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index e933ec1..65b77c5 100644
--- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -6,34 +6,31 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the (GNU-style) assembly parser for the AArch64
-// architecture.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCExpr.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cstdio>
using namespace llvm;
namespace {
@@ -41,21 +38,74 @@ namespace {
class AArch64Operand;
class AArch64AsmParser : public MCTargetAsmParser {
+public:
+ typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
+
+private:
+ StringRef Mnemonic; ///< Instruction mnemonic.
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ MCAsmParser &getParser() const { return Parser; }
+ MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+ SMLoc getLoc() const { return Parser.getTok().getLoc(); }
+
+ bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
+ AArch64CC::CondCode parseCondCodeString(StringRef Cond);
+ bool parseCondCode(OperandVector &Operands, bool invertCondCode);
+ int tryParseRegister();
+ int tryMatchVectorRegister(StringRef &Kind, bool expected);
+ bool parseRegister(OperandVector &Operands);
+ bool parseSymbolicImmVal(const MCExpr *&ImmVal);
+ bool parseVectorList(OperandVector &Operands);
+ bool parseOperand(OperandVector &Operands, bool isCondCode,
+ bool invertCondCode);
+
+ void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+ bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+ bool showMatchError(SMLoc Loc, unsigned ErrCode);
+
+ bool parseDirectiveWord(unsigned Size, SMLoc L);
+ bool parseDirectiveTLSDescCall(SMLoc L);
+
+ bool parseDirectiveLOH(StringRef LOH, SMLoc L);
+
+ bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
+ bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands, MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) override;
+/// @name Auto-generated Match Functions
+/// {
+
#define GET_ASSEMBLER_HEADER
#include "AArch64GenAsmMatcher.inc"
+ /// }
+
+ OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
+ OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
+ OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
+ OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
+ OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
+ OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
+ OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
+ OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
+ OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
+ OperandMatchResultTy tryParseAddSubImm(OperandVector &Operands);
+ OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands);
+ bool tryParseVectorRegister(OperandVector &Operands);
+
public:
enum AArch64MatchResultTy {
- Match_FirstAArch64 = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "AArch64GenAsmMatcher.inc"
};
-
AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
@@ -63,191 +113,197 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- // These are the public interface of the MCTargetAsmParser
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- bool ParseDirective(AsmToken DirectiveID);
- bool ParseDirectiveTLSDescCall(SMLoc L);
- bool ParseDirectiveWord(unsigned Size, SMLoc L);
-
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer&Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm);
-
- // The rest of the sub-parsers have more freedom over interface: they return
- // an OperandMatchResultTy because it's less ambiguous than true/false or
- // -1/0/1 even if it is more verbose
- OperandMatchResultTy
- ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic);
-
- OperandMatchResultTy ParseImmediate(const MCExpr *&ExprVal);
-
- OperandMatchResultTy ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind);
-
- OperandMatchResultTy
- ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- uint32_t NumLanes);
-
- OperandMatchResultTy
- ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- uint32_t &NumLanes);
-
- OperandMatchResultTy
- ParseImmWithLSLOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseCondCodeOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseCRxOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseFPImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseFPImm0AndImm0Operand( SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- template<typename SomeNamedImmMapper> OperandMatchResultTy
- ParseNamedImmOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- return ParseNamedImmOperand(SomeNamedImmMapper(), Operands);
- }
-
- OperandMatchResultTy
- ParseNamedImmOperand(const NamedImmMapper &Mapper,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseLSXAddressOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseShiftExtend(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- OperandMatchResultTy
- ParseSysRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout,
- SMLoc &LayoutLoc);
-
- OperandMatchResultTy ParseVectorList(SmallVectorImpl<MCParsedAsmOperand *> &);
-
- bool validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands);
-
- /// Scan the next token (which had better be an identifier) and determine
- /// whether it represents a general-purpose or vector register. It returns
- /// true if an identifier was found and populates its reference arguments. It
- /// does not consume the token.
- bool
- IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, StringRef &LayoutSpec,
- SMLoc &LayoutLoc) const;
-
+ SMLoc NameLoc, OperandVector &Operands) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned Kind) override;
+
+ static bool classifySymbolRef(const MCExpr *Expr,
+ AArch64MCExpr::VariantKind &ELFRefKind,
+ MCSymbolRefExpr::VariantKind &DarwinRefKind,
+ int64_t &Addend);
};
-
-}
+} // end anonymous namespace
namespace {
-/// Instances of this class represent a parsed AArch64 machine instruction.
+/// AArch64Operand - Instances of this class represent a parsed AArch64 machine
+/// instruction.
class AArch64Operand : public MCParsedAsmOperand {
private:
enum KindTy {
- k_ImmWithLSL, // #uimm {, LSL #amt }
- k_CondCode, // eq/ne/...
- k_FPImmediate, // Limited-precision floating-point imm
- k_Immediate, // Including expressions referencing symbols
+ k_Immediate,
+ k_ShiftedImm,
+ k_CondCode,
k_Register,
+ k_VectorList,
+ k_VectorIndex,
+ k_Token,
+ k_SysReg,
+ k_SysCR,
+ k_Prefetch,
k_ShiftExtend,
- k_VectorList, // A sequential list of 1 to 4 registers.
- k_SysReg, // The register operand of MRS and MSR instructions
- k_Token, // The mnemonic; other raw tokens the auto-generated
- k_WrappedRegister // Load/store exclusive permit a wrapped register.
+ k_FPImm,
+ k_Barrier
} Kind;
SMLoc StartLoc, EndLoc;
- struct ImmWithLSLOp {
- const MCExpr *Val;
- unsigned ShiftAmount;
- bool ImplicitAmount;
+ struct TokOp {
+ const char *Data;
+ unsigned Length;
+ bool IsSuffix; // Is the operand actually a suffix on the mnemonic.
};
- struct CondCodeOp {
- A64CC::CondCodes Code;
+ struct RegOp {
+ unsigned RegNum;
+ bool isVector;
};
- struct FPImmOp {
- double Val;
+ struct VectorListOp {
+ unsigned RegNum;
+ unsigned Count;
+ unsigned NumElements;
+ unsigned ElementKind;
+ };
+
+ struct VectorIndexOp {
+ unsigned Val;
};
struct ImmOp {
const MCExpr *Val;
};
- struct RegOp {
- unsigned RegNum;
+ struct ShiftedImmOp {
+ const MCExpr *Val;
+ unsigned ShiftAmount;
};
- struct ShiftExtendOp {
- A64SE::ShiftExtSpecifiers ShiftType;
- unsigned Amount;
- bool ImplicitAmount;
+ struct CondCodeOp {
+ AArch64CC::CondCode Code;
};
- // A vector register list is a sequential list of 1 to 4 registers.
- struct VectorListOp {
- unsigned RegNum;
- unsigned Count;
- A64Layout::VectorLayout Layout;
+ struct FPImmOp {
+ unsigned Val; // Encoded 8-bit representation.
+ };
+
+ struct BarrierOp {
+ unsigned Val; // Not the enum since not all values have names.
};
struct SysRegOp {
const char *Data;
unsigned Length;
+ uint64_t FeatureBits; // We need to pass through information about which
+ // core we are compiling for so that the SysReg
+ // Mappers can appropriately conditionalize.
};
- struct TokOp {
- const char *Data;
- unsigned Length;
+ struct SysCRImmOp {
+ unsigned Val;
+ };
+
+ struct PrefetchOp {
+ unsigned Val;
+ };
+
+ struct ShiftExtendOp {
+ AArch64_AM::ShiftExtendType Type;
+ unsigned Amount;
+ bool HasExplicitAmount;
+ };
+
+ struct ExtendOp {
+ unsigned Val;
};
union {
- struct ImmWithLSLOp ImmWithLSL;
- struct CondCodeOp CondCode;
- struct FPImmOp FPImm;
- struct ImmOp Imm;
+ struct TokOp Tok;
struct RegOp Reg;
- struct ShiftExtendOp ShiftExtend;
struct VectorListOp VectorList;
+ struct VectorIndexOp VectorIndex;
+ struct ImmOp Imm;
+ struct ShiftedImmOp ShiftedImm;
+ struct CondCodeOp CondCode;
+ struct FPImmOp FPImm;
+ struct BarrierOp Barrier;
struct SysRegOp SysReg;
- struct TokOp Tok;
+ struct SysCRImmOp SysCRImm;
+ struct PrefetchOp Prefetch;
+ struct ShiftExtendOp ShiftExtend;
};
- AArch64Operand(KindTy K, SMLoc S, SMLoc E)
- : MCParsedAsmOperand(), Kind(K), StartLoc(S), EndLoc(E) {}
+ // Keep the MCContext around as the MCExprs may need manipulated during
+ // the add<>Operands() calls.
+ MCContext &Ctx;
+
+ AArch64Operand(KindTy K, MCContext &_Ctx)
+ : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
public:
- AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand() {
+ AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
+ Kind = o.Kind;
+ StartLoc = o.StartLoc;
+ EndLoc = o.EndLoc;
+ switch (Kind) {
+ case k_Token:
+ Tok = o.Tok;
+ break;
+ case k_Immediate:
+ Imm = o.Imm;
+ break;
+ case k_ShiftedImm:
+ ShiftedImm = o.ShiftedImm;
+ break;
+ case k_CondCode:
+ CondCode = o.CondCode;
+ break;
+ case k_FPImm:
+ FPImm = o.FPImm;
+ break;
+ case k_Barrier:
+ Barrier = o.Barrier;
+ break;
+ case k_Register:
+ Reg = o.Reg;
+ break;
+ case k_VectorList:
+ VectorList = o.VectorList;
+ break;
+ case k_VectorIndex:
+ VectorIndex = o.VectorIndex;
+ break;
+ case k_SysReg:
+ SysReg = o.SysReg;
+ break;
+ case k_SysCR:
+ SysCRImm = o.SysCRImm;
+ break;
+ case k_Prefetch:
+ Prefetch = o.Prefetch;
+ break;
+ case k_ShiftExtend:
+ ShiftExtend = o.ShiftExtend;
+ break;
+ }
}
- SMLoc getStartLoc() const { return StartLoc; }
- SMLoc getEndLoc() const { return EndLoc; }
- void print(raw_ostream&) const;
- void dump() const;
+ /// getStartLoc - Get the location of the first token of this operand.
+ SMLoc getStartLoc() const override { return StartLoc; }
+ /// getEndLoc - Get the location of the last token of this operand.
+ SMLoc getEndLoc() const override { return EndLoc; }
StringRef getToken() const {
assert(Kind == k_Token && "Invalid access!");
return StringRef(Tok.Data, Tok.Length);
}
- unsigned getReg() const {
- assert((Kind == k_Register || Kind == k_WrappedRegister)
- && "Invalid access!");
- return Reg.RegNum;
+ bool isTokenSuffix() const {
+ assert(Kind == k_Token && "Invalid access!");
+ return Tok.IsSuffix;
}
const MCExpr *getImm() const {
@@ -255,1234 +311,1778 @@ public:
return Imm.Val;
}
- A64CC::CondCodes getCondCode() const {
- assert(Kind == k_CondCode && "Invalid access!");
- return CondCode.Code;
+ const MCExpr *getShiftedImmVal() const {
+ assert(Kind == k_ShiftedImm && "Invalid access!");
+ return ShiftedImm.Val;
}
- static bool isNonConstantExpr(const MCExpr *E,
- AArch64MCExpr::VariantKind &Variant) {
- if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(E)) {
- Variant = A64E->getKind();
- return true;
- } else if (!isa<MCConstantExpr>(E)) {
- Variant = AArch64MCExpr::VK_AARCH64_None;
- return true;
- }
-
- return false;
+ unsigned getShiftedImmShift() const {
+ assert(Kind == k_ShiftedImm && "Invalid access!");
+ return ShiftedImm.ShiftAmount;
}
- bool isCondCode() const { return Kind == k_CondCode; }
- bool isToken() const { return Kind == k_Token; }
- bool isReg() const { return Kind == k_Register; }
- bool isImm() const { return Kind == k_Immediate; }
- bool isMem() const { return false; }
- bool isFPImm() const { return Kind == k_FPImmediate; }
- bool isShiftOrExtend() const { return Kind == k_ShiftExtend; }
- bool isSysReg() const { return Kind == k_SysReg; }
- bool isImmWithLSL() const { return Kind == k_ImmWithLSL; }
- bool isWrappedReg() const { return Kind == k_WrappedRegister; }
-
- bool isAddSubImmLSL0() const {
- if (!isImmWithLSL()) return false;
- if (ImmWithLSL.ShiftAmount != 0) return false;
-
- AArch64MCExpr::VariantKind Variant;
- if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
- return Variant == AArch64MCExpr::VK_AARCH64_LO12
- || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12
- || Variant == AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC
- || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12
- || Variant == AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC
- || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC_LO12;
- }
-
- // Otherwise it should be a real immediate in range:
- const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
- return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ AArch64CC::CondCode getCondCode() const {
+ assert(Kind == k_CondCode && "Invalid access!");
+ return CondCode.Code;
}
- bool isAddSubImmLSL12() const {
- if (!isImmWithLSL()) return false;
- if (ImmWithLSL.ShiftAmount != 12) return false;
-
- AArch64MCExpr::VariantKind Variant;
- if (isNonConstantExpr(ImmWithLSL.Val, Variant)) {
- return Variant == AArch64MCExpr::VK_AARCH64_DTPREL_HI12
- || Variant == AArch64MCExpr::VK_AARCH64_TPREL_HI12;
- }
-
- // Otherwise it should be a real immediate in range:
- const MCConstantExpr *CE = cast<MCConstantExpr>(ImmWithLSL.Val);
- return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ unsigned getFPImm() const {
+ assert(Kind == k_FPImm && "Invalid access!");
+ return FPImm.Val;
}
- template<unsigned MemSize, unsigned RmSize> bool isAddrRegExtend() const {
- if (!isShiftOrExtend()) return false;
-
- A64SE::ShiftExtSpecifiers Ext = ShiftExtend.ShiftType;
- if (RmSize == 32 && !(Ext == A64SE::UXTW || Ext == A64SE::SXTW))
- return false;
-
- if (RmSize == 64 && !(Ext == A64SE::LSL || Ext == A64SE::SXTX))
- return false;
-
- return ShiftExtend.Amount == Log2_32(MemSize) || ShiftExtend.Amount == 0;
+ unsigned getBarrier() const {
+ assert(Kind == k_Barrier && "Invalid access!");
+ return Barrier.Val;
}
- bool isAdrpLabel() const {
- if (!isImm()) return false;
-
- AArch64MCExpr::VariantKind Variant;
- if (isNonConstantExpr(getImm(), Variant)) {
- return Variant == AArch64MCExpr::VK_AARCH64_None
- || Variant == AArch64MCExpr::VK_AARCH64_GOT
- || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL
- || Variant == AArch64MCExpr::VK_AARCH64_TLSDESC;
- }
-
- return isLabel<21, 4096>();
+ unsigned getReg() const override {
+ assert(Kind == k_Register && "Invalid access!");
+ return Reg.RegNum;
}
- template<unsigned RegWidth> bool isBitfieldWidth() const {
- if (!isImm()) return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
-
- return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ unsigned getVectorListStart() const {
+ assert(Kind == k_VectorList && "Invalid access!");
+ return VectorList.RegNum;
}
- template<int RegWidth>
- bool isCVTFixedPos() const {
- if (!isImm()) return false;
-
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
-
- return CE->getValue() >= 1 && CE->getValue() <= RegWidth;
+ unsigned getVectorListCount() const {
+ assert(Kind == k_VectorList && "Invalid access!");
+ return VectorList.Count;
}
- bool isFMOVImm() const {
- if (!isFPImm()) return false;
-
- APFloat RealVal(FPImm.Val);
- uint32_t ImmVal;
- return A64Imms::isFPImm(RealVal, ImmVal);
+ unsigned getVectorIndex() const {
+ assert(Kind == k_VectorIndex && "Invalid access!");
+ return VectorIndex.Val;
}
- bool isFPZero() const {
- if (!isFPImm()) return false;
+ StringRef getSysReg() const {
+ assert(Kind == k_SysReg && "Invalid access!");
+ return StringRef(SysReg.Data, SysReg.Length);
+ }
- APFloat RealVal(FPImm.Val);
- return RealVal.isPosZero();
+ uint64_t getSysRegFeatureBits() const {
+ assert(Kind == k_SysReg && "Invalid access!");
+ return SysReg.FeatureBits;
}
- template<unsigned field_width, unsigned scale>
- bool isLabel() const {
- if (!isImm()) return false;
+ unsigned getSysCR() const {
+ assert(Kind == k_SysCR && "Invalid access!");
+ return SysCRImm.Val;
+ }
- if (dyn_cast<MCSymbolRefExpr>(Imm.Val)) {
- return true;
- } else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
- int64_t Val = CE->getValue();
- int64_t Min = - (scale * (1LL << (field_width - 1)));
- int64_t Max = scale * ((1LL << (field_width - 1)) - 1);
- return (Val % scale) == 0 && Val >= Min && Val <= Max;
- }
+ unsigned getPrefetch() const {
+ assert(Kind == k_Prefetch && "Invalid access!");
+ return Prefetch.Val;
+ }
- // N.b. this disallows explicit relocation specifications via an
- // AArch64MCExpr. Users needing that behaviour
- return false;
+ AArch64_AM::ShiftExtendType getShiftExtendType() const {
+ assert(Kind == k_ShiftExtend && "Invalid access!");
+ return ShiftExtend.Type;
}
- bool isLane1() const {
- if (!isImm()) return false;
+ unsigned getShiftExtendAmount() const {
+ assert(Kind == k_ShiftExtend && "Invalid access!");
+ return ShiftExtend.Amount;
+ }
- // Because it's come through custom assembly parsing, it must always be a
- // constant expression.
- return cast<MCConstantExpr>(getImm())->getValue() == 1;
+ bool hasShiftExtendAmount() const {
+ assert(Kind == k_ShiftExtend && "Invalid access!");
+ return ShiftExtend.HasExplicitAmount;
}
- bool isLoadLitLabel() const {
- if (!isImm()) return false;
+ bool isImm() const override { return Kind == k_Immediate; }
+ bool isMem() const override { return false; }
+ bool isSImm9() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= -256 && Val < 256);
+ }
+ bool isSImm7s4() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= -256 && Val <= 252 && (Val & 3) == 0);
+ }
+ bool isSImm7s8() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= -512 && Val <= 504 && (Val & 7) == 0);
+ }
+ bool isSImm7s16() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0);
+ }
+
+ bool isSymbolicUImm12Offset(const MCExpr *Expr, unsigned Scale) const {
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (!AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
+ Addend)) {
+ // If we don't understand the expression, assume the best and
+ // let the fixup and relocation code deal with it.
+ return true;
+ }
- AArch64MCExpr::VariantKind Variant;
- if (isNonConstantExpr(getImm(), Variant)) {
- return Variant == AArch64MCExpr::VK_AARCH64_None
- || Variant == AArch64MCExpr::VK_AARCH64_GOTTPREL;
+ if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
+ ELFRefKind == AArch64MCExpr::VK_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_GOT_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
+ ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
+ ELFRefKind == AArch64MCExpr::VK_GOTTPREL_LO12_NC ||
+ ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) {
+ // Note that we don't range-check the addend. It's adjusted modulo page
+ // size when converted, so there is no "out of range" condition when using
+ // @pageoff.
+ return Addend >= 0 && (Addend % Scale) == 0;
+ } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
+ DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
+ // @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
+ return Addend == 0;
}
- return isLabel<19, 4>();
+ return false;
}
- template<unsigned RegWidth> bool isLogicalImm() const {
- if (!isImm()) return false;
+ template <int Scale> bool isUImm12Offset() const {
+ if (!isImm())
+ return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
- if (!CE) return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return isSymbolicUImm12Offset(getImm(), Scale);
- uint32_t Bits;
- return A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+ int64_t Val = MCE->getValue();
+ return (Val % Scale) == 0 && Val >= 0 && (Val / Scale) < 0x1000;
}
- template<unsigned RegWidth> bool isLogicalImmMOV() const {
- if (!isLogicalImm<RegWidth>()) return false;
-
- const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
-
- // The move alias for ORR is only valid if the immediate cannot be
- // represented with a move (immediate) instruction; they take priority.
- int UImm16, Shift;
- return !A64Imms::isMOVZImm(RegWidth, CE->getValue(), UImm16, Shift)
- && !A64Imms::isMOVNImm(RegWidth, CE->getValue(), UImm16, Shift);
+ bool isImm0_7() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 8);
+ }
+ bool isImm1_8() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val > 0 && Val < 9);
+ }
+ bool isImm0_15() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 16);
+ }
+ bool isImm1_16() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val > 0 && Val < 17);
+ }
+ bool isImm0_31() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 32);
+ }
+ bool isImm1_31() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 1 && Val < 32);
+ }
+ bool isImm1_32() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 1 && Val < 33);
+ }
+ bool isImm0_63() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 64);
+ }
+ bool isImm1_63() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 1 && Val < 64);
+ }
+ bool isImm1_64() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 1 && Val < 65);
+ }
+ bool isImm0_127() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 128);
+ }
+ bool isImm0_255() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 256);
+ }
+ bool isImm0_65535() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 0 && Val < 65536);
+ }
+ bool isImm32_63() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ int64_t Val = MCE->getValue();
+ return (Val >= 32 && Val < 64);
+ }
+ bool isLogicalImm32() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32);
}
+ bool isLogicalImm64() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64);
+ }
+ bool isShiftedImm() const { return Kind == k_ShiftedImm; }
+ bool isAddSubImm() const {
+ if (!isShiftedImm() && !isImm())
+ return false;
- template<int MemSize>
- bool isOffsetUImm12() const {
- if (!isImm()) return false;
+ const MCExpr *Expr;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
+ if (isShiftedImm()) {
+ unsigned Shift = ShiftedImm.ShiftAmount;
+ Expr = ShiftedImm.Val;
+ if (Shift != 0 && Shift != 12)
+ return false;
+ } else {
+ Expr = getImm();
+ }
- // Assume they know what they're doing for now if they've given us a
- // non-constant expression. In principle we could check for ridiculous
- // things that can't possibly work or relocations that would almost
- // certainly break resulting code.
- if (!CE)
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (AArch64AsmParser::classifySymbolRef(Expr, ELFRefKind,
+ DarwinRefKind, Addend)) {
+ return DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF
+ || DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF
+ || (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF && Addend == 0)
+ || ELFRefKind == AArch64MCExpr::VK_LO12
+ || ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12
+ || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12
+ || ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC
+ || ELFRefKind == AArch64MCExpr::VK_TPREL_HI12
+ || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12
+ || ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC
+ || ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12;
+ }
+
+ // Otherwise it should be a real immediate in range:
+ const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
+ return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+ }
+ bool isCondCode() const { return Kind == k_CondCode; }
+ bool isSIMDImmType10() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return false;
+ return AArch64_AM::isAdvSIMDModImmType10(MCE->getValue());
+ }
+ bool isBranchTarget26() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return true;
+ int64_t Val = MCE->getValue();
+ if (Val & 0x3)
+ return false;
+ return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
+ }
+ bool isPCRelLabel19() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ return true;
+ int64_t Val = MCE->getValue();
+ if (Val & 0x3)
+ return false;
+ return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
+ }
+ bool isBranchTarget14() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
return true;
+ int64_t Val = MCE->getValue();
+ if (Val & 0x3)
+ return false;
+ return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
+ }
- int64_t Val = CE->getValue();
+ bool
+ isMovWSymbol(ArrayRef<AArch64MCExpr::VariantKind> AllowedModifiers) const {
+ if (!isImm())
+ return false;
- // Must be a multiple of the access size in bytes.
- if ((Val & (MemSize - 1)) != 0) return false;
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (!AArch64AsmParser::classifySymbolRef(getImm(), ELFRefKind,
+ DarwinRefKind, Addend)) {
+ return false;
+ }
+ if (DarwinRefKind != MCSymbolRefExpr::VK_None)
+ return false;
- // Must be 12-bit unsigned
- return Val >= 0 && Val <= 0xfff * MemSize;
- }
+ for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
+ if (ELFRefKind == AllowedModifiers[i])
+ return Addend == 0;
+ }
- template<A64SE::ShiftExtSpecifiers SHKind, bool is64Bit>
- bool isShift() const {
- if (!isShiftOrExtend()) return false;
+ return false;
+ }
- if (ShiftExtend.ShiftType != SHKind)
- return false;
+ bool isMovZSymbolG3() const {
+ static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
+ return isMovWSymbol(Variants);
+ }
- return is64Bit ? ShiftExtend.Amount <= 63 : ShiftExtend.Amount <= 31;
+ bool isMovZSymbolG2() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G2, AArch64MCExpr::VK_ABS_G2_S,
+ AArch64MCExpr::VK_TPREL_G2, AArch64MCExpr::VK_DTPREL_G2};
+ return isMovWSymbol(Variants);
}
- bool isMOVN32Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_SABS_G0,
- AArch64MCExpr::VK_AARCH64_SABS_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G0,
+ bool isMovZSymbolG1() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G1, AArch64MCExpr::VK_ABS_G1_S,
+ AArch64MCExpr::VK_GOTTPREL_G1, AArch64MCExpr::VK_TPREL_G1,
+ AArch64MCExpr::VK_DTPREL_G1,
};
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
-
- return isMoveWideImm(32, PermittedModifiers, NumModifiers);
- }
-
- bool isMOVN64Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_SABS_G0,
- AArch64MCExpr::VK_AARCH64_SABS_G1,
- AArch64MCExpr::VK_AARCH64_SABS_G2,
- AArch64MCExpr::VK_AARCH64_DTPREL_G2,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G2,
- AArch64MCExpr::VK_AARCH64_TPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G0,
- };
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
-
- return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ return isMovWSymbol(Variants);
}
+ bool isMovZSymbolG0() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G0, AArch64MCExpr::VK_ABS_G0_S,
+ AArch64MCExpr::VK_TPREL_G0, AArch64MCExpr::VK_DTPREL_G0};
+ return isMovWSymbol(Variants);
+ }
- bool isMOVZ32Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_ABS_G0,
- AArch64MCExpr::VK_AARCH64_ABS_G1,
- AArch64MCExpr::VK_AARCH64_SABS_G0,
- AArch64MCExpr::VK_AARCH64_SABS_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G0,
- };
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
-
- return isMoveWideImm(32, PermittedModifiers, NumModifiers);
- }
-
- bool isMOVZ64Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_ABS_G0,
- AArch64MCExpr::VK_AARCH64_ABS_G1,
- AArch64MCExpr::VK_AARCH64_ABS_G2,
- AArch64MCExpr::VK_AARCH64_ABS_G3,
- AArch64MCExpr::VK_AARCH64_SABS_G0,
- AArch64MCExpr::VK_AARCH64_SABS_G1,
- AArch64MCExpr::VK_AARCH64_SABS_G2,
- AArch64MCExpr::VK_AARCH64_DTPREL_G2,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G2,
- AArch64MCExpr::VK_AARCH64_TPREL_G1,
- AArch64MCExpr::VK_AARCH64_TPREL_G0,
- };
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
+ bool isMovKSymbolG3() const {
+ static AArch64MCExpr::VariantKind Variants[] = { AArch64MCExpr::VK_ABS_G3 };
+ return isMovWSymbol(Variants);
+ }
- return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ bool isMovKSymbolG2() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G2_NC};
+ return isMovWSymbol(Variants);
}
- bool isMOVK32Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
- AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
- AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
- };
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
-
- return isMoveWideImm(32, PermittedModifiers, NumModifiers);
- }
-
- bool isMOVK64Imm() const {
- static const AArch64MCExpr::VariantKind PermittedModifiers[] = {
- AArch64MCExpr::VK_AARCH64_ABS_G0_NC,
- AArch64MCExpr::VK_AARCH64_ABS_G1_NC,
- AArch64MCExpr::VK_AARCH64_ABS_G2_NC,
- AArch64MCExpr::VK_AARCH64_ABS_G3,
- AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC,
- AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC,
- AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC,
- AArch64MCExpr::VK_AARCH64_TPREL_G1_NC,
- AArch64MCExpr::VK_AARCH64_TPREL_G0_NC,
+ bool isMovKSymbolG1() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G1_NC, AArch64MCExpr::VK_TPREL_G1_NC,
+ AArch64MCExpr::VK_DTPREL_G1_NC
};
- const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers);
-
- return isMoveWideImm(64, PermittedModifiers, NumModifiers);
+ return isMovWSymbol(Variants);
}
- bool isMoveWideImm(unsigned RegWidth,
- const AArch64MCExpr::VariantKind *PermittedModifiers,
- unsigned NumModifiers) const {
- if (!isImmWithLSL()) return false;
+ bool isMovKSymbolG0() const {
+ static AArch64MCExpr::VariantKind Variants[] = {
+ AArch64MCExpr::VK_ABS_G0_NC, AArch64MCExpr::VK_GOTTPREL_G0_NC,
+ AArch64MCExpr::VK_TPREL_G0_NC, AArch64MCExpr::VK_DTPREL_G0_NC
+ };
+ return isMovWSymbol(Variants);
+ }
- if (ImmWithLSL.ShiftAmount % 16 != 0) return false;
- if (ImmWithLSL.ShiftAmount >= RegWidth) return false;
+ template<int RegWidth, int Shift>
+ bool isMOVZMovAlias() const {
+ if (!isImm()) return false;
- AArch64MCExpr::VariantKind Modifier;
- if (isNonConstantExpr(ImmWithLSL.Val, Modifier)) {
- // E.g. "#:abs_g0:sym, lsl #16" makes no sense.
- if (!ImmWithLSL.ImplicitAmount) return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE) return false;
+ uint64_t Value = CE->getValue();
- for (unsigned i = 0; i < NumModifiers; ++i)
- if (PermittedModifiers[i] == Modifier) return true;
+ if (RegWidth == 32)
+ Value &= 0xffffffffULL;
+ // "lsl #0" takes precedence: in practice this only affects "#0, lsl #0".
+ if (Value == 0 && Shift != 0)
return false;
- }
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmWithLSL.Val);
- return CE && CE->getValue() >= 0 && CE->getValue() <= 0xffff;
+ return (Value & ~(0xffffULL << Shift)) == 0;
}
- template<int RegWidth, bool (*isValidImm)(int, uint64_t, int&, int&)>
- bool isMoveWideMovAlias() const {
+ template<int RegWidth, int Shift>
+ bool isMOVNMovAlias() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
if (!CE) return false;
-
- int UImm16, Shift;
uint64_t Value = CE->getValue();
- // If this is a 32-bit instruction then all bits above 32 should be the
- // same: either of these is fine because signed/unsigned values should be
- // permitted.
- if (RegWidth == 32) {
- if ((Value >> 32) != 0 && (Value >> 32) != 0xffffffff)
+ // MOVZ takes precedence over MOVN.
+ for (int MOVZShift = 0; MOVZShift <= 48; MOVZShift += 16)
+ if ((Value & ~(0xffffULL << MOVZShift)) == 0)
return false;
+ Value = ~Value;
+ if (RegWidth == 32)
Value &= 0xffffffffULL;
- }
- return isValidImm(RegWidth, Value, UImm16, Shift);
+ return (Value & ~(0xffffULL << Shift)) == 0;
}
- bool isMSRWithReg() const {
+ bool isFPImm() const { return Kind == k_FPImm; }
+ bool isBarrier() const { return Kind == k_Barrier; }
+ bool isSysReg() const { return Kind == k_SysReg; }
+ bool isMRSSystemRegister() const {
if (!isSysReg()) return false;
bool IsKnownRegister;
- StringRef Name(SysReg.Data, SysReg.Length);
- A64SysReg::MSRMapper().fromString(Name, IsKnownRegister);
+ auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
+ Mapper.fromString(getSysReg(), IsKnownRegister);
return IsKnownRegister;
}
-
- bool isMSRPState() const {
+ bool isMSRSystemRegister() const {
if (!isSysReg()) return false;
bool IsKnownRegister;
- StringRef Name(SysReg.Data, SysReg.Length);
- A64PState::PStateMapper().fromString(Name, IsKnownRegister);
+ auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
+ Mapper.fromString(getSysReg(), IsKnownRegister);
return IsKnownRegister;
}
-
- bool isMRS() const {
+ bool isSystemPStateField() const {
if (!isSysReg()) return false;
- // First check against specific MSR-only (write-only) registers
bool IsKnownRegister;
- StringRef Name(SysReg.Data, SysReg.Length);
- A64SysReg::MRSMapper().fromString(Name, IsKnownRegister);
+ AArch64PState::PStateMapper().fromString(getSysReg(), IsKnownRegister);
return IsKnownRegister;
}
+ bool isReg() const override { return Kind == k_Register && !Reg.isVector; }
+ bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
+ bool isVectorRegLo() const {
+ return Kind == k_Register && Reg.isVector &&
+ AArch64MCRegisterClasses[AArch64::FPR128_loRegClassID].contains(
+ Reg.RegNum);
+ }
+ bool isGPR32as64() const {
+ return Kind == k_Register && !Reg.isVector &&
+ AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(Reg.RegNum);
+ }
- bool isPRFM() const {
- if (!isImm()) return false;
+ bool isGPR64sp0() const {
+ return Kind == k_Register && !Reg.isVector &&
+ AArch64MCRegisterClasses[AArch64::GPR64spRegClassID].contains(Reg.RegNum);
+ }
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ /// Is this a vector list with the type implicit (presumably attached to the
+ /// instruction itself)?
+ template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
+ return Kind == k_VectorList && VectorList.Count == NumRegs &&
+ !VectorList.ElementKind;
+ }
- if (!CE)
+ template <unsigned NumRegs, unsigned NumElements, char ElementKind>
+ bool isTypedVectorList() const {
+ if (Kind != k_VectorList)
return false;
-
- return CE->getValue() >= 0 && CE->getValue() <= 31;
+ if (VectorList.Count != NumRegs)
+ return false;
+ if (VectorList.ElementKind != ElementKind)
+ return false;
+ return VectorList.NumElements == NumElements;
}
- template<A64SE::ShiftExtSpecifiers SHKind> bool isRegExtend() const {
- if (!isShiftOrExtend()) return false;
-
- if (ShiftExtend.ShiftType != SHKind)
+ bool isVectorIndex1() const {
+ return Kind == k_VectorIndex && VectorIndex.Val == 1;
+ }
+ bool isVectorIndexB() const {
+ return Kind == k_VectorIndex && VectorIndex.Val < 16;
+ }
+ bool isVectorIndexH() const {
+ return Kind == k_VectorIndex && VectorIndex.Val < 8;
+ }
+ bool isVectorIndexS() const {
+ return Kind == k_VectorIndex && VectorIndex.Val < 4;
+ }
+ bool isVectorIndexD() const {
+ return Kind == k_VectorIndex && VectorIndex.Val < 2;
+ }
+ bool isToken() const override { return Kind == k_Token; }
+ bool isTokenEqual(StringRef Str) const {
+ return Kind == k_Token && getToken() == Str;
+ }
+ bool isSysCR() const { return Kind == k_SysCR; }
+ bool isPrefetch() const { return Kind == k_Prefetch; }
+ bool isShiftExtend() const { return Kind == k_ShiftExtend; }
+ bool isShifter() const {
+ if (!isShiftExtend())
return false;
- return ShiftExtend.Amount <= 4;
+ AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+ return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+ ST == AArch64_AM::ASR || ST == AArch64_AM::ROR ||
+ ST == AArch64_AM::MSL);
}
-
- bool isRegExtendLSL() const {
- if (!isShiftOrExtend()) return false;
-
- if (ShiftExtend.ShiftType != A64SE::LSL)
+ bool isExtend() const {
+ if (!isShiftExtend())
return false;
- return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4;
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return (ET == AArch64_AM::UXTB || ET == AArch64_AM::SXTB ||
+ ET == AArch64_AM::UXTH || ET == AArch64_AM::SXTH ||
+ ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW ||
+ ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX ||
+ ET == AArch64_AM::LSL) &&
+ getShiftExtendAmount() <= 4;
}
- // if 0 < value <= w, return true
- bool isShrFixedWidth(int w) const {
- if (!isImm())
+ bool isExtend64() const {
+ if (!isExtend())
return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
+ // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return ET != AArch64_AM::UXTX && ET != AArch64_AM::SXTX;
+ }
+ bool isExtendLSL64() const {
+ if (!isExtend())
return false;
- int64_t Value = CE->getValue();
- return Value > 0 && Value <= w;
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return (ET == AArch64_AM::UXTX || ET == AArch64_AM::SXTX ||
+ ET == AArch64_AM::LSL) &&
+ getShiftExtendAmount() <= 4;
}
- bool isShrImm8() const { return isShrFixedWidth(8); }
-
- bool isShrImm16() const { return isShrFixedWidth(16); }
-
- bool isShrImm32() const { return isShrFixedWidth(32); }
-
- bool isShrImm64() const { return isShrFixedWidth(64); }
-
- // if 0 <= value < w, return true
- bool isShlFixedWidth(int w) const {
- if (!isImm())
- return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
+ template<int Width> bool isMemXExtend() const {
+ if (!isExtend())
return false;
- int64_t Value = CE->getValue();
- return Value >= 0 && Value < w;
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return (ET == AArch64_AM::LSL || ET == AArch64_AM::SXTX) &&
+ (getShiftExtendAmount() == Log2_32(Width / 8) ||
+ getShiftExtendAmount() == 0);
}
- bool isShlImm8() const { return isShlFixedWidth(8); }
-
- bool isShlImm16() const { return isShlFixedWidth(16); }
-
- bool isShlImm32() const { return isShlFixedWidth(32); }
-
- bool isShlImm64() const { return isShlFixedWidth(64); }
-
- bool isNeonMovImmShiftLSL() const {
- if (!isShiftOrExtend())
+ template<int Width> bool isMemWExtend() const {
+ if (!isExtend())
return false;
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ return (ET == AArch64_AM::UXTW || ET == AArch64_AM::SXTW) &&
+ (getShiftExtendAmount() == Log2_32(Width / 8) ||
+ getShiftExtendAmount() == 0);
+ }
- if (ShiftExtend.ShiftType != A64SE::LSL)
+ template <unsigned width>
+ bool isArithmeticShifter() const {
+ if (!isShifter())
return false;
- // Valid shift amount is 0, 8, 16 and 24.
- return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24;
+ // An arithmetic shifter is LSL, LSR, or ASR.
+ AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+ return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+ ST == AArch64_AM::ASR) && getShiftExtendAmount() < width;
}
- bool isNeonMovImmShiftLSLH() const {
- if (!isShiftOrExtend())
+ template <unsigned width>
+ bool isLogicalShifter() const {
+ if (!isShifter())
return false;
- if (ShiftExtend.ShiftType != A64SE::LSL)
+ // A logical shifter is LSL, LSR, ASR or ROR.
+ AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+ return (ST == AArch64_AM::LSL || ST == AArch64_AM::LSR ||
+ ST == AArch64_AM::ASR || ST == AArch64_AM::ROR) &&
+ getShiftExtendAmount() < width;
+ }
+
+ bool isMovImm32Shifter() const {
+ if (!isShifter())
return false;
- // Valid shift amount is 0 and 8.
- return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8;
+ // A MOVi shifter is LSL of 0, 16, 32, or 48.
+ AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+ if (ST != AArch64_AM::LSL)
+ return false;
+ uint64_t Val = getShiftExtendAmount();
+ return (Val == 0 || Val == 16);
}
- bool isNeonMovImmShiftMSL() const {
- if (!isShiftOrExtend())
+ bool isMovImm64Shifter() const {
+ if (!isShifter())
return false;
- if (ShiftExtend.ShiftType != A64SE::MSL)
+ // A MOVi shifter is LSL of 0 or 16.
+ AArch64_AM::ShiftExtendType ST = getShiftExtendType();
+ if (ST != AArch64_AM::LSL)
return false;
-
- // Valid shift amount is 8 and 16.
- return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16;
+ uint64_t Val = getShiftExtendAmount();
+ return (Val == 0 || Val == 16 || Val == 32 || Val == 48);
}
- template <A64Layout::VectorLayout Layout, unsigned Count>
- bool isVectorList() const {
- return Kind == k_VectorList && VectorList.Layout == Layout &&
- VectorList.Count == Count;
+ bool isLogicalVecShifter() const {
+ if (!isShifter())
+ return false;
+
+ // A logical vector shifter is a left shift by 0, 8, 16, or 24.
+ unsigned Shift = getShiftExtendAmount();
+ return getShiftExtendType() == AArch64_AM::LSL &&
+ (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24);
}
- template <int MemSize> bool isSImm7Scaled() const {
- if (!isImm())
+ bool isLogicalVecHalfWordShifter() const {
+ if (!isLogicalVecShifter())
return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ // A logical vector shifter is a left shift by 0 or 8.
+ unsigned Shift = getShiftExtendAmount();
+ return getShiftExtendType() == AArch64_AM::LSL &&
+ (Shift == 0 || Shift == 8);
+ }
- int64_t Val = CE->getValue();
- if (Val % MemSize != 0) return false;
+ bool isMoveVecShifter() const {
+ if (!isShiftExtend())
+ return false;
- Val /= MemSize;
+ // A logical vector shifter is a left shift by 8 or 16.
+ unsigned Shift = getShiftExtendAmount();
+ return getShiftExtendType() == AArch64_AM::MSL &&
+ (Shift == 8 || Shift == 16);
+ }
- return Val >= -64 && Val < 64;
+ // Fallback unscaled operands are for aliases of LDR/STR that fall back
+ // to LDUR/STUR when the offset is not legal for the former but is for
+ // the latter. As such, in addition to checking for being a legal unscaled
+ // address, also check that it is not a legal scaled address. This avoids
+ // ambiguity in the matcher.
+ template<int Width>
+ bool isSImm9OffsetFB() const {
+ return isSImm9() && !isUImm12Offset<Width / 8>();
}
- template<int BitWidth>
- bool isSImm() const {
- if (!isImm()) return false;
+ bool isAdrpLabel() const {
+ // Validation was handled during parsing, so we just sanity check that
+ // something didn't go haywire.
+ if (!isImm())
+ return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Min = - (4096 * (1LL << (21 - 1)));
+ int64_t Max = 4096 * ((1LL << (21 - 1)) - 1);
+ return (Val % 4096) == 0 && Val >= Min && Val <= Max;
+ }
- return CE->getValue() >= -(1LL << (BitWidth - 1))
- && CE->getValue() < (1LL << (BitWidth - 1));
+ return true;
}
- template<int bitWidth>
- bool isUImm() const {
- if (!isImm()) return false;
+ bool isAdrLabel() const {
+ // Validation was handled during parsing, so we just sanity check that
+ // something didn't go haywire.
+ if (!isImm())
+ return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val)) {
+ int64_t Val = CE->getValue();
+ int64_t Min = - (1LL << (21 - 1));
+ int64_t Max = ((1LL << (21 - 1)) - 1);
+ return Val >= Min && Val <= Max;
+ }
- return CE->getValue() >= 0 && CE->getValue() < (1LL << bitWidth);
+ return true;
}
- bool isUImm() const {
- if (!isImm()) return false;
+ void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ // Add as immediates when possible. Null MCExpr = 0.
+ if (!Expr)
+ Inst.addOperand(MCOperand::CreateImm(0));
+ else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+ Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ else
+ Inst.addOperand(MCOperand::CreateExpr(Expr));
+ }
- return isa<MCConstantExpr>(getImm());
+ void addRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
}
- bool isNeonUImm64Mask() const {
- if (!isImm())
- return false;
+ void addGPR32as64Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(
+ AArch64MCRegisterClasses[AArch64::GPR64RegClassID].contains(getReg()));
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE)
- return false;
+ const MCRegisterInfo *RI = Ctx.getRegisterInfo();
+ uint32_t Reg = RI->getRegClass(AArch64::GPR32RegClassID).getRegister(
+ RI->getEncodingValue(getReg()));
- uint64_t Value = CE->getValue();
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ }
- // i64 value with each byte being either 0x00 or 0xff.
- for (unsigned i = 0; i < 8; ++i, Value >>= 8)
- if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff)
- return false;
- return true;
+ void addVectorReg64Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(
+ AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
+ Inst.addOperand(MCOperand::CreateReg(AArch64::D0 + getReg() - AArch64::Q0));
}
- // if value == N, return true
- template<int N>
- bool isExactImm() const {
- if (!isImm()) return false;
+ void addVectorReg128Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ assert(
+ AArch64MCRegisterClasses[AArch64::FPR128RegClassID].contains(getReg()));
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
+ void addVectorRegLoOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getReg()));
+ }
- return CE->getValue() == N;
+ template <unsigned NumRegs>
+ void addVectorList64Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ static unsigned FirstRegs[] = { AArch64::D0, AArch64::D0_D1,
+ AArch64::D0_D1_D2, AArch64::D0_D1_D2_D3 };
+ unsigned FirstReg = FirstRegs[NumRegs - 1];
+
+ Inst.addOperand(
+ MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
}
- bool isFPZeroIZero() const {
- return isFPZero();
+ template <unsigned NumRegs>
+ void addVectorList128Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ static unsigned FirstRegs[] = { AArch64::Q0, AArch64::Q0_Q1,
+ AArch64::Q0_Q1_Q2, AArch64::Q0_Q1_Q2_Q3 };
+ unsigned FirstReg = FirstRegs[NumRegs - 1];
+
+ Inst.addOperand(
+ MCOperand::CreateReg(FirstReg + getVectorListStart() - AArch64::Q0));
}
- static AArch64Operand *CreateImmWithLSL(const MCExpr *Val,
- unsigned ShiftAmount,
- bool ImplicitAmount,
- SMLoc S,SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E);
- Op->ImmWithLSL.Val = Val;
- Op->ImmWithLSL.ShiftAmount = ShiftAmount;
- Op->ImmWithLSL.ImplicitAmount = ImplicitAmount;
- return Op;
+ void addVectorIndex1Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
- static AArch64Operand *CreateCondCode(A64CC::CondCodes Code,
- SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_CondCode, S, E);
- Op->CondCode.Code = Code;
- return Op;
+ void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
- static AArch64Operand *CreateFPImm(double Val,
- SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_FPImmediate, S, E);
- Op->FPImm.Val = Val;
- return Op;
+ void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
- static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_Immediate, S, E);
- Op->Imm.Val = Val;
- return Op;
+ void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
- static AArch64Operand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_Register, S, E);
- Op->Reg.RegNum = RegNum;
- return Op;
+ void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
}
- static AArch64Operand *CreateWrappedReg(unsigned RegNum, SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_WrappedRegister, S, E);
- Op->Reg.RegNum = RegNum;
- return Op;
+ void addImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // If this is a pageoff symrefexpr with an addend, adjust the addend
+ // to be only the page-offset portion. Otherwise, just add the expr
+ // as-is.
+ addExpr(Inst, getImm());
}
- static AArch64Operand *CreateShiftExtend(A64SE::ShiftExtSpecifiers ShiftTyp,
- unsigned Amount,
- bool ImplicitAmount,
- SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, S, E);
- Op->ShiftExtend.ShiftType = ShiftTyp;
- Op->ShiftExtend.Amount = Amount;
- Op->ShiftExtend.ImplicitAmount = ImplicitAmount;
- return Op;
+ void addAddSubImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ if (isShiftedImm()) {
+ addExpr(Inst, getShiftedImmVal());
+ Inst.addOperand(MCOperand::CreateImm(getShiftedImmShift()));
+ } else {
+ addExpr(Inst, getImm());
+ Inst.addOperand(MCOperand::CreateImm(0));
+ }
}
- static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S) {
- AArch64Operand *Op = new AArch64Operand(k_SysReg, S, S);
- Op->Tok.Data = Str.data();
- Op->Tok.Length = Str.size();
- return Op;
+ void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getCondCode()));
}
- static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
- A64Layout::VectorLayout Layout,
- SMLoc S, SMLoc E) {
- AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E);
- Op->VectorList.RegNum = RegNum;
- Op->VectorList.Count = Count;
- Op->VectorList.Layout = Layout;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
+ void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE)
+ addExpr(Inst, getImm());
+ else
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 12));
}
- static AArch64Operand *CreateToken(StringRef Str, SMLoc S) {
- AArch64Operand *Op = new AArch64Operand(k_Token, S, S);
- Op->Tok.Data = Str.data();
- Op->Tok.Length = Str.size();
- return Op;
+ void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
+ addImmOperands(Inst, N);
}
+ template<int Scale>
+ void addUImm12OffsetOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- void addExpr(MCInst &Inst, const MCExpr *Expr) const {
- // Add as immediates when possible.
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
- else
- Inst.addOperand(MCOperand::CreateExpr(Expr));
+ if (!MCE) {
+ Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ return;
+ }
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / Scale));
}
- template<unsigned RegWidth>
- void addBFILSBOperands(MCInst &Inst, unsigned N) const {
+ void addSImm9Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- unsigned EncodedVal = (RegWidth - CE->getValue()) % RegWidth;
- Inst.addOperand(MCOperand::CreateImm(EncodedVal));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addBFIWidthOperands(MCInst &Inst, unsigned N) const {
+ void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
}
- void addBFXWidthOperands(MCInst &Inst, unsigned N) const {
+ void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
-
- uint64_t LSB = Inst.getOperand(Inst.getNumOperands()-1).getImm();
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
-
- Inst.addOperand(MCOperand::CreateImm(LSB + CE->getValue() - 1));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
}
- void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+ void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getCondCode()));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
}
- void addCVTFixedPosOperands(MCInst &Inst, unsigned N) const {
+ void addImm0_7Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
-
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- Inst.addOperand(MCOperand::CreateImm(64 - CE->getValue()));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addFMOVImmOperands(MCInst &Inst, unsigned N) const {
+ void addImm1_8Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ }
- APFloat RealVal(FPImm.Val);
- uint32_t ImmVal;
- A64Imms::isFPImm(RealVal, ImmVal);
+ void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ }
- Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ void addImm1_16Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addFPZeroOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands");
- Inst.addOperand(MCOperand::CreateImm(0));
+ void addImm0_31Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addFPZeroIZeroOperands(MCInst &Inst, unsigned N) const {
- addFPZeroOperands(Inst, N);
+ void addImm1_31Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addInvCondCodeOperands(MCInst &Inst, unsigned N) const {
+ void addImm1_32Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- unsigned Encoded = A64InvertCondCode(getCondCode());
- Inst.addOperand(MCOperand::CreateImm(Encoded));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addRegOperands(MCInst &Inst, unsigned N) const {
+ void addImm0_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addImmOperands(MCInst &Inst, unsigned N) const {
+ void addImm1_63Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- template<int MemSize>
- void addSImm7ScaledOperands(MCInst &Inst, unsigned N) const {
+ void addImm1_64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ }
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- uint64_t Val = CE->getValue() / MemSize;
- Inst.addOperand(MCOperand::CreateImm(Val & 0x7f));
+ void addImm0_127Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- template<int BitWidth>
- void addSImmOperands(MCInst &Inst, unsigned N) const {
+ void addImm0_255Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ }
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- uint64_t Val = CE->getValue();
- Inst.addOperand(MCOperand::CreateImm(Val & ((1ULL << BitWidth) - 1)));
+ void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
}
- void addImmWithLSLOperands(MCInst &Inst, unsigned N) const {
- assert (N == 1 && "Invalid number of operands!");
+ void addImm32_63Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
+ }
- addExpr(Inst, ImmWithLSL.Val);
+ void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid logical immediate operand!");
+ uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
}
- template<unsigned field_width, unsigned scale>
- void addLabelOperands(MCInst &Inst, unsigned N) const {
+ void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid logical immediate operand!");
+ uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm.Val);
+ void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ assert(MCE && "Invalid immediate operand!");
+ uint64_t encoding = AArch64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
+ Inst.addOperand(MCOperand::CreateImm(encoding));
+ }
- if (!CE) {
- addExpr(Inst, Imm.Val);
+ void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
+ // Branch operands don't encode the low bits, so shift them off
+ // here. If it's a label, however, just put it on directly as there's
+ // not enough information now to do anything.
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE) {
+ addExpr(Inst, getImm());
return;
}
-
- int64_t Val = CE->getValue();
- assert(Val % scale == 0 && "Unaligned immediate in instruction");
- Val /= scale;
-
- Inst.addOperand(MCOperand::CreateImm(Val & ((1LL << field_width) - 1)));
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
}
- template<int MemSize>
- void addOffsetUImm12Operands(MCInst &Inst, unsigned N) const {
+ void addPCRelLabel19Operands(MCInst &Inst, unsigned N) const {
+ // Branch operands don't encode the low bits, so shift them off
+ // here. If it's a label, however, just put it on directly as there's
+ // not enough information now to do anything.
assert(N == 1 && "Invalid number of operands!");
-
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
- Inst.addOperand(MCOperand::CreateImm(CE->getValue() / MemSize));
- } else {
- Inst.addOperand(MCOperand::CreateExpr(getImm()));
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE) {
+ addExpr(Inst, getImm());
+ return;
}
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
}
- template<unsigned RegWidth>
- void addLogicalImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands");
- const MCConstantExpr *CE = cast<MCConstantExpr>(Imm.Val);
+ void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
+ // Branch operands don't encode the low bits, so shift them off
+ // here. If it's a label, however, just put it on directly as there's
+ // not enough information now to do anything.
+ assert(N == 1 && "Invalid number of operands!");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
+ if (!MCE) {
+ addExpr(Inst, getImm());
+ return;
+ }
+ assert(MCE && "Invalid constant immediate operand!");
+ Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
+ }
- uint32_t Bits;
- A64Imms::isLogicalImm(RegWidth, CE->getValue(), Bits);
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getFPImm()));
+ }
- Inst.addOperand(MCOperand::CreateImm(Bits));
+ void addBarrierOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getBarrier()));
}
- void addMRSOperands(MCInst &Inst, unsigned N) const {
+ void addMRSSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
bool Valid;
- StringRef Name(SysReg.Data, SysReg.Length);
- uint32_t Bits = A64SysReg::MRSMapper().fromString(Name, Valid);
+ auto Mapper = AArch64SysReg::MRSMapper(getSysRegFeatureBits());
+ uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
Inst.addOperand(MCOperand::CreateImm(Bits));
}
- void addMSRWithRegOperands(MCInst &Inst, unsigned N) const {
+ void addMSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
bool Valid;
- StringRef Name(SysReg.Data, SysReg.Length);
- uint32_t Bits = A64SysReg::MSRMapper().fromString(Name, Valid);
+ auto Mapper = AArch64SysReg::MSRMapper(getSysRegFeatureBits());
+ uint32_t Bits = Mapper.fromString(getSysReg(), Valid);
Inst.addOperand(MCOperand::CreateImm(Bits));
}
- void addMSRPStateOperands(MCInst &Inst, unsigned N) const {
+ void addSystemPStateFieldOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
bool Valid;
- StringRef Name(SysReg.Data, SysReg.Length);
- uint32_t Bits = A64PState::PStateMapper().fromString(Name, Valid);
+ uint32_t Bits =
+ AArch64PState::PStateMapper().fromString(getSysReg(), Valid);
Inst.addOperand(MCOperand::CreateImm(Bits));
}
- void addMoveWideImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
-
- addExpr(Inst, ImmWithLSL.Val);
-
- AArch64MCExpr::VariantKind Variant;
- if (!isNonConstantExpr(ImmWithLSL.Val, Variant)) {
- Inst.addOperand(MCOperand::CreateImm(ImmWithLSL.ShiftAmount / 16));
- return;
- }
-
- // We know it's relocated
- switch (Variant) {
- case AArch64MCExpr::VK_AARCH64_ABS_G0:
- case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
- case AArch64MCExpr::VK_AARCH64_SABS_G0:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
- case AArch64MCExpr::VK_AARCH64_TPREL_G0:
- case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
- Inst.addOperand(MCOperand::CreateImm(0));
- break;
- case AArch64MCExpr::VK_AARCH64_ABS_G1:
- case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
- case AArch64MCExpr::VK_AARCH64_SABS_G1:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
- case AArch64MCExpr::VK_AARCH64_TPREL_G1:
- case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
- Inst.addOperand(MCOperand::CreateImm(1));
- break;
- case AArch64MCExpr::VK_AARCH64_ABS_G2:
- case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
- case AArch64MCExpr::VK_AARCH64_SABS_G2:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
- case AArch64MCExpr::VK_AARCH64_TPREL_G2:
- Inst.addOperand(MCOperand::CreateImm(2));
- break;
- case AArch64MCExpr::VK_AARCH64_ABS_G3:
- Inst.addOperand(MCOperand::CreateImm(3));
- break;
- default: llvm_unreachable("Inappropriate move wide relocation");
- }
+ void addSysCROperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getSysCR()));
}
- template<int RegWidth, bool isValidImm(int, uint64_t, int&, int&)>
- void addMoveWideMovAliasOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && "Invalid number of operands!");
- int UImm16, Shift;
-
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- uint64_t Value = CE->getValue();
-
- if (RegWidth == 32) {
- Value &= 0xffffffffULL;
- }
+ void addPrefetchOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
+ }
- bool Valid = isValidImm(RegWidth, Value, UImm16, Shift);
- (void)Valid;
- assert(Valid && "Invalid immediates should have been weeded out by now");
+ void addShifterOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ unsigned Imm =
+ AArch64_AM::getShifterImm(getShiftExtendType(), getShiftExtendAmount());
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
- Inst.addOperand(MCOperand::CreateImm(UImm16));
- Inst.addOperand(MCOperand::CreateImm(Shift));
+ void addExtendOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTW;
+ unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
+ Inst.addOperand(MCOperand::CreateImm(Imm));
}
- void addPRFMOperands(MCInst &Inst, unsigned N) const {
+ void addExtend64Operands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ if (ET == AArch64_AM::LSL) ET = AArch64_AM::UXTX;
+ unsigned Imm = AArch64_AM::getArithExtendImm(ET, getShiftExtendAmount());
+ Inst.addOperand(MCOperand::CreateImm(Imm));
+ }
- const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
- assert(CE->getValue() >= 0 && CE->getValue() <= 31
- && "PRFM operand should be 5-bits");
+ void addMemExtendOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
+ Inst.addOperand(MCOperand::CreateImm(IsSigned));
+ Inst.addOperand(MCOperand::CreateImm(getShiftExtendAmount() != 0));
+ }
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+ // For 8-bit load/store instructions with a register offset, both the
+ // "DoShift" and "NoShift" variants have a shift of 0. Because of this,
+ // they're disambiguated by whether the shift was explicit or implicit rather
+ // than its size.
+ void addMemExtend8Operands(MCInst &Inst, unsigned N) const {
+ assert(N == 2 && "Invalid number of operands!");
+ AArch64_AM::ShiftExtendType ET = getShiftExtendType();
+ bool IsSigned = ET == AArch64_AM::SXTW || ET == AArch64_AM::SXTX;
+ Inst.addOperand(MCOperand::CreateImm(IsSigned));
+ Inst.addOperand(MCOperand::CreateImm(hasShiftExtendAmount()));
}
- // For Add-sub (extended register) operands.
- void addRegExtendOperands(MCInst &Inst, unsigned N) const {
+ template<int Shift>
+ void addMOVZMovAliasOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((Value >> Shift) & 0xffff));
}
- // For Vector Immediates shifted imm operands.
- void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const {
+ template<int Shift>
+ void addMOVNMovAliasOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24)
- llvm_unreachable("Invalid shift amount for vector immediate inst.");
-
- // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3.
- int64_t Imm = ShiftExtend.Amount / 8;
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ const MCConstantExpr *CE = cast<MCConstantExpr>(getImm());
+ uint64_t Value = CE->getValue();
+ Inst.addOperand(MCOperand::CreateImm((~Value >> Shift) & 0xffff));
}
- void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
+ void print(raw_ostream &OS) const override;
- if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8)
- llvm_unreachable("Invalid shift amount for vector immediate inst.");
-
- // Encode LSLH shift amount 0, 8 as 0, 1.
- int64_t Imm = ShiftExtend.Amount / 8;
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
+ MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_Token, Ctx);
+ Op->Tok.Data = Str.data();
+ Op->Tok.Length = Str.size();
+ Op->Tok.IsSuffix = IsSuffix;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
- void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
-
- if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16)
- llvm_unreachable("Invalid shift amount for vector immediate inst.");
+ static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_Register, Ctx);
+ Op->Reg.RegNum = RegNum;
+ Op->Reg.isVector = isVector;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
- // Encode MSL shift amount 8, 16 as 0, 1.
- int64_t Imm = ShiftExtend.Amount / 8 - 1;
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
+ unsigned NumElements, char ElementKind,
+ SMLoc S, SMLoc E, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx);
+ Op->VectorList.RegNum = RegNum;
+ Op->VectorList.Count = Count;
+ Op->VectorList.NumElements = NumElements;
+ Op->VectorList.ElementKind = ElementKind;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
}
- // For the extend in load-store (register offset) instructions.
- template<unsigned MemSize>
- void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const {
- addAddrRegExtendOperands(Inst, N, MemSize);
+ static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx);
+ Op->VectorIndex.Val = Idx;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
}
- void addAddrRegExtendOperands(MCInst &Inst, unsigned N,
- unsigned MemSize) const {
- assert(N == 1 && "Invalid number of operands!");
+ static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx);
+ Op->Imm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
- // First bit of Option is set in instruction classes, the high two bits are
- // as follows:
- unsigned OptionHi = 0;
- switch (ShiftExtend.ShiftType) {
- case A64SE::UXTW:
- case A64SE::LSL:
- OptionHi = 1;
- break;
- case A64SE::SXTW:
- case A64SE::SXTX:
- OptionHi = 3;
- break;
- default:
- llvm_unreachable("Invalid extend type for register offset");
- }
+ static AArch64Operand *CreateShiftedImm(const MCExpr *Val,
+ unsigned ShiftAmount, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx);
+ Op->ShiftedImm .Val = Val;
+ Op->ShiftedImm.ShiftAmount = ShiftAmount;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
- unsigned S = 0;
- if (MemSize == 1 && !ShiftExtend.ImplicitAmount)
- S = 1;
- else if (MemSize != 1 && ShiftExtend.Amount != 0)
- S = 1;
+ static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S,
+ SMLoc E, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx);
+ Op->CondCode.Code = Code;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
- Inst.addOperand(MCOperand::CreateImm((OptionHi << 1) | S));
+ static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx);
+ Op->FPImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
- void addShiftOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount));
+ static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx);
+ Op->Barrier.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
- void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
+ static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S,
+ uint64_t FeatureBits, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx);
+ Op->SysReg.Data = Str.data();
+ Op->SysReg.Length = Str.size();
+ Op->SysReg.FeatureBits = FeatureBits;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
- // A bit from each byte in the constant forms the encoded immediate
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- uint64_t Value = CE->getValue();
+ static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
+ MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx);
+ Op->SysCRImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
+ }
- unsigned Imm = 0;
- for (unsigned i = 0; i < 8; ++i, Value >>= 8) {
- Imm |= (Value & 1) << i;
- }
- Inst.addOperand(MCOperand::CreateImm(Imm));
+ static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx);
+ Op->Prefetch.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
}
- void addVectorListOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum));
+ static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp,
+ unsigned Val, bool HasExplicitAmount,
+ SMLoc S, SMLoc E, MCContext &Ctx) {
+ AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx);
+ Op->ShiftExtend.Type = ShOp;
+ Op->ShiftExtend.Amount = Val;
+ Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount;
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ return Op;
}
};
} // end anonymous namespace.
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- StringRef Mnemonic) {
+void AArch64Operand::print(raw_ostream &OS) const {
+ switch (Kind) {
+ case k_FPImm:
+ OS << "<fpimm " << getFPImm() << "("
+ << AArch64_AM::getFPImmFloat(getFPImm()) << ") >";
+ break;
+ case k_Barrier: {
+ bool Valid;
+ StringRef Name = AArch64DB::DBarrierMapper().toString(getBarrier(), Valid);
+ if (Valid)
+ OS << "<barrier " << Name << ">";
+ else
+ OS << "<barrier invalid #" << getBarrier() << ">";
+ break;
+ }
+ case k_Immediate:
+ getImm()->print(OS);
+ break;
+ case k_ShiftedImm: {
+ unsigned Shift = getShiftedImmShift();
+ OS << "<shiftedimm ";
+ getShiftedImmVal()->print(OS);
+ OS << ", lsl #" << AArch64_AM::getShiftValue(Shift) << ">";
+ break;
+ }
+ case k_CondCode:
+ OS << "<condcode " << getCondCode() << ">";
+ break;
+ case k_Register:
+ OS << "<register " << getReg() << ">";
+ break;
+ case k_VectorList: {
+ OS << "<vectorlist ";
+ unsigned Reg = getVectorListStart();
+ for (unsigned i = 0, e = getVectorListCount(); i != e; ++i)
+ OS << Reg + i << " ";
+ OS << ">";
+ break;
+ }
+ case k_VectorIndex:
+ OS << "<vectorindex " << getVectorIndex() << ">";
+ break;
+ case k_SysReg:
+ OS << "<sysreg: " << getSysReg() << '>';
+ break;
+ case k_Token:
+ OS << "'" << getToken() << "'";
+ break;
+ case k_SysCR:
+ OS << "c" << getSysCR();
+ break;
+ case k_Prefetch: {
+ bool Valid;
+ StringRef Name = AArch64PRFM::PRFMMapper().toString(getPrefetch(), Valid);
+ if (Valid)
+ OS << "<prfop " << Name << ">";
+ else
+ OS << "<prfop invalid #" << getPrefetch() << ">";
+ break;
+ }
+ case k_ShiftExtend: {
+ OS << "<" << AArch64_AM::getShiftExtendName(getShiftExtendType()) << " #"
+ << getShiftExtendAmount();
+ if (!hasShiftExtendAmount())
+ OS << "<imp>";
+ OS << '>';
+ break;
+ }
+ }
+}
- // See if the operand has a custom parser
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+/// @name Auto-generated Match Functions
+/// {
- // It could either succeed, fail or just not care.
- if (ResTy != MatchOperand_NoMatch)
- return ResTy;
+static unsigned MatchRegisterName(StringRef Name);
- switch (getLexer().getKind()) {
- default:
- Error(Parser.getTok().getLoc(), "unexpected token in operand");
- return MatchOperand_ParseFail;
- case AsmToken::Identifier: {
- // It might be in the LSL/UXTB family ...
- OperandMatchResultTy GotShift = ParseShiftExtend(Operands);
+/// }
+
+static unsigned matchVectorRegName(StringRef Name) {
+ return StringSwitch<unsigned>(Name)
+ .Case("v0", AArch64::Q0)
+ .Case("v1", AArch64::Q1)
+ .Case("v2", AArch64::Q2)
+ .Case("v3", AArch64::Q3)
+ .Case("v4", AArch64::Q4)
+ .Case("v5", AArch64::Q5)
+ .Case("v6", AArch64::Q6)
+ .Case("v7", AArch64::Q7)
+ .Case("v8", AArch64::Q8)
+ .Case("v9", AArch64::Q9)
+ .Case("v10", AArch64::Q10)
+ .Case("v11", AArch64::Q11)
+ .Case("v12", AArch64::Q12)
+ .Case("v13", AArch64::Q13)
+ .Case("v14", AArch64::Q14)
+ .Case("v15", AArch64::Q15)
+ .Case("v16", AArch64::Q16)
+ .Case("v17", AArch64::Q17)
+ .Case("v18", AArch64::Q18)
+ .Case("v19", AArch64::Q19)
+ .Case("v20", AArch64::Q20)
+ .Case("v21", AArch64::Q21)
+ .Case("v22", AArch64::Q22)
+ .Case("v23", AArch64::Q23)
+ .Case("v24", AArch64::Q24)
+ .Case("v25", AArch64::Q25)
+ .Case("v26", AArch64::Q26)
+ .Case("v27", AArch64::Q27)
+ .Case("v28", AArch64::Q28)
+ .Case("v29", AArch64::Q29)
+ .Case("v30", AArch64::Q30)
+ .Case("v31", AArch64::Q31)
+ .Default(0);
+}
- // We can only continue if no tokens were eaten.
- if (GotShift != MatchOperand_NoMatch)
- return GotShift;
+static bool isValidVectorKind(StringRef Name) {
+ return StringSwitch<bool>(Name.lower())
+ .Case(".8b", true)
+ .Case(".16b", true)
+ .Case(".4h", true)
+ .Case(".8h", true)
+ .Case(".2s", true)
+ .Case(".4s", true)
+ .Case(".1d", true)
+ .Case(".2d", true)
+ .Case(".1q", true)
+ // Accept the width neutral ones, too, for verbose syntax. If those
+ // aren't used in the right places, the token operand won't match so
+ // all will work out.
+ .Case(".b", true)
+ .Case(".h", true)
+ .Case(".s", true)
+ .Case(".d", true)
+ .Default(false);
+}
- // ... or it might be a register ...
- uint32_t NumLanes = 0;
- OperandMatchResultTy GotReg = ParseRegister(Operands, NumLanes);
- assert(GotReg != MatchOperand_ParseFail
- && "register parsing shouldn't partially succeed");
-
- if (GotReg == MatchOperand_Success) {
- if (Parser.getTok().is(AsmToken::LBrac))
- return ParseNEONLane(Operands, NumLanes);
- else
- return MatchOperand_Success;
- }
- // ... or it might be a symbolish thing
- }
- // Fall through
- case AsmToken::LParen: // E.g. (strcmp-4)
- case AsmToken::Integer: // 1f, 2b labels
- case AsmToken::String: // quoted labels
- case AsmToken::Dot: // . is Current location
- case AsmToken::Dollar: // $ is PC
- case AsmToken::Colon: {
- SMLoc StartLoc = Parser.getTok().getLoc();
- SMLoc EndLoc;
- const MCExpr *ImmVal = 0;
-
- if (ParseImmediate(ImmVal) != MatchOperand_Success)
- return MatchOperand_ParseFail;
+static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
+ char &ElementKind) {
+ assert(isValidVectorKind(Name));
- EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
- return MatchOperand_Success;
- }
- case AsmToken::Hash: { // Immediates
- SMLoc StartLoc = Parser.getTok().getLoc();
- SMLoc EndLoc;
- const MCExpr *ImmVal = 0;
- Parser.Lex();
+ ElementKind = Name.lower()[Name.size() - 1];
+ NumElements = 0;
- if (ParseImmediate(ImmVal) != MatchOperand_Success)
- return MatchOperand_ParseFail;
+ if (Name.size() == 2)
+ return;
- EndLoc = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
- Operands.push_back(AArch64Operand::CreateImm(ImmVal, StartLoc, EndLoc));
- return MatchOperand_Success;
+ // Parse the lane count
+ Name = Name.drop_front();
+ while (isdigit(Name.front())) {
+ NumElements = 10 * NumElements + (Name.front() - '0');
+ Name = Name.drop_front();
}
- case AsmToken::LBrac: {
- SMLoc Loc = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateToken("[", Loc));
- Parser.Lex(); // Eat '['
+}
- // There's no comma after a '[', so we can parse the next operand
- // immediately.
- return ParseOperand(Operands, Mnemonic);
- }
- // The following will likely be useful later, but not in very early cases
- case AsmToken::LCurly: // SIMD vector list is not parsed here
- llvm_unreachable("Don't know how to deal with '{' in operand");
- return MatchOperand_ParseFail;
+bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+ SMLoc &EndLoc) {
+ StartLoc = getLoc();
+ RegNo = tryParseRegister();
+ EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ return (RegNo == (unsigned)-1);
+}
+
+/// tryParseRegister - Try to parse a register name. The token must be an
+/// Identifier when called, and if it is a register name the token is eaten and
+/// the register is added to the operand list.
+int AArch64AsmParser::tryParseRegister() {
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ std::string lowerCase = Tok.getString().lower();
+ unsigned RegNum = MatchRegisterName(lowerCase);
+ // Also handle a few aliases of registers.
+ if (RegNum == 0)
+ RegNum = StringSwitch<unsigned>(lowerCase)
+ .Case("fp", AArch64::FP)
+ .Case("lr", AArch64::LR)
+ .Case("x31", AArch64::XZR)
+ .Case("w31", AArch64::WZR)
+ .Default(0);
+
+ if (RegNum == 0)
+ return -1;
+
+ Parser.Lex(); // Eat identifier token.
+ return RegNum;
+}
+
+/// tryMatchVectorRegister - Try to parse a vector register name with optional
+/// kind specifier. If it is a register specifier, eat the token and return it.
+int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ TokError("vector register expected");
+ return -1;
+ }
+
+ StringRef Name = Parser.getTok().getString();
+ // If there is a kind specifier, it's separated from the register name by
+ // a '.'.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+ unsigned RegNum = matchVectorRegName(Head);
+ if (RegNum) {
+ if (Next != StringRef::npos) {
+ Kind = Name.slice(Next, StringRef::npos);
+ if (!isValidVectorKind(Kind)) {
+ TokError("invalid vector kind qualifier");
+ return -1;
+ }
+ }
+ Parser.Lex(); // Eat the register token.
+ return RegNum;
}
+
+ if (expected)
+ TokError("vector register expected");
+ return -1;
}
+/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseImmediate(const MCExpr *&ExprVal) {
- if (getLexer().is(AsmToken::Colon)) {
- AArch64MCExpr::VariantKind RefKind;
+AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+ SMLoc S = getLoc();
- OperandMatchResultTy ResTy = ParseRelocPrefix(RefKind);
- if (ResTy != MatchOperand_Success)
- return ResTy;
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
- const MCExpr *SubExprVal;
- if (getParser().parseExpression(SubExprVal))
- return MatchOperand_ParseFail;
+ StringRef Tok = Parser.getTok().getIdentifier();
+ if (Tok[0] != 'c' && Tok[0] != 'C') {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
+ }
- ExprVal = AArch64MCExpr::Create(RefKind, SubExprVal, getContext());
- return MatchOperand_Success;
+ uint32_t CRNum;
+ bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
+ if (BadNum || CRNum > 15) {
+ Error(S, "Expected cN operand where 0 <= N <= 15");
+ return MatchOperand_ParseFail;
}
- // No weird AArch64MCExpr prefix
- return getParser().parseExpression(ExprVal)
- ? MatchOperand_ParseFail : MatchOperand_Success;
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(
+ AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
+ return MatchOperand_Success;
}
-// A lane attached to a NEON register. "[N]", which should yield three tokens:
-// '[', N, ']'. A hash is not allowed to precede the immediate here.
+/// tryParsePrefetch - Try to parse a prefetch operand.
AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseNEONLane(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- uint32_t NumLanes) {
- SMLoc Loc = Parser.getTok().getLoc();
+AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ // Either an identifier for named values or a 5-bit immediate.
+ bool Hash = Tok.is(AsmToken::Hash);
+ if (Hash || Tok.is(AsmToken::Integer)) {
+ if (Hash)
+ Parser.Lex(); // Eat hash token.
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
- assert(Parser.getTok().is(AsmToken::LBrac) && "inappropriate operand");
- Operands.push_back(AArch64Operand::CreateToken("[", Loc));
- Parser.Lex(); // Eat '['
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for prefetch operand");
+ return MatchOperand_ParseFail;
+ }
+ unsigned prfop = MCE->getValue();
+ if (prfop > 31) {
+ TokError("prefetch operand out of range, [0,31] expected");
+ return MatchOperand_ParseFail;
+ }
- if (Parser.getTok().isNot(AsmToken::Integer)) {
- Error(Parser.getTok().getLoc(), "expected lane number");
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ return MatchOperand_Success;
+ }
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
- if (Parser.getTok().getIntVal() >= NumLanes) {
- Error(Parser.getTok().getLoc(), "lane number incompatible with layout");
+ bool Valid;
+ unsigned prfop = AArch64PRFM::PRFMMapper().fromString(Tok.getString(), Valid);
+ if (!Valid) {
+ TokError("pre-fetch hint expected");
return MatchOperand_ParseFail;
}
- const MCExpr *Lane = MCConstantExpr::Create(Parser.getTok().getIntVal(),
- getContext());
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat actual lane
- SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateImm(Lane, S, E));
+ Parser.Lex(); // Eat identifier token.
+ Operands.push_back(AArch64Operand::CreatePrefetch(prfop, S, getContext()));
+ return MatchOperand_Success;
+}
+/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
+/// instruction.
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ const MCExpr *Expr;
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(Parser.getTok().getLoc(), "expected ']' after lane");
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat hash token.
+ }
+
+ if (parseSymbolicImmVal(Expr))
return MatchOperand_ParseFail;
+
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+ if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
+ ELFRefKind == AArch64MCExpr::VK_INVALID) {
+ // No modifier was specified at all; this is the syntax for an ELF basic
+ // ADRP relocation (unfortunately).
+ Expr =
+ AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_ABS_PAGE, getContext());
+ } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
+ DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
+ Addend != 0) {
+ Error(S, "gotpage label reference not allowed an addend");
+ return MatchOperand_ParseFail;
+ } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
+ DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
+ DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
+ ELFRefKind != AArch64MCExpr::VK_GOT_PAGE &&
+ ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
+ ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
+ // The operand must be an @page or @gotpage qualified symbolref.
+ Error(S, "page or gotpage label reference expected");
+ return MatchOperand_ParseFail;
+ }
}
- Operands.push_back(AArch64Operand::CreateToken("]", Loc));
- Parser.Lex(); // Eat ']'
+ // We have either a label reference possibly with addend or an immediate. The
+ // addend is a raw value here. The linker will adjust it to only reference the
+ // page.
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
return MatchOperand_Success;
}
+/// tryParseAdrLabel - Parse and validate a source label for the ADR
+/// instruction.
AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseRelocPrefix(AArch64MCExpr::VariantKind &RefKind) {
- assert(getLexer().is(AsmToken::Colon) && "expected a ':'");
- Parser.Lex();
+AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ const MCExpr *Expr;
- if (getLexer().isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(),
- "expected relocation specifier in operand after ':'");
- return MatchOperand_ParseFail;
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat hash token.
}
- std::string LowerCase = Parser.getTok().getIdentifier().lower();
- RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
- .Case("got", AArch64MCExpr::VK_AARCH64_GOT)
- .Case("got_lo12", AArch64MCExpr::VK_AARCH64_GOT_LO12)
- .Case("lo12", AArch64MCExpr::VK_AARCH64_LO12)
- .Case("abs_g0", AArch64MCExpr::VK_AARCH64_ABS_G0)
- .Case("abs_g0_nc", AArch64MCExpr::VK_AARCH64_ABS_G0_NC)
- .Case("abs_g1", AArch64MCExpr::VK_AARCH64_ABS_G1)
- .Case("abs_g1_nc", AArch64MCExpr::VK_AARCH64_ABS_G1_NC)
- .Case("abs_g2", AArch64MCExpr::VK_AARCH64_ABS_G2)
- .Case("abs_g2_nc", AArch64MCExpr::VK_AARCH64_ABS_G2_NC)
- .Case("abs_g3", AArch64MCExpr::VK_AARCH64_ABS_G3)
- .Case("abs_g0_s", AArch64MCExpr::VK_AARCH64_SABS_G0)
- .Case("abs_g1_s", AArch64MCExpr::VK_AARCH64_SABS_G1)
- .Case("abs_g2_s", AArch64MCExpr::VK_AARCH64_SABS_G2)
- .Case("dtprel_g2", AArch64MCExpr::VK_AARCH64_DTPREL_G2)
- .Case("dtprel_g1", AArch64MCExpr::VK_AARCH64_DTPREL_G1)
- .Case("dtprel_g1_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC)
- .Case("dtprel_g0", AArch64MCExpr::VK_AARCH64_DTPREL_G0)
- .Case("dtprel_g0_nc", AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC)
- .Case("dtprel_hi12", AArch64MCExpr::VK_AARCH64_DTPREL_HI12)
- .Case("dtprel_lo12", AArch64MCExpr::VK_AARCH64_DTPREL_LO12)
- .Case("dtprel_lo12_nc", AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC)
- .Case("gottprel_g1", AArch64MCExpr::VK_AARCH64_GOTTPREL_G1)
- .Case("gottprel_g0_nc", AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC)
- .Case("gottprel", AArch64MCExpr::VK_AARCH64_GOTTPREL)
- .Case("gottprel_lo12", AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12)
- .Case("tprel_g2", AArch64MCExpr::VK_AARCH64_TPREL_G2)
- .Case("tprel_g1", AArch64MCExpr::VK_AARCH64_TPREL_G1)
- .Case("tprel_g1_nc", AArch64MCExpr::VK_AARCH64_TPREL_G1_NC)
- .Case("tprel_g0", AArch64MCExpr::VK_AARCH64_TPREL_G0)
- .Case("tprel_g0_nc", AArch64MCExpr::VK_AARCH64_TPREL_G0_NC)
- .Case("tprel_hi12", AArch64MCExpr::VK_AARCH64_TPREL_HI12)
- .Case("tprel_lo12", AArch64MCExpr::VK_AARCH64_TPREL_LO12)
- .Case("tprel_lo12_nc", AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC)
- .Case("tlsdesc", AArch64MCExpr::VK_AARCH64_TLSDESC)
- .Case("tlsdesc_lo12", AArch64MCExpr::VK_AARCH64_TLSDESC_LO12)
- .Default(AArch64MCExpr::VK_AARCH64_None);
-
- if (RefKind == AArch64MCExpr::VK_AARCH64_None) {
- Error(Parser.getTok().getLoc(),
- "expected relocation specifier in operand after ':'");
+ if (getParser().parseExpression(Expr))
return MatchOperand_ParseFail;
- }
- Parser.Lex(); // Eat identifier
- if (getLexer().isNot(AsmToken::Colon)) {
- Error(Parser.getTok().getLoc(),
- "expected ':' after relocation specifier");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
+
return MatchOperand_Success;
}
+/// tryParseFPImm - A floating point immediate expression operand.
AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseImmWithLSLOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
+ SMLoc S = getLoc();
- SMLoc S = Parser.getTok().getLoc();
+ bool Hash = false;
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex(); // Eat '#'
+ Hash = true;
+ }
+
+ // Handle negation, as that still comes through as a separate token.
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ Parser.Lex();
+ }
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 63;
+ int Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
+ Parser.Lex(); // Eat the token.
+ // Check for out of range values. As an exception, we let Zero through,
+ // as we handle that special case in post-processing before matching in
+ // order to use the zero register for it.
+ if (Val == -1 && !RealVal.isZero()) {
+ TokError("expected compatible register or floating-point constant");
+ return MatchOperand_ParseFail;
+ }
+ Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+ if (Tok.is(AsmToken::Integer)) {
+ int64_t Val;
+ if (!isNegative && Tok.getString().startswith("0x")) {
+ Val = Tok.getIntVal();
+ if (Val > 255 || Val < 0) {
+ TokError("encoded floating point value out of range");
+ return MatchOperand_ParseFail;
+ }
+ } else {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ // If we had a '-' in front, toggle the sign bit.
+ IntVal ^= (uint64_t)isNegative << 63;
+ Val = AArch64_AM::getFP64Imm(APInt(64, IntVal));
+ }
+ Parser.Lex(); // Eat the token.
+ Operands.push_back(AArch64Operand::CreateFPImm(Val, S, getContext()));
+ return MatchOperand_Success;
+ }
+
+ if (!Hash)
+ return MatchOperand_NoMatch;
+
+ TokError("invalid floating point immediate");
+ return MatchOperand_ParseFail;
+}
+
+/// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
+ SMLoc S = getLoc();
if (Parser.getTok().is(AsmToken::Hash))
Parser.Lex(); // Eat '#'
@@ -1491,11 +2091,21 @@ AArch64AsmParser::ParseImmWithLSLOperand(
return MatchOperand_NoMatch;
const MCExpr *Imm;
- if (ParseImmediate(Imm) != MatchOperand_Success)
+ if (parseSymbolicImmVal(Imm))
return MatchOperand_ParseFail;
else if (Parser.getTok().isNot(AsmToken::Comma)) {
+ uint64_t ShiftAmount = 0;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Imm);
+ if (MCE) {
+ int64_t Val = MCE->getValue();
+ if (Val > 0xfff && (Val & 0xfff) == 0) {
+ Imm = MCConstantExpr::Create(Val >> 12, getContext());
+ ShiftAmount = 12;
+ }
+ }
SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, 0, true, S, E));
+ Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, S, E,
+ getContext()));
return MatchOperand_Success;
}
@@ -1503,18 +2113,22 @@ AArch64AsmParser::ParseImmWithLSLOperand(
Parser.Lex();
// The optional operand must be "lsl #N" where N is non-negative.
- if (Parser.getTok().is(AsmToken::Identifier)
- && Parser.getTok().getIdentifier().equals_lower("lsl")) {
- Parser.Lex();
+ if (!Parser.getTok().is(AsmToken::Identifier) ||
+ !Parser.getTok().getIdentifier().equals_lower("lsl")) {
+ Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+ return MatchOperand_ParseFail;
+ }
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex();
+ // Eat 'lsl'
+ Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::Integer)) {
- Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
- return MatchOperand_ParseFail;
- }
- }
+ if (Parser.getTok().is(AsmToken::Hash)) {
+ Parser.Lex();
+ }
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
+ return MatchOperand_ParseFail;
}
int64_t ShiftAmount = Parser.getTok().getIntVal();
@@ -1526,791 +2140,977 @@ AArch64AsmParser::ParseImmWithLSLOperand(
Parser.Lex(); // Eat the number
SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateImmWithLSL(Imm, ShiftAmount,
- false, S, E));
+ Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount,
+ S, E, getContext()));
return MatchOperand_Success;
}
+/// parseCondCodeString - Parse a Condition Code string.
+AArch64CC::CondCode AArch64AsmParser::parseCondCodeString(StringRef Cond) {
+ AArch64CC::CondCode CC = StringSwitch<AArch64CC::CondCode>(Cond.lower())
+ .Case("eq", AArch64CC::EQ)
+ .Case("ne", AArch64CC::NE)
+ .Case("cs", AArch64CC::HS)
+ .Case("hs", AArch64CC::HS)
+ .Case("cc", AArch64CC::LO)
+ .Case("lo", AArch64CC::LO)
+ .Case("mi", AArch64CC::MI)
+ .Case("pl", AArch64CC::PL)
+ .Case("vs", AArch64CC::VS)
+ .Case("vc", AArch64CC::VC)
+ .Case("hi", AArch64CC::HI)
+ .Case("ls", AArch64CC::LS)
+ .Case("ge", AArch64CC::GE)
+ .Case("lt", AArch64CC::LT)
+ .Case("gt", AArch64CC::GT)
+ .Case("le", AArch64CC::LE)
+ .Case("al", AArch64CC::AL)
+ .Case("nv", AArch64CC::NV)
+ .Default(AArch64CC::Invalid);
+ return CC;
+}
+
+/// parseCondCode - Parse a Condition Code operand.
+bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
+ bool invertCondCode) {
+ SMLoc S = getLoc();
+ const AsmToken &Tok = Parser.getTok();
+ assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+ StringRef Cond = Tok.getString();
+ AArch64CC::CondCode CC = parseCondCodeString(Cond);
+ if (CC == AArch64CC::Invalid)
+ return TokError("invalid condition code");
+ Parser.Lex(); // Eat identifier token.
+
+ if (invertCondCode)
+ CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC));
+
+ Operands.push_back(
+ AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext()));
+ return false;
+}
+/// tryParseOptionalShift - Some operands take an optional shift argument. Parse
+/// them if present.
AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseCondCodeOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- if (Parser.getTok().isNot(AsmToken::Identifier))
+AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ std::string LowerID = Tok.getString().lower();
+ AArch64_AM::ShiftExtendType ShOp =
+ StringSwitch<AArch64_AM::ShiftExtendType>(LowerID)
+ .Case("lsl", AArch64_AM::LSL)
+ .Case("lsr", AArch64_AM::LSR)
+ .Case("asr", AArch64_AM::ASR)
+ .Case("ror", AArch64_AM::ROR)
+ .Case("msl", AArch64_AM::MSL)
+ .Case("uxtb", AArch64_AM::UXTB)
+ .Case("uxth", AArch64_AM::UXTH)
+ .Case("uxtw", AArch64_AM::UXTW)
+ .Case("uxtx", AArch64_AM::UXTX)
+ .Case("sxtb", AArch64_AM::SXTB)
+ .Case("sxth", AArch64_AM::SXTH)
+ .Case("sxtw", AArch64_AM::SXTW)
+ .Case("sxtx", AArch64_AM::SXTX)
+ .Default(AArch64_AM::InvalidShiftExtend);
+
+ if (ShOp == AArch64_AM::InvalidShiftExtend)
return MatchOperand_NoMatch;
- StringRef Tok = Parser.getTok().getIdentifier();
- A64CC::CondCodes CondCode = A64StringToCondCode(Tok);
+ SMLoc S = Tok.getLoc();
+ Parser.Lex();
- if (CondCode == A64CC::Invalid)
- return MatchOperand_NoMatch;
+ bool Hash = getLexer().is(AsmToken::Hash);
+ if (!Hash && getLexer().isNot(AsmToken::Integer)) {
+ if (ShOp == AArch64_AM::LSL || ShOp == AArch64_AM::LSR ||
+ ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR ||
+ ShOp == AArch64_AM::MSL) {
+ // We expect a number here.
+ TokError("expected #imm after shift specifier");
+ return MatchOperand_ParseFail;
+ }
- SMLoc S = Parser.getTok().getLoc();
- Parser.Lex(); // Eat condition code
- SMLoc E = Parser.getTok().getLoc();
+ // "extend" type operatoins don't need an immediate, #0 is implicit.
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(
+ AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext()));
+ return MatchOperand_Success;
+ }
- Operands.push_back(AArch64Operand::CreateCondCode(CondCode, S, E));
- return MatchOperand_Success;
-}
+ if (Hash)
+ Parser.Lex(); // Eat the '#'.
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseCRxOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- SMLoc S = Parser.getTok().getLoc();
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Error(S, "Expected cN operand where 0 <= N <= 15");
+ // Make sure we do actually have a number
+ if (!Parser.getTok().is(AsmToken::Integer)) {
+ Error(Parser.getTok().getLoc(),
+ "expected integer shift amount");
return MatchOperand_ParseFail;
}
- StringRef Tok = Parser.getTok().getIdentifier();
- if (Tok[0] != 'c' && Tok[0] != 'C') {
- Error(S, "Expected cN operand where 0 <= N <= 15");
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
return MatchOperand_ParseFail;
- }
- uint32_t CRNum;
- bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
- if (BadNum || CRNum > 15) {
- Error(S, "Expected cN operand where 0 <= N <= 15");
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("expected #imm after shift specifier");
return MatchOperand_ParseFail;
}
- const MCExpr *CRImm = MCConstantExpr::Create(CRNum, getContext());
-
- Parser.Lex();
- SMLoc E = Parser.getTok().getLoc();
-
- Operands.push_back(AArch64Operand::CreateImm(CRImm, S, E));
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateShiftExtend(
+ ShOp, MCE->getValue(), true, S, E, getContext()));
return MatchOperand_Success;
}
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseFPImmOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
+/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
+bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
+ OperandVector &Operands) {
+ if (Name.find('.') != StringRef::npos)
+ return TokError("invalid operand");
- SMLoc S = Parser.getTok().getLoc();
+ Mnemonic = Name;
+ Operands.push_back(
+ AArch64Operand::CreateToken("sys", false, NameLoc, getContext()));
- bool Hash = false;
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat '#'
- Hash = true;
- }
+ const AsmToken &Tok = Parser.getTok();
+ StringRef Op = Tok.getString();
+ SMLoc S = Tok.getLoc();
- bool Negative = false;
- if (Parser.getTok().is(AsmToken::Minus)) {
- Negative = true;
- Parser.Lex(); // Eat '-'
- } else if (Parser.getTok().is(AsmToken::Plus)) {
- Parser.Lex(); // Eat '+'
+ const MCExpr *Expr = nullptr;
+
+#define SYS_ALIAS(op1, Cn, Cm, op2) \
+ do { \
+ Expr = MCConstantExpr::Create(op1, getContext()); \
+ Operands.push_back( \
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
+ Operands.push_back( \
+ AArch64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
+ Operands.push_back( \
+ AArch64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
+ Expr = MCConstantExpr::Create(op2, getContext()); \
+ Operands.push_back( \
+ AArch64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
+ } while (0)
+
+ if (Mnemonic == "ic") {
+ if (!Op.compare_lower("ialluis")) {
+ // SYS #0, C7, C1, #0
+ SYS_ALIAS(0, 7, 1, 0);
+ } else if (!Op.compare_lower("iallu")) {
+ // SYS #0, C7, C5, #0
+ SYS_ALIAS(0, 7, 5, 0);
+ } else if (!Op.compare_lower("ivau")) {
+ // SYS #3, C7, C5, #1
+ SYS_ALIAS(3, 7, 5, 1);
+ } else {
+ return TokError("invalid operand for IC instruction");
+ }
+ } else if (Mnemonic == "dc") {
+ if (!Op.compare_lower("zva")) {
+ // SYS #3, C7, C4, #1
+ SYS_ALIAS(3, 7, 4, 1);
+ } else if (!Op.compare_lower("ivac")) {
+ // SYS #3, C7, C6, #1
+ SYS_ALIAS(0, 7, 6, 1);
+ } else if (!Op.compare_lower("isw")) {
+ // SYS #0, C7, C6, #2
+ SYS_ALIAS(0, 7, 6, 2);
+ } else if (!Op.compare_lower("cvac")) {
+ // SYS #3, C7, C10, #1
+ SYS_ALIAS(3, 7, 10, 1);
+ } else if (!Op.compare_lower("csw")) {
+ // SYS #0, C7, C10, #2
+ SYS_ALIAS(0, 7, 10, 2);
+ } else if (!Op.compare_lower("cvau")) {
+ // SYS #3, C7, C11, #1
+ SYS_ALIAS(3, 7, 11, 1);
+ } else if (!Op.compare_lower("civac")) {
+ // SYS #3, C7, C14, #1
+ SYS_ALIAS(3, 7, 14, 1);
+ } else if (!Op.compare_lower("cisw")) {
+ // SYS #0, C7, C14, #2
+ SYS_ALIAS(0, 7, 14, 2);
+ } else {
+ return TokError("invalid operand for DC instruction");
+ }
+ } else if (Mnemonic == "at") {
+ if (!Op.compare_lower("s1e1r")) {
+ // SYS #0, C7, C8, #0
+ SYS_ALIAS(0, 7, 8, 0);
+ } else if (!Op.compare_lower("s1e2r")) {
+ // SYS #4, C7, C8, #0
+ SYS_ALIAS(4, 7, 8, 0);
+ } else if (!Op.compare_lower("s1e3r")) {
+ // SYS #6, C7, C8, #0
+ SYS_ALIAS(6, 7, 8, 0);
+ } else if (!Op.compare_lower("s1e1w")) {
+ // SYS #0, C7, C8, #1
+ SYS_ALIAS(0, 7, 8, 1);
+ } else if (!Op.compare_lower("s1e2w")) {
+ // SYS #4, C7, C8, #1
+ SYS_ALIAS(4, 7, 8, 1);
+ } else if (!Op.compare_lower("s1e3w")) {
+ // SYS #6, C7, C8, #1
+ SYS_ALIAS(6, 7, 8, 1);
+ } else if (!Op.compare_lower("s1e0r")) {
+ // SYS #0, C7, C8, #3
+ SYS_ALIAS(0, 7, 8, 2);
+ } else if (!Op.compare_lower("s1e0w")) {
+ // SYS #0, C7, C8, #3
+ SYS_ALIAS(0, 7, 8, 3);
+ } else if (!Op.compare_lower("s12e1r")) {
+ // SYS #4, C7, C8, #4
+ SYS_ALIAS(4, 7, 8, 4);
+ } else if (!Op.compare_lower("s12e1w")) {
+ // SYS #4, C7, C8, #5
+ SYS_ALIAS(4, 7, 8, 5);
+ } else if (!Op.compare_lower("s12e0r")) {
+ // SYS #4, C7, C8, #6
+ SYS_ALIAS(4, 7, 8, 6);
+ } else if (!Op.compare_lower("s12e0w")) {
+ // SYS #4, C7, C8, #7
+ SYS_ALIAS(4, 7, 8, 7);
+ } else {
+ return TokError("invalid operand for AT instruction");
+ }
+ } else if (Mnemonic == "tlbi") {
+ if (!Op.compare_lower("vmalle1is")) {
+ // SYS #0, C8, C3, #0
+ SYS_ALIAS(0, 8, 3, 0);
+ } else if (!Op.compare_lower("alle2is")) {
+ // SYS #4, C8, C3, #0
+ SYS_ALIAS(4, 8, 3, 0);
+ } else if (!Op.compare_lower("alle3is")) {
+ // SYS #6, C8, C3, #0
+ SYS_ALIAS(6, 8, 3, 0);
+ } else if (!Op.compare_lower("vae1is")) {
+ // SYS #0, C8, C3, #1
+ SYS_ALIAS(0, 8, 3, 1);
+ } else if (!Op.compare_lower("vae2is")) {
+ // SYS #4, C8, C3, #1
+ SYS_ALIAS(4, 8, 3, 1);
+ } else if (!Op.compare_lower("vae3is")) {
+ // SYS #6, C8, C3, #1
+ SYS_ALIAS(6, 8, 3, 1);
+ } else if (!Op.compare_lower("aside1is")) {
+ // SYS #0, C8, C3, #2
+ SYS_ALIAS(0, 8, 3, 2);
+ } else if (!Op.compare_lower("vaae1is")) {
+ // SYS #0, C8, C3, #3
+ SYS_ALIAS(0, 8, 3, 3);
+ } else if (!Op.compare_lower("alle1is")) {
+ // SYS #4, C8, C3, #4
+ SYS_ALIAS(4, 8, 3, 4);
+ } else if (!Op.compare_lower("vale1is")) {
+ // SYS #0, C8, C3, #5
+ SYS_ALIAS(0, 8, 3, 5);
+ } else if (!Op.compare_lower("vaale1is")) {
+ // SYS #0, C8, C3, #7
+ SYS_ALIAS(0, 8, 3, 7);
+ } else if (!Op.compare_lower("vmalle1")) {
+ // SYS #0, C8, C7, #0
+ SYS_ALIAS(0, 8, 7, 0);
+ } else if (!Op.compare_lower("alle2")) {
+ // SYS #4, C8, C7, #0
+ SYS_ALIAS(4, 8, 7, 0);
+ } else if (!Op.compare_lower("vale2is")) {
+ // SYS #4, C8, C3, #5
+ SYS_ALIAS(4, 8, 3, 5);
+ } else if (!Op.compare_lower("vale3is")) {
+ // SYS #6, C8, C3, #5
+ SYS_ALIAS(6, 8, 3, 5);
+ } else if (!Op.compare_lower("alle3")) {
+ // SYS #6, C8, C7, #0
+ SYS_ALIAS(6, 8, 7, 0);
+ } else if (!Op.compare_lower("vae1")) {
+ // SYS #0, C8, C7, #1
+ SYS_ALIAS(0, 8, 7, 1);
+ } else if (!Op.compare_lower("vae2")) {
+ // SYS #4, C8, C7, #1
+ SYS_ALIAS(4, 8, 7, 1);
+ } else if (!Op.compare_lower("vae3")) {
+ // SYS #6, C8, C7, #1
+ SYS_ALIAS(6, 8, 7, 1);
+ } else if (!Op.compare_lower("aside1")) {
+ // SYS #0, C8, C7, #2
+ SYS_ALIAS(0, 8, 7, 2);
+ } else if (!Op.compare_lower("vaae1")) {
+ // SYS #0, C8, C7, #3
+ SYS_ALIAS(0, 8, 7, 3);
+ } else if (!Op.compare_lower("alle1")) {
+ // SYS #4, C8, C7, #4
+ SYS_ALIAS(4, 8, 7, 4);
+ } else if (!Op.compare_lower("vale1")) {
+ // SYS #0, C8, C7, #5
+ SYS_ALIAS(0, 8, 7, 5);
+ } else if (!Op.compare_lower("vale2")) {
+ // SYS #4, C8, C7, #5
+ SYS_ALIAS(4, 8, 7, 5);
+ } else if (!Op.compare_lower("vale3")) {
+ // SYS #6, C8, C7, #5
+ SYS_ALIAS(6, 8, 7, 5);
+ } else if (!Op.compare_lower("vaale1")) {
+ // SYS #0, C8, C7, #7
+ SYS_ALIAS(0, 8, 7, 7);
+ } else if (!Op.compare_lower("ipas2e1")) {
+ // SYS #4, C8, C4, #1
+ SYS_ALIAS(4, 8, 4, 1);
+ } else if (!Op.compare_lower("ipas2le1")) {
+ // SYS #4, C8, C4, #5
+ SYS_ALIAS(4, 8, 4, 5);
+ } else if (!Op.compare_lower("ipas2e1is")) {
+ // SYS #4, C8, C4, #1
+ SYS_ALIAS(4, 8, 0, 1);
+ } else if (!Op.compare_lower("ipas2le1is")) {
+ // SYS #4, C8, C4, #5
+ SYS_ALIAS(4, 8, 0, 5);
+ } else if (!Op.compare_lower("vmalls12e1")) {
+ // SYS #4, C8, C7, #6
+ SYS_ALIAS(4, 8, 7, 6);
+ } else if (!Op.compare_lower("vmalls12e1is")) {
+ // SYS #4, C8, C3, #6
+ SYS_ALIAS(4, 8, 3, 6);
+ } else {
+ return TokError("invalid operand for TLBI instruction");
+ }
}
- if (Parser.getTok().isNot(AsmToken::Real)) {
- if (!Hash)
- return MatchOperand_NoMatch;
- Error(S, "Expected floating-point immediate");
- return MatchOperand_ParseFail;
- }
+#undef SYS_ALIAS
- APFloat RealVal(APFloat::IEEEdouble, Parser.getTok().getString());
- if (Negative) RealVal.changeSign();
- double DblVal = RealVal.convertToDouble();
+ Parser.Lex(); // Eat operand.
- Parser.Lex(); // Eat real number
- SMLoc E = Parser.getTok().getLoc();
+ bool ExpectRegister = (Op.lower().find("all") == StringRef::npos);
+ bool HasRegister = false;
- Operands.push_back(AArch64Operand::CreateFPImm(DblVal, S, E));
- return MatchOperand_Success;
-}
+ // Check for the optional register operand.
+ if (getLexer().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat comma.
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseFPImm0AndImm0Operand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands))
+ return TokError("expected register operand");
- SMLoc S = Parser.getTok().getLoc();
+ HasRegister = true;
+ }
- bool Hash = false;
- if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat '#'
- Hash = true;
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ Parser.eatToEndOfStatement();
+ return TokError("unexpected token in argument list");
+ }
+
+ if (ExpectRegister && !HasRegister) {
+ return TokError("specified " + Mnemonic + " op requires a register");
+ }
+ else if (!ExpectRegister && HasRegister) {
+ return TokError("specified " + Mnemonic + " op does not use a register");
}
- APFloat RealVal(0.0);
- if (Parser.getTok().is(AsmToken::Real)) {
- if(Parser.getTok().getString() != "0.0") {
- Error(S, "only #0.0 is acceptable as immediate");
+ Parser.Lex(); // Consume the EndOfStatement
+ return false;
+}
+
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+
+ // Can be either a #imm style literal or an option name
+ bool Hash = Tok.is(AsmToken::Hash);
+ if (Hash || Tok.is(AsmToken::Integer)) {
+ // Immediate operand.
+ if (Hash)
+ Parser.Lex(); // Eat the '#'
+ const MCExpr *ImmVal;
+ SMLoc ExprLoc = getLoc();
+ if (getParser().parseExpression(ImmVal))
+ return MatchOperand_ParseFail;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ Error(ExprLoc, "immediate value expected for barrier operand");
return MatchOperand_ParseFail;
}
- }
- else if (Parser.getTok().is(AsmToken::Integer)) {
- if(Parser.getTok().getIntVal() != 0) {
- Error(S, "only #0.0 is acceptable as immediate");
+ if (MCE->getValue() < 0 || MCE->getValue() > 15) {
+ Error(ExprLoc, "barrier operand out of range");
return MatchOperand_ParseFail;
}
+ Operands.push_back(
+ AArch64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
+ return MatchOperand_Success;
}
- else {
- if (!Hash)
- return MatchOperand_NoMatch;
- Error(S, "only #0.0 is acceptable as immediate");
+
+ if (Tok.isNot(AsmToken::Identifier)) {
+ TokError("invalid operand for instruction");
return MatchOperand_ParseFail;
}
- Parser.Lex(); // Eat real number
- SMLoc E = Parser.getTok().getLoc();
+ bool Valid;
+ unsigned Opt = AArch64DB::DBarrierMapper().fromString(Tok.getString(), Valid);
+ if (!Valid) {
+ TokError("invalid barrier option name");
+ return MatchOperand_ParseFail;
+ }
+
+ // The only valid named option for ISB is 'sy'
+ if (Mnemonic == "isb" && Opt != AArch64DB::SY) {
+ TokError("'sy' or #imm operand expected");
+ return MatchOperand_ParseFail;
+ }
+
+ Operands.push_back(
+ AArch64Operand::CreateBarrier(Opt, getLoc(), getContext()));
+ Parser.Lex(); // Consume the option
- Operands.push_back(AArch64Operand::CreateFPImm(0.0, S, E));
return MatchOperand_Success;
}
-// Automatically generated
-static unsigned MatchRegisterName(StringRef Name);
-
-bool
-AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
- StringRef &Layout,
- SMLoc &LayoutLoc) const {
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return false;
+ return MatchOperand_NoMatch;
- std::string LowerReg = Tok.getString().lower();
- size_t DotPos = LowerReg.find('.');
+ Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), getLoc(),
+ STI.getFeatureBits(), getContext()));
+ Parser.Lex(); // Eat identifier
- bool IsVec128 = false;
- SMLoc S = Tok.getLoc();
- RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
+ return MatchOperand_Success;
+}
- if (DotPos == std::string::npos) {
- Layout = StringRef();
- } else {
- // Everything afterwards needs to be a literal token, expected to be
- // '.2d','.b' etc for vector registers.
-
- // This StringSwitch validates the input and (perhaps more importantly)
- // gives us a permanent string to use in the token (a pointer into LowerReg
- // would go out of scope when we return).
- LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
- StringRef LayoutText = StringRef(LowerReg).substr(DotPos);
-
- // See if it's a 128-bit layout first.
- Layout = StringSwitch<const char *>(LayoutText)
- .Case(".q", ".q").Case(".1q", ".1q")
- .Case(".d", ".d").Case(".2d", ".2d")
- .Case(".s", ".s").Case(".4s", ".4s")
- .Case(".h", ".h").Case(".8h", ".8h")
- .Case(".b", ".b").Case(".16b", ".16b")
- .Default("");
-
- if (Layout.size() != 0)
- IsVec128 = true;
- else {
- Layout = StringSwitch<const char *>(LayoutText)
- .Case(".1d", ".1d")
- .Case(".2s", ".2s")
- .Case(".4h", ".4h")
- .Case(".8b", ".8b")
- .Default("");
+/// tryParseVectorRegister - Parse a vector register operand.
+bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return true;
+
+ SMLoc S = getLoc();
+ // Check for a vector register specifier first.
+ StringRef Kind;
+ int64_t Reg = tryMatchVectorRegister(Kind, false);
+ if (Reg == -1)
+ return true;
+ Operands.push_back(
+ AArch64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
+ // If there was an explicit qualifier, that goes on as a literal text
+ // operand.
+ if (!Kind.empty())
+ Operands.push_back(
+ AArch64Operand::CreateToken(Kind, false, S, getContext()));
+
+ // If there is an index specifier following the register, parse that too.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = getLoc();
+ Parser.Lex(); // Eat left bracket token.
+
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return false;
}
- if (Layout.size() == 0) {
- // If we've still not pinned it down the register is malformed.
+ SMLoc E = getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
return false;
}
- }
- RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
- if (RegNum == AArch64::NoRegister) {
- RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
- .Case("ip0", AArch64::X16)
- .Case("ip1", AArch64::X17)
- .Case("fp", AArch64::X29)
- .Case("lr", AArch64::X30)
- .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0)
- .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1)
- .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2)
- .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3)
- .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4)
- .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5)
- .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6)
- .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7)
- .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8)
- .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9)
- .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10)
- .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11)
- .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12)
- .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13)
- .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14)
- .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15)
- .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16)
- .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17)
- .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18)
- .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19)
- .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20)
- .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21)
- .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22)
- .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23)
- .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24)
- .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25)
- .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26)
- .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27)
- .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28)
- .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29)
- .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30)
- .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31)
- .Default(AArch64::NoRegister);
- }
- if (RegNum == AArch64::NoRegister)
- return false;
+ Parser.Lex(); // Eat right bracket token.
- return true;
+ Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
+ E, getContext()));
+ }
+
+ return false;
}
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- uint32_t &NumLanes) {
- unsigned RegNum;
- StringRef Layout;
- SMLoc RegEndLoc, LayoutLoc;
- SMLoc S = Parser.getTok().getLoc();
-
- if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
- return MatchOperand_NoMatch;
+/// parseRegister - Parse a non-vector register operand.
+bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ // Try for a vector register.
+ if (!tryParseVectorRegister(Operands))
+ return false;
- Operands.push_back(AArch64Operand::CreateReg(RegNum, S, RegEndLoc));
+ // Try for a scalar register.
+ int64_t Reg = tryParseRegister();
+ if (Reg == -1)
+ return true;
+ Operands.push_back(
+ AArch64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
- if (Layout.size() != 0) {
- unsigned long long TmpLanes = 0;
- llvm::getAsUnsignedInteger(Layout.substr(1), 10, TmpLanes);
- if (TmpLanes != 0) {
- NumLanes = TmpLanes;
- } else {
- // If the number of lanes isn't specified explicitly, a valid instruction
- // will have an element specifier and be capable of acting on the entire
- // vector register.
- switch (Layout.back()) {
- default: llvm_unreachable("Invalid layout specifier");
- case 'b': NumLanes = 16; break;
- case 'h': NumLanes = 8; break;
- case 's': NumLanes = 4; break;
- case 'd': NumLanes = 2; break;
- case 'q': NumLanes = 1; break;
+ // A small number of instructions (FMOVXDhighr, for example) have "[1]"
+ // as a string token in the instruction itself.
+ if (getLexer().getKind() == AsmToken::LBrac) {
+ SMLoc LBracS = getLoc();
+ Parser.Lex();
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Integer)) {
+ SMLoc IntS = getLoc();
+ int64_t Val = Tok.getIntVal();
+ if (Val == 1) {
+ Parser.Lex();
+ if (getLexer().getKind() == AsmToken::RBrac) {
+ SMLoc RBracS = getLoc();
+ Parser.Lex();
+ Operands.push_back(
+ AArch64Operand::CreateToken("[", false, LBracS, getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateToken("1", false, IntS, getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateToken("]", false, RBracS, getContext()));
+ return false;
+ }
}
}
-
- Operands.push_back(AArch64Operand::CreateToken(Layout, LayoutLoc));
}
- Parser.Lex();
- return MatchOperand_Success;
-}
-
-bool
-AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) {
- // This callback is used for things like DWARF frame directives in
- // assembly. They don't care about things like NEON layouts or lanes, they
- // just want to be able to produce the DWARF register number.
- StringRef LayoutSpec;
- SMLoc RegEndLoc, LayoutLoc;
- StartLoc = Parser.getTok().getLoc();
-
- if (!IdentifyRegister(RegNo, RegEndLoc, LayoutSpec, LayoutLoc))
- return true;
-
- Parser.Lex();
- EndLoc = Parser.getTok().getLoc();
-
return false;
}
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseNamedImmOperand(const NamedImmMapper &Mapper,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- // Since these operands occur in very limited circumstances, without
- // alternatives, we actually signal an error if there is no match. If relaxing
- // this, beware of unintended consequences: an immediate will be accepted
- // during matching, no matter how it gets into the AArch64Operand.
- const AsmToken &Tok = Parser.getTok();
- SMLoc S = Tok.getLoc();
+bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
+ bool HasELFModifier = false;
+ AArch64MCExpr::VariantKind RefKind;
- if (Tok.is(AsmToken::Identifier)) {
- bool ValidName;
- uint32_t Code = Mapper.fromString(Tok.getString().lower(), ValidName);
+ if (Parser.getTok().is(AsmToken::Colon)) {
+ Parser.Lex(); // Eat ':"
+ HasELFModifier = true;
- if (!ValidName) {
- Error(S, "operand specifier not recognised");
- return MatchOperand_ParseFail;
+ if (Parser.getTok().isNot(AsmToken::Identifier)) {
+ Error(Parser.getTok().getLoc(),
+ "expect relocation specifier in operand after ':'");
+ return true;
}
- Parser.Lex(); // We're done with the identifier. Eat it
-
- SMLoc E = Parser.getTok().getLoc();
- const MCExpr *Imm = MCConstantExpr::Create(Code, getContext());
- Operands.push_back(AArch64Operand::CreateImm(Imm, S, E));
- return MatchOperand_Success;
- } else if (Tok.is(AsmToken::Hash)) {
- Parser.Lex();
+ std::string LowerCase = Parser.getTok().getIdentifier().lower();
+ RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
+ .Case("lo12", AArch64MCExpr::VK_LO12)
+ .Case("abs_g3", AArch64MCExpr::VK_ABS_G3)
+ .Case("abs_g2", AArch64MCExpr::VK_ABS_G2)
+ .Case("abs_g2_s", AArch64MCExpr::VK_ABS_G2_S)
+ .Case("abs_g2_nc", AArch64MCExpr::VK_ABS_G2_NC)
+ .Case("abs_g1", AArch64MCExpr::VK_ABS_G1)
+ .Case("abs_g1_s", AArch64MCExpr::VK_ABS_G1_S)
+ .Case("abs_g1_nc", AArch64MCExpr::VK_ABS_G1_NC)
+ .Case("abs_g0", AArch64MCExpr::VK_ABS_G0)
+ .Case("abs_g0_s", AArch64MCExpr::VK_ABS_G0_S)
+ .Case("abs_g0_nc", AArch64MCExpr::VK_ABS_G0_NC)
+ .Case("dtprel_g2", AArch64MCExpr::VK_DTPREL_G2)
+ .Case("dtprel_g1", AArch64MCExpr::VK_DTPREL_G1)
+ .Case("dtprel_g1_nc", AArch64MCExpr::VK_DTPREL_G1_NC)
+ .Case("dtprel_g0", AArch64MCExpr::VK_DTPREL_G0)
+ .Case("dtprel_g0_nc", AArch64MCExpr::VK_DTPREL_G0_NC)
+ .Case("dtprel_hi12", AArch64MCExpr::VK_DTPREL_HI12)
+ .Case("dtprel_lo12", AArch64MCExpr::VK_DTPREL_LO12)
+ .Case("dtprel_lo12_nc", AArch64MCExpr::VK_DTPREL_LO12_NC)
+ .Case("tprel_g2", AArch64MCExpr::VK_TPREL_G2)
+ .Case("tprel_g1", AArch64MCExpr::VK_TPREL_G1)
+ .Case("tprel_g1_nc", AArch64MCExpr::VK_TPREL_G1_NC)
+ .Case("tprel_g0", AArch64MCExpr::VK_TPREL_G0)
+ .Case("tprel_g0_nc", AArch64MCExpr::VK_TPREL_G0_NC)
+ .Case("tprel_hi12", AArch64MCExpr::VK_TPREL_HI12)
+ .Case("tprel_lo12", AArch64MCExpr::VK_TPREL_LO12)
+ .Case("tprel_lo12_nc", AArch64MCExpr::VK_TPREL_LO12_NC)
+ .Case("tlsdesc_lo12", AArch64MCExpr::VK_TLSDESC_LO12)
+ .Case("got", AArch64MCExpr::VK_GOT_PAGE)
+ .Case("got_lo12", AArch64MCExpr::VK_GOT_LO12)
+ .Case("gottprel", AArch64MCExpr::VK_GOTTPREL_PAGE)
+ .Case("gottprel_lo12", AArch64MCExpr::VK_GOTTPREL_LO12_NC)
+ .Case("gottprel_g1", AArch64MCExpr::VK_GOTTPREL_G1)
+ .Case("gottprel_g0_nc", AArch64MCExpr::VK_GOTTPREL_G0_NC)
+ .Case("tlsdesc", AArch64MCExpr::VK_TLSDESC_PAGE)
+ .Default(AArch64MCExpr::VK_INVALID);
+
+ if (RefKind == AArch64MCExpr::VK_INVALID) {
+ Error(Parser.getTok().getLoc(),
+ "expect relocation specifier in operand after ':'");
+ return true;
+ }
- const MCExpr *ImmVal;
- if (ParseImmediate(ImmVal) != MatchOperand_Success)
- return MatchOperand_ParseFail;
+ Parser.Lex(); // Eat identifier
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!CE || CE->getValue() < 0 || !Mapper.validImm(CE->getValue())) {
- Error(S, "Invalid immediate for instruction");
- return MatchOperand_ParseFail;
+ if (Parser.getTok().isNot(AsmToken::Colon)) {
+ Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier");
+ return true;
}
-
- SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E));
- return MatchOperand_Success;
+ Parser.Lex(); // Eat ':'
}
- Error(S, "unexpected operand for instruction");
- return MatchOperand_ParseFail;
-}
-
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseSysRegOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- const AsmToken &Tok = Parser.getTok();
-
- // Any MSR/MRS operand will be an identifier, and we want to store it as some
- // kind of string: SPSel is valid for two different forms of MSR with two
- // different encodings. There's no collision at the moment, but the potential
- // is there.
- if (!Tok.is(AsmToken::Identifier)) {
- return MatchOperand_NoMatch;
- }
+ if (getParser().parseExpression(ImmVal))
+ return true;
- SMLoc S = Tok.getLoc();
- Operands.push_back(AArch64Operand::CreateSysReg(Tok.getString(), S));
- Parser.Lex(); // Eat identifier
+ if (HasELFModifier)
+ ImmVal = AArch64MCExpr::Create(ImmVal, RefKind, getContext());
- return MatchOperand_Success;
+ return false;
}
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseLSXAddressOperand(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- SMLoc S = Parser.getTok().getLoc();
-
- unsigned RegNum;
- SMLoc RegEndLoc, LayoutLoc;
- StringRef Layout;
- if(!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)
- || !AArch64MCRegisterClasses[AArch64::GPR64xspRegClassID].contains(RegNum)
- || Layout.size() != 0) {
- // Check Layout.size because we don't want to let "x3.4s" or similar
- // through.
- return MatchOperand_NoMatch;
- }
- Parser.Lex(); // Eat register
+/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
+bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
+ assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
+ SMLoc S = getLoc();
+ Parser.Lex(); // Eat left bracket token.
+ StringRef Kind;
+ int64_t FirstReg = tryMatchVectorRegister(Kind, true);
+ if (FirstReg == -1)
+ return true;
+ int64_t PrevReg = FirstReg;
+ unsigned Count = 1;
- if (Parser.getTok().is(AsmToken::RBrac)) {
- // We're done
- SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
- return MatchOperand_Success;
- }
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ Parser.Lex(); // Eat the minus.
- // Otherwise, only ", #0" is valid
+ SMLoc Loc = getLoc();
+ StringRef NextKind;
+ int64_t Reg = tryMatchVectorRegister(NextKind, true);
+ if (Reg == -1)
+ return true;
+ // Any Kind suffices must match on all regs in the list.
+ if (Kind != NextKind)
+ return Error(Loc, "mismatched register size suffix");
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Parser.getTok().getLoc(), "expected ',' or ']' after register");
- return MatchOperand_ParseFail;
- }
- Parser.Lex(); // Eat ','
+ unsigned Space = (PrevReg < Reg) ? (Reg - PrevReg) : (Reg + 32 - PrevReg);
- if (Parser.getTok().isNot(AsmToken::Hash)) {
- Error(Parser.getTok().getLoc(), "expected '#0'");
- return MatchOperand_ParseFail;
- }
- Parser.Lex(); // Eat '#'
+ if (Space == 0 || Space > 3) {
+ return Error(Loc, "invalid number of vectors");
+ }
- if (Parser.getTok().isNot(AsmToken::Integer)
- || Parser.getTok().getIntVal() != 0 ) {
- Error(Parser.getTok().getLoc(), "expected '#0'");
- return MatchOperand_ParseFail;
+ Count += Space;
}
- Parser.Lex(); // Eat '0'
-
- SMLoc E = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateWrappedReg(RegNum, S, E));
- return MatchOperand_Success;
-}
+ else {
+ while (Parser.getTok().is(AsmToken::Comma)) {
+ Parser.Lex(); // Eat the comma token.
-AArch64AsmParser::OperandMatchResultTy
-AArch64AsmParser::ParseShiftExtend(
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- StringRef IDVal = Parser.getTok().getIdentifier();
- std::string LowerID = IDVal.lower();
-
- A64SE::ShiftExtSpecifiers Spec =
- StringSwitch<A64SE::ShiftExtSpecifiers>(LowerID)
- .Case("lsl", A64SE::LSL)
- .Case("msl", A64SE::MSL)
- .Case("lsr", A64SE::LSR)
- .Case("asr", A64SE::ASR)
- .Case("ror", A64SE::ROR)
- .Case("uxtb", A64SE::UXTB)
- .Case("uxth", A64SE::UXTH)
- .Case("uxtw", A64SE::UXTW)
- .Case("uxtx", A64SE::UXTX)
- .Case("sxtb", A64SE::SXTB)
- .Case("sxth", A64SE::SXTH)
- .Case("sxtw", A64SE::SXTW)
- .Case("sxtx", A64SE::SXTX)
- .Default(A64SE::Invalid);
-
- if (Spec == A64SE::Invalid)
- return MatchOperand_NoMatch;
+ SMLoc Loc = getLoc();
+ StringRef NextKind;
+ int64_t Reg = tryMatchVectorRegister(NextKind, true);
+ if (Reg == -1)
+ return true;
+ // Any Kind suffices must match on all regs in the list.
+ if (Kind != NextKind)
+ return Error(Loc, "mismatched register size suffix");
- // Eat the shift
- SMLoc S, E;
- S = Parser.getTok().getLoc();
- Parser.Lex();
+ // Registers must be incremental (with wraparound at 31)
+ if (getContext().getRegisterInfo()->getEncodingValue(Reg) !=
+ (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32)
+ return Error(Loc, "registers must be sequential");
- if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR &&
- Spec != A64SE::ROR && Spec != A64SE::MSL) {
- // The shift amount can be omitted for the extending versions, but not real
- // shifts:
- // add x0, x0, x0, uxtb
- // is valid, and equivalent to
- // add x0, x0, x0, uxtb #0
-
- if (Parser.getTok().is(AsmToken::Comma) ||
- Parser.getTok().is(AsmToken::EndOfStatement) ||
- Parser.getTok().is(AsmToken::RBrac)) {
- Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, 0, true,
- S, E));
- return MatchOperand_Success;
+ PrevReg = Reg;
+ ++Count;
}
}
- // Eat # at beginning of immediate
- if (!Parser.getTok().is(AsmToken::Hash)) {
- Error(Parser.getTok().getLoc(),
- "expected #imm after shift specifier");
- return MatchOperand_ParseFail;
- }
- Parser.Lex();
-
- // Make sure we do actually have a number
- if (!Parser.getTok().is(AsmToken::Integer)) {
- Error(Parser.getTok().getLoc(),
- "expected integer shift amount");
- return MatchOperand_ParseFail;
- }
- unsigned Amount = Parser.getTok().getIntVal();
- Parser.Lex();
- E = Parser.getTok().getLoc();
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(getLoc(), "'}' expected");
+ Parser.Lex(); // Eat the '}' token.
- Operands.push_back(AArch64Operand::CreateShiftExtend(Spec, Amount, false,
- S, E));
+ if (Count > 4)
+ return Error(S, "invalid number of vectors");
- return MatchOperand_Success;
-}
+ unsigned NumElements = 0;
+ char ElementKind = 0;
+ if (!Kind.empty())
+ parseValidVectorKind(Kind, NumElements, ElementKind);
-/// Try to parse a vector register token, If it is a vector register,
-/// the token is eaten and return true. Otherwise return false.
-bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc,
- StringRef &Layout, SMLoc &LayoutLoc) {
- bool IsVector = true;
-
- if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc))
- IsVector = false;
- else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID]
- .contains(RegNum) &&
- !AArch64MCRegisterClasses[AArch64::FPR128RegClassID]
- .contains(RegNum))
- IsVector = false;
- else if (Layout.size() == 0)
- IsVector = false;
-
- if (!IsVector)
- Error(Parser.getTok().getLoc(), "expected vector type register");
-
- Parser.Lex(); // Eat this token.
- return IsVector;
-}
+ Operands.push_back(AArch64Operand::CreateVectorList(
+ FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
+ // If there is an index specifier following the list, parse that too.
+ if (Parser.getTok().is(AsmToken::LBrac)) {
+ SMLoc SIdx = getLoc();
+ Parser.Lex(); // Eat left bracket token.
-// A vector list contains 1-4 consecutive registers.
-// Now there are two kinds of vector list when number of vector > 1:
-// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
-// (2) {Vn.layout - Vm.layout}
-// If the layout is like .b/.h/.s/.d, also parse the lane.
-AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList(
- SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
- if (Parser.getTok().isNot(AsmToken::LCurly)) {
- Error(Parser.getTok().getLoc(), "'{' expected");
- return MatchOperand_ParseFail;
- }
- SMLoc SLoc = Parser.getTok().getLoc();
- Parser.Lex(); // Eat '{' token.
+ const MCExpr *ImmVal;
+ if (getParser().parseExpression(ImmVal))
+ return false;
+ const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
+ if (!MCE) {
+ TokError("immediate value expected for vector index");
+ return false;
+ }
- unsigned Reg, Count = 1;
- StringRef LayoutStr;
- SMLoc RegEndLoc, LayoutLoc;
- if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc))
- return MatchOperand_ParseFail;
+ SMLoc E = getLoc();
+ if (Parser.getTok().isNot(AsmToken::RBrac)) {
+ Error(E, "']' expected");
+ return false;
+ }
- if (Parser.getTok().is(AsmToken::Minus)) {
- Parser.Lex(); // Eat the minus.
+ Parser.Lex(); // Eat right bracket token.
- unsigned Reg2;
- StringRef LayoutStr2;
- SMLoc RegEndLoc2, LayoutLoc2;
- SMLoc RegLoc2 = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
+ E, getContext()));
+ }
+ return false;
+}
- if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
- return MatchOperand_ParseFail;
- unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg);
+AArch64AsmParser::OperandMatchResultTy
+AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
+ const AsmToken &Tok = Parser.getTok();
+ if (!Tok.is(AsmToken::Identifier))
+ return MatchOperand_NoMatch;
- if (LayoutStr != LayoutStr2) {
- Error(LayoutLoc2, "expected the same vector layout");
- return MatchOperand_ParseFail;
- }
- if (Space == 0 || Space > 3) {
- Error(RegLoc2, "invalid number of vectors");
- return MatchOperand_ParseFail;
- }
+ unsigned RegNum = MatchRegisterName(Tok.getString().lower());
- Count += Space;
- } else {
- unsigned LastReg = Reg;
- while (Parser.getTok().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
- unsigned Reg2;
- StringRef LayoutStr2;
- SMLoc RegEndLoc2, LayoutLoc2;
- SMLoc RegLoc2 = Parser.getTok().getLoc();
+ MCContext &Ctx = getContext();
+ const MCRegisterInfo *RI = Ctx.getRegisterInfo();
+ if (!RI->getRegClass(AArch64::GPR64spRegClassID).contains(RegNum))
+ return MatchOperand_NoMatch;
- if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2))
- return MatchOperand_ParseFail;
- unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg)
- : (Reg2 + 32 - LastReg);
- Count++;
-
- // The space between two vectors should be 1. And they should have the same layout.
- // Total count shouldn't be great than 4
- if (Space != 1) {
- Error(RegLoc2, "invalid space between two vectors");
- return MatchOperand_ParseFail;
- }
- if (LayoutStr != LayoutStr2) {
- Error(LayoutLoc2, "expected the same vector layout");
- return MatchOperand_ParseFail;
- }
- if (Count > 4) {
- Error(RegLoc2, "invalid number of vectors");
- return MatchOperand_ParseFail;
- }
+ SMLoc S = getLoc();
+ Parser.Lex(); // Eat register
- LastReg = Reg2;
- }
+ if (Parser.getTok().isNot(AsmToken::Comma)) {
+ Operands.push_back(
+ AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ return MatchOperand_Success;
}
+ Parser.Lex(); // Eat comma.
- if (Parser.getTok().isNot(AsmToken::RCurly)) {
- Error(Parser.getTok().getLoc(), "'}' expected");
+ if (Parser.getTok().is(AsmToken::Hash))
+ Parser.Lex(); // Eat hash
+
+ if (Parser.getTok().isNot(AsmToken::Integer)) {
+ Error(getLoc(), "index must be absent or #0");
return MatchOperand_ParseFail;
}
- SMLoc ELoc = Parser.getTok().getLoc();
- Parser.Lex(); // Eat '}' token.
- A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr);
- if (Count > 1) { // If count > 1, create vector list using super register.
- bool IsVec64 = (Layout < A64Layout::VL_16B);
- static unsigned SupRegIDs[3][2] = {
- { AArch64::QPairRegClassID, AArch64::DPairRegClassID },
- { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID },
- { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID }
- };
- unsigned SupRegID = SupRegIDs[Count - 2][static_cast<int>(IsVec64)];
- unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
- const MCRegisterInfo *MRI = getContext().getRegisterInfo();
- Reg = MRI->getMatchingSuperReg(Reg, Sub0,
- &AArch64MCRegisterClasses[SupRegID]);
+ const MCExpr *ImmVal;
+ if (Parser.parseExpression(ImmVal) || !isa<MCConstantExpr>(ImmVal) ||
+ cast<MCConstantExpr>(ImmVal)->getValue() != 0) {
+ Error(getLoc(), "index must be absent or #0");
+ return MatchOperand_ParseFail;
}
- Operands.push_back(
- AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc));
- if (Parser.getTok().is(AsmToken::LBrac)) {
- uint32_t NumLanes = 0;
- switch(Layout) {
- case A64Layout::VL_B : NumLanes = 16; break;
- case A64Layout::VL_H : NumLanes = 8; break;
- case A64Layout::VL_S : NumLanes = 4; break;
- case A64Layout::VL_D : NumLanes = 2; break;
- default:
- SMLoc Loc = getLexer().getLoc();
- Error(Loc, "expected comma before next operand");
- return MatchOperand_ParseFail;
- }
- return ParseNEONLane(Operands, NumLanes);
- } else {
- return MatchOperand_Success;
- }
+ Operands.push_back(
+ AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
+ return MatchOperand_Success;
}
-// FIXME: We would really like to be able to tablegen'erate this.
-bool AArch64AsmParser::
-validateInstruction(MCInst &Inst,
- const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- switch (Inst.getOpcode()) {
- case AArch64::BFIwwii:
- case AArch64::BFIxxii:
- case AArch64::SBFIZwwii:
- case AArch64::SBFIZxxii:
- case AArch64::UBFIZwwii:
- case AArch64::UBFIZxxii: {
- unsigned ImmOps = Inst.getNumOperands() - 2;
- int64_t ImmR = Inst.getOperand(ImmOps).getImm();
- int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
-
- if (ImmR != 0 && ImmS >= ImmR) {
- return Error(Operands[4]->getStartLoc(),
- "requested insert overflows register");
- }
+/// parseOperand - Parse a arm instruction operand. For now this parses the
+/// operand regardless of the mnemonic.
+bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
+ bool invertCondCode) {
+ // Check if the current operand has a custom associated parser, if so, try to
+ // custom parse the operand, or fallback to the general approach.
+ OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy == MatchOperand_Success)
return false;
- }
- case AArch64::BFXILwwii:
- case AArch64::BFXILxxii:
- case AArch64::SBFXwwii:
- case AArch64::SBFXxxii:
- case AArch64::UBFXwwii:
- case AArch64::UBFXxxii: {
- unsigned ImmOps = Inst.getNumOperands() - 2;
- int64_t ImmR = Inst.getOperand(ImmOps).getImm();
- int64_t ImmS = Inst.getOperand(ImmOps+1).getImm();
- int64_t RegWidth = 0;
- switch (Inst.getOpcode()) {
- case AArch64::SBFXxxii: case AArch64::UBFXxxii: case AArch64::BFXILxxii:
- RegWidth = 64;
- break;
- case AArch64::SBFXwwii: case AArch64::UBFXwwii: case AArch64::BFXILwwii:
- RegWidth = 32;
- break;
- }
+ // If there wasn't a custom match, try the generic matcher below. Otherwise,
+ // there was a match, but an error occurred, in which case, just return that
+ // the operand parsing failed.
+ if (ResTy == MatchOperand_ParseFail)
+ return true;
- if (ImmS >= RegWidth || ImmS < ImmR) {
- return Error(Operands[4]->getStartLoc(),
- "requested extract overflows register");
- }
+ // Nothing custom, so do general case parsing.
+ SMLoc S, E;
+ switch (getLexer().getKind()) {
+ default: {
+ SMLoc S = getLoc();
+ const MCExpr *Expr;
+ if (parseSymbolicImmVal(Expr))
+ return Error(S, "invalid operand");
+
+ SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
return false;
}
- case AArch64::ICix: {
- int64_t ImmVal = Inst.getOperand(0).getImm();
- A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
- if (!A64IC::NeedsRegister(ICOp)) {
- return Error(Operands[1]->getStartLoc(),
- "specified IC op does not use a register");
- }
- return false;
+ case AsmToken::LBrac: {
+ SMLoc Loc = Parser.getTok().getLoc();
+ Operands.push_back(AArch64Operand::CreateToken("[", false, Loc,
+ getContext()));
+ Parser.Lex(); // Eat '['
+
+ // There's no comma after a '[', so we can parse the next operand
+ // immediately.
+ return parseOperand(Operands, false, false);
}
- case AArch64::ICi: {
- int64_t ImmVal = Inst.getOperand(0).getImm();
- A64IC::ICValues ICOp = static_cast<A64IC::ICValues>(ImmVal);
- if (A64IC::NeedsRegister(ICOp)) {
- return Error(Operands[1]->getStartLoc(),
- "specified IC op requires a register");
- }
+ case AsmToken::LCurly:
+ return parseVectorList(Operands);
+ case AsmToken::Identifier: {
+ // If we're expecting a Condition Code operand, then just parse that.
+ if (isCondCode)
+ return parseCondCode(Operands, invertCondCode);
+
+ // If it's a register name, parse it.
+ if (!parseRegister(Operands))
+ return false;
+
+ // This could be an optional "shift" or "extend" operand.
+ OperandMatchResultTy GotShift = tryParseOptionalShiftExtend(Operands);
+ // We can only continue if no tokens were eaten.
+ if (GotShift != MatchOperand_NoMatch)
+ return GotShift;
+
+ // This was not a register so parse other operands that start with an
+ // identifier (like labels) as expressions and create them as immediates.
+ const MCExpr *IdVal;
+ S = getLoc();
+ if (getParser().parseExpression(IdVal))
+ return true;
+
+ E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
return false;
}
- case AArch64::TLBIix: {
- int64_t ImmVal = Inst.getOperand(0).getImm();
- A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
- if (!A64TLBI::NeedsRegister(TLBIOp)) {
- return Error(Operands[1]->getStartLoc(),
- "specified TLBI op does not use a register");
+ case AsmToken::Integer:
+ case AsmToken::Real:
+ case AsmToken::Hash: {
+ // #42 -> immediate.
+ S = getLoc();
+ if (getLexer().is(AsmToken::Hash))
+ Parser.Lex();
+
+ // Parse a negative sign
+ bool isNegative = false;
+ if (Parser.getTok().is(AsmToken::Minus)) {
+ isNegative = true;
+ // We need to consume this token only when we have a Real, otherwise
+ // we let parseSymbolicImmVal take care of it
+ if (Parser.getLexer().peekTok().is(AsmToken::Real))
+ Parser.Lex();
}
- return false;
- }
- case AArch64::TLBIi: {
- int64_t ImmVal = Inst.getOperand(0).getImm();
- A64TLBI::TLBIValues TLBIOp = static_cast<A64TLBI::TLBIValues>(ImmVal);
- if (A64TLBI::NeedsRegister(TLBIOp)) {
- return Error(Operands[1]->getStartLoc(),
- "specified TLBI op requires a register");
+
+ // The only Real that should come through here is a literal #0.0 for
+ // the fcmp[e] r, #0.0 instructions. They expect raw token operands,
+ // so convert the value.
+ const AsmToken &Tok = Parser.getTok();
+ if (Tok.is(AsmToken::Real)) {
+ APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+ uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+ if (Mnemonic != "fcmp" && Mnemonic != "fcmpe" && Mnemonic != "fcmeq" &&
+ Mnemonic != "fcmge" && Mnemonic != "fcmgt" && Mnemonic != "fcmle" &&
+ Mnemonic != "fcmlt")
+ return TokError("unexpected floating point literal");
+ else if (IntVal != 0 || isNegative)
+ return TokError("expected floating-point constant #0.0");
+ Parser.Lex(); // Eat the token.
+
+ Operands.push_back(
+ AArch64Operand::CreateToken("#0", false, S, getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateToken(".0", false, S, getContext()));
+ return false;
}
+
+ const MCExpr *ImmVal;
+ if (parseSymbolicImmVal(ImmVal))
+ return true;
+
+ E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
+ Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext()));
return false;
}
}
-
- return false;
}
-
-// Parses the instruction *together with* all operands, appending each parsed
-// operand to the "Operands" list
+/// ParseInstruction - Parse an AArch64 instruction mnemonic followed by its
+/// operands.
bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- StringRef PatchedName = StringSwitch<StringRef>(Name.lower())
- .Case("beq", "b.eq")
- .Case("bne", "b.ne")
- .Case("bhs", "b.hs")
- .Case("bcs", "b.cs")
- .Case("blo", "b.lo")
- .Case("bcc", "b.cc")
- .Case("bmi", "b.mi")
- .Case("bpl", "b.pl")
- .Case("bvs", "b.vs")
- .Case("bvc", "b.vc")
- .Case("bhi", "b.hi")
- .Case("bls", "b.ls")
- .Case("bge", "b.ge")
- .Case("blt", "b.lt")
- .Case("bgt", "b.gt")
- .Case("ble", "b.le")
- .Case("bal", "b.al")
- .Case("bnv", "b.nv")
- .Default(Name);
-
- size_t CondCodePos = PatchedName.find('.');
-
- StringRef Mnemonic = PatchedName.substr(0, CondCodePos);
- Operands.push_back(AArch64Operand::CreateToken(Mnemonic, NameLoc));
-
- if (CondCodePos != StringRef::npos) {
- // We have a condition code
- SMLoc S = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 1);
- StringRef CondStr = PatchedName.substr(CondCodePos + 1, StringRef::npos);
- A64CC::CondCodes Code;
-
- Code = A64StringToCondCode(CondStr);
-
- if (Code == A64CC::Invalid) {
- Error(S, "invalid condition code");
+ OperandVector &Operands) {
+ Name = StringSwitch<StringRef>(Name.lower())
+ .Case("beq", "b.eq")
+ .Case("bne", "b.ne")
+ .Case("bhs", "b.hs")
+ .Case("bcs", "b.cs")
+ .Case("blo", "b.lo")
+ .Case("bcc", "b.cc")
+ .Case("bmi", "b.mi")
+ .Case("bpl", "b.pl")
+ .Case("bvs", "b.vs")
+ .Case("bvc", "b.vc")
+ .Case("bhi", "b.hi")
+ .Case("bls", "b.ls")
+ .Case("bge", "b.ge")
+ .Case("blt", "b.lt")
+ .Case("bgt", "b.gt")
+ .Case("ble", "b.le")
+ .Case("bal", "b.al")
+ .Case("bnv", "b.nv")
+ .Default(Name);
+
+ // Create the leading tokens for the mnemonic, split by '.' characters.
+ size_t Start = 0, Next = Name.find('.');
+ StringRef Head = Name.slice(Start, Next);
+
+ // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
+ if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi") {
+ bool IsError = parseSysAlias(Head, NameLoc, Operands);
+ if (IsError && getLexer().isNot(AsmToken::EndOfStatement))
Parser.eatToEndOfStatement();
- return true;
- }
-
- SMLoc DotL = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos);
-
- Operands.push_back(AArch64Operand::CreateToken(".", DotL));
- SMLoc E = SMLoc::getFromPointer(NameLoc.getPointer() + CondCodePos + 3);
- Operands.push_back(AArch64Operand::CreateCondCode(Code, S, E));
+ return IsError;
}
- // Now we parse the operands of this instruction
+ Operands.push_back(
+ AArch64Operand::CreateToken(Head, false, NameLoc, getContext()));
+ Mnemonic = Head;
+
+ // Handle condition codes for a branch mnemonic
+ if (Head == "b" && Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start + 1, Next);
+
+ SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+ (Head.data() - Name.data()));
+ AArch64CC::CondCode CC = parseCondCodeString(Head);
+ if (CC == AArch64CC::Invalid)
+ return Error(SuffixLoc, "invalid condition code");
+ Operands.push_back(
+ AArch64Operand::CreateToken(".", true, SuffixLoc, getContext()));
+ Operands.push_back(
+ AArch64Operand::CreateCondCode(CC, NameLoc, NameLoc, getContext()));
+ }
+
+ // Add the remaining tokens in the mnemonic.
+ while (Next != StringRef::npos) {
+ Start = Next;
+ Next = Name.find('.', Start + 1);
+ Head = Name.slice(Start, Next);
+ SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
+ (Head.data() - Name.data()) + 1);
+ Operands.push_back(
+ AArch64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
+ }
+
+ // Conditional compare instructions have a Condition Code operand, which needs
+ // to be parsed and an immediate operand created.
+ bool condCodeFourthOperand =
+ (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" ||
+ Head == "fccmpe" || Head == "fcsel" || Head == "csel" ||
+ Head == "csinc" || Head == "csinv" || Head == "csneg");
+
+ // These instructions are aliases to some of the conditional select
+ // instructions. However, the condition code is inverted in the aliased
+ // instruction.
+ //
+ // FIXME: Is this the correct way to handle these? Or should the parser
+ // generate the aliased instructions directly?
+ bool condCodeSecondOperand = (Head == "cset" || Head == "csetm");
+ bool condCodeThirdOperand =
+ (Head == "cinc" || Head == "cinv" || Head == "cneg");
+
+ // Read the remaining operands.
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
- if (ParseOperand(Operands, Mnemonic)) {
+ if (parseOperand(Operands, false, false)) {
Parser.eatToEndOfStatement();
return true;
}
+ unsigned N = 2;
while (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the comma.
// Parse and remember the operand.
- if (ParseOperand(Operands, Mnemonic)) {
+ if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
+ (N == 3 && condCodeThirdOperand) ||
+ (N == 2 && condCodeSecondOperand),
+ condCodeSecondOperand || condCodeThirdOperand)) {
Parser.eatToEndOfStatement();
return true;
}
-
// After successfully parsing some operands there are two special cases to
// consider (i.e. notional operands not separated by commas). Both are due
// to memory specifiers:
@@ -2321,52 +3121,716 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
// in the given context!
if (Parser.getTok().is(AsmToken::RBrac)) {
SMLoc Loc = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateToken("]", Loc));
+ Operands.push_back(AArch64Operand::CreateToken("]", false, Loc,
+ getContext()));
Parser.Lex();
}
if (Parser.getTok().is(AsmToken::Exclaim)) {
SMLoc Loc = Parser.getTok().getLoc();
- Operands.push_back(AArch64Operand::CreateToken("!", Loc));
+ Operands.push_back(AArch64Operand::CreateToken("!", false, Loc,
+ getContext()));
Parser.Lex();
}
+
+ ++N;
}
}
if (getLexer().isNot(AsmToken::EndOfStatement)) {
- SMLoc Loc = getLexer().getLoc();
+ SMLoc Loc = Parser.getTok().getLoc();
Parser.eatToEndOfStatement();
- return Error(Loc, "expected comma before next operand");
+ return Error(Loc, "unexpected token in argument list");
}
- // Eat the EndOfStatement
- Parser.Lex();
-
+ Parser.Lex(); // Consume the EndOfStatement
return false;
}
+// FIXME: This entire function is a giant hack to provide us with decent
+// operand range validation/diagnostics until TableGen/MC can be extended
+// to support autogeneration of this kind of validation.
+bool AArch64AsmParser::validateInstruction(MCInst &Inst,
+ SmallVectorImpl<SMLoc> &Loc) {
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ // Check for indexed addressing modes w/ the base register being the
+ // same as a destination/source register or pair load where
+ // the Rt == Rt2. All of those are undefined behaviour.
+ switch (Inst.getOpcode()) {
+ case AArch64::LDPSWpre:
+ case AArch64::LDPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::LDPXpost:
+ case AArch64::LDPXpre: {
+ unsigned Rt = Inst.getOperand(1).getReg();
+ unsigned Rt2 = Inst.getOperand(2).getReg();
+ unsigned Rn = Inst.getOperand(3).getReg();
+ if (RI->isSubRegisterEq(Rn, Rt))
+ return Error(Loc[0], "unpredictable LDP instruction, writeback base "
+ "is also a destination");
+ if (RI->isSubRegisterEq(Rn, Rt2))
+ return Error(Loc[1], "unpredictable LDP instruction, writeback base "
+ "is also a destination");
+ // FALLTHROUGH
+ }
+ case AArch64::LDPDi:
+ case AArch64::LDPQi:
+ case AArch64::LDPSi:
+ case AArch64::LDPSWi:
+ case AArch64::LDPWi:
+ case AArch64::LDPXi: {
+ unsigned Rt = Inst.getOperand(0).getReg();
+ unsigned Rt2 = Inst.getOperand(1).getReg();
+ if (Rt == Rt2)
+ return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
+ break;
+ }
+ case AArch64::LDPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::LDPQpost:
+ case AArch64::LDPQpre:
+ case AArch64::LDPSpost:
+ case AArch64::LDPSpre:
+ case AArch64::LDPSWpost: {
+ unsigned Rt = Inst.getOperand(1).getReg();
+ unsigned Rt2 = Inst.getOperand(2).getReg();
+ if (Rt == Rt2)
+ return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
+ break;
+ }
+ case AArch64::STPDpost:
+ case AArch64::STPDpre:
+ case AArch64::STPQpost:
+ case AArch64::STPQpre:
+ case AArch64::STPSpost:
+ case AArch64::STPSpre:
+ case AArch64::STPWpost:
+ case AArch64::STPWpre:
+ case AArch64::STPXpost:
+ case AArch64::STPXpre: {
+ unsigned Rt = Inst.getOperand(1).getReg();
+ unsigned Rt2 = Inst.getOperand(2).getReg();
+ unsigned Rn = Inst.getOperand(3).getReg();
+ if (RI->isSubRegisterEq(Rn, Rt))
+ return Error(Loc[0], "unpredictable STP instruction, writeback base "
+ "is also a source");
+ if (RI->isSubRegisterEq(Rn, Rt2))
+ return Error(Loc[1], "unpredictable STP instruction, writeback base "
+ "is also a source");
+ break;
+ }
+ case AArch64::LDRBBpre:
+ case AArch64::LDRBpre:
+ case AArch64::LDRHHpre:
+ case AArch64::LDRHpre:
+ case AArch64::LDRSBWpre:
+ case AArch64::LDRSBXpre:
+ case AArch64::LDRSHWpre:
+ case AArch64::LDRSHXpre:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRWpre:
+ case AArch64::LDRXpre:
+ case AArch64::LDRBBpost:
+ case AArch64::LDRBpost:
+ case AArch64::LDRHHpost:
+ case AArch64::LDRHpost:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSHWpost:
+ case AArch64::LDRSHXpost:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRWpost:
+ case AArch64::LDRXpost: {
+ unsigned Rt = Inst.getOperand(1).getReg();
+ unsigned Rn = Inst.getOperand(2).getReg();
+ if (RI->isSubRegisterEq(Rn, Rt))
+ return Error(Loc[0], "unpredictable LDR instruction, writeback base "
+ "is also a source");
+ break;
+ }
+ case AArch64::STRBBpost:
+ case AArch64::STRBpost:
+ case AArch64::STRHHpost:
+ case AArch64::STRHpost:
+ case AArch64::STRWpost:
+ case AArch64::STRXpost:
+ case AArch64::STRBBpre:
+ case AArch64::STRBpre:
+ case AArch64::STRHHpre:
+ case AArch64::STRHpre:
+ case AArch64::STRWpre:
+ case AArch64::STRXpre: {
+ unsigned Rt = Inst.getOperand(1).getReg();
+ unsigned Rn = Inst.getOperand(2).getReg();
+ if (RI->isSubRegisterEq(Rn, Rt))
+ return Error(Loc[0], "unpredictable STR instruction, writeback base "
+ "is also a source");
+ break;
+ }
+ }
+
+ // Now check immediate ranges. Separate from the above as there is overlap
+ // in the instructions being checked and this keeps the nested conditionals
+ // to a minimum.
+ switch (Inst.getOpcode()) {
+ case AArch64::ADDSWri:
+ case AArch64::ADDSXri:
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ case AArch64::SUBSWri:
+ case AArch64::SUBSXri:
+ case AArch64::SUBWri:
+ case AArch64::SUBXri: {
+ // Annoyingly we can't do this in the isAddSubImm predicate, so there is
+ // some slight duplication here.
+ if (Inst.getOperand(2).isExpr()) {
+ const MCExpr *Expr = Inst.getOperand(2).getExpr();
+ AArch64MCExpr::VariantKind ELFRefKind;
+ MCSymbolRefExpr::VariantKind DarwinRefKind;
+ int64_t Addend;
+ if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
+ return Error(Loc[2], "invalid immediate expression");
+ }
+
+ // Only allow these with ADDXri.
+ if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
+ DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) &&
+ Inst.getOpcode() == AArch64::ADDXri)
+ return false;
+
+ // Only allow these with ADDXri/ADDWri
+ if ((ELFRefKind == AArch64MCExpr::VK_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 ||
+ ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
+ ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 ||
+ ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
+ ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
+ ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) &&
+ (Inst.getOpcode() == AArch64::ADDXri ||
+ Inst.getOpcode() == AArch64::ADDWri))
+ return false;
+
+ // Don't allow expressions in the immediate field otherwise
+ return Error(Loc[2], "invalid immediate expression");
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
+ switch (ErrCode) {
+ case Match_MissingFeature:
+ return Error(Loc,
+ "instruction requires a CPU feature not currently enabled");
+ case Match_InvalidOperand:
+ return Error(Loc, "invalid operand for instruction");
+ case Match_InvalidSuffix:
+ return Error(Loc, "invalid type suffix for instruction");
+ case Match_InvalidCondCode:
+ return Error(Loc, "expected AArch64 condition code");
+ case Match_AddSubRegExtendSmall:
+ return Error(Loc,
+ "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubRegExtendLarge:
+ return Error(Loc,
+ "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
+ case Match_AddSubSecondSource:
+ return Error(Loc,
+ "expected compatible register, symbol or integer in range [0, 4095]");
+ case Match_LogicalSecondSource:
+ return Error(Loc, "expected compatible register or logical immediate");
+ case Match_InvalidMovImm32Shift:
+ return Error(Loc, "expected 'lsl' with optional integer 0 or 16");
+ case Match_InvalidMovImm64Shift:
+ return Error(Loc, "expected 'lsl' with optional integer 0, 16, 32 or 48");
+ case Match_AddSubRegShift32:
+ return Error(Loc,
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
+ case Match_AddSubRegShift64:
+ return Error(Loc,
+ "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
+ case Match_InvalidFPImm:
+ return Error(Loc,
+ "expected compatible register or floating-point constant");
+ case Match_InvalidMemoryIndexedSImm9:
+ return Error(Loc, "index must be an integer in range [-256, 255].");
+ case Match_InvalidMemoryIndexed4SImm7:
+ return Error(Loc, "index must be a multiple of 4 in range [-256, 252].");
+ case Match_InvalidMemoryIndexed8SImm7:
+ return Error(Loc, "index must be a multiple of 8 in range [-512, 504].");
+ case Match_InvalidMemoryIndexed16SImm7:
+ return Error(Loc, "index must be a multiple of 16 in range [-1024, 1008].");
+ case Match_InvalidMemoryWExtend8:
+ return Error(Loc,
+ "expected 'uxtw' or 'sxtw' with optional shift of #0");
+ case Match_InvalidMemoryWExtend16:
+ return Error(Loc,
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
+ case Match_InvalidMemoryWExtend32:
+ return Error(Loc,
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
+ case Match_InvalidMemoryWExtend64:
+ return Error(Loc,
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
+ case Match_InvalidMemoryWExtend128:
+ return Error(Loc,
+ "expected 'uxtw' or 'sxtw' with optional shift of #0 or #4");
+ case Match_InvalidMemoryXExtend8:
+ return Error(Loc,
+ "expected 'lsl' or 'sxtx' with optional shift of #0");
+ case Match_InvalidMemoryXExtend16:
+ return Error(Loc,
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
+ case Match_InvalidMemoryXExtend32:
+ return Error(Loc,
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
+ case Match_InvalidMemoryXExtend64:
+ return Error(Loc,
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
+ case Match_InvalidMemoryXExtend128:
+ return Error(Loc,
+ "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
+ case Match_InvalidMemoryIndexed1:
+ return Error(Loc, "index must be an integer in range [0, 4095].");
+ case Match_InvalidMemoryIndexed2:
+ return Error(Loc, "index must be a multiple of 2 in range [0, 8190].");
+ case Match_InvalidMemoryIndexed4:
+ return Error(Loc, "index must be a multiple of 4 in range [0, 16380].");
+ case Match_InvalidMemoryIndexed8:
+ return Error(Loc, "index must be a multiple of 8 in range [0, 32760].");
+ case Match_InvalidMemoryIndexed16:
+ return Error(Loc, "index must be a multiple of 16 in range [0, 65520].");
+ case Match_InvalidImm0_7:
+ return Error(Loc, "immediate must be an integer in range [0, 7].");
+ case Match_InvalidImm0_15:
+ return Error(Loc, "immediate must be an integer in range [0, 15].");
+ case Match_InvalidImm0_31:
+ return Error(Loc, "immediate must be an integer in range [0, 31].");
+ case Match_InvalidImm0_63:
+ return Error(Loc, "immediate must be an integer in range [0, 63].");
+ case Match_InvalidImm0_127:
+ return Error(Loc, "immediate must be an integer in range [0, 127].");
+ case Match_InvalidImm0_65535:
+ return Error(Loc, "immediate must be an integer in range [0, 65535].");
+ case Match_InvalidImm1_8:
+ return Error(Loc, "immediate must be an integer in range [1, 8].");
+ case Match_InvalidImm1_16:
+ return Error(Loc, "immediate must be an integer in range [1, 16].");
+ case Match_InvalidImm1_32:
+ return Error(Loc, "immediate must be an integer in range [1, 32].");
+ case Match_InvalidImm1_64:
+ return Error(Loc, "immediate must be an integer in range [1, 64].");
+ case Match_InvalidIndex1:
+ return Error(Loc, "expected lane specifier '[1]'");
+ case Match_InvalidIndexB:
+ return Error(Loc, "vector lane must be an integer in range [0, 15].");
+ case Match_InvalidIndexH:
+ return Error(Loc, "vector lane must be an integer in range [0, 7].");
+ case Match_InvalidIndexS:
+ return Error(Loc, "vector lane must be an integer in range [0, 3].");
+ case Match_InvalidIndexD:
+ return Error(Loc, "vector lane must be an integer in range [0, 1].");
+ case Match_InvalidLabel:
+ return Error(Loc, "expected label or encodable integer pc offset");
+ case Match_MRS:
+ return Error(Loc, "expected readable system register");
+ case Match_MSR:
+ return Error(Loc, "expected writable system register or pstate");
+ case Match_MnemonicFail:
+ return Error(Loc, "unrecognized instruction mnemonic");
+ default:
+ assert(0 && "unexpected error code!");
+ return Error(Loc, "invalid instruction format");
+ }
+}
+
+static const char *getSubtargetFeatureName(unsigned Val);
+
+bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+ OperandVector &Operands,
+ MCStreamer &Out,
+ unsigned &ErrorInfo,
+ bool MatchingInlineAsm) {
+ assert(!Operands.empty() && "Unexpect empty operand list!");
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]);
+ assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+ StringRef Tok = Op->getToken();
+ unsigned NumOperands = Operands.size();
+
+ if (NumOperands == 4 && Tok == "lsl") {
+ AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]);
+ AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
+ if (Op2->isReg() && Op3->isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ if (Op3CE) {
+ uint64_t Op3Val = Op3CE->getValue();
+ uint64_t NewOp3Val = 0;
+ uint64_t NewOp4Val = 0;
+ if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
+ Op2->getReg())) {
+ NewOp3Val = (32 - Op3Val) & 0x1f;
+ NewOp4Val = 31 - Op3Val;
+ } else {
+ NewOp3Val = (64 - Op3Val) & 0x3f;
+ NewOp4Val = 63 - Op3Val;
+ }
+
+ const MCExpr *NewOp3 = MCConstantExpr::Create(NewOp3Val, getContext());
+ const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext());
+
+ Operands[0] = AArch64Operand::CreateToken(
+ "ubfm", false, Op->getStartLoc(), getContext());
+ Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
+ Op3->getEndLoc(), getContext());
+ Operands.push_back(AArch64Operand::CreateImm(
+ NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
+ delete Op3;
+ delete Op;
+ }
+ }
+ } else if (NumOperands == 5) {
+ // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
+ // UBFIZ -> UBFM aliases.
+ if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
+ AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
+ AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
+ AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+
+ if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+
+ if (Op3CE && Op4CE) {
+ uint64_t Op3Val = Op3CE->getValue();
+ uint64_t Op4Val = Op4CE->getValue();
+
+ uint64_t RegWidth = 0;
+ if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Op1->getReg()))
+ RegWidth = 64;
+ else
+ RegWidth = 32;
+
+ if (Op3Val >= RegWidth)
+ return Error(Op3->getStartLoc(),
+ "expected integer in range [0, 31]");
+ if (Op4Val < 1 || Op4Val > RegWidth)
+ return Error(Op4->getStartLoc(),
+ "expected integer in range [1, 32]");
+
+ uint64_t NewOp3Val = 0;
+ if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains(
+ Op1->getReg()))
+ NewOp3Val = (32 - Op3Val) & 0x1f;
+ else
+ NewOp3Val = (64 - Op3Val) & 0x3f;
+
+ uint64_t NewOp4Val = Op4Val - 1;
+
+ if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val)
+ return Error(Op4->getStartLoc(),
+ "requested insert overflows register");
+
+ const MCExpr *NewOp3 =
+ MCConstantExpr::Create(NewOp3Val, getContext());
+ const MCExpr *NewOp4 =
+ MCConstantExpr::Create(NewOp4Val, getContext());
+ Operands[3] = AArch64Operand::CreateImm(
+ NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext());
+ Operands[4] = AArch64Operand::CreateImm(
+ NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ if (Tok == "bfi")
+ Operands[0] = AArch64Operand::CreateToken(
+ "bfm", false, Op->getStartLoc(), getContext());
+ else if (Tok == "sbfiz")
+ Operands[0] = AArch64Operand::CreateToken(
+ "sbfm", false, Op->getStartLoc(), getContext());
+ else if (Tok == "ubfiz")
+ Operands[0] = AArch64Operand::CreateToken(
+ "ubfm", false, Op->getStartLoc(), getContext());
+ else
+ llvm_unreachable("No valid mnemonic for alias?");
+
+ delete Op;
+ delete Op3;
+ delete Op4;
+ }
+ }
+
+ // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and
+ // UBFX -> UBFM aliases.
+ } else if (NumOperands == 5 &&
+ (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
+ AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]);
+ AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]);
+ AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]);
+
+ if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
+ const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
+ const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
+
+ if (Op3CE && Op4CE) {
+ uint64_t Op3Val = Op3CE->getValue();
+ uint64_t Op4Val = Op4CE->getValue();
+
+ uint64_t RegWidth = 0;
+ if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Op1->getReg()))
+ RegWidth = 64;
+ else
+ RegWidth = 32;
+
+ if (Op3Val >= RegWidth)
+ return Error(Op3->getStartLoc(),
+ "expected integer in range [0, 31]");
+ if (Op4Val < 1 || Op4Val > RegWidth)
+ return Error(Op4->getStartLoc(),
+ "expected integer in range [1, 32]");
+
+ uint64_t NewOp4Val = Op3Val + Op4Val - 1;
+
+ if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val)
+ return Error(Op4->getStartLoc(),
+ "requested extract overflows register");
+
+ const MCExpr *NewOp4 =
+ MCConstantExpr::Create(NewOp4Val, getContext());
+ Operands[4] = AArch64Operand::CreateImm(
+ NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
+ if (Tok == "bfxil")
+ Operands[0] = AArch64Operand::CreateToken(
+ "bfm", false, Op->getStartLoc(), getContext());
+ else if (Tok == "sbfx")
+ Operands[0] = AArch64Operand::CreateToken(
+ "sbfm", false, Op->getStartLoc(), getContext());
+ else if (Tok == "ubfx")
+ Operands[0] = AArch64Operand::CreateToken(
+ "ubfm", false, Op->getStartLoc(), getContext());
+ else
+ llvm_unreachable("No valid mnemonic for alias?");
+
+ delete Op;
+ delete Op4;
+ }
+ }
+ }
+ }
+ // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
+ // InstAlias can't quite handle this since the reg classes aren't
+ // subclasses.
+ if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
+ // The source register can be Wn here, but the matcher expects a
+ // GPR64. Twiddle it here if necessary.
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
+ if (Op->isReg()) {
+ unsigned Reg = getXRegFromWReg(Op->getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+ Op->getEndLoc(), getContext());
+ delete Op;
+ }
+ }
+ // FIXME: Likewise for sxt[bh] with a Xd dst operand
+ else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) {
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
+ if (Op->isReg() &&
+ AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Op->getReg())) {
+ // The source register can be Wn here, but the matcher expects a
+ // GPR64. Twiddle it here if necessary.
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]);
+ if (Op->isReg()) {
+ unsigned Reg = getXRegFromWReg(Op->getReg());
+ Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+ Op->getEndLoc(), getContext());
+ delete Op;
+ }
+ }
+ }
+ // FIXME: Likewise for uxt[bh] with a Xd dst operand
+ else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) {
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
+ if (Op->isReg() &&
+ AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains(
+ Op->getReg())) {
+ // The source register can be Wn here, but the matcher expects a
+ // GPR32. Twiddle it here if necessary.
+ AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]);
+ if (Op->isReg()) {
+ unsigned Reg = getWRegFromXReg(Op->getReg());
+ Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(),
+ Op->getEndLoc(), getContext());
+ delete Op;
+ }
+ }
+ }
+
+ // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
+ if (NumOperands == 3 && Tok == "fmov") {
+ AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]);
+ AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]);
+ if (RegOp->isReg() && ImmOp->isFPImm() &&
+ ImmOp->getFPImm() == (unsigned)-1) {
+ unsigned zreg =
+ AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains(
+ RegOp->getReg())
+ ? AArch64::WZR
+ : AArch64::XZR;
+ Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(),
+ Op->getEndLoc(), getContext());
+ delete ImmOp;
+ }
+ }
+
+ MCInst Inst;
+ // First try to match against the secondary set of tables containing the
+ // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
+ unsigned MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
+
+ // If that fails, try against the alternate table containing long-form NEON:
+ // "fadd v0.2s, v1.2s, v2.2s"
+ if (MatchResult != Match_Success)
+ MatchResult =
+ MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
+
+ switch (MatchResult) {
+ case Match_Success: {
+ // Perform range checking and other semantic validations
+ SmallVector<SMLoc, 8> OperandLocs;
+ NumOperands = Operands.size();
+ for (unsigned i = 1; i < NumOperands; ++i)
+ OperandLocs.push_back(Operands[i]->getStartLoc());
+ if (validateInstruction(Inst, OperandLocs))
+ return true;
+
+ Inst.setLoc(IDLoc);
+ Out.EmitInstruction(Inst, STI);
+ return false;
+ }
+ case Match_MissingFeature: {
+ assert(ErrorInfo && "Unknown missing feature!");
+ // Special case the error message for the very common case where only
+ // a single subtarget feature is missing (neon, e.g.).
+ std::string Msg = "instruction requires:";
+ unsigned Mask = 1;
+ for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
+ if (ErrorInfo & Mask) {
+ Msg += " ";
+ Msg += getSubtargetFeatureName(ErrorInfo & Mask);
+ }
+ Mask <<= 1;
+ }
+ return Error(IDLoc, Msg);
+ }
+ case Match_MnemonicFail:
+ return showMatchError(IDLoc, MatchResult);
+ case Match_InvalidOperand: {
+ SMLoc ErrorLoc = IDLoc;
+ if (ErrorInfo != ~0U) {
+ if (ErrorInfo >= Operands.size())
+ return Error(IDLoc, "too few operands for instruction");
+
+ ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ }
+ // If the match failed on a suffix token operand, tweak the diagnostic
+ // accordingly.
+ if (((AArch64Operand *)Operands[ErrorInfo])->isToken() &&
+ ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix())
+ MatchResult = Match_InvalidSuffix;
+
+ return showMatchError(ErrorLoc, MatchResult);
+ }
+ case Match_InvalidMemoryIndexed1:
+ case Match_InvalidMemoryIndexed2:
+ case Match_InvalidMemoryIndexed4:
+ case Match_InvalidMemoryIndexed8:
+ case Match_InvalidMemoryIndexed16:
+ case Match_InvalidCondCode:
+ case Match_AddSubRegExtendSmall:
+ case Match_AddSubRegExtendLarge:
+ case Match_AddSubSecondSource:
+ case Match_LogicalSecondSource:
+ case Match_AddSubRegShift32:
+ case Match_AddSubRegShift64:
+ case Match_InvalidMovImm32Shift:
+ case Match_InvalidMovImm64Shift:
+ case Match_InvalidFPImm:
+ case Match_InvalidMemoryWExtend8:
+ case Match_InvalidMemoryWExtend16:
+ case Match_InvalidMemoryWExtend32:
+ case Match_InvalidMemoryWExtend64:
+ case Match_InvalidMemoryWExtend128:
+ case Match_InvalidMemoryXExtend8:
+ case Match_InvalidMemoryXExtend16:
+ case Match_InvalidMemoryXExtend32:
+ case Match_InvalidMemoryXExtend64:
+ case Match_InvalidMemoryXExtend128:
+ case Match_InvalidMemoryIndexed4SImm7:
+ case Match_InvalidMemoryIndexed8SImm7:
+ case Match_InvalidMemoryIndexed16SImm7:
+ case Match_InvalidMemoryIndexedSImm9:
+ case Match_InvalidImm0_7:
+ case Match_InvalidImm0_15:
+ case Match_InvalidImm0_31:
+ case Match_InvalidImm0_63:
+ case Match_InvalidImm0_127:
+ case Match_InvalidImm0_65535:
+ case Match_InvalidImm1_8:
+ case Match_InvalidImm1_16:
+ case Match_InvalidImm1_32:
+ case Match_InvalidImm1_64:
+ case Match_InvalidIndex1:
+ case Match_InvalidIndexB:
+ case Match_InvalidIndexH:
+ case Match_InvalidIndexS:
+ case Match_InvalidIndexD:
+ case Match_InvalidLabel:
+ case Match_MSR:
+ case Match_MRS: {
+ // Any time we get here, there's nothing fancy to do. Just get the
+ // operand SMLoc and display the diagnostic.
+ SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc();
+ if (ErrorLoc == SMLoc())
+ ErrorLoc = IDLoc;
+ return showMatchError(ErrorLoc, MatchResult);
+ }
+ }
+
+ llvm_unreachable("Implement any new match types added!");
+ return true;
+}
+
+/// ParseDirective parses the arm specific directives
bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
+ SMLoc Loc = DirectiveID.getLoc();
if (IDVal == ".hword")
- return ParseDirectiveWord(2, DirectiveID.getLoc());
- else if (IDVal == ".word")
- return ParseDirectiveWord(4, DirectiveID.getLoc());
- else if (IDVal == ".xword")
- return ParseDirectiveWord(8, DirectiveID.getLoc());
- else if (IDVal == ".tlsdesccall")
- return ParseDirectiveTLSDescCall(DirectiveID.getLoc());
-
- return true;
+ return parseDirectiveWord(2, Loc);
+ if (IDVal == ".word")
+ return parseDirectiveWord(4, Loc);
+ if (IDVal == ".xword")
+ return parseDirectiveWord(8, Loc);
+ if (IDVal == ".tlsdesccall")
+ return parseDirectiveTLSDescCall(Loc);
+
+ return parseDirectiveLOH(IDVal, Loc);
}
/// parseDirectiveWord
/// ::= .word [ expression (, expression)* ]
-bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
if (getParser().parseExpression(Value))
- return false;
+ return true;
getParser().getStreamer().EmitValue(Value, Size);
@@ -2374,10 +3838,8 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
break;
// FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(L, "unexpected token in directive");
- return false;
- }
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(L, "unexpected token in directive");
Parser.Lex();
}
}
@@ -2388,15 +3850,14 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
// parseDirectiveTLSDescCall:
// ::= .tlsdesccall symbol
-bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
+bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
StringRef Name;
- if (getParser().parseIdentifier(Name)) {
- Error(L, "expected symbol after directive");
- return false;
- }
+ if (getParser().parseIdentifier(Name))
+ return Error(L, "expected symbol after directive");
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+ const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
+ Expr = AArch64MCExpr::Create(Expr, AArch64MCExpr::VK_TLSDESC, getContext());
MCInst Inst;
Inst.setOpcode(AArch64::TLSDESCCALL);
@@ -2406,271 +3867,181 @@ bool AArch64AsmParser::ParseDirectiveTLSDescCall(SMLoc L) {
return false;
}
+/// ::= .loh <lohName | lohId> label1, ..., labelN
+/// The number of arguments depends on the loh identifier.
+bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
+ if (IDVal != MCLOHDirectiveName())
+ return true;
+ MCLOHType Kind;
+ if (getParser().getTok().isNot(AsmToken::Identifier)) {
+ if (getParser().getTok().isNot(AsmToken::Integer))
+ return TokError("expected an identifier or a number in directive");
+ // We successfully get a numeric value for the identifier.
+ // Check if it is valid.
+ int64_t Id = getParser().getTok().getIntVal();
+ Kind = (MCLOHType)Id;
+ // Check that Id does not overflow MCLOHType.
+ if (!isValidMCLOHType(Kind) || Id != Kind)
+ return TokError("invalid numeric identifier in directive");
+ } else {
+ StringRef Name = getTok().getIdentifier();
+ // We successfully parse an identifier.
+ // Check if it is a recognized one.
+ int Id = MCLOHNameToId(Name);
+
+ if (Id == -1)
+ return TokError("invalid identifier in directive");
+ Kind = (MCLOHType)Id;
+ }
+ // Consume the identifier.
+ Lex();
+ // Get the number of arguments of this LOH.
+ int NbArgs = MCLOHIdToNbArgs(Kind);
+
+ assert(NbArgs != -1 && "Invalid number of arguments");
+
+ SmallVector<MCSymbol *, 3> Args;
+ for (int Idx = 0; Idx < NbArgs; ++Idx) {
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
+ Args.push_back(getContext().GetOrCreateSymbol(Name));
+
+ if (Idx + 1 == NbArgs)
+ break;
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+ Lex();
+ }
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
-bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands,
- MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
- MCInst Inst;
- unsigned MatchResult;
- MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
- MatchingInlineAsm);
+ getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
+ return false;
+}
- if (ErrorInfo != ~0U && ErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
+bool
+AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
+ AArch64MCExpr::VariantKind &ELFRefKind,
+ MCSymbolRefExpr::VariantKind &DarwinRefKind,
+ int64_t &Addend) {
+ ELFRefKind = AArch64MCExpr::VK_INVALID;
+ DarwinRefKind = MCSymbolRefExpr::VK_None;
+ Addend = 0;
+
+ if (const AArch64MCExpr *AE = dyn_cast<AArch64MCExpr>(Expr)) {
+ ELFRefKind = AE->getKind();
+ Expr = AE->getSubExpr();
+ }
+
+ const MCSymbolRefExpr *SE = dyn_cast<MCSymbolRefExpr>(Expr);
+ if (SE) {
+ // It's a simple symbol reference with no addend.
+ DarwinRefKind = SE->getKind();
+ return true;
+ }
- switch (MatchResult) {
- default: break;
- case Match_Success:
- if (validateInstruction(Inst, Operands))
- return true;
+ const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
+ if (!BE)
+ return false;
- Out.EmitInstruction(Inst, STI);
+ SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
+ if (!SE)
return false;
- case Match_MissingFeature:
- Error(IDLoc, "instruction requires a CPU feature not currently enabled");
- return true;
- case Match_InvalidOperand: {
- SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
- ErrorLoc = ((AArch64Operand*)Operands[ErrorInfo])->getStartLoc();
- if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
- }
+ DarwinRefKind = SE->getKind();
- return Error(ErrorLoc, "invalid operand for instruction");
- }
- case Match_MnemonicFail:
- return Error(IDLoc, "invalid instruction");
+ if (BE->getOpcode() != MCBinaryExpr::Add &&
+ BE->getOpcode() != MCBinaryExpr::Sub)
+ return false;
- case Match_AddSubRegExtendSmall:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected '[su]xt[bhw]' or 'lsl' with optional integer in range [0, 4]");
- case Match_AddSubRegExtendLarge:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'sxtx' 'uxtx' or 'lsl' with optional integer in range [0, 4]");
- case Match_AddSubRegShift32:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 31]");
- case Match_AddSubRegShift64:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl', 'lsr' or 'asr' with optional integer in range [0, 63]");
- case Match_AddSubSecondSource:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected compatible register, symbol or integer in range [0, 4095]");
- case Match_CVTFixedPos32:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 32]");
- case Match_CVTFixedPos64:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 64]");
- case Match_CondCode:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected AArch64 condition code");
- case Match_FPImm:
- // Any situation which allows a nontrivial floating-point constant also
- // allows a register.
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected compatible register or floating-point constant");
- case Match_FPZero:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected floating-point constant #0.0 or invalid register type");
- case Match_Label:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected label or encodable integer pc offset");
- case Match_Lane1:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected lane specifier '[1]'");
- case Match_LoadStoreExtend32_1:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'uxtw' or 'sxtw' with optional shift of #0");
- case Match_LoadStoreExtend32_2:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'uxtw' or 'sxtw' with optional shift of #0 or #1");
- case Match_LoadStoreExtend32_4:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'uxtw' or 'sxtw' with optional shift of #0 or #2");
- case Match_LoadStoreExtend32_8:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'uxtw' or 'sxtw' with optional shift of #0 or #3");
- case Match_LoadStoreExtend32_16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtw' with optional shift of #0 or #4");
- case Match_LoadStoreExtend64_1:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtx' with optional shift of #0");
- case Match_LoadStoreExtend64_2:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtx' with optional shift of #0 or #1");
- case Match_LoadStoreExtend64_4:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtx' with optional shift of #0 or #2");
- case Match_LoadStoreExtend64_8:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtx' with optional shift of #0 or #3");
- case Match_LoadStoreExtend64_16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'lsl' or 'sxtx' with optional shift of #0 or #4");
- case Match_LoadStoreSImm7_4:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer multiple of 4 in range [-256, 252]");
- case Match_LoadStoreSImm7_8:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer multiple of 8 in range [-512, 504]");
- case Match_LoadStoreSImm7_16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer multiple of 16 in range [-1024, 1008]");
- case Match_LoadStoreSImm9:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [-256, 255]");
- case Match_LoadStoreUImm12_1:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic reference or integer in range [0, 4095]");
- case Match_LoadStoreUImm12_2:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic reference or integer in range [0, 8190]");
- case Match_LoadStoreUImm12_4:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic reference or integer in range [0, 16380]");
- case Match_LoadStoreUImm12_8:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic reference or integer in range [0, 32760]");
- case Match_LoadStoreUImm12_16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic reference or integer in range [0, 65520]");
- case Match_LogicalSecondSource:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected compatible register or logical immediate");
- case Match_MOVWUImm16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected relocated symbol or integer in range [0, 65535]");
- case Match_MRS:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected readable system register");
- case Match_MSR:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected writable system register or pstate");
- case Match_NamedImm_at:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic 'at' operand: s1e[0-3][rw] or s12e[01][rw]");
- case Match_NamedImm_dbarrier:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 15] or symbolic barrier operand");
- case Match_NamedImm_dc:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected symbolic 'dc' operand");
- case Match_NamedImm_ic:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected 'ic' operand: 'ialluis', 'iallu' or 'ivau'");
- case Match_NamedImm_isb:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 15] or 'sy'");
- case Match_NamedImm_prefetch:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected prefetch hint: p(ld|st|i)l[123](strm|keep)");
- case Match_NamedImm_tlbi:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected translation buffer invalidation operand");
- case Match_UImm16:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 65535]");
- case Match_UImm3:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 7]");
- case Match_UImm4:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 15]");
- case Match_UImm5:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 31]");
- case Match_UImm6:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 63]");
- case Match_UImm7:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 127]");
- case Match_Width32:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [<lsb>, 31]");
- case Match_Width64:
- return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [<lsb>, 63]");
- case Match_ShrImm8:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 8]");
- case Match_ShrImm16:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 16]");
- case Match_ShrImm32:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 32]");
- case Match_ShrImm64:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [1, 64]");
- case Match_ShlImm8:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 7]");
- case Match_ShlImm16:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 15]");
- case Match_ShlImm32:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 31]");
- case Match_ShlImm64:
- return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(),
- "expected integer in range [0, 63]");
- }
+ // See if the addend is is a constant, otherwise there's more going
+ // on here than we can deal with.
+ auto AddendExpr = dyn_cast<MCConstantExpr>(BE->getRHS());
+ if (!AddendExpr)
+ return false;
- llvm_unreachable("Implement any new match types added!");
- return true;
+ Addend = AddendExpr->getValue();
+ if (BE->getOpcode() == MCBinaryExpr::Sub)
+ Addend = -Addend;
+
+ // It's some symbol reference + a constant addend, but really
+ // shouldn't use both Darwin and ELF syntax.
+ return ELFRefKind == AArch64MCExpr::VK_INVALID ||
+ DarwinRefKind == MCSymbolRefExpr::VK_None;
}
-void AArch64Operand::print(raw_ostream &OS) const {
+/// Force static initialization.
+extern "C" void LLVMInitializeAArch64AsmParser() {
+ RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
+ RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
+
+ RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64leTarget);
+ RegisterMCAsmParser<AArch64AsmParser> W(TheARM64beTarget);
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#define GET_MATCHER_IMPLEMENTATION
+#include "AArch64GenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+ unsigned Kind) {
+ AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp);
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ int64_t ExpectedVal;
switch (Kind) {
- case k_CondCode:
- OS << "<CondCode: " << CondCode.Code << ">";
+ default:
+ return Match_InvalidOperand;
+ case MCK__35_0:
+ ExpectedVal = 0;
break;
- case k_FPImmediate:
- OS << "<fpimm: " << FPImm.Val << ">";
+ case MCK__35_1:
+ ExpectedVal = 1;
break;
- case k_ImmWithLSL:
- OS << "<immwithlsl: imm=" << ImmWithLSL.Val
- << ", shift=" << ImmWithLSL.ShiftAmount << ">";
+ case MCK__35_12:
+ ExpectedVal = 12;
break;
- case k_Immediate:
- getImm()->print(OS);
+ case MCK__35_16:
+ ExpectedVal = 16;
break;
- case k_Register:
- OS << "<register " << getReg() << '>';
+ case MCK__35_2:
+ ExpectedVal = 2;
break;
- case k_Token:
- OS << '\'' << getToken() << '\'';
+ case MCK__35_24:
+ ExpectedVal = 24;
break;
- case k_ShiftExtend:
- OS << "<shift: type=" << ShiftExtend.ShiftType
- << ", amount=" << ShiftExtend.Amount << ">";
+ case MCK__35_3:
+ ExpectedVal = 3;
break;
- case k_SysReg: {
- StringRef Name(SysReg.Data, SysReg.Length);
- OS << "<sysreg: " << Name << '>';
+ case MCK__35_32:
+ ExpectedVal = 32;
break;
- }
- default:
- llvm_unreachable("No idea how to print this kind of operand");
+ case MCK__35_4:
+ ExpectedVal = 4;
+ break;
+ case MCK__35_48:
+ ExpectedVal = 48;
+ break;
+ case MCK__35_6:
+ ExpectedVal = 6;
+ break;
+ case MCK__35_64:
+ ExpectedVal = 64;
+ break;
+ case MCK__35_8:
+ ExpectedVal = 8;
break;
}
+ if (!Op->isImm())
+ return Match_InvalidOperand;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (!CE)
+ return Match_InvalidOperand;
+ if (CE->getValue() == ExpectedVal)
+ return Match_Success;
+ return Match_InvalidOperand;
}
-
-void AArch64Operand::dump() const {
- print(errs());
-}
-
-
-/// Force static initialization.
-extern "C" void LLVMInitializeAArch64AsmParser() {
- RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
- RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
-}
-
-#define GET_REGISTER_MATCHER
-#define GET_MATCHER_IMPLEMENTATION
-#include "AArch64GenAsmMatcher.inc"
diff --git a/lib/Target/AArch64/AsmParser/CMakeLists.txt b/lib/Target/AArch64/AsmParser/CMakeLists.txt
index e81ec70..cc0a9d8 100644
--- a/lib/Target/AArch64/AsmParser/CMakeLists.txt
+++ b/lib/Target/AArch64/AsmParser/CMakeLists.txt
@@ -1,3 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
add_llvm_library(LLVMAArch64AsmParser
AArch64AsmParser.cpp
)
+
diff --git a/lib/Target/AArch64/AsmParser/LLVMBuild.txt b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
index 2d8f632..11eb9d5 100644
--- a/lib/Target/AArch64/AsmParser/LLVMBuild.txt
+++ b/lib/Target/AArch64/AsmParser/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/AsmParser/Makefile b/lib/Target/AArch64/AsmParser/Makefile
index 56c9ef5..00268c7 100644
--- a/lib/Target/AArch64/AsmParser/Makefile
+++ b/lib/Target/AArch64/AsmParser/Makefile
@@ -9,7 +9,7 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMAArch64AsmParser
-# Hack: we need to include 'main' target directory to grab private headers
+# Hack: we need to include 'main' ARM target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/CMakeLists.txt b/lib/Target/AArch64/CMakeLists.txt
index dfc10af..789d549 100644
--- a/lib/Target/AArch64/CMakeLists.txt
+++ b/lib/Target/AArch64/CMakeLists.txt
@@ -1,37 +1,51 @@
set(LLVM_TARGET_DEFINITIONS AArch64.td)
-tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
-tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
tablegen(LLVM AArch64GenInstrInfo.inc -gen-instr-info)
tablegen(LLVM AArch64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM AArch64GenMCPseudoLowering.inc -gen-pseudo-lowering)
-tablegen(LLVM AArch64GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM AArch64GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM AArch64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(LLVM AArch64GenAsmMatcher.inc -gen-asm-matcher)
tablegen(LLVM AArch64GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM AArch64GenFastISel.inc -gen-fast-isel)
+tablegen(LLVM AArch64GenCallingConv.inc -gen-callingconv)
tablegen(LLVM AArch64GenSubtargetInfo.inc -gen-subtarget)
+tablegen(LLVM AArch64GenDisassemblerTables.inc -gen-disassembler)
add_public_tablegen_target(AArch64CommonTableGen)
add_llvm_target(AArch64CodeGen
+ AArch64AddressTypePromotion.cpp
+ AArch64AdvSIMDScalarPass.cpp
AArch64AsmPrinter.cpp
- AArch64BranchFixupPass.cpp
+ AArch64BranchRelaxation.cpp
+ AArch64CleanupLocalDynamicTLSPass.cpp
+ AArch64CollectLOH.cpp
+ AArch64ConditionalCompares.cpp
+ AArch64DeadRegisterDefinitionsPass.cpp
+ AArch64ExpandPseudoInsts.cpp
+ AArch64FastISel.cpp
AArch64FrameLowering.cpp
AArch64ISelDAGToDAG.cpp
AArch64ISelLowering.cpp
AArch64InstrInfo.cpp
- AArch64MachineFunctionInfo.cpp
+ AArch64LoadStoreOptimizer.cpp
AArch64MCInstLower.cpp
+ AArch64PromoteConstant.cpp
AArch64RegisterInfo.cpp
AArch64SelectionDAGInfo.cpp
+ AArch64StorePairSuppress.cpp
AArch64Subtarget.cpp
AArch64TargetMachine.cpp
AArch64TargetObjectFile.cpp
AArch64TargetTransformInfo.cpp
- )
+)
+add_dependencies(LLVMAArch64CodeGen intrinsics_gen)
+
+add_subdirectory(TargetInfo)
add_subdirectory(AsmParser)
add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)
-add_subdirectory(TargetInfo)
add_subdirectory(Utils)
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 9bd363a..6de27d6 100644
--- a/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -1,4 +1,4 @@
-//===- AArch64Disassembler.cpp - Disassembler for AArch64 ISA -------------===//
+//===- AArch64Disassembler.cpp - Disassembler for AArch64 -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,244 +7,169 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the functions necessary to decode AArch64 instruction
-// bitpatterns into MCInsts (with the help of TableGenerated information from
-// the instruction definitions).
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-disassembler"
-
-#include "AArch64.h"
-#include "AArch64RegisterInfo.h"
+#include "AArch64Disassembler.h"
+#include "AArch64ExternalSymbolizer.h"
#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryObject.h"
#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-typedef MCDisassembler::DecodeStatus DecodeStatus;
-
-namespace {
-/// AArch64 disassembler for all AArch64 platforms.
-class AArch64Disassembler : public MCDisassembler {
- OwningPtr<const MCRegisterInfo> RegInfo;
-public:
- /// Initializes the disassembler.
- ///
- AArch64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info)
- : MCDisassembler(STI), RegInfo(Info) {
- }
-
- ~AArch64Disassembler() {}
+#define DEBUG_TYPE "aarch64-disassembler"
- /// See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
+// Pull DecodeStatus and its enum values into the global namespace.
+typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
- const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
-};
-
-}
-
-// Forward-declarations used in the auto-generated files.
-static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus
-DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-
-static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus
-DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-
-static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder);
+// Forward declare these because the autogenerated code will reference them.
+// Definitions are further down.
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
- unsigned OptionHiS,
- uint64_t Address,
- const void *Decoder);
-
-
-static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
- unsigned Imm6Bits,
- uint64_t Address,
+static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
- unsigned Imm6Bits,
- uint64_t Address,
+static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst,
+ unsigned RegNo, uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
- unsigned RmBits,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
+static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-template<int RegWidth>
-static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
- unsigned FullImm,
- uint64_t Address,
- const void *Decoder);
-
-template<int RegWidth>
-static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
- unsigned Bits,
+static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
- unsigned ShiftAmount,
+static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
-static DecodeStatus
-DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
- uint64_t Address, const void *Decoder);
-
-static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
- unsigned ShiftAmount,
+static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
-static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
+
+static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
+ uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
+static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
+ uint32_t insn,
+ uint64_t Address,
+ const void *Decoder);
+static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
- unsigned Insn,
+static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
uint64_t Address,
const void *Decoder);
-
-static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-template<typename SomeNamedImmMapper>
-static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus
-DecodeSysRegOperand(const A64SysReg::SysRegMapper &InstMapper,
- llvm::MCInst &Inst, unsigned Val,
- uint64_t Address, const void *Decoder);
-
-static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-
-static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val,
- uint64_t Address,
+static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
const void *Decoder);
-
-static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder);
-
-static bool Check(DecodeStatus &Out, DecodeStatus In);
-
-#include "AArch64GenDisassemblerTables.inc"
+static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder);
+static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder);
+static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
+static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder);
static bool Check(DecodeStatus &Out, DecodeStatus In) {
switch (In) {
@@ -261,486 +186,479 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) {
llvm_unreachable("Invalid DecodeStatus!");
}
+#include "AArch64GenDisassemblerTables.inc"
+#include "AArch64GenInstrInfo.inc"
+
+#define Success llvm::MCDisassembler::Success
+#define Fail llvm::MCDisassembler::Fail
+#define SoftFail llvm::MCDisassembler::SoftFail
+
+static MCDisassembler *createAArch64Disassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new AArch64Disassembler(STI, Ctx);
+}
+
DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
+ const MemoryObject &Region,
+ uint64_t Address,
+ raw_ostream &os,
+ raw_ostream &cs) const {
CommentStream = &cs;
uint8_t bytes[4];
+ Size = 0;
// We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, bytes) == -1) {
- Size = 0;
- return MCDisassembler::Fail;
- }
+ if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
+ return Fail;
+ Size = 4;
// Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn = (bytes[3] << 24) |
- (bytes[2] << 16) |
- (bytes[1] << 8) |
- (bytes[0] << 0);
+ uint32_t insn =
+ (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
// Calling the auto-generated decoder function.
- DecodeStatus result = decodeInstruction(DecoderTableA6432, MI, insn, Address,
- this, STI);
- if (result != MCDisassembler::Fail) {
- Size = 4;
- return result;
- }
-
- MI.clear();
- Size = 0;
- return MCDisassembler::Fail;
+ return decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
}
-static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
- const AArch64Disassembler *Dis = static_cast<const AArch64Disassembler*>(D);
- return Dis->getRegInfo()->getRegClass(RC).getRegister(RegNo);
+static MCSymbolizer *
+createAArch64ExternalSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp,
+ void *DisInfo, MCContext *Ctx,
+ MCRelocationInfo *RelInfo) {
+ return new llvm::AArch64ExternalSymbolizer(
+ *Ctx,
+ std::unique_ptr<MCRelocationInfo>(RelInfo),
+ GetOpInfo, SymbolLookUp, DisInfo);
}
-static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
+extern "C" void LLVMInitializeAArch64Disassembler() {
+ TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget,
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget,
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget,
+ createAArch64ExternalSymbolizer);
+ TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget,
+ createAArch64ExternalSymbolizer);
- uint16_t Register = getReg(Decoder, AArch64::GPR64RegClassID, RegNo);
- Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ TargetRegistry::RegisterMCDisassembler(TheARM64leTarget,
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCDisassembler(TheARM64beTarget,
+ createAArch64Disassembler);
+ TargetRegistry::RegisterMCSymbolizer(TheARM64leTarget,
+ createAArch64ExternalSymbolizer);
+ TargetRegistry::RegisterMCSymbolizer(TheARM64beTarget,
+ createAArch64ExternalSymbolizer);
}
-static DecodeStatus
-DecodeGPR64xspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- uint16_t Register = getReg(Decoder, AArch64::GPR64xspRegClassID, RegNo);
- Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
-}
+static const unsigned FPR128DecoderTable[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
+ AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
+ AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
+ AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
+ AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
+ AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
+ AArch64::Q30, AArch64::Q31
+};
-static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
+static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::GPR32RegClassID, RegNo);
+ unsigned Register = FPR128DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus
-DecodeGPR32wspRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- uint16_t Register = getReg(Decoder, AArch64::GPR32wspRegClassID, RegNo);
- Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ if (RegNo > 15)
+ return Fail;
+ return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
}
-static DecodeStatus
-DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 31)
- return MCDisassembler::Fail;
-
- uint16_t Register = getReg(Decoder, AArch64::FPR8RegClassID, RegNo);
- Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
-}
+static const unsigned FPR64DecoderTable[] = {
+ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
+ AArch64::D5, AArch64::D6, AArch64::D7, AArch64::D8, AArch64::D9,
+ AArch64::D10, AArch64::D11, AArch64::D12, AArch64::D13, AArch64::D14,
+ AArch64::D15, AArch64::D16, AArch64::D17, AArch64::D18, AArch64::D19,
+ AArch64::D20, AArch64::D21, AArch64::D22, AArch64::D23, AArch64::D24,
+ AArch64::D25, AArch64::D26, AArch64::D27, AArch64::D28, AArch64::D29,
+ AArch64::D30, AArch64::D31
+};
-static DecodeStatus
-DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::FPR16RegClassID, RegNo);
+ unsigned Register = FPR64DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
+static const unsigned FPR32DecoderTable[] = {
+ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
+ AArch64::S5, AArch64::S6, AArch64::S7, AArch64::S8, AArch64::S9,
+ AArch64::S10, AArch64::S11, AArch64::S12, AArch64::S13, AArch64::S14,
+ AArch64::S15, AArch64::S16, AArch64::S17, AArch64::S18, AArch64::S19,
+ AArch64::S20, AArch64::S21, AArch64::S22, AArch64::S23, AArch64::S24,
+ AArch64::S25, AArch64::S26, AArch64::S27, AArch64::S28, AArch64::S29,
+ AArch64::S30, AArch64::S31
+};
-static DecodeStatus
-DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::FPR32RegClassID, RegNo);
+ unsigned Register = FPR32DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus
-DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static const unsigned FPR16DecoderTable[] = {
+ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
+ AArch64::H5, AArch64::H6, AArch64::H7, AArch64::H8, AArch64::H9,
+ AArch64::H10, AArch64::H11, AArch64::H12, AArch64::H13, AArch64::H14,
+ AArch64::H15, AArch64::H16, AArch64::H17, AArch64::H18, AArch64::H19,
+ AArch64::H20, AArch64::H21, AArch64::H22, AArch64::H23, AArch64::H24,
+ AArch64::H25, AArch64::H26, AArch64::H27, AArch64::H28, AArch64::H29,
+ AArch64::H30, AArch64::H31
+};
+
+static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::FPR64RegClassID, RegNo);
+ unsigned Register = FPR16DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus
-DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 15)
- return MCDisassembler::Fail;
-
- return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder);
-}
+static const unsigned FPR8DecoderTable[] = {
+ AArch64::B0, AArch64::B1, AArch64::B2, AArch64::B3, AArch64::B4,
+ AArch64::B5, AArch64::B6, AArch64::B7, AArch64::B8, AArch64::B9,
+ AArch64::B10, AArch64::B11, AArch64::B12, AArch64::B13, AArch64::B14,
+ AArch64::B15, AArch64::B16, AArch64::B17, AArch64::B18, AArch64::B19,
+ AArch64::B20, AArch64::B21, AArch64::B22, AArch64::B23, AArch64::B24,
+ AArch64::B25, AArch64::B26, AArch64::B27, AArch64::B28, AArch64::B29,
+ AArch64::B30, AArch64::B31
+};
-static DecodeStatus
-DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
+static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::FPR128RegClassID, RegNo);
+ unsigned Register = FPR8DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus
-DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address, const void *Decoder) {
- if (RegNo > 15)
- return MCDisassembler::Fail;
-
- return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder);
-}
+static const unsigned GPR64DecoderTable[] = {
+ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
+ AArch64::X5, AArch64::X6, AArch64::X7, AArch64::X8, AArch64::X9,
+ AArch64::X10, AArch64::X11, AArch64::X12, AArch64::X13, AArch64::X14,
+ AArch64::X15, AArch64::X16, AArch64::X17, AArch64::X18, AArch64::X19,
+ AArch64::X20, AArch64::X21, AArch64::X22, AArch64::X23, AArch64::X24,
+ AArch64::X25, AArch64::X26, AArch64::X27, AArch64::X28, AArch64::FP,
+ AArch64::LR, AArch64::XZR
+};
-static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- if (RegNo > 30)
- return MCDisassembler::Fail;
+static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
- uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo);
+ unsigned Register = GPR64DecoderTable[RegNo];
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo,
- unsigned RegID,
- const void *Decoder) {
+static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
if (RegNo > 31)
- return MCDisassembler::Fail;
-
- uint16_t Register = getReg(Decoder, RegID, RegNo);
+ return Fail;
+ unsigned Register = GPR64DecoderTable[RegNo];
+ if (Register == AArch64::XZR)
+ Register = AArch64::SP;
Inst.addOperand(MCOperand::CreateReg(Register));
- return MCDisassembler::Success;
+ return Success;
}
-static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID,
- Decoder);
-}
+static const unsigned GPR32DecoderTable[] = {
+ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
+ AArch64::W5, AArch64::W6, AArch64::W7, AArch64::W8, AArch64::W9,
+ AArch64::W10, AArch64::W11, AArch64::W12, AArch64::W13, AArch64::W14,
+ AArch64::W15, AArch64::W16, AArch64::W17, AArch64::W18, AArch64::W19,
+ AArch64::W20, AArch64::W21, AArch64::W22, AArch64::W23, AArch64::W24,
+ AArch64::W25, AArch64::W26, AArch64::W27, AArch64::W28, AArch64::W29,
+ AArch64::W30, AArch64::WZR
+};
-static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
+static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID,
- Decoder);
-}
+ if (RegNo > 31)
+ return Fail;
-static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID,
- Decoder);
+ unsigned Register = GPR32DecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
+static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID,
- Decoder);
-}
+ if (RegNo > 31)
+ return Fail;
-static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID,
- Decoder);
+ unsigned Register = GPR32DecoderTable[RegNo];
+ if (Register == AArch64::WZR)
+ Register = AArch64::WSP;
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder) {
- return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID,
- Decoder);
-}
+static const unsigned VectorDecoderTable[] = {
+ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
+ AArch64::Q5, AArch64::Q6, AArch64::Q7, AArch64::Q8, AArch64::Q9,
+ AArch64::Q10, AArch64::Q11, AArch64::Q12, AArch64::Q13, AArch64::Q14,
+ AArch64::Q15, AArch64::Q16, AArch64::Q17, AArch64::Q18, AArch64::Q19,
+ AArch64::Q20, AArch64::Q21, AArch64::Q22, AArch64::Q23, AArch64::Q24,
+ AArch64::Q25, AArch64::Q26, AArch64::Q27, AArch64::Q28, AArch64::Q29,
+ AArch64::Q30, AArch64::Q31
+};
-static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
- unsigned OptionHiS,
- uint64_t Address,
- const void *Decoder) {
- // Option{1} must be 1. OptionHiS is made up of {Option{2}, Option{1},
- // S}. Hence we want to check bit 1.
- if (!(OptionHiS & 2))
- return MCDisassembler::Fail;
+static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
- Inst.addOperand(MCOperand::CreateImm(OptionHiS));
- return MCDisassembler::Success;
+ unsigned Register = VectorDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeBitfield32ImmOperand(llvm::MCInst &Inst,
- unsigned Imm6Bits,
- uint64_t Address,
- const void *Decoder) {
- // In the 32-bit variant, bit 6 must be zero. I.e. the immediate must be
- // between 0 and 31.
- if (Imm6Bits > 31)
- return MCDisassembler::Fail;
+static const unsigned QQDecoderTable[] = {
+ AArch64::Q0_Q1, AArch64::Q1_Q2, AArch64::Q2_Q3, AArch64::Q3_Q4,
+ AArch64::Q4_Q5, AArch64::Q5_Q6, AArch64::Q6_Q7, AArch64::Q7_Q8,
+ AArch64::Q8_Q9, AArch64::Q9_Q10, AArch64::Q10_Q11, AArch64::Q11_Q12,
+ AArch64::Q12_Q13, AArch64::Q13_Q14, AArch64::Q14_Q15, AArch64::Q15_Q16,
+ AArch64::Q16_Q17, AArch64::Q17_Q18, AArch64::Q18_Q19, AArch64::Q19_Q20,
+ AArch64::Q20_Q21, AArch64::Q21_Q22, AArch64::Q22_Q23, AArch64::Q23_Q24,
+ AArch64::Q24_Q25, AArch64::Q25_Q26, AArch64::Q26_Q27, AArch64::Q27_Q28,
+ AArch64::Q28_Q29, AArch64::Q29_Q30, AArch64::Q30_Q31, AArch64::Q31_Q0
+};
- Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
- return MCDisassembler::Success;
+static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr, const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = QQDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeCVT32FixedPosOperand(llvm::MCInst &Inst,
- unsigned Imm6Bits,
- uint64_t Address,
- const void *Decoder) {
- // 1 <= Imm <= 32. Encoded as 64 - Imm so: 63 >= Encoded >= 32.
- if (Imm6Bits < 32)
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(Imm6Bits));
- return MCDisassembler::Success;
-}
+static const unsigned QQQDecoderTable[] = {
+ AArch64::Q0_Q1_Q2, AArch64::Q1_Q2_Q3, AArch64::Q2_Q3_Q4,
+ AArch64::Q3_Q4_Q5, AArch64::Q4_Q5_Q6, AArch64::Q5_Q6_Q7,
+ AArch64::Q6_Q7_Q8, AArch64::Q7_Q8_Q9, AArch64::Q8_Q9_Q10,
+ AArch64::Q9_Q10_Q11, AArch64::Q10_Q11_Q12, AArch64::Q11_Q12_Q13,
+ AArch64::Q12_Q13_Q14, AArch64::Q13_Q14_Q15, AArch64::Q14_Q15_Q16,
+ AArch64::Q15_Q16_Q17, AArch64::Q16_Q17_Q18, AArch64::Q17_Q18_Q19,
+ AArch64::Q18_Q19_Q20, AArch64::Q19_Q20_Q21, AArch64::Q20_Q21_Q22,
+ AArch64::Q21_Q22_Q23, AArch64::Q22_Q23_Q24, AArch64::Q23_Q24_Q25,
+ AArch64::Q24_Q25_Q26, AArch64::Q25_Q26_Q27, AArch64::Q26_Q27_Q28,
+ AArch64::Q27_Q28_Q29, AArch64::Q28_Q29_Q30, AArch64::Q29_Q30_Q31,
+ AArch64::Q30_Q31_Q0, AArch64::Q31_Q0_Q1
+};
-static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst,
- unsigned RmBits,
- uint64_t Address,
- const void *Decoder) {
- // Any bits are valid in the instruction (they're architecturally ignored),
- // but a code generator should insert 0.
- Inst.addOperand(MCOperand::CreateImm(0));
- return MCDisassembler::Success;
+static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr, const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = QQQDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(8 - Val));
- return MCDisassembler::Success;
-}
+static const unsigned QQQQDecoderTable[] = {
+ AArch64::Q0_Q1_Q2_Q3, AArch64::Q1_Q2_Q3_Q4, AArch64::Q2_Q3_Q4_Q5,
+ AArch64::Q3_Q4_Q5_Q6, AArch64::Q4_Q5_Q6_Q7, AArch64::Q5_Q6_Q7_Q8,
+ AArch64::Q6_Q7_Q8_Q9, AArch64::Q7_Q8_Q9_Q10, AArch64::Q8_Q9_Q10_Q11,
+ AArch64::Q9_Q10_Q11_Q12, AArch64::Q10_Q11_Q12_Q13, AArch64::Q11_Q12_Q13_Q14,
+ AArch64::Q12_Q13_Q14_Q15, AArch64::Q13_Q14_Q15_Q16, AArch64::Q14_Q15_Q16_Q17,
+ AArch64::Q15_Q16_Q17_Q18, AArch64::Q16_Q17_Q18_Q19, AArch64::Q17_Q18_Q19_Q20,
+ AArch64::Q18_Q19_Q20_Q21, AArch64::Q19_Q20_Q21_Q22, AArch64::Q20_Q21_Q22_Q23,
+ AArch64::Q21_Q22_Q23_Q24, AArch64::Q22_Q23_Q24_Q25, AArch64::Q23_Q24_Q25_Q26,
+ AArch64::Q24_Q25_Q26_Q27, AArch64::Q25_Q26_Q27_Q28, AArch64::Q26_Q27_Q28_Q29,
+ AArch64::Q27_Q28_Q29_Q30, AArch64::Q28_Q29_Q30_Q31, AArch64::Q29_Q30_Q31_Q0,
+ AArch64::Q30_Q31_Q0_Q1, AArch64::Q31_Q0_Q1_Q2
+};
-static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(16 - Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = QQQQDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(32 - Val));
- return MCDisassembler::Success;
-}
+static const unsigned DDDecoderTable[] = {
+ AArch64::D0_D1, AArch64::D1_D2, AArch64::D2_D3, AArch64::D3_D4,
+ AArch64::D4_D5, AArch64::D5_D6, AArch64::D6_D7, AArch64::D7_D8,
+ AArch64::D8_D9, AArch64::D9_D10, AArch64::D10_D11, AArch64::D11_D12,
+ AArch64::D12_D13, AArch64::D13_D14, AArch64::D14_D15, AArch64::D15_D16,
+ AArch64::D16_D17, AArch64::D17_D18, AArch64::D18_D19, AArch64::D19_D20,
+ AArch64::D20_D21, AArch64::D21_D22, AArch64::D22_D23, AArch64::D23_D24,
+ AArch64::D24_D25, AArch64::D25_D26, AArch64::D26_D27, AArch64::D27_D28,
+ AArch64::D28_D29, AArch64::D29_D30, AArch64::D30_D31, AArch64::D31_D0
+};
-static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr, const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = DDDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- if (Val > 7)
- return MCDisassembler::Fail;
+static const unsigned DDDDecoderTable[] = {
+ AArch64::D0_D1_D2, AArch64::D1_D2_D3, AArch64::D2_D3_D4,
+ AArch64::D3_D4_D5, AArch64::D4_D5_D6, AArch64::D5_D6_D7,
+ AArch64::D6_D7_D8, AArch64::D7_D8_D9, AArch64::D8_D9_D10,
+ AArch64::D9_D10_D11, AArch64::D10_D11_D12, AArch64::D11_D12_D13,
+ AArch64::D12_D13_D14, AArch64::D13_D14_D15, AArch64::D14_D15_D16,
+ AArch64::D15_D16_D17, AArch64::D16_D17_D18, AArch64::D17_D18_D19,
+ AArch64::D18_D19_D20, AArch64::D19_D20_D21, AArch64::D20_D21_D22,
+ AArch64::D21_D22_D23, AArch64::D22_D23_D24, AArch64::D23_D24_D25,
+ AArch64::D24_D25_D26, AArch64::D25_D26_D27, AArch64::D26_D27_D28,
+ AArch64::D27_D28_D29, AArch64::D28_D29_D30, AArch64::D29_D30_D31,
+ AArch64::D30_D31_D0, AArch64::D31_D0_D1
+};
- Inst.addOperand(MCOperand::CreateImm(Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr, const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = DDDDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- if (Val > 15)
- return MCDisassembler::Fail;
+static const unsigned DDDDDecoderTable[] = {
+ AArch64::D0_D1_D2_D3, AArch64::D1_D2_D3_D4, AArch64::D2_D3_D4_D5,
+ AArch64::D3_D4_D5_D6, AArch64::D4_D5_D6_D7, AArch64::D5_D6_D7_D8,
+ AArch64::D6_D7_D8_D9, AArch64::D7_D8_D9_D10, AArch64::D8_D9_D10_D11,
+ AArch64::D9_D10_D11_D12, AArch64::D10_D11_D12_D13, AArch64::D11_D12_D13_D14,
+ AArch64::D12_D13_D14_D15, AArch64::D13_D14_D15_D16, AArch64::D14_D15_D16_D17,
+ AArch64::D15_D16_D17_D18, AArch64::D16_D17_D18_D19, AArch64::D17_D18_D19_D20,
+ AArch64::D18_D19_D20_D21, AArch64::D19_D20_D21_D22, AArch64::D20_D21_D22_D23,
+ AArch64::D21_D22_D23_D24, AArch64::D22_D23_D24_D25, AArch64::D23_D24_D25_D26,
+ AArch64::D24_D25_D26_D27, AArch64::D25_D26_D27_D28, AArch64::D26_D27_D28_D29,
+ AArch64::D27_D28_D29_D30, AArch64::D28_D29_D30_D31, AArch64::D29_D30_D31_D0,
+ AArch64::D30_D31_D0_D1, AArch64::D31_D0_D1_D2
+};
- Inst.addOperand(MCOperand::CreateImm(Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
+ uint64_t Addr,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return Fail;
+ unsigned Register = DDDDDecoderTable[RegNo];
+ Inst.addOperand(MCOperand::CreateReg(Register));
+ return Success;
}
-static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- if (Val > 31)
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeFixedPointScaleImm32(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ // scale{5} is asserted as 1 in tblgen.
+ Imm |= 0x20;
+ Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+ return Success;
}
-static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- if (Val > 63)
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(Val));
- return MCDisassembler::Success;
+static DecodeStatus DecodeFixedPointScaleImm64(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(64 - Imm));
+ return Success;
}
-template<int RegWidth>
-static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst,
- unsigned FullImm,
- uint64_t Address,
- const void *Decoder) {
- unsigned Imm16 = FullImm & 0xffff;
- unsigned Shift = FullImm >> 16;
+static DecodeStatus DecodePCRelLabel19(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ int64_t ImmVal = Imm;
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+
+ // Sign-extend 19-bit immediate.
+ if (ImmVal & (1 << (19 - 1)))
+ ImmVal |= ~((1LL << 19) - 1);
- if (RegWidth == 32 && Shift > 1) return MCDisassembler::Fail;
+ if (!Dis->tryAddingSymbolicOperand(Inst, ImmVal << 2, Addr,
+ Inst.getOpcode() != AArch64::LDRXl, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ return Success;
+}
- Inst.addOperand(MCOperand::CreateImm(Imm16));
- Inst.addOperand(MCOperand::CreateImm(Shift));
- return MCDisassembler::Success;
+static DecodeStatus DecodeMemExtend(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm((Imm >> 1) & 1));
+ Inst.addOperand(MCOperand::CreateImm(Imm & 1));
+ return Success;
}
-template<int RegWidth>
-static DecodeStatus DecodeLogicalImmOperand(llvm::MCInst &Inst,
- unsigned Bits,
+static DecodeStatus DecodeMRSSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- uint64_t Imm;
- if (!A64Imms::isLogicalImmBits(RegWidth, Bits, Imm))
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(Bits));
- return MCDisassembler::Success;
-}
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+ const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
+ Imm |= 0x8000;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
-static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst,
- unsigned ShiftAmount,
- uint64_t Address,
- const void *Decoder) {
- // Only values 0-4 are valid for this 3-bit field
- if (ShiftAmount > 4)
- return MCDisassembler::Fail;
+ bool ValidNamed;
+ (void)AArch64SysReg::MRSMapper(STI.getFeatureBits())
+ .toString(Imm, ValidNamed);
- Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
- return MCDisassembler::Success;
+ return ValidNamed ? Success : Fail;
}
-static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst,
- unsigned ShiftAmount,
+static DecodeStatus DecodeMSRSystemRegister(llvm::MCInst &Inst, unsigned Imm,
uint64_t Address,
const void *Decoder) {
- // Only values below 32 are valid for a 32-bit register
- if (ShiftAmount > 31)
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus DecodeBitfieldInstruction(llvm::MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned ImmS = fieldFromInstruction(Insn, 10, 6);
- unsigned ImmR = fieldFromInstruction(Insn, 16, 6);
- unsigned SF = fieldFromInstruction(Insn, 31, 1);
-
- // Undef for 0b11 just in case it occurs. Don't want the compiler to optimise
- // out assertions that it thinks should never be hit.
- enum OpcTypes { SBFM = 0, BFM, UBFM, Undef } Opc;
- Opc = (OpcTypes)fieldFromInstruction(Insn, 29, 2);
-
- if (!SF) {
- // ImmR and ImmS must be between 0 and 31 for 32-bit instructions.
- if (ImmR > 31 || ImmS > 31)
- return MCDisassembler::Fail;
- }
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+ const MCSubtargetInfo &STI = Dis->getSubtargetInfo();
- if (SF) {
- DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
- // BFM MCInsts use Rd as a source too.
- if (Opc == BFM) DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
- DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
- } else {
- DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
- // BFM MCInsts use Rd as a source too.
- if (Opc == BFM) DecodeGPR32RegisterClass(Inst, Rd, Address, Decoder);
- DecodeGPR32RegisterClass(Inst, Rn, Address, Decoder);
- }
-
- // ASR and LSR have more specific patterns so they won't get here:
- assert(!(ImmS == 31 && !SF && Opc != BFM)
- && "shift should have used auto decode");
- assert(!(ImmS == 63 && SF && Opc != BFM)
- && "shift should have used auto decode");
-
- // Extension instructions similarly:
- if (Opc == SBFM && ImmR == 0) {
- assert((ImmS != 7 && ImmS != 15) && "extension got here");
- assert((ImmS != 31 || SF == 0) && "extension got here");
- } else if (Opc == UBFM && ImmR == 0) {
- assert((SF != 0 || (ImmS != 7 && ImmS != 15)) && "extension got here");
- }
+ Imm |= 0x8000;
+ Inst.addOperand(MCOperand::CreateImm(Imm));
- if (Opc == UBFM) {
- // It might be a LSL instruction, which actually takes the shift amount
- // itself as an MCInst operand.
- if (SF && (ImmS + 1) % 64 == ImmR) {
- Inst.setOpcode(AArch64::LSLxxi);
- Inst.addOperand(MCOperand::CreateImm(63 - ImmS));
- return MCDisassembler::Success;
- } else if (!SF && (ImmS + 1) % 32 == ImmR) {
- Inst.setOpcode(AArch64::LSLwwi);
- Inst.addOperand(MCOperand::CreateImm(31 - ImmS));
- return MCDisassembler::Success;
- }
- }
-
- // Otherwise it's definitely either an extract or an insert depending on which
- // of ImmR or ImmS is larger.
- unsigned ExtractOp, InsertOp;
- switch (Opc) {
- default: llvm_unreachable("unexpected instruction trying to decode bitfield");
- case SBFM:
- ExtractOp = SF ? AArch64::SBFXxxii : AArch64::SBFXwwii;
- InsertOp = SF ? AArch64::SBFIZxxii : AArch64::SBFIZwwii;
- break;
- case BFM:
- ExtractOp = SF ? AArch64::BFXILxxii : AArch64::BFXILwwii;
- InsertOp = SF ? AArch64::BFIxxii : AArch64::BFIwwii;
- break;
- case UBFM:
- ExtractOp = SF ? AArch64::UBFXxxii : AArch64::UBFXwwii;
- InsertOp = SF ? AArch64::UBFIZxxii : AArch64::UBFIZwwii;
- break;
- }
-
- // Otherwise it's a boring insert or extract
- Inst.addOperand(MCOperand::CreateImm(ImmR));
- Inst.addOperand(MCOperand::CreateImm(ImmS));
-
-
- if (ImmS < ImmR)
- Inst.setOpcode(InsertOp);
- else
- Inst.setOpcode(ExtractOp);
+ bool ValidNamed;
+ (void)AArch64SysReg::MSRMapper(STI.getFeatureBits())
+ .toString(Imm, ValidNamed);
- return MCDisassembler::Success;
+ return ValidNamed ? Success : Fail;
}
static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
@@ -763,811 +681,879 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
// Add the lane
Inst.addOperand(MCOperand::CreateImm(1));
- return MCDisassembler::Success;
+ return Success;
}
+static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
+ unsigned Add) {
+ Inst.addOperand(MCOperand::CreateImm(Add - Imm));
+ return Success;
+}
-static DecodeStatus DecodeLDSTPairInstruction(llvm::MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- DecodeStatus Result = MCDisassembler::Success;
- unsigned Rt = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned Rt2 = fieldFromInstruction(Insn, 10, 5);
- unsigned SImm7 = fieldFromInstruction(Insn, 15, 7);
- unsigned L = fieldFromInstruction(Insn, 22, 1);
- unsigned V = fieldFromInstruction(Insn, 26, 1);
- unsigned Opc = fieldFromInstruction(Insn, 30, 2);
-
- // Not an official name, but it turns out that bit 23 distinguishes indexed
- // from non-indexed operations.
- unsigned Indexed = fieldFromInstruction(Insn, 23, 1);
-
- if (Indexed && L == 0) {
- // The MCInst for an indexed store has an out operand and 4 ins:
- // Rn_wb, Rt, Rt2, Rn, Imm
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- }
-
- // You shouldn't load to the same register twice in an instruction...
- if (L && Rt == Rt2)
- Result = MCDisassembler::SoftFail;
-
- // ... or do any operation that writes-back to a transfer register. But note
- // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
- if (Indexed && V == 0 && Rn != 31 && (Rt == Rn || Rt2 == Rn))
- Result = MCDisassembler::SoftFail;
-
- // Exactly how we decode the MCInst's registers depends on the Opc and V
- // fields of the instruction. These also obviously determine the size of the
- // operation so we can fill in that information while we're at it.
- if (V) {
- // The instruction operates on the FP/SIMD registers
- switch (Opc) {
- default: return MCDisassembler::Fail;
- case 0:
- DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
- DecodeFPR32RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- case 1:
- DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
- DecodeFPR64RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- case 2:
- DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
- DecodeFPR128RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- }
- } else {
- switch (Opc) {
- default: return MCDisassembler::Fail;
- case 0:
- DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
- DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- case 1:
- assert(L && "unexpected \"store signed\" attempt");
- DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
- DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- case 2:
- DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
- DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder);
- break;
- }
- }
-
- if (Indexed && L == 1) {
- // The MCInst for an indexed load has 3 out operands and an 3 ins:
- // Rt, Rt2, Rn_wb, Rt2, Rn, Imm
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- }
-
-
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- Inst.addOperand(MCOperand::CreateImm(SImm7));
+static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
+ unsigned Add) {
+ Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
+ return Success;
+}
- return Result;
+static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm, 64);
}
-static DecodeStatus DecodeLoadPairExclusiveInstruction(llvm::MCInst &Inst,
- uint32_t Val,
- uint64_t Address,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(Val, 0, 5);
- unsigned Rn = fieldFromInstruction(Val, 5, 5);
- unsigned Rt2 = fieldFromInstruction(Val, 10, 5);
- unsigned MemSize = fieldFromInstruction(Val, 30, 2);
-
- DecodeStatus S = MCDisassembler::Success;
- if (Rt == Rt2) S = MCDisassembler::SoftFail;
-
- switch (MemSize) {
- case 2:
- if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPR32RegisterClass(Inst, Rt2, Address, Decoder)))
- return MCDisassembler::Fail;
- break;
- case 3:
- if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder)))
- return MCDisassembler::Fail;
- if (!Check(S, DecodeGPR64RegisterClass(Inst, Rt2, Address, Decoder)))
- return MCDisassembler::Fail;
- break;
- default:
- llvm_unreachable("Invalid MemSize in DecodeLoadPairExclusiveInstruction");
- }
+static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
+}
- if (!Check(S, DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder)))
- return MCDisassembler::Fail;
+static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm, 32);
+}
- return S;
+static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
}
-template<typename SomeNamedImmMapper>
-static DecodeStatus DecodeNamedImmOperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- SomeNamedImmMapper Mapper;
- bool ValidNamed;
- Mapper.toString(Val, ValidNamed);
- if (ValidNamed || Mapper.validImm(Val)) {
- Inst.addOperand(MCOperand::CreateImm(Val));
- return MCDisassembler::Success;
- }
+static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm, 16);
+}
- return MCDisassembler::Fail;
+static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
}
-static DecodeStatus DecodeSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
- llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- bool ValidNamed;
- Mapper.toString(Val, ValidNamed);
+static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftRImm(Inst, Imm, 8);
+}
- Inst.addOperand(MCOperand::CreateImm(Val));
+static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftLImm(Inst, Imm, 64);
+}
- return ValidNamed ? MCDisassembler::Success : MCDisassembler::Fail;
+static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftLImm(Inst, Imm, 32);
}
-static DecodeStatus DecodeMRSOperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- return DecodeSysRegOperand(A64SysReg::MRSMapper(), Inst, Val, Address,
- Decoder);
+static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftLImm(Inst, Imm, 16);
}
-static DecodeStatus DecodeMSROperand(llvm::MCInst &Inst,
- unsigned Val,
- uint64_t Address,
- const void *Decoder) {
- return DecodeSysRegOperand(A64SysReg::MSRMapper(), Inst, Val, Address,
- Decoder);
+static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
+ uint64_t Addr, const void *Decoder) {
+ return DecodeVecShiftLImm(Inst, Imm, 8);
}
-static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst,
- unsigned Insn,
- uint64_t Address,
+static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
const void *Decoder) {
- unsigned Rt = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned Imm9 = fieldFromInstruction(Insn, 12, 9);
-
- unsigned Opc = fieldFromInstruction(Insn, 22, 2);
- unsigned V = fieldFromInstruction(Insn, 26, 1);
- unsigned Size = fieldFromInstruction(Insn, 30, 2);
-
- if (Opc == 0 || (V == 1 && Opc == 2)) {
- // It's a store, the MCInst gets: Rn_wb, Rt, Rn, Imm
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(insn, 16, 5);
+ unsigned shiftHi = fieldFromInstruction(insn, 22, 2);
+ unsigned shiftLo = fieldFromInstruction(insn, 10, 6);
+ unsigned shift = (shiftHi << 6) | shiftLo;
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::ADDWrs:
+ case AArch64::ADDSWrs:
+ case AArch64::SUBWrs:
+ case AArch64::SUBSWrs:
+ // if shift == '11' then ReservedValue()
+ if (shiftHi == 0x3)
+ return Fail;
+ // Deliberate fallthrough
+ case AArch64::ANDWrs:
+ case AArch64::ANDSWrs:
+ case AArch64::BICWrs:
+ case AArch64::BICSWrs:
+ case AArch64::ORRWrs:
+ case AArch64::ORNWrs:
+ case AArch64::EORWrs:
+ case AArch64::EONWrs: {
+ // if sf == '0' and imm6<5> == '1' then ReservedValue()
+ if (shiftLo >> 5 == 1)
+ return Fail;
+ DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
+ break;
}
-
- if (V == 0 && (Opc == 2 || Size == 3)) {
- DecodeGPR64RegisterClass(Inst, Rt, Address, Decoder);
- } else if (V == 0) {
- DecodeGPR32RegisterClass(Inst, Rt, Address, Decoder);
- } else if (V == 1 && (Opc & 2)) {
- DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
- } else {
- switch (Size) {
- case 0:
- DecodeFPR8RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 1:
- DecodeFPR16RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- DecodeFPR32RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 3:
- DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
- break;
- }
+ case AArch64::ADDXrs:
+ case AArch64::ADDSXrs:
+ case AArch64::SUBXrs:
+ case AArch64::SUBSXrs:
+ // if shift == '11' then ReservedValue()
+ if (shiftHi == 0x3)
+ return Fail;
+ // Deliberate fallthrough
+ case AArch64::ANDXrs:
+ case AArch64::ANDSXrs:
+ case AArch64::BICXrs:
+ case AArch64::BICSXrs:
+ case AArch64::ORRXrs:
+ case AArch64::ORNXrs:
+ case AArch64::EORXrs:
+ case AArch64::EONXrs:
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
+ break;
}
- if (Opc != 0 && (V != 1 || Opc != 2)) {
- // It's a load, the MCInst gets: Rt, Rn_wb, Rn, Imm
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- }
+ Inst.addOperand(MCOperand::CreateImm(shift));
+ return Success;
+}
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned imm = fieldFromInstruction(insn, 5, 16);
+ unsigned shift = fieldFromInstruction(insn, 21, 2);
+ shift <<= 4;
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::MOVZWi:
+ case AArch64::MOVNWi:
+ case AArch64::MOVKWi:
+ if (shift & (1U << 5))
+ return Fail;
+ DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+ break;
+ case AArch64::MOVZXi:
+ case AArch64::MOVNXi:
+ case AArch64::MOVKXi:
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ break;
+ }
- Inst.addOperand(MCOperand::CreateImm(Imm9));
+ if (Inst.getOpcode() == AArch64::MOVKWi ||
+ Inst.getOpcode() == AArch64::MOVKXi)
+ Inst.addOperand(Inst.getOperand(0));
- // N.b. The official documentation says undpredictable if Rt == Rn, but this
- // takes place at the architectural rather than encoding level:
- //
- // "STR xzr, [sp], #4" is perfectly valid.
- if (V == 0 && Rt == Rn && Rn != 31)
- return MCDisassembler::SoftFail;
- else
- return MCDisassembler::Success;
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::CreateImm(shift));
+ return Success;
}
-static MCDisassembler *createAArch64Disassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new AArch64Disassembler(STI, T.createMCRegInfo(""));
-}
+static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned offset = fieldFromInstruction(insn, 10, 12);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
-extern "C" void LLVMInitializeAArch64Disassembler() {
- TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget,
- createAArch64Disassembler);
- TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget,
- createAArch64Disassembler);
-}
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::PRFMui:
+ // Rt is an immediate in prefetch.
+ Inst.addOperand(MCOperand::CreateImm(Rt));
+ break;
+ case AArch64::STRBBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRSBWui:
+ case AArch64::STRHHui:
+ case AArch64::LDRHHui:
+ case AArch64::LDRSHWui:
+ case AArch64::STRWui:
+ case AArch64::LDRWui:
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRSBXui:
+ case AArch64::LDRSHXui:
+ case AArch64::LDRSWui:
+ case AArch64::STRXui:
+ case AArch64::LDRXui:
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRQui:
+ case AArch64::STRQui:
+ DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRDui:
+ case AArch64::STRDui:
+ DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRSui:
+ case AArch64::STRSui:
+ DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRHui:
+ case AArch64::STRHui:
+ DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDRBui:
+ case AArch64::STRBui:
+ DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ }
-template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
-static DecodeStatus
-DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount,
- uint64_t Address, const void *Decoder) {
- bool IsLSL = false;
- if (Ext == A64SE::LSL)
- IsLSL = true;
- else if (Ext != A64SE::MSL)
- return MCDisassembler::Fail;
-
- // MSL and LSLH accepts encoded shift amount 0 or 1.
- if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1)
- return MCDisassembler::Fail;
-
- // LSL accepts encoded shift amount 0, 1, 2 or 3.
- if (IsLSL && ShiftAmount > 3)
- return MCDisassembler::Fail;
-
- Inst.addOperand(MCOperand::CreateImm(ShiftAmount));
- return MCDisassembler::Success;
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ if (!Dis->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(offset));
+ return Success;
}
-// Decode post-index vector load/store instructions.
-// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
-// operand is an immediate equal the the length of vector list in bytes,
-// or Rm is decoded to a GPR64noxzr register.
-static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned Rm = fieldFromInstruction(Insn, 16, 5);
- unsigned Opcode = fieldFromInstruction(Insn, 12, 4);
- unsigned IsLoad = fieldFromInstruction(Insn, 22, 1);
- // 0 for 64bit vector list, 1 for 128bit vector list
- unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1);
+static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ int64_t offset = fieldFromInstruction(insn, 12, 9);
- unsigned NumVecs;
- switch (Opcode) {
- case 0: // ld4/st4
- case 2: // ld1/st1 with 4 vectors
- NumVecs = 4; break;
- case 4: // ld3/st3
- case 6: // ld1/st1 with 3 vectors
- NumVecs = 3; break;
- case 7: // ld1/st1 with 1 vector
- NumVecs = 1; break;
- case 8: // ld2/st2
- case 10: // ld1/st1 with 2 vectors
- NumVecs = 2; break;
+ // offset is a 9-bit signed immediate, so sign extend it to
+ // fill the unsigned.
+ if (offset & (1 << (9 - 1)))
+ offset |= ~((1LL << 9) - 1);
+
+ // First operand is always the writeback to the address register, if needed.
+ switch (Inst.getOpcode()) {
default:
- llvm_unreachable("Invalid opcode for post-index load/store instructions");
+ break;
+ case AArch64::LDRSBWpre:
+ case AArch64::LDRSHWpre:
+ case AArch64::STRBBpre:
+ case AArch64::LDRBBpre:
+ case AArch64::STRHHpre:
+ case AArch64::LDRHHpre:
+ case AArch64::STRWpre:
+ case AArch64::LDRWpre:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSHWpost:
+ case AArch64::STRBBpost:
+ case AArch64::LDRBBpost:
+ case AArch64::STRHHpost:
+ case AArch64::LDRHHpost:
+ case AArch64::STRWpost:
+ case AArch64::LDRWpost:
+ case AArch64::LDRSBXpre:
+ case AArch64::LDRSHXpre:
+ case AArch64::STRXpre:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRXpre:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSHXpost:
+ case AArch64::STRXpost:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRXpost:
+ case AArch64::LDRQpre:
+ case AArch64::STRQpre:
+ case AArch64::LDRQpost:
+ case AArch64::STRQpost:
+ case AArch64::LDRDpre:
+ case AArch64::STRDpre:
+ case AArch64::LDRDpost:
+ case AArch64::STRDpost:
+ case AArch64::LDRSpre:
+ case AArch64::STRSpre:
+ case AArch64::LDRSpost:
+ case AArch64::STRSpost:
+ case AArch64::LDRHpre:
+ case AArch64::STRHpre:
+ case AArch64::LDRHpost:
+ case AArch64::STRHpost:
+ case AArch64::LDRBpre:
+ case AArch64::STRBpre:
+ case AArch64::LDRBpost:
+ case AArch64::STRBpost:
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ break;
}
- // Decode vector list of 1/2/3/4 vectors for load instructions.
- if (IsLoad) {
- switch (NumVecs) {
- case 1:
- Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
- : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 3:
- Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 4:
- Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
- break;
- }
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::PRFUMi:
+ // Rt is an immediate in prefetch.
+ Inst.addOperand(MCOperand::CreateImm(Rt));
+ break;
+ case AArch64::STURBBi:
+ case AArch64::LDURBBi:
+ case AArch64::LDURSBWi:
+ case AArch64::STURHHi:
+ case AArch64::LDURHHi:
+ case AArch64::LDURSHWi:
+ case AArch64::STURWi:
+ case AArch64::LDURWi:
+ case AArch64::LDTRSBWi:
+ case AArch64::LDTRSHWi:
+ case AArch64::STTRWi:
+ case AArch64::LDTRWi:
+ case AArch64::STTRHi:
+ case AArch64::LDTRHi:
+ case AArch64::LDTRBi:
+ case AArch64::STTRBi:
+ case AArch64::LDRSBWpre:
+ case AArch64::LDRSHWpre:
+ case AArch64::STRBBpre:
+ case AArch64::LDRBBpre:
+ case AArch64::STRHHpre:
+ case AArch64::LDRHHpre:
+ case AArch64::STRWpre:
+ case AArch64::LDRWpre:
+ case AArch64::LDRSBWpost:
+ case AArch64::LDRSHWpost:
+ case AArch64::STRBBpost:
+ case AArch64::LDRBBpost:
+ case AArch64::STRHHpost:
+ case AArch64::LDRHHpost:
+ case AArch64::STRWpost:
+ case AArch64::LDRWpost:
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURSBXi:
+ case AArch64::LDURSHXi:
+ case AArch64::LDURSWi:
+ case AArch64::STURXi:
+ case AArch64::LDURXi:
+ case AArch64::LDTRSBXi:
+ case AArch64::LDTRSHXi:
+ case AArch64::LDTRSWi:
+ case AArch64::STTRXi:
+ case AArch64::LDTRXi:
+ case AArch64::LDRSBXpre:
+ case AArch64::LDRSHXpre:
+ case AArch64::STRXpre:
+ case AArch64::LDRSWpre:
+ case AArch64::LDRXpre:
+ case AArch64::LDRSBXpost:
+ case AArch64::LDRSHXpost:
+ case AArch64::STRXpost:
+ case AArch64::LDRSWpost:
+ case AArch64::LDRXpost:
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURQi:
+ case AArch64::STURQi:
+ case AArch64::LDRQpre:
+ case AArch64::STRQpre:
+ case AArch64::LDRQpost:
+ case AArch64::STRQpost:
+ DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURDi:
+ case AArch64::STURDi:
+ case AArch64::LDRDpre:
+ case AArch64::STRDpre:
+ case AArch64::LDRDpost:
+ case AArch64::STRDpost:
+ DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURSi:
+ case AArch64::STURSi:
+ case AArch64::LDRSpre:
+ case AArch64::STRSpre:
+ case AArch64::LDRSpost:
+ case AArch64::STRSpost:
+ DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURHi:
+ case AArch64::STURHi:
+ case AArch64::LDRHpre:
+ case AArch64::STRHpre:
+ case AArch64::LDRHpost:
+ case AArch64::STRHpost:
+ DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
+ case AArch64::LDURBi:
+ case AArch64::STURBi:
+ case AArch64::LDRBpre:
+ case AArch64::STRBpre:
+ case AArch64::LDRBpost:
+ case AArch64::STRBpost:
+ DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
+ break;
}
- // Decode write back register, which is equal to Rn.
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
-
- if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte
- Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8)));
- else // Decode Rm
- DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
-
- // Decode vector list of 1/2/3/4 vectors for load instructions.
- if (!IsLoad) {
- switch (NumVecs) {
- case 1:
- Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder)
- : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 3:
- Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 4:
- Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder);
- break;
- }
- }
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(offset));
- return MCDisassembler::Success;
+ bool IsLoad = fieldFromInstruction(insn, 22, 1);
+ bool IsIndexed = fieldFromInstruction(insn, 10, 2) != 0;
+ bool IsFP = fieldFromInstruction(insn, 26, 1);
+
+ // Cannot write back to a transfer register (but xzr != sp).
+ if (IsLoad && IsIndexed && !IsFP && Rn != 31 && Rt == Rn)
+ return SoftFail;
+
+ return Success;
}
-// Decode post-index vector load/store lane instructions.
-// This is necessary as we need to decode Rm: if Rm == 0b11111, the last
-// operand is an immediate equal the the length of the changed bytes,
-// or Rm is decoded to a GPR64noxzr register.
-static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
+static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
const void *Decoder) {
- bool Is64bitVec = false;
- bool IsLoadDup = false;
- bool IsLoad = false;
- // The total number of bytes transferred.
- // TransferBytes = NumVecs * OneLaneBytes
- unsigned TransferBytes = 0;
- unsigned NumVecs = 0;
- unsigned Opc = Inst.getOpcode();
- switch (Opc) {
- case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
- case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
- case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
- case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: {
- switch (Opc) {
- case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register:
- TransferBytes = 1; break;
- case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register:
- TransferBytes = 2; break;
- case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register:
- TransferBytes = 4; break;
- case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register:
- TransferBytes = 8; break;
- }
- Is64bitVec = true;
- IsLoadDup = true;
- NumVecs = 1;
- break;
- }
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
+ unsigned Rs = fieldFromInstruction(insn, 16, 5);
- case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
- case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
- case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
- case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: {
- switch (Opc) {
- case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register:
- TransferBytes = 1; break;
- case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register:
- TransferBytes = 2; break;
- case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register:
- TransferBytes = 4; break;
- case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register:
- TransferBytes = 8; break;
- }
- IsLoadDup = true;
- NumVecs = 1;
+ unsigned Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ default:
+ return Fail;
+ case AArch64::STLXRW:
+ case AArch64::STLXRB:
+ case AArch64::STLXRH:
+ case AArch64::STXRW:
+ case AArch64::STXRB:
+ case AArch64::STXRH:
+ DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+ // FALLTHROUGH
+ case AArch64::LDARW:
+ case AArch64::LDARB:
+ case AArch64::LDARH:
+ case AArch64::LDAXRW:
+ case AArch64::LDAXRB:
+ case AArch64::LDAXRH:
+ case AArch64::LDXRW:
+ case AArch64::LDXRB:
+ case AArch64::LDXRH:
+ case AArch64::STLRW:
+ case AArch64::STLRB:
+ case AArch64::STLRH:
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
break;
- }
-
- case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
- case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
- case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
- case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: {
- switch (Opc) {
- case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register:
- TransferBytes = 2; break;
- case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register:
- TransferBytes = 4; break;
- case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register:
- TransferBytes = 8; break;
- case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register:
- TransferBytes = 16; break;
- }
- Is64bitVec = true;
- IsLoadDup = true;
- NumVecs = 2;
+ case AArch64::STLXRX:
+ case AArch64::STXRX:
+ DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+ // FALLTHROUGH
+ case AArch64::LDARX:
+ case AArch64::LDAXRX:
+ case AArch64::LDXRX:
+ case AArch64::STLRX:
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
break;
- }
-
- case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
- case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
- case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
- case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: {
- switch (Opc) {
- case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register:
- TransferBytes = 2; break;
- case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register:
- TransferBytes = 4; break;
- case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register:
- TransferBytes = 8; break;
- case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register:
- TransferBytes = 16; break;
- }
- IsLoadDup = true;
- NumVecs = 2;
+ case AArch64::STLXPW:
+ case AArch64::STXPW:
+ DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+ // FALLTHROUGH
+ case AArch64::LDAXPW:
+ case AArch64::LDXPW:
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
break;
- }
-
- case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
- case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
- case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
- case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: {
- switch (Opc) {
- case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register:
- TransferBytes = 3; break;
- case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register:
- TransferBytes = 6; break;
- case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register:
- TransferBytes = 12; break;
- case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register:
- TransferBytes = 24; break;
- }
- Is64bitVec = true;
- IsLoadDup = true;
- NumVecs = 3;
+ case AArch64::STLXPX:
+ case AArch64::STXPX:
+ DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
+ // FALLTHROUGH
+ case AArch64::LDAXPX:
+ case AArch64::LDXPX:
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
break;
}
- case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
- case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register:
- case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register:
- case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: {
- switch (Opc) {
- case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register:
- TransferBytes = 3; break;
- case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register:
- TransferBytes = 6; break;
- case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register:
- TransferBytes = 12; break;
- case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register:
- TransferBytes = 24; break;
- }
- IsLoadDup = true;
- NumVecs = 3;
- break;
- }
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
- case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
- case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
- case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: {
- switch (Opc) {
- case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register:
- TransferBytes = 4; break;
- case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register:
- TransferBytes = 8; break;
- case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register:
- TransferBytes = 16; break;
- case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register:
- TransferBytes = 32; break;
- }
- Is64bitVec = true;
- IsLoadDup = true;
- NumVecs = 4;
- break;
- }
+ // You shouldn't load to the same register twice in an instruction...
+ if ((Opcode == AArch64::LDAXPW || Opcode == AArch64::LDXPW ||
+ Opcode == AArch64::LDAXPX || Opcode == AArch64::LDXPX) &&
+ Rt == Rt2)
+ return SoftFail;
- case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
- case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register:
- case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register:
- case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: {
- switch (Opc) {
- case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register:
- TransferBytes = 4; break;
- case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register:
- TransferBytes = 8; break;
- case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register:
- TransferBytes = 16; break;
- case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register:
- TransferBytes = 32; break;
- }
- IsLoadDup = true;
- NumVecs = 4;
- break;
- }
+ return Success;
+}
- case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
- case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
- case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
- case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: {
- switch (Opc) {
- case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register:
- TransferBytes = 1; break;
- case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register:
- TransferBytes = 2; break;
- case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register:
- TransferBytes = 4; break;
- case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register:
- TransferBytes = 8; break;
- }
- IsLoad = true;
- NumVecs = 1;
- break;
- }
+static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rt = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
+ int64_t offset = fieldFromInstruction(insn, 15, 7);
+ bool IsLoad = fieldFromInstruction(insn, 22, 1);
- case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
- case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
- case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
- case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: {
- switch (Opc) {
- case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register:
- TransferBytes = 2; break;
- case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register:
- TransferBytes = 4; break;
- case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register:
- TransferBytes = 8; break;
- case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register:
- TransferBytes = 16; break;
- }
- IsLoad = true;
- NumVecs = 2;
- break;
- }
+ // offset is a 7-bit signed immediate, so sign extend it to
+ // fill the unsigned.
+ if (offset & (1 << (7 - 1)))
+ offset |= ~((1LL << 7) - 1);
- case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
- case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
- case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
- case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: {
- switch (Opc) {
- case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register:
- TransferBytes = 3; break;
- case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register:
- TransferBytes = 6; break;
- case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register:
- TransferBytes = 12; break;
- case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register:
- TransferBytes = 24; break;
- }
- IsLoad = true;
- NumVecs = 3;
- break;
- }
+ unsigned Opcode = Inst.getOpcode();
+ bool NeedsDisjointWritebackTransfer = false;
- case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
- case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
- case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
- case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: {
- switch (Opc) {
- case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register:
- TransferBytes = 4; break;
- case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register:
- TransferBytes = 8; break;
- case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register:
- TransferBytes = 16; break;
- case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register:
- TransferBytes = 32; break;
- }
- IsLoad = true;
- NumVecs = 4;
+ // First operand is always writeback of base register.
+ switch (Opcode) {
+ default:
break;
- }
-
- case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
- case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
- case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
- case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: {
- switch (Opc) {
- case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register:
- TransferBytes = 1; break;
- case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register:
- TransferBytes = 2; break;
- case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register:
- TransferBytes = 4; break;
- case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register:
- TransferBytes = 8; break;
- }
- NumVecs = 1;
+ case AArch64::LDPXpost:
+ case AArch64::STPXpost:
+ case AArch64::LDPSWpost:
+ case AArch64::LDPXpre:
+ case AArch64::STPXpre:
+ case AArch64::LDPSWpre:
+ case AArch64::LDPWpost:
+ case AArch64::STPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::STPWpre:
+ case AArch64::LDPQpost:
+ case AArch64::STPQpost:
+ case AArch64::LDPQpre:
+ case AArch64::STPQpre:
+ case AArch64::LDPDpost:
+ case AArch64::STPDpost:
+ case AArch64::LDPDpre:
+ case AArch64::STPDpre:
+ case AArch64::LDPSpost:
+ case AArch64::STPSpost:
+ case AArch64::LDPSpre:
+ case AArch64::STPSpre:
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
break;
}
- case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
- case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
- case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
- case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: {
- switch (Opc) {
- case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register:
- TransferBytes = 2; break;
- case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register:
- TransferBytes = 4; break;
- case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register:
- TransferBytes = 8; break;
- case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register:
- TransferBytes = 16; break;
- }
- NumVecs = 2;
+ switch (Opcode) {
+ default:
+ return Fail;
+ case AArch64::LDPXpost:
+ case AArch64::STPXpost:
+ case AArch64::LDPSWpost:
+ case AArch64::LDPXpre:
+ case AArch64::STPXpre:
+ case AArch64::LDPSWpre:
+ NeedsDisjointWritebackTransfer = true;
+ // Fallthrough
+ case AArch64::LDNPXi:
+ case AArch64::STNPXi:
+ case AArch64::LDPXi:
+ case AArch64::STPXi:
+ case AArch64::LDPSWi:
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
break;
- }
-
- case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
- case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
- case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
- case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: {
- switch (Opc) {
- case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register:
- TransferBytes = 3; break;
- case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register:
- TransferBytes = 6; break;
- case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register:
- TransferBytes = 12; break;
- case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register:
- TransferBytes = 24; break;
- }
- NumVecs = 3;
+ case AArch64::LDPWpost:
+ case AArch64::STPWpost:
+ case AArch64::LDPWpre:
+ case AArch64::STPWpre:
+ NeedsDisjointWritebackTransfer = true;
+ // Fallthrough
+ case AArch64::LDNPWi:
+ case AArch64::STNPWi:
+ case AArch64::LDPWi:
+ case AArch64::STPWi:
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
break;
- }
-
- case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
- case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
- case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
- case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: {
- switch (Opc) {
- case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register:
- TransferBytes = 4; break;
- case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register:
- TransferBytes = 8; break;
- case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register:
- TransferBytes = 16; break;
- case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register:
- TransferBytes = 32; break;
- }
- NumVecs = 4;
+ case AArch64::LDNPQi:
+ case AArch64::STNPQi:
+ case AArch64::LDPQpost:
+ case AArch64::STPQpost:
+ case AArch64::LDPQi:
+ case AArch64::STPQi:
+ case AArch64::LDPQpre:
+ case AArch64::STPQpre:
+ DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder);
+ break;
+ case AArch64::LDNPDi:
+ case AArch64::STNPDi:
+ case AArch64::LDPDpost:
+ case AArch64::STPDpost:
+ case AArch64::LDPDi:
+ case AArch64::STPDi:
+ case AArch64::LDPDpre:
+ case AArch64::STPDpre:
+ DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder);
+ break;
+ case AArch64::LDNPSi:
+ case AArch64::STNPSi:
+ case AArch64::LDPSpost:
+ case AArch64::STPSpost:
+ case AArch64::LDPSi:
+ case AArch64::STPSi:
+ case AArch64::LDPSpre:
+ case AArch64::STPSpre:
+ DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder);
break;
}
- default:
- return MCDisassembler::Fail;
- } // End of switch (Opc)
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(offset));
- unsigned Rt = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned Rm = fieldFromInstruction(Insn, 16, 5);
-
- // Decode post-index of load duplicate lane
- if (IsLoadDup) {
- switch (NumVecs) {
- case 1:
- Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder)
- : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 3:
- Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 4:
- Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder)
- : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
- }
-
- // Decode write back register, which is equal to Rn.
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
-
- if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
- Inst.addOperand(MCOperand::CreateImm(TransferBytes));
- else // Decode Rm
- DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
-
- return MCDisassembler::Success;
- }
+ // You shouldn't load to the same register twice in an instruction...
+ if (IsLoad && Rt == Rt2)
+ return SoftFail;
- // Decode post-index of load/store lane
- // Loads have a vector list as output.
- if (IsLoad) {
- switch (NumVecs) {
- case 1:
- DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 3:
- DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 4:
- DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
- }
- }
+ // ... or do any operation that writes-back to a transfer register. But note
+ // that "stp xzr, xzr, [sp], #4" is fine because xzr and sp are different.
+ if (NeedsDisjointWritebackTransfer && Rn != 31 && (Rt == Rn || Rt2 == Rn))
+ return SoftFail;
+
+ return Success;
+}
- // Decode write back register, which is equal to Rn.
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
- DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder);
+static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Rm = fieldFromInstruction(insn, 16, 5);
+ unsigned extend = fieldFromInstruction(insn, 10, 6);
- if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes
- Inst.addOperand(MCOperand::CreateImm(TransferBytes));
- else // Decode Rm
- DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder);
+ unsigned shift = extend & 0x7;
+ if (shift > 4)
+ return Fail;
- // Decode the source vector list.
- switch (NumVecs) {
- case 1:
- DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder);
- break;
- case 2:
- DecodeQPairRegisterClass(Inst, Rt, Address, Decoder);
+ switch (Inst.getOpcode()) {
+ default:
+ return Fail;
+ case AArch64::ADDWrx:
+ case AArch64::SUBWrx:
+ DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
break;
- case 3:
- DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder);
+ case AArch64::ADDSWrx:
+ case AArch64::SUBSWrx:
+ DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
break;
- case 4:
- DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder);
- }
-
- // Decode lane
- unsigned Q = fieldFromInstruction(Insn, 30, 1);
- unsigned S = fieldFromInstruction(Insn, 10, 3);
- unsigned lane = 0;
- // Calculate the number of lanes by number of vectors and transferred bytes.
- // NumLanes = 16 bytes / bytes of each lane
- unsigned NumLanes = 16 / (TransferBytes / NumVecs);
- switch (NumLanes) {
- case 16: // A vector has 16 lanes, each lane is 1 bytes.
- lane = (Q << 3) | S;
+ case AArch64::ADDXrx:
+ case AArch64::SUBXrx:
+ DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
break;
- case 8:
- lane = (Q << 2) | (S >> 1);
+ case AArch64::ADDSXrx:
+ case AArch64::SUBSXrx:
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
break;
- case 4:
- lane = (Q << 1) | (S >> 2);
+ case AArch64::ADDXrx64:
+ case AArch64::SUBXrx64:
+ DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
break;
- case 2:
- lane = Q;
+ case AArch64::SUBSXrx64:
+ case AArch64::ADDSXrx64:
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
break;
}
- Inst.addOperand(MCOperand::CreateImm(lane));
- return MCDisassembler::Success;
+ Inst.addOperand(MCOperand::CreateImm(extend));
+ return Success;
}
-static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(Insn, 0, 5);
- unsigned Rn = fieldFromInstruction(Insn, 5, 5);
- unsigned size = fieldFromInstruction(Insn, 22, 2);
- unsigned Q = fieldFromInstruction(Insn, 30, 1);
+static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Datasize = fieldFromInstruction(insn, 31, 1);
+ unsigned imm;
+
+ if (Datasize) {
+ if (Inst.getOpcode() == AArch64::ANDSXri)
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ else
+ DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
+ imm = fieldFromInstruction(insn, 10, 13);
+ if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 64))
+ return Fail;
+ } else {
+ if (Inst.getOpcode() == AArch64::ANDSWri)
+ DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+ else
+ DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
+ imm = fieldFromInstruction(insn, 10, 12);
+ if (!AArch64_AM::isValidDecodeLogicalImmediate(imm, 32))
+ return Fail;
+ }
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ return Success;
+}
- DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
+static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned cmode = fieldFromInstruction(insn, 12, 4);
+ unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
+ imm |= fieldFromInstruction(insn, 5, 5);
- if(Q)
- DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
+ if (Inst.getOpcode() == AArch64::MOVID)
+ DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
else
- DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder);
+ DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
- switch (size) {
- case 0:
- Inst.addOperand(MCOperand::CreateImm(8));
+ switch (Inst.getOpcode()) {
+ default:
break;
- case 1:
- Inst.addOperand(MCOperand::CreateImm(16));
+ case AArch64::MOVIv4i16:
+ case AArch64::MOVIv8i16:
+ case AArch64::MVNIv4i16:
+ case AArch64::MVNIv8i16:
+ case AArch64::MOVIv2i32:
+ case AArch64::MOVIv4i32:
+ case AArch64::MVNIv2i32:
+ case AArch64::MVNIv4i32:
+ Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
break;
- case 2:
- Inst.addOperand(MCOperand::CreateImm(32));
+ case AArch64::MOVIv2s_msl:
+ case AArch64::MOVIv4s_msl:
+ case AArch64::MVNIv2s_msl:
+ case AArch64::MVNIv4s_msl:
+ Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
break;
- default :
- return MCDisassembler::Fail;
}
- return MCDisassembler::Success;
+
+ return Success;
}
+static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned cmode = fieldFromInstruction(insn, 12, 4);
+ unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
+ imm |= fieldFromInstruction(insn, 5, 5);
+
+ // Tied operands added twice.
+ DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
+
+ Inst.addOperand(MCOperand::CreateImm(imm));
+ Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
+
+ return Success;
+}
+
+static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr, const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
+ imm |= fieldFromInstruction(insn, 29, 2);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+
+ // Sign-extend the 21-bit immediate.
+ if (imm & (1 << (21 - 1)))
+ imm |= ~((1LL << 21) - 1);
+
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ if (!Dis->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return Success;
+}
+
+static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr, const void *Decoder) {
+ unsigned Rd = fieldFromInstruction(insn, 0, 5);
+ unsigned Rn = fieldFromInstruction(insn, 5, 5);
+ unsigned Imm = fieldFromInstruction(insn, 10, 14);
+ unsigned S = fieldFromInstruction(insn, 29, 1);
+ unsigned Datasize = fieldFromInstruction(insn, 31, 1);
+
+ unsigned ShifterVal = (Imm >> 12) & 3;
+ unsigned ImmVal = Imm & 0xFFF;
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+
+ if (ShifterVal != 0 && ShifterVal != 1)
+ return Fail;
+
+ if (Datasize) {
+ if (Rd == 31 && !S)
+ DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
+ else
+ DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
+ } else {
+ if (Rd == 31 && !S)
+ DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
+ else
+ DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
+ DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
+ }
+
+ if (!Dis->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(ImmVal));
+ Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
+ return Success;
+}
+
+static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr,
+ const void *Decoder) {
+ int64_t imm = fieldFromInstruction(insn, 0, 26);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+
+ // Sign-extend the 26-bit immediate.
+ if (imm & (1 << (26 - 1)))
+ imm |= ~((1LL << 26) - 1);
+
+ if (!Dis->tryAddingSymbolicOperand(Inst, imm << 2, Addr, true, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(imm));
+
+ return Success;
+}
+
+static DecodeStatus DecodeSystemPStateInstruction(llvm::MCInst &Inst,
+ uint32_t insn, uint64_t Addr,
+ const void *Decoder) {
+ uint64_t op1 = fieldFromInstruction(insn, 16, 3);
+ uint64_t op2 = fieldFromInstruction(insn, 5, 3);
+ uint64_t crm = fieldFromInstruction(insn, 8, 4);
+
+ uint64_t pstate_field = (op1 << 3) | op2;
+
+ Inst.addOperand(MCOperand::CreateImm(pstate_field));
+ Inst.addOperand(MCOperand::CreateImm(crm));
+
+ bool ValidNamed;
+ (void)AArch64PState::PStateMapper().toString(pstate_field, ValidNamed);
+
+ return ValidNamed ? Success : Fail;
+}
+
+static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
+ uint64_t Addr, const void *Decoder) {
+ uint64_t Rt = fieldFromInstruction(insn, 0, 5);
+ uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
+ bit |= fieldFromInstruction(insn, 19, 5);
+ int64_t dst = fieldFromInstruction(insn, 5, 14);
+ const AArch64Disassembler *Dis =
+ static_cast<const AArch64Disassembler *>(Decoder);
+
+ // Sign-extend 14-bit immediate.
+ if (dst & (1 << (14 - 1)))
+ dst |= ~((1LL << 14) - 1);
+
+ if (fieldFromInstruction(insn, 31, 1) == 0)
+ DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
+ else
+ DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
+ Inst.addOperand(MCOperand::CreateImm(bit));
+ if (!Dis->tryAddingSymbolicOperand(Inst, dst << 2, Addr, true, 0, 4))
+ Inst.addOperand(MCOperand::CreateImm(dst));
+
+ return Success;
+}
diff --git a/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
new file mode 100644
index 0000000..68d4867
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -0,0 +1,40 @@
+//===- AArch64Disassembler.h - Disassembler for AArch64 ---------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AArch64DISASSEMBLER_H
+#define AArch64DISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+class AArch64Disassembler : public MCDisassembler {
+public:
+ AArch64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
+
+ ~AArch64Disassembler() {}
+
+ /// getInstruction - See MCDisassembler.
+ MCDisassembler::DecodeStatus
+ getInstruction(MCInst &instr, uint64_t &size, const MemoryObject &region,
+ uint64_t address, raw_ostream &vStream,
+ raw_ostream &cStream) const override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
new file mode 100644
index 0000000..2466368
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp
@@ -0,0 +1,221 @@
+//===- AArch64ExternalSymbolizer.cpp - Symbolizer for AArch64 ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64ExternalSymbolizer.h"
+#include "AArch64Subtarget.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "Utils/AArch64BaseInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-disassembler"
+
+static MCSymbolRefExpr::VariantKind
+getVariant(uint64_t LLVMDisassembler_VariantKind) {
+ switch (LLVMDisassembler_VariantKind) {
+ case LLVMDisassembler_VariantKind_None:
+ return MCSymbolRefExpr::VK_None;
+ case LLVMDisassembler_VariantKind_ARM64_PAGE:
+ return MCSymbolRefExpr::VK_PAGE;
+ case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
+ return MCSymbolRefExpr::VK_PAGEOFF;
+ case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
+ return MCSymbolRefExpr::VK_GOTPAGE;
+ case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
+ return MCSymbolRefExpr::VK_GOTPAGEOFF;
+ case LLVMDisassembler_VariantKind_ARM64_TLVP:
+ case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
+ default:
+ assert(0 && "bad LLVMDisassembler_VariantKind");
+ return MCSymbolRefExpr::VK_None;
+ }
+}
+
+/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
+/// operand in place of the immediate Value in the MCInst. The immediate
+/// Value has not had any PC adjustment made by the caller. If the instruction
+/// is a branch that adds the PC to the immediate Value then isBranch is
+/// Success, else Fail. If GetOpInfo is non-null, then it is called to get any
+/// symbolic information at the Address for this instrution. If that returns
+/// non-zero then the symbolic information it returns is used to create an
+/// MCExpr and that is added as an operand to the MCInst. If GetOpInfo()
+/// returns zero and isBranch is Success then a symbol look up for
+/// Address + Value is done and if a symbol is found an MCExpr is created with
+/// that, else an MCExpr with Address + Value is created. If GetOpInfo()
+/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
+/// tested and for ADRP an other instructions that help to load of pointers
+/// a symbol look up is done to see it is returns a specific reference type
+/// to add to the comment stream. This function returns Success if it adds
+/// an operand to the MCInst and Fail otherwise.
+bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
+ MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
+ bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+ // FIXME: This method shares a lot of code with
+ // MCExternalSymbolizer::tryAddingSymbolicOperand. It may be possible
+ // refactor the MCExternalSymbolizer interface to allow more of this
+ // implementation to be shared.
+ //
+ struct LLVMOpInfo1 SymbolicOp;
+ memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
+ SymbolicOp.Value = Value;
+ uint64_t ReferenceType;
+ const char *ReferenceName;
+ if (!GetOpInfo ||
+ !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (IsBranch) {
+ ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
+ const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
+ Address, &ReferenceName);
+ if (Name) {
+ SymbolicOp.AddSymbol.Name = Name;
+ SymbolicOp.AddSymbol.Present = true;
+ SymbolicOp.Value = 0;
+ } else {
+ SymbolicOp.Value = Address + Value;
+ }
+ if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
+ CommentStream << "symbol stub for: " << ReferenceName;
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Message)
+ CommentStream << "Objc message: " << ReferenceName;
+ } else if (MI.getOpcode() == AArch64::ADRP) {
+ ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
+ // otool expects the fully encoded ADRP instruction to be passed in as
+ // the value here, so reconstruct it:
+ const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
+ uint32_t EncodedInst = 0x90000000;
+ EncodedInst |= (Value & 0x3) << 29; // immlo
+ EncodedInst |= ((Value >> 2) & 0x7FFFF) << 5; // immhi
+ EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // reg
+ SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
+ &ReferenceName);
+ CommentStream << format("0x%llx",
+ 0xfffffffffffff000LL & (Address + Value));
+ } else if (MI.getOpcode() == AArch64::ADDXri ||
+ MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRXl ||
+ MI.getOpcode() == AArch64::ADR) {
+ if (MI.getOpcode() == AArch64::ADDXri)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
+ else if (MI.getOpcode() == AArch64::LDRXui)
+ ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
+ if (MI.getOpcode() == AArch64::LDRXl) {
+ ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
+ SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+ &ReferenceName);
+ } else if (MI.getOpcode() == AArch64::ADR) {
+ ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
+ SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
+ &ReferenceName);
+ } else {
+ const MCRegisterInfo &MCRI = *Ctx.getRegisterInfo();
+ // otool expects the fully encoded ADD/LDR instruction to be passed in
+ // as the value here, so reconstruct it:
+ unsigned EncodedInst =
+ MI.getOpcode() == AArch64::ADDXri ? 0x91000000: 0xF9400000;
+ EncodedInst |= Value << 10; // imm12 [+ shift:2 for ADD]
+ EncodedInst |=
+ MCRI.getEncodingValue(MI.getOperand(1).getReg()) << 5; // Rn
+ EncodedInst |= MCRI.getEncodingValue(MI.getOperand(0).getReg()); // Rd
+
+ SymbolLookUp(DisInfo, EncodedInst, &ReferenceType, Address,
+ &ReferenceName);
+ }
+ if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
+ CommentStream << "literal pool symbol address: " << ReferenceName;
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
+ CommentStream << "literal pool for: \"" << ReferenceName << "\"";
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
+ CommentStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Message)
+ CommentStream << "Objc message: " << ReferenceName;
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
+ CommentStream << "Objc message ref: " << ReferenceName;
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
+ CommentStream << "Objc selector ref: " << ReferenceName;
+ else if (ReferenceType ==
+ LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
+ CommentStream << "Objc class ref: " << ReferenceName;
+ // For these instructions, the SymbolLookUp() above is just to get the
+ // ReferenceType and ReferenceName. We want to make sure not to
+ // fall through so we don't build an MCExpr to leave the disassembly
+ // of the immediate values of these instructions to the InstPrinter.
+ return false;
+ } else {
+ return false;
+ }
+ }
+
+ const MCExpr *Add = nullptr;
+ if (SymbolicOp.AddSymbol.Present) {
+ if (SymbolicOp.AddSymbol.Name) {
+ StringRef Name(SymbolicOp.AddSymbol.Name);
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+ MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
+ if (Variant != MCSymbolRefExpr::VK_None)
+ Add = MCSymbolRefExpr::Create(Sym, Variant, Ctx);
+ else
+ Add = MCSymbolRefExpr::Create(Sym, Ctx);
+ } else {
+ Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, Ctx);
+ }
+ }
+
+ const MCExpr *Sub = nullptr;
+ if (SymbolicOp.SubtractSymbol.Present) {
+ if (SymbolicOp.SubtractSymbol.Name) {
+ StringRef Name(SymbolicOp.SubtractSymbol.Name);
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+ Sub = MCSymbolRefExpr::Create(Sym, Ctx);
+ } else {
+ Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, Ctx);
+ }
+ }
+
+ const MCExpr *Off = nullptr;
+ if (SymbolicOp.Value != 0)
+ Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx);
+
+ const MCExpr *Expr;
+ if (Sub) {
+ const MCExpr *LHS;
+ if (Add)
+ LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx);
+ else
+ LHS = MCUnaryExpr::CreateMinus(Sub, Ctx);
+ if (Off)
+ Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx);
+ else
+ Expr = LHS;
+ } else if (Add) {
+ if (Off)
+ Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx);
+ else
+ Expr = Add;
+ } else {
+ if (Off)
+ Expr = Off;
+ else
+ Expr = MCConstantExpr::Create(0, Ctx);
+ }
+
+ MI.addOperand(MCOperand::CreateExpr(Expr));
+
+ return true;
+}
diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
new file mode 100644
index 0000000..171d31c
--- /dev/null
+++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h
@@ -0,0 +1,38 @@
+//===- AArch64ExternalSymbolizer.h - Symbolizer for AArch64 -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Symbolize AArch64 assembly code during disassembly using callbacks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AArch64EXTERNALSYMBOLIZER_H
+#define AArch64EXTERNALSYMBOLIZER_H
+
+#include "llvm/MC/MCExternalSymbolizer.h"
+
+namespace llvm {
+
+class AArch64ExternalSymbolizer : public MCExternalSymbolizer {
+public:
+ AArch64ExternalSymbolizer(MCContext &Ctx,
+ std::unique_ptr<MCRelocationInfo> RelInfo,
+ LLVMOpInfoCallback GetOpInfo,
+ LLVMSymbolLookupCallback SymbolLookUp,
+ void *DisInfo)
+ : MCExternalSymbolizer(Ctx, std::move(RelInfo), GetOpInfo, SymbolLookUp,
+ DisInfo) {}
+
+ bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t InstSize) override;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/AArch64/Disassembler/Android.mk b/lib/Target/AArch64/Disassembler/Android.mk
index fcc53ad..b89538d 100644
--- a/lib/Target/AArch64/Disassembler/Android.mk
+++ b/lib/Target/AArch64/Disassembler/Android.mk
@@ -7,7 +7,8 @@ arm64_disassembler_TBLGEN_TABLES := \
AArch64GenRegisterInfo.inc
arm64_disassembler_SRC_FILES := \
- AArch64Disassembler.cpp
+ AArch64Disassembler.cpp \
+ AArch64ExternalSymbolizer.cpp
# For the device
# =====================================================
diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt
index 21baf25..be4ccad 100644
--- a/lib/Target/AArch64/Disassembler/CMakeLists.txt
+++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt
@@ -1,3 +1,14 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
add_llvm_library(LLVMAArch64Disassembler
AArch64Disassembler.cpp
+ AArch64ExternalSymbolizer.cpp
)
+# workaround for hanging compilation on MSVC8, 9 and 10
+#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+#set_property(
+# SOURCE ARMDisassembler.cpp
+# PROPERTY COMPILE_FLAGS "/Od"
+# )
+#endif()
+add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/Disassembler/LLVMBuild.txt b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
index 05c4ed1..a4224f4 100644
--- a/lib/Target/AArch64/Disassembler/LLVMBuild.txt
+++ b/lib/Target/AArch64/Disassembler/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/AArch64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/Disassembler/Makefile b/lib/Target/AArch64/Disassembler/Makefile
index 5c86120..741bb81 100644
--- a/lib/Target/AArch64/Disassembler/Makefile
+++ b/lib/Target/AArch64/Disassembler/Makefile
@@ -10,7 +10,7 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMAArch64Disassembler
-# Hack: we need to include 'main' target directory to grab private headers
+# Hack: we need to include 'main' arm target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
index fd3f009..f484a5b 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp
@@ -11,529 +11,1306 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "AArch64InstPrinter.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#define GET_INSTRUCTION_NAME
#define PRINT_ALIAS_INSTR
#include "AArch64GenAsmWriter.inc"
-
-static int64_t unpackSignedImm(int BitWidth, uint64_t Value) {
- assert(!(Value & ~((1ULL << BitWidth)-1)) && "immediate not n-bit");
- if (Value & (1ULL << (BitWidth - 1)))
- return static_cast<int64_t>(Value) - (1LL << BitWidth);
- else
- return Value;
-}
+#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "AArch64GenAsmWriter1.inc"
AArch64InstPrinter::AArch64InstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) :
- MCInstPrinter(MAI, MII, MRI) {
+ const MCSubtargetInfo &STI)
+ : MCInstPrinter(MAI, MII, MRI) {
// Initialize the set of available features.
setAvailableFeatures(STI.getFeatureBits());
}
+AArch64AppleInstPrinter::AArch64AppleInstPrinter(const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI)
+ : AArch64InstPrinter(MAI, MII, MRI, STI) {}
+
void AArch64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+ // This is for .cfi directives.
OS << getRegisterName(RegNo);
}
-void
-AArch64InstPrinter::printOffsetSImm9Operand(const MCInst *MI,
- unsigned OpNum, raw_ostream &O) {
- const MCOperand &MOImm = MI->getOperand(OpNum);
- int32_t Imm = unpackSignedImm(9, MOImm.getImm());
-
- O << '#' << Imm;
-}
-
-void
-AArch64InstPrinter::printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, unsigned MemSize,
- unsigned RmSize) {
- unsigned ExtImm = MI->getOperand(OpNum).getImm();
- unsigned OptionHi = ExtImm >> 1;
- unsigned S = ExtImm & 1;
- bool IsLSL = OptionHi == 1 && RmSize == 64;
-
- const char *Ext;
- switch (OptionHi) {
- case 1:
- Ext = (RmSize == 32) ? "uxtw" : "lsl";
- break;
- case 3:
- Ext = (RmSize == 32) ? "sxtw" : "sxtx";
- break;
- default:
- llvm_unreachable("Incorrect Option on load/store (reg offset)");
- }
- O << Ext;
+void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ // Check for special encodings and print the canonical alias instead.
- if (S) {
- unsigned ShiftAmt = Log2_32(MemSize);
- O << " #" << ShiftAmt;
- } else if (IsLSL) {
- O << " #0";
- }
-}
+ unsigned Opcode = MI->getOpcode();
-void
-AArch64InstPrinter::printAddSubImmLSL0Operand(const MCInst *MI,
- unsigned OpNum, raw_ostream &O) {
- const MCOperand &Imm12Op = MI->getOperand(OpNum);
+ if (Opcode == AArch64::SYSxt)
+ if (printSysAlias(MI, O)) {
+ printAnnotation(O, Annot);
+ return;
+ }
- if (Imm12Op.isImm()) {
- int64_t Imm12 = Imm12Op.getImm();
- assert(Imm12 >= 0 && "Invalid immediate for add/sub imm");
- O << "#" << Imm12;
- } else {
- assert(Imm12Op.isExpr() && "Unexpected shift operand type");
- O << "#" << *Imm12Op.getExpr();
- }
-}
+ // SBFM/UBFM should print to a nicer aliased form if possible.
+ if (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri ||
+ Opcode == AArch64::UBFMXri || Opcode == AArch64::UBFMWri) {
+ const MCOperand &Op0 = MI->getOperand(0);
+ const MCOperand &Op1 = MI->getOperand(1);
+ const MCOperand &Op2 = MI->getOperand(2);
+ const MCOperand &Op3 = MI->getOperand(3);
+
+ bool IsSigned = (Opcode == AArch64::SBFMXri || Opcode == AArch64::SBFMWri);
+ bool Is64Bit = (Opcode == AArch64::SBFMXri || Opcode == AArch64::UBFMXri);
+ if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
+ const char *AsmMnemonic = nullptr;
+
+ switch (Op3.getImm()) {
+ default:
+ break;
+ case 7:
+ if (IsSigned)
+ AsmMnemonic = "sxtb";
+ else if (!Is64Bit)
+ AsmMnemonic = "uxtb";
+ break;
+ case 15:
+ if (IsSigned)
+ AsmMnemonic = "sxth";
+ else if (!Is64Bit)
+ AsmMnemonic = "uxth";
+ break;
+ case 31:
+ // *xtw is only valid for signed 64-bit operations.
+ if (Is64Bit && IsSigned)
+ AsmMnemonic = "sxtw";
+ break;
+ }
+
+ if (AsmMnemonic) {
+ O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+ << ", " << getRegisterName(getWRegFromXReg(Op1.getReg()));
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
-void
-AArch64InstPrinter::printAddSubImmLSL12Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+ // All immediate shifts are aliases, implemented using the Bitfield
+ // instruction. In all cases the immediate shift amount shift must be in
+ // the range 0 to (reg.size -1).
+ if (Op2.isImm() && Op3.isImm()) {
+ const char *AsmMnemonic = nullptr;
+ int shift = 0;
+ int64_t immr = Op2.getImm();
+ int64_t imms = Op3.getImm();
+ if (Opcode == AArch64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
+ AsmMnemonic = "lsl";
+ shift = 31 - imms;
+ } else if (Opcode == AArch64::UBFMXri && imms != 0x3f &&
+ ((imms + 1 == immr))) {
+ AsmMnemonic = "lsl";
+ shift = 63 - imms;
+ } else if (Opcode == AArch64::UBFMWri && imms == 0x1f) {
+ AsmMnemonic = "lsr";
+ shift = immr;
+ } else if (Opcode == AArch64::UBFMXri && imms == 0x3f) {
+ AsmMnemonic = "lsr";
+ shift = immr;
+ } else if (Opcode == AArch64::SBFMWri && imms == 0x1f) {
+ AsmMnemonic = "asr";
+ shift = immr;
+ } else if (Opcode == AArch64::SBFMXri && imms == 0x3f) {
+ AsmMnemonic = "asr";
+ shift = immr;
+ }
+ if (AsmMnemonic) {
+ O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
+ << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
+ printAnnotation(O, Annot);
+ return;
+ }
+ }
- printAddSubImmLSL0Operand(MI, OpNum, O);
+ // SBFIZ/UBFIZ aliases
+ if (Op2.getImm() > Op3.getImm()) {
+ O << '\t' << (IsSigned ? "sbfiz" : "ubfiz") << '\t'
+ << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+ << ", #" << (Is64Bit ? 64 : 32) - Op2.getImm() << ", #" << Op3.getImm() + 1;
+ printAnnotation(O, Annot);
+ return;
+ }
- O << ", lsl #12";
-}
+ // Otherwise SBFX/UBFX is the preferred form
+ O << '\t' << (IsSigned ? "sbfx" : "ubfx") << '\t'
+ << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op1.getReg())
+ << ", #" << Op2.getImm() << ", #" << Op3.getImm() - Op2.getImm() + 1;
+ printAnnotation(O, Annot);
+ return;
+ }
-void
-AArch64InstPrinter::printBareImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- O << MO.getImm();
-}
+ if (Opcode == AArch64::BFMXri || Opcode == AArch64::BFMWri) {
+ const MCOperand &Op0 = MI->getOperand(0); // Op1 == Op0
+ const MCOperand &Op2 = MI->getOperand(2);
+ int ImmR = MI->getOperand(3).getImm();
+ int ImmS = MI->getOperand(4).getImm();
+
+ // BFI alias
+ if (ImmS < ImmR) {
+ int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
+ int LSB = (BitWidth - ImmR) % BitWidth;
+ int Width = ImmS + 1;
+ O << "\tbfi\t" << getRegisterName(Op0.getReg()) << ", "
+ << getRegisterName(Op2.getReg()) << ", #" << LSB << ", #" << Width;
+ printAnnotation(O, Annot);
+ return;
+ }
-template<unsigned RegWidth> void
-AArch64InstPrinter::printBFILSBOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &ImmROp = MI->getOperand(OpNum);
- unsigned LSB = ImmROp.getImm() == 0 ? 0 : RegWidth - ImmROp.getImm();
+ int LSB = ImmR;
+ int Width = ImmS - ImmR + 1;
+ // Otherwise BFXIL the preferred form
+ O << "\tbfxil\t"
+ << getRegisterName(Op0.getReg()) << ", " << getRegisterName(Op2.getReg())
+ << ", #" << LSB << ", #" << Width;
+ printAnnotation(O, Annot);
+ return;
+ }
- O << '#' << LSB;
-}
+ // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
+ // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
+ // printed.
+ if ((Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi ||
+ Opcode == AArch64::MOVNXi || Opcode == AArch64::MOVNWi) &&
+ MI->getOperand(1).isExpr()) {
+ if (Opcode == AArch64::MOVZXi || Opcode == AArch64::MOVZWi)
+ O << "\tmovz\t";
+ else
+ O << "\tmovn\t";
-void AArch64InstPrinter::printBFIWidthOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &ImmSOp = MI->getOperand(OpNum);
- unsigned Width = ImmSOp.getImm() + 1;
+ O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+ << *MI->getOperand(1).getExpr();
+ return;
+ }
- O << '#' << Width;
-}
+ if ((Opcode == AArch64::MOVKXi || Opcode == AArch64::MOVKWi) &&
+ MI->getOperand(2).isExpr()) {
+ O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
+ << *MI->getOperand(2).getExpr();
+ return;
+ }
-void
-AArch64InstPrinter::printBFXWidthOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &ImmSOp = MI->getOperand(OpNum);
- const MCOperand &ImmROp = MI->getOperand(OpNum - 1);
+ if (!printAliasInstr(MI, O))
+ printInstruction(MI, O);
- unsigned ImmR = ImmROp.getImm();
- unsigned ImmS = ImmSOp.getImm();
+ printAnnotation(O, Annot);
+}
- assert(ImmS >= ImmR && "Invalid ImmR, ImmS combination for bitfield extract");
+static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
+ bool &IsTbx) {
+ switch (Opcode) {
+ case AArch64::TBXv8i8One:
+ case AArch64::TBXv8i8Two:
+ case AArch64::TBXv8i8Three:
+ case AArch64::TBXv8i8Four:
+ IsTbx = true;
+ Layout = ".8b";
+ return true;
+ case AArch64::TBLv8i8One:
+ case AArch64::TBLv8i8Two:
+ case AArch64::TBLv8i8Three:
+ case AArch64::TBLv8i8Four:
+ IsTbx = false;
+ Layout = ".8b";
+ return true;
+ case AArch64::TBXv16i8One:
+ case AArch64::TBXv16i8Two:
+ case AArch64::TBXv16i8Three:
+ case AArch64::TBXv16i8Four:
+ IsTbx = true;
+ Layout = ".16b";
+ return true;
+ case AArch64::TBLv16i8One:
+ case AArch64::TBLv16i8Two:
+ case AArch64::TBLv16i8Three:
+ case AArch64::TBLv16i8Four:
+ IsTbx = false;
+ Layout = ".16b";
+ return true;
+ default:
+ return false;
+ }
+}
- O << '#' << (ImmS - ImmR + 1);
+struct LdStNInstrDesc {
+ unsigned Opcode;
+ const char *Mnemonic;
+ const char *Layout;
+ int ListOperand;
+ bool HasLane;
+ int NaturalOffset;
+};
+
+static LdStNInstrDesc LdStNInstInfo[] = {
+ { AArch64::LD1i8, "ld1", ".b", 1, true, 0 },
+ { AArch64::LD1i16, "ld1", ".h", 1, true, 0 },
+ { AArch64::LD1i32, "ld1", ".s", 1, true, 0 },
+ { AArch64::LD1i64, "ld1", ".d", 1, true, 0 },
+ { AArch64::LD1i8_POST, "ld1", ".b", 2, true, 1 },
+ { AArch64::LD1i16_POST, "ld1", ".h", 2, true, 2 },
+ { AArch64::LD1i32_POST, "ld1", ".s", 2, true, 4 },
+ { AArch64::LD1i64_POST, "ld1", ".d", 2, true, 8 },
+ { AArch64::LD1Rv16b, "ld1r", ".16b", 0, false, 0 },
+ { AArch64::LD1Rv8h, "ld1r", ".8h", 0, false, 0 },
+ { AArch64::LD1Rv4s, "ld1r", ".4s", 0, false, 0 },
+ { AArch64::LD1Rv2d, "ld1r", ".2d", 0, false, 0 },
+ { AArch64::LD1Rv8b, "ld1r", ".8b", 0, false, 0 },
+ { AArch64::LD1Rv4h, "ld1r", ".4h", 0, false, 0 },
+ { AArch64::LD1Rv2s, "ld1r", ".2s", 0, false, 0 },
+ { AArch64::LD1Rv1d, "ld1r", ".1d", 0, false, 0 },
+ { AArch64::LD1Rv16b_POST, "ld1r", ".16b", 1, false, 1 },
+ { AArch64::LD1Rv8h_POST, "ld1r", ".8h", 1, false, 2 },
+ { AArch64::LD1Rv4s_POST, "ld1r", ".4s", 1, false, 4 },
+ { AArch64::LD1Rv2d_POST, "ld1r", ".2d", 1, false, 8 },
+ { AArch64::LD1Rv8b_POST, "ld1r", ".8b", 1, false, 1 },
+ { AArch64::LD1Rv4h_POST, "ld1r", ".4h", 1, false, 2 },
+ { AArch64::LD1Rv2s_POST, "ld1r", ".2s", 1, false, 4 },
+ { AArch64::LD1Rv1d_POST, "ld1r", ".1d", 1, false, 8 },
+ { AArch64::LD1Onev16b, "ld1", ".16b", 0, false, 0 },
+ { AArch64::LD1Onev8h, "ld1", ".8h", 0, false, 0 },
+ { AArch64::LD1Onev4s, "ld1", ".4s", 0, false, 0 },
+ { AArch64::LD1Onev2d, "ld1", ".2d", 0, false, 0 },
+ { AArch64::LD1Onev8b, "ld1", ".8b", 0, false, 0 },
+ { AArch64::LD1Onev4h, "ld1", ".4h", 0, false, 0 },
+ { AArch64::LD1Onev2s, "ld1", ".2s", 0, false, 0 },
+ { AArch64::LD1Onev1d, "ld1", ".1d", 0, false, 0 },
+ { AArch64::LD1Onev16b_POST, "ld1", ".16b", 1, false, 16 },
+ { AArch64::LD1Onev8h_POST, "ld1", ".8h", 1, false, 16 },
+ { AArch64::LD1Onev4s_POST, "ld1", ".4s", 1, false, 16 },
+ { AArch64::LD1Onev2d_POST, "ld1", ".2d", 1, false, 16 },
+ { AArch64::LD1Onev8b_POST, "ld1", ".8b", 1, false, 8 },
+ { AArch64::LD1Onev4h_POST, "ld1", ".4h", 1, false, 8 },
+ { AArch64::LD1Onev2s_POST, "ld1", ".2s", 1, false, 8 },
+ { AArch64::LD1Onev1d_POST, "ld1", ".1d", 1, false, 8 },
+ { AArch64::LD1Twov16b, "ld1", ".16b", 0, false, 0 },
+ { AArch64::LD1Twov8h, "ld1", ".8h", 0, false, 0 },
+ { AArch64::LD1Twov4s, "ld1", ".4s", 0, false, 0 },
+ { AArch64::LD1Twov2d, "ld1", ".2d", 0, false, 0 },
+ { AArch64::LD1Twov8b, "ld1", ".8b", 0, false, 0 },
+ { AArch64::LD1Twov4h, "ld1", ".4h", 0, false, 0 },
+ { AArch64::LD1Twov2s, "ld1", ".2s", 0, false, 0 },
+ { AArch64::LD1Twov1d, "ld1", ".1d", 0, false, 0 },
+ { AArch64::LD1Twov16b_POST, "ld1", ".16b", 1, false, 32 },
+ { AArch64::LD1Twov8h_POST, "ld1", ".8h", 1, false, 32 },
+ { AArch64::LD1Twov4s_POST, "ld1", ".4s", 1, false, 32 },
+ { AArch64::LD1Twov2d_POST, "ld1", ".2d", 1, false, 32 },
+ { AArch64::LD1Twov8b_POST, "ld1", ".8b", 1, false, 16 },
+ { AArch64::LD1Twov4h_POST, "ld1", ".4h", 1, false, 16 },
+ { AArch64::LD1Twov2s_POST, "ld1", ".2s", 1, false, 16 },
+ { AArch64::LD1Twov1d_POST, "ld1", ".1d", 1, false, 16 },
+ { AArch64::LD1Threev16b, "ld1", ".16b", 0, false, 0 },
+ { AArch64::LD1Threev8h, "ld1", ".8h", 0, false, 0 },
+ { AArch64::LD1Threev4s, "ld1", ".4s", 0, false, 0 },
+ { AArch64::LD1Threev2d, "ld1", ".2d", 0, false, 0 },
+ { AArch64::LD1Threev8b, "ld1", ".8b", 0, false, 0 },
+ { AArch64::LD1Threev4h, "ld1", ".4h", 0, false, 0 },
+ { AArch64::LD1Threev2s, "ld1", ".2s", 0, false, 0 },
+ { AArch64::LD1Threev1d, "ld1", ".1d", 0, false, 0 },
+ { AArch64::LD1Threev16b_POST, "ld1", ".16b", 1, false, 48 },
+ { AArch64::LD1Threev8h_POST, "ld1", ".8h", 1, false, 48 },
+ { AArch64::LD1Threev4s_POST, "ld1", ".4s", 1, false, 48 },
+ { AArch64::LD1Threev2d_POST, "ld1", ".2d", 1, false, 48 },
+ { AArch64::LD1Threev8b_POST, "ld1", ".8b", 1, false, 24 },
+ { AArch64::LD1Threev4h_POST, "ld1", ".4h", 1, false, 24 },
+ { AArch64::LD1Threev2s_POST, "ld1", ".2s", 1, false, 24 },
+ { AArch64::LD1Threev1d_POST, "ld1", ".1d", 1, false, 24 },
+ { AArch64::LD1Fourv16b, "ld1", ".16b", 0, false, 0 },
+ { AArch64::LD1Fourv8h, "ld1", ".8h", 0, false, 0 },
+ { AArch64::LD1Fourv4s, "ld1", ".4s", 0, false, 0 },
+ { AArch64::LD1Fourv2d, "ld1", ".2d", 0, false, 0 },
+ { AArch64::LD1Fourv8b, "ld1", ".8b", 0, false, 0 },
+ { AArch64::LD1Fourv4h, "ld1", ".4h", 0, false, 0 },
+ { AArch64::LD1Fourv2s, "ld1", ".2s", 0, false, 0 },
+ { AArch64::LD1Fourv1d, "ld1", ".1d", 0, false, 0 },
+ { AArch64::LD1Fourv16b_POST, "ld1", ".16b", 1, false, 64 },
+ { AArch64::LD1Fourv8h_POST, "ld1", ".8h", 1, false, 64 },
+ { AArch64::LD1Fourv4s_POST, "ld1", ".4s", 1, false, 64 },
+ { AArch64::LD1Fourv2d_POST, "ld1", ".2d", 1, false, 64 },
+ { AArch64::LD1Fourv8b_POST, "ld1", ".8b", 1, false, 32 },
+ { AArch64::LD1Fourv4h_POST, "ld1", ".4h", 1, false, 32 },
+ { AArch64::LD1Fourv2s_POST, "ld1", ".2s", 1, false, 32 },
+ { AArch64::LD1Fourv1d_POST, "ld1", ".1d", 1, false, 32 },
+ { AArch64::LD2i8, "ld2", ".b", 1, true, 0 },
+ { AArch64::LD2i16, "ld2", ".h", 1, true, 0 },
+ { AArch64::LD2i32, "ld2", ".s", 1, true, 0 },
+ { AArch64::LD2i64, "ld2", ".d", 1, true, 0 },
+ { AArch64::LD2i8_POST, "ld2", ".b", 2, true, 2 },
+ { AArch64::LD2i16_POST, "ld2", ".h", 2, true, 4 },
+ { AArch64::LD2i32_POST, "ld2", ".s", 2, true, 8 },
+ { AArch64::LD2i64_POST, "ld2", ".d", 2, true, 16 },
+ { AArch64::LD2Rv16b, "ld2r", ".16b", 0, false, 0 },
+ { AArch64::LD2Rv8h, "ld2r", ".8h", 0, false, 0 },
+ { AArch64::LD2Rv4s, "ld2r", ".4s", 0, false, 0 },
+ { AArch64::LD2Rv2d, "ld2r", ".2d", 0, false, 0 },
+ { AArch64::LD2Rv8b, "ld2r", ".8b", 0, false, 0 },
+ { AArch64::LD2Rv4h, "ld2r", ".4h", 0, false, 0 },
+ { AArch64::LD2Rv2s, "ld2r", ".2s", 0, false, 0 },
+ { AArch64::LD2Rv1d, "ld2r", ".1d", 0, false, 0 },
+ { AArch64::LD2Rv16b_POST, "ld2r", ".16b", 1, false, 2 },
+ { AArch64::LD2Rv8h_POST, "ld2r", ".8h", 1, false, 4 },
+ { AArch64::LD2Rv4s_POST, "ld2r", ".4s", 1, false, 8 },
+ { AArch64::LD2Rv2d_POST, "ld2r", ".2d", 1, false, 16 },
+ { AArch64::LD2Rv8b_POST, "ld2r", ".8b", 1, false, 2 },
+ { AArch64::LD2Rv4h_POST, "ld2r", ".4h", 1, false, 4 },
+ { AArch64::LD2Rv2s_POST, "ld2r", ".2s", 1, false, 8 },
+ { AArch64::LD2Rv1d_POST, "ld2r", ".1d", 1, false, 16 },
+ { AArch64::LD2Twov16b, "ld2", ".16b", 0, false, 0 },
+ { AArch64::LD2Twov8h, "ld2", ".8h", 0, false, 0 },
+ { AArch64::LD2Twov4s, "ld2", ".4s", 0, false, 0 },
+ { AArch64::LD2Twov2d, "ld2", ".2d", 0, false, 0 },
+ { AArch64::LD2Twov8b, "ld2", ".8b", 0, false, 0 },
+ { AArch64::LD2Twov4h, "ld2", ".4h", 0, false, 0 },
+ { AArch64::LD2Twov2s, "ld2", ".2s", 0, false, 0 },
+ { AArch64::LD2Twov16b_POST, "ld2", ".16b", 1, false, 32 },
+ { AArch64::LD2Twov8h_POST, "ld2", ".8h", 1, false, 32 },
+ { AArch64::LD2Twov4s_POST, "ld2", ".4s", 1, false, 32 },
+ { AArch64::LD2Twov2d_POST, "ld2", ".2d", 1, false, 32 },
+ { AArch64::LD2Twov8b_POST, "ld2", ".8b", 1, false, 16 },
+ { AArch64::LD2Twov4h_POST, "ld2", ".4h", 1, false, 16 },
+ { AArch64::LD2Twov2s_POST, "ld2", ".2s", 1, false, 16 },
+ { AArch64::LD3i8, "ld3", ".b", 1, true, 0 },
+ { AArch64::LD3i16, "ld3", ".h", 1, true, 0 },
+ { AArch64::LD3i32, "ld3", ".s", 1, true, 0 },
+ { AArch64::LD3i64, "ld3", ".d", 1, true, 0 },
+ { AArch64::LD3i8_POST, "ld3", ".b", 2, true, 3 },
+ { AArch64::LD3i16_POST, "ld3", ".h", 2, true, 6 },
+ { AArch64::LD3i32_POST, "ld3", ".s", 2, true, 12 },
+ { AArch64::LD3i64_POST, "ld3", ".d", 2, true, 24 },
+ { AArch64::LD3Rv16b, "ld3r", ".16b", 0, false, 0 },
+ { AArch64::LD3Rv8h, "ld3r", ".8h", 0, false, 0 },
+ { AArch64::LD3Rv4s, "ld3r", ".4s", 0, false, 0 },
+ { AArch64::LD3Rv2d, "ld3r", ".2d", 0, false, 0 },
+ { AArch64::LD3Rv8b, "ld3r", ".8b", 0, false, 0 },
+ { AArch64::LD3Rv4h, "ld3r", ".4h", 0, false, 0 },
+ { AArch64::LD3Rv2s, "ld3r", ".2s", 0, false, 0 },
+ { AArch64::LD3Rv1d, "ld3r", ".1d", 0, false, 0 },
+ { AArch64::LD3Rv16b_POST, "ld3r", ".16b", 1, false, 3 },
+ { AArch64::LD3Rv8h_POST, "ld3r", ".8h", 1, false, 6 },
+ { AArch64::LD3Rv4s_POST, "ld3r", ".4s", 1, false, 12 },
+ { AArch64::LD3Rv2d_POST, "ld3r", ".2d", 1, false, 24 },
+ { AArch64::LD3Rv8b_POST, "ld3r", ".8b", 1, false, 3 },
+ { AArch64::LD3Rv4h_POST, "ld3r", ".4h", 1, false, 6 },
+ { AArch64::LD3Rv2s_POST, "ld3r", ".2s", 1, false, 12 },
+ { AArch64::LD3Rv1d_POST, "ld3r", ".1d", 1, false, 24 },
+ { AArch64::LD3Threev16b, "ld3", ".16b", 0, false, 0 },
+ { AArch64::LD3Threev8h, "ld3", ".8h", 0, false, 0 },
+ { AArch64::LD3Threev4s, "ld3", ".4s", 0, false, 0 },
+ { AArch64::LD3Threev2d, "ld3", ".2d", 0, false, 0 },
+ { AArch64::LD3Threev8b, "ld3", ".8b", 0, false, 0 },
+ { AArch64::LD3Threev4h, "ld3", ".4h", 0, false, 0 },
+ { AArch64::LD3Threev2s, "ld3", ".2s", 0, false, 0 },
+ { AArch64::LD3Threev16b_POST, "ld3", ".16b", 1, false, 48 },
+ { AArch64::LD3Threev8h_POST, "ld3", ".8h", 1, false, 48 },
+ { AArch64::LD3Threev4s_POST, "ld3", ".4s", 1, false, 48 },
+ { AArch64::LD3Threev2d_POST, "ld3", ".2d", 1, false, 48 },
+ { AArch64::LD3Threev8b_POST, "ld3", ".8b", 1, false, 24 },
+ { AArch64::LD3Threev4h_POST, "ld3", ".4h", 1, false, 24 },
+ { AArch64::LD3Threev2s_POST, "ld3", ".2s", 1, false, 24 },
+ { AArch64::LD4i8, "ld4", ".b", 1, true, 0 },
+ { AArch64::LD4i16, "ld4", ".h", 1, true, 0 },
+ { AArch64::LD4i32, "ld4", ".s", 1, true, 0 },
+ { AArch64::LD4i64, "ld4", ".d", 1, true, 0 },
+ { AArch64::LD4i8_POST, "ld4", ".b", 2, true, 4 },
+ { AArch64::LD4i16_POST, "ld4", ".h", 2, true, 8 },
+ { AArch64::LD4i32_POST, "ld4", ".s", 2, true, 16 },
+ { AArch64::LD4i64_POST, "ld4", ".d", 2, true, 32 },
+ { AArch64::LD4Rv16b, "ld4r", ".16b", 0, false, 0 },
+ { AArch64::LD4Rv8h, "ld4r", ".8h", 0, false, 0 },
+ { AArch64::LD4Rv4s, "ld4r", ".4s", 0, false, 0 },
+ { AArch64::LD4Rv2d, "ld4r", ".2d", 0, false, 0 },
+ { AArch64::LD4Rv8b, "ld4r", ".8b", 0, false, 0 },
+ { AArch64::LD4Rv4h, "ld4r", ".4h", 0, false, 0 },
+ { AArch64::LD4Rv2s, "ld4r", ".2s", 0, false, 0 },
+ { AArch64::LD4Rv1d, "ld4r", ".1d", 0, false, 0 },
+ { AArch64::LD4Rv16b_POST, "ld4r", ".16b", 1, false, 4 },
+ { AArch64::LD4Rv8h_POST, "ld4r", ".8h", 1, false, 8 },
+ { AArch64::LD4Rv4s_POST, "ld4r", ".4s", 1, false, 16 },
+ { AArch64::LD4Rv2d_POST, "ld4r", ".2d", 1, false, 32 },
+ { AArch64::LD4Rv8b_POST, "ld4r", ".8b", 1, false, 4 },
+ { AArch64::LD4Rv4h_POST, "ld4r", ".4h", 1, false, 8 },
+ { AArch64::LD4Rv2s_POST, "ld4r", ".2s", 1, false, 16 },
+ { AArch64::LD4Rv1d_POST, "ld4r", ".1d", 1, false, 32 },
+ { AArch64::LD4Fourv16b, "ld4", ".16b", 0, false, 0 },
+ { AArch64::LD4Fourv8h, "ld4", ".8h", 0, false, 0 },
+ { AArch64::LD4Fourv4s, "ld4", ".4s", 0, false, 0 },
+ { AArch64::LD4Fourv2d, "ld4", ".2d", 0, false, 0 },
+ { AArch64::LD4Fourv8b, "ld4", ".8b", 0, false, 0 },
+ { AArch64::LD4Fourv4h, "ld4", ".4h", 0, false, 0 },
+ { AArch64::LD4Fourv2s, "ld4", ".2s", 0, false, 0 },
+ { AArch64::LD4Fourv16b_POST, "ld4", ".16b", 1, false, 64 },
+ { AArch64::LD4Fourv8h_POST, "ld4", ".8h", 1, false, 64 },
+ { AArch64::LD4Fourv4s_POST, "ld4", ".4s", 1, false, 64 },
+ { AArch64::LD4Fourv2d_POST, "ld4", ".2d", 1, false, 64 },
+ { AArch64::LD4Fourv8b_POST, "ld4", ".8b", 1, false, 32 },
+ { AArch64::LD4Fourv4h_POST, "ld4", ".4h", 1, false, 32 },
+ { AArch64::LD4Fourv2s_POST, "ld4", ".2s", 1, false, 32 },
+ { AArch64::ST1i8, "st1", ".b", 0, true, 0 },
+ { AArch64::ST1i16, "st1", ".h", 0, true, 0 },
+ { AArch64::ST1i32, "st1", ".s", 0, true, 0 },
+ { AArch64::ST1i64, "st1", ".d", 0, true, 0 },
+ { AArch64::ST1i8_POST, "st1", ".b", 1, true, 1 },
+ { AArch64::ST1i16_POST, "st1", ".h", 1, true, 2 },
+ { AArch64::ST1i32_POST, "st1", ".s", 1, true, 4 },
+ { AArch64::ST1i64_POST, "st1", ".d", 1, true, 8 },
+ { AArch64::ST1Onev16b, "st1", ".16b", 0, false, 0 },
+ { AArch64::ST1Onev8h, "st1", ".8h", 0, false, 0 },
+ { AArch64::ST1Onev4s, "st1", ".4s", 0, false, 0 },
+ { AArch64::ST1Onev2d, "st1", ".2d", 0, false, 0 },
+ { AArch64::ST1Onev8b, "st1", ".8b", 0, false, 0 },
+ { AArch64::ST1Onev4h, "st1", ".4h", 0, false, 0 },
+ { AArch64::ST1Onev2s, "st1", ".2s", 0, false, 0 },
+ { AArch64::ST1Onev1d, "st1", ".1d", 0, false, 0 },
+ { AArch64::ST1Onev16b_POST, "st1", ".16b", 1, false, 16 },
+ { AArch64::ST1Onev8h_POST, "st1", ".8h", 1, false, 16 },
+ { AArch64::ST1Onev4s_POST, "st1", ".4s", 1, false, 16 },
+ { AArch64::ST1Onev2d_POST, "st1", ".2d", 1, false, 16 },
+ { AArch64::ST1Onev8b_POST, "st1", ".8b", 1, false, 8 },
+ { AArch64::ST1Onev4h_POST, "st1", ".4h", 1, false, 8 },
+ { AArch64::ST1Onev2s_POST, "st1", ".2s", 1, false, 8 },
+ { AArch64::ST1Onev1d_POST, "st1", ".1d", 1, false, 8 },
+ { AArch64::ST1Twov16b, "st1", ".16b", 0, false, 0 },
+ { AArch64::ST1Twov8h, "st1", ".8h", 0, false, 0 },
+ { AArch64::ST1Twov4s, "st1", ".4s", 0, false, 0 },
+ { AArch64::ST1Twov2d, "st1", ".2d", 0, false, 0 },
+ { AArch64::ST1Twov8b, "st1", ".8b", 0, false, 0 },
+ { AArch64::ST1Twov4h, "st1", ".4h", 0, false, 0 },
+ { AArch64::ST1Twov2s, "st1", ".2s", 0, false, 0 },
+ { AArch64::ST1Twov1d, "st1", ".1d", 0, false, 0 },
+ { AArch64::ST1Twov16b_POST, "st1", ".16b", 1, false, 32 },
+ { AArch64::ST1Twov8h_POST, "st1", ".8h", 1, false, 32 },
+ { AArch64::ST1Twov4s_POST, "st1", ".4s", 1, false, 32 },
+ { AArch64::ST1Twov2d_POST, "st1", ".2d", 1, false, 32 },
+ { AArch64::ST1Twov8b_POST, "st1", ".8b", 1, false, 16 },
+ { AArch64::ST1Twov4h_POST, "st1", ".4h", 1, false, 16 },
+ { AArch64::ST1Twov2s_POST, "st1", ".2s", 1, false, 16 },
+ { AArch64::ST1Twov1d_POST, "st1", ".1d", 1, false, 16 },
+ { AArch64::ST1Threev16b, "st1", ".16b", 0, false, 0 },
+ { AArch64::ST1Threev8h, "st1", ".8h", 0, false, 0 },
+ { AArch64::ST1Threev4s, "st1", ".4s", 0, false, 0 },
+ { AArch64::ST1Threev2d, "st1", ".2d", 0, false, 0 },
+ { AArch64::ST1Threev8b, "st1", ".8b", 0, false, 0 },
+ { AArch64::ST1Threev4h, "st1", ".4h", 0, false, 0 },
+ { AArch64::ST1Threev2s, "st1", ".2s", 0, false, 0 },
+ { AArch64::ST1Threev1d, "st1", ".1d", 0, false, 0 },
+ { AArch64::ST1Threev16b_POST, "st1", ".16b", 1, false, 48 },
+ { AArch64::ST1Threev8h_POST, "st1", ".8h", 1, false, 48 },
+ { AArch64::ST1Threev4s_POST, "st1", ".4s", 1, false, 48 },
+ { AArch64::ST1Threev2d_POST, "st1", ".2d", 1, false, 48 },
+ { AArch64::ST1Threev8b_POST, "st1", ".8b", 1, false, 24 },
+ { AArch64::ST1Threev4h_POST, "st1", ".4h", 1, false, 24 },
+ { AArch64::ST1Threev2s_POST, "st1", ".2s", 1, false, 24 },
+ { AArch64::ST1Threev1d_POST, "st1", ".1d", 1, false, 24 },
+ { AArch64::ST1Fourv16b, "st1", ".16b", 0, false, 0 },
+ { AArch64::ST1Fourv8h, "st1", ".8h", 0, false, 0 },
+ { AArch64::ST1Fourv4s, "st1", ".4s", 0, false, 0 },
+ { AArch64::ST1Fourv2d, "st1", ".2d", 0, false, 0 },
+ { AArch64::ST1Fourv8b, "st1", ".8b", 0, false, 0 },
+ { AArch64::ST1Fourv4h, "st1", ".4h", 0, false, 0 },
+ { AArch64::ST1Fourv2s, "st1", ".2s", 0, false, 0 },
+ { AArch64::ST1Fourv1d, "st1", ".1d", 0, false, 0 },
+ { AArch64::ST1Fourv16b_POST, "st1", ".16b", 1, false, 64 },
+ { AArch64::ST1Fourv8h_POST, "st1", ".8h", 1, false, 64 },
+ { AArch64::ST1Fourv4s_POST, "st1", ".4s", 1, false, 64 },
+ { AArch64::ST1Fourv2d_POST, "st1", ".2d", 1, false, 64 },
+ { AArch64::ST1Fourv8b_POST, "st1", ".8b", 1, false, 32 },
+ { AArch64::ST1Fourv4h_POST, "st1", ".4h", 1, false, 32 },
+ { AArch64::ST1Fourv2s_POST, "st1", ".2s", 1, false, 32 },
+ { AArch64::ST1Fourv1d_POST, "st1", ".1d", 1, false, 32 },
+ { AArch64::ST2i8, "st2", ".b", 0, true, 0 },
+ { AArch64::ST2i16, "st2", ".h", 0, true, 0 },
+ { AArch64::ST2i32, "st2", ".s", 0, true, 0 },
+ { AArch64::ST2i64, "st2", ".d", 0, true, 0 },
+ { AArch64::ST2i8_POST, "st2", ".b", 1, true, 2 },
+ { AArch64::ST2i16_POST, "st2", ".h", 1, true, 4 },
+ { AArch64::ST2i32_POST, "st2", ".s", 1, true, 8 },
+ { AArch64::ST2i64_POST, "st2", ".d", 1, true, 16 },
+ { AArch64::ST2Twov16b, "st2", ".16b", 0, false, 0 },
+ { AArch64::ST2Twov8h, "st2", ".8h", 0, false, 0 },
+ { AArch64::ST2Twov4s, "st2", ".4s", 0, false, 0 },
+ { AArch64::ST2Twov2d, "st2", ".2d", 0, false, 0 },
+ { AArch64::ST2Twov8b, "st2", ".8b", 0, false, 0 },
+ { AArch64::ST2Twov4h, "st2", ".4h", 0, false, 0 },
+ { AArch64::ST2Twov2s, "st2", ".2s", 0, false, 0 },
+ { AArch64::ST2Twov16b_POST, "st2", ".16b", 1, false, 32 },
+ { AArch64::ST2Twov8h_POST, "st2", ".8h", 1, false, 32 },
+ { AArch64::ST2Twov4s_POST, "st2", ".4s", 1, false, 32 },
+ { AArch64::ST2Twov2d_POST, "st2", ".2d", 1, false, 32 },
+ { AArch64::ST2Twov8b_POST, "st2", ".8b", 1, false, 16 },
+ { AArch64::ST2Twov4h_POST, "st2", ".4h", 1, false, 16 },
+ { AArch64::ST2Twov2s_POST, "st2", ".2s", 1, false, 16 },
+ { AArch64::ST3i8, "st3", ".b", 0, true, 0 },
+ { AArch64::ST3i16, "st3", ".h", 0, true, 0 },
+ { AArch64::ST3i32, "st3", ".s", 0, true, 0 },
+ { AArch64::ST3i64, "st3", ".d", 0, true, 0 },
+ { AArch64::ST3i8_POST, "st3", ".b", 1, true, 3 },
+ { AArch64::ST3i16_POST, "st3", ".h", 1, true, 6 },
+ { AArch64::ST3i32_POST, "st3", ".s", 1, true, 12 },
+ { AArch64::ST3i64_POST, "st3", ".d", 1, true, 24 },
+ { AArch64::ST3Threev16b, "st3", ".16b", 0, false, 0 },
+ { AArch64::ST3Threev8h, "st3", ".8h", 0, false, 0 },
+ { AArch64::ST3Threev4s, "st3", ".4s", 0, false, 0 },
+ { AArch64::ST3Threev2d, "st3", ".2d", 0, false, 0 },
+ { AArch64::ST3Threev8b, "st3", ".8b", 0, false, 0 },
+ { AArch64::ST3Threev4h, "st3", ".4h", 0, false, 0 },
+ { AArch64::ST3Threev2s, "st3", ".2s", 0, false, 0 },
+ { AArch64::ST3Threev16b_POST, "st3", ".16b", 1, false, 48 },
+ { AArch64::ST3Threev8h_POST, "st3", ".8h", 1, false, 48 },
+ { AArch64::ST3Threev4s_POST, "st3", ".4s", 1, false, 48 },
+ { AArch64::ST3Threev2d_POST, "st3", ".2d", 1, false, 48 },
+ { AArch64::ST3Threev8b_POST, "st3", ".8b", 1, false, 24 },
+ { AArch64::ST3Threev4h_POST, "st3", ".4h", 1, false, 24 },
+ { AArch64::ST3Threev2s_POST, "st3", ".2s", 1, false, 24 },
+ { AArch64::ST4i8, "st4", ".b", 0, true, 0 },
+ { AArch64::ST4i16, "st4", ".h", 0, true, 0 },
+ { AArch64::ST4i32, "st4", ".s", 0, true, 0 },
+ { AArch64::ST4i64, "st4", ".d", 0, true, 0 },
+ { AArch64::ST4i8_POST, "st4", ".b", 1, true, 4 },
+ { AArch64::ST4i16_POST, "st4", ".h", 1, true, 8 },
+ { AArch64::ST4i32_POST, "st4", ".s", 1, true, 16 },
+ { AArch64::ST4i64_POST, "st4", ".d", 1, true, 32 },
+ { AArch64::ST4Fourv16b, "st4", ".16b", 0, false, 0 },
+ { AArch64::ST4Fourv8h, "st4", ".8h", 0, false, 0 },
+ { AArch64::ST4Fourv4s, "st4", ".4s", 0, false, 0 },
+ { AArch64::ST4Fourv2d, "st4", ".2d", 0, false, 0 },
+ { AArch64::ST4Fourv8b, "st4", ".8b", 0, false, 0 },
+ { AArch64::ST4Fourv4h, "st4", ".4h", 0, false, 0 },
+ { AArch64::ST4Fourv2s, "st4", ".2s", 0, false, 0 },
+ { AArch64::ST4Fourv16b_POST, "st4", ".16b", 1, false, 64 },
+ { AArch64::ST4Fourv8h_POST, "st4", ".8h", 1, false, 64 },
+ { AArch64::ST4Fourv4s_POST, "st4", ".4s", 1, false, 64 },
+ { AArch64::ST4Fourv2d_POST, "st4", ".2d", 1, false, 64 },
+ { AArch64::ST4Fourv8b_POST, "st4", ".8b", 1, false, 32 },
+ { AArch64::ST4Fourv4h_POST, "st4", ".4h", 1, false, 32 },
+ { AArch64::ST4Fourv2s_POST, "st4", ".2s", 1, false, 32 },
+};
+
+static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
+ unsigned Idx;
+ for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
+ if (LdStNInstInfo[Idx].Opcode == Opcode)
+ return &LdStNInstInfo[Idx];
+
+ return nullptr;
}
-void
-AArch64InstPrinter::printCRxOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &CRx = MI->getOperand(OpNum);
+void AArch64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot) {
+ unsigned Opcode = MI->getOpcode();
+ StringRef Layout, Mnemonic;
- O << 'c' << CRx.getImm();
-}
+ bool IsTbx;
+ if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
+ O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
+ << getRegisterName(MI->getOperand(0).getReg(), AArch64::vreg) << ", ";
+ unsigned ListOpNum = IsTbx ? 2 : 1;
+ printVectorList(MI, ListOpNum, O, "");
-void
-AArch64InstPrinter::printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &ScaleOp = MI->getOperand(OpNum);
+ O << ", "
+ << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), AArch64::vreg);
+ printAnnotation(O, Annot);
+ return;
+ }
- O << '#' << (64 - ScaleOp.getImm());
-}
+ if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
+ O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
+
+ // Now onto the operands: first a vector list with possible lane
+ // specifier. E.g. { v0 }[2]
+ int OpNum = LdStDesc->ListOperand;
+ printVectorList(MI, OpNum++, O, "");
+
+ if (LdStDesc->HasLane)
+ O << '[' << MI->getOperand(OpNum++).getImm() << ']';
+
+ // Next the address: [xN]
+ unsigned AddrReg = MI->getOperand(OpNum++).getReg();
+ O << ", [" << getRegisterName(AddrReg) << ']';
+
+ // Finally, there might be a post-indexed offset.
+ if (LdStDesc->NaturalOffset != 0) {
+ unsigned Reg = MI->getOperand(OpNum++).getReg();
+ if (Reg != AArch64::XZR)
+ O << ", " << getRegisterName(Reg);
+ else {
+ assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
+ O << ", #" << LdStDesc->NaturalOffset;
+ }
+ }
+ printAnnotation(O, Annot);
+ return;
+ }
-void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &o) {
- const MCOperand &MOImm8 = MI->getOperand(OpNum);
+ AArch64InstPrinter::printInst(MI, O, Annot);
+}
- assert(MOImm8.isImm()
- && "Immediate operand required for floating-point immediate inst");
+bool AArch64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
+#ifndef NDEBUG
+ unsigned Opcode = MI->getOpcode();
+ assert(Opcode == AArch64::SYSxt && "Invalid opcode for SYS alias!");
+#endif
+
+ const char *Asm = nullptr;
+ const MCOperand &Op1 = MI->getOperand(0);
+ const MCOperand &Cn = MI->getOperand(1);
+ const MCOperand &Cm = MI->getOperand(2);
+ const MCOperand &Op2 = MI->getOperand(3);
+
+ unsigned Op1Val = Op1.getImm();
+ unsigned CnVal = Cn.getImm();
+ unsigned CmVal = Cm.getImm();
+ unsigned Op2Val = Op2.getImm();
+
+ if (CnVal == 7) {
+ switch (CmVal) {
+ default:
+ break;
+
+ // IC aliases
+ case 1:
+ if (Op1Val == 0 && Op2Val == 0)
+ Asm = "ic\tialluis";
+ break;
+ case 5:
+ if (Op1Val == 0 && Op2Val == 0)
+ Asm = "ic\tiallu";
+ else if (Op1Val == 3 && Op2Val == 1)
+ Asm = "ic\tivau";
+ break;
+
+ // DC aliases
+ case 4:
+ if (Op1Val == 3 && Op2Val == 1)
+ Asm = "dc\tzva";
+ break;
+ case 6:
+ if (Op1Val == 0 && Op2Val == 1)
+ Asm = "dc\tivac";
+ if (Op1Val == 0 && Op2Val == 2)
+ Asm = "dc\tisw";
+ break;
+ case 10:
+ if (Op1Val == 3 && Op2Val == 1)
+ Asm = "dc\tcvac";
+ else if (Op1Val == 0 && Op2Val == 2)
+ Asm = "dc\tcsw";
+ break;
+ case 11:
+ if (Op1Val == 3 && Op2Val == 1)
+ Asm = "dc\tcvau";
+ break;
+ case 14:
+ if (Op1Val == 3 && Op2Val == 1)
+ Asm = "dc\tcivac";
+ else if (Op1Val == 0 && Op2Val == 2)
+ Asm = "dc\tcisw";
+ break;
+
+ // AT aliases
+ case 8:
+ switch (Op1Val) {
+ default:
+ break;
+ case 0:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "at\ts1e1r"; break;
+ case 1: Asm = "at\ts1e1w"; break;
+ case 2: Asm = "at\ts1e0r"; break;
+ case 3: Asm = "at\ts1e0w"; break;
+ }
+ break;
+ case 4:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "at\ts1e2r"; break;
+ case 1: Asm = "at\ts1e2w"; break;
+ case 4: Asm = "at\ts12e1r"; break;
+ case 5: Asm = "at\ts12e1w"; break;
+ case 6: Asm = "at\ts12e0r"; break;
+ case 7: Asm = "at\ts12e0w"; break;
+ }
+ break;
+ case 6:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "at\ts1e3r"; break;
+ case 1: Asm = "at\ts1e3w"; break;
+ }
+ break;
+ }
+ break;
+ }
+ } else if (CnVal == 8) {
+ // TLBI aliases
+ switch (CmVal) {
+ default:
+ break;
+ case 3:
+ switch (Op1Val) {
+ default:
+ break;
+ case 0:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\tvmalle1is"; break;
+ case 1: Asm = "tlbi\tvae1is"; break;
+ case 2: Asm = "tlbi\taside1is"; break;
+ case 3: Asm = "tlbi\tvaae1is"; break;
+ case 5: Asm = "tlbi\tvale1is"; break;
+ case 7: Asm = "tlbi\tvaale1is"; break;
+ }
+ break;
+ case 4:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\talle2is"; break;
+ case 1: Asm = "tlbi\tvae2is"; break;
+ case 4: Asm = "tlbi\talle1is"; break;
+ case 5: Asm = "tlbi\tvale2is"; break;
+ case 6: Asm = "tlbi\tvmalls12e1is"; break;
+ }
+ break;
+ case 6:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\talle3is"; break;
+ case 1: Asm = "tlbi\tvae3is"; break;
+ case 5: Asm = "tlbi\tvale3is"; break;
+ }
+ break;
+ }
+ break;
+ case 0:
+ switch (Op1Val) {
+ default:
+ break;
+ case 4:
+ switch (Op2Val) {
+ default:
+ break;
+ case 1: Asm = "tlbi\tipas2e1is"; break;
+ case 5: Asm = "tlbi\tipas2le1is"; break;
+ }
+ break;
+ }
+ break;
+ case 4:
+ switch (Op1Val) {
+ default:
+ break;
+ case 4:
+ switch (Op2Val) {
+ default:
+ break;
+ case 1: Asm = "tlbi\tipas2e1"; break;
+ case 5: Asm = "tlbi\tipas2le1"; break;
+ }
+ break;
+ }
+ break;
+ case 7:
+ switch (Op1Val) {
+ default:
+ break;
+ case 0:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\tvmalle1"; break;
+ case 1: Asm = "tlbi\tvae1"; break;
+ case 2: Asm = "tlbi\taside1"; break;
+ case 3: Asm = "tlbi\tvaae1"; break;
+ case 5: Asm = "tlbi\tvale1"; break;
+ case 7: Asm = "tlbi\tvaale1"; break;
+ }
+ break;
+ case 4:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\talle2"; break;
+ case 1: Asm = "tlbi\tvae2"; break;
+ case 4: Asm = "tlbi\talle1"; break;
+ case 5: Asm = "tlbi\tvale2"; break;
+ case 6: Asm = "tlbi\tvmalls12e1"; break;
+ }
+ break;
+ case 6:
+ switch (Op2Val) {
+ default:
+ break;
+ case 0: Asm = "tlbi\talle3"; break;
+ case 1: Asm = "tlbi\tvae3"; break;
+ case 5: Asm = "tlbi\tvale3"; break;
+ }
+ break;
+ }
+ break;
+ }
+ }
+
+ if (Asm) {
+ unsigned Reg = MI->getOperand(4).getReg();
- uint32_t Imm8 = MOImm8.getImm();
- uint32_t Fraction = Imm8 & 0xf;
- uint32_t Exponent = (Imm8 >> 4) & 0x7;
- uint32_t Negative = (Imm8 >> 7) & 0x1;
+ O << '\t' << Asm;
+ if (StringRef(Asm).lower().find("all") == StringRef::npos)
+ O << ", " << getRegisterName(Reg);
+ }
- float Val = 1.0f + Fraction / 16.0f;
+ return Asm != nullptr;
+}
- // That is:
- // 000 -> 2^1, 001 -> 2^2, 010 -> 2^3, 011 -> 2^4,
- // 100 -> 2^-3, 101 -> 2^-2, 110 -> 2^-1, 111 -> 2^0
- if (Exponent & 0x4) {
- Val /= 1 << (7 - Exponent);
+void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg);
+ } else if (Op.isImm()) {
+ O << '#' << Op.getImm();
} else {
- Val *= 1 << (Exponent + 1);
+ assert(Op.isExpr() && "unknown operand kind in printOperand");
+ O << *Op.getExpr();
}
+}
- Val = Negative ? -Val : Val;
-
- o << '#' << format("%.8f", Val);
+void AArch64InstPrinter::printHexImm(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ O << format("#%#llx", Op.getImm());
}
-void AArch64InstPrinter::printFPZeroOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &o) {
- o << "#0.0";
+void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
+ unsigned Imm, raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ unsigned Reg = Op.getReg();
+ if (Reg == AArch64::XZR)
+ O << "#" << Imm;
+ else
+ O << getRegisterName(Reg);
+ } else
+ assert(0 && "unknown operand kind in printPostIncOperand64");
}
-void
-AArch64InstPrinter::printCondCodeOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
+void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isReg() && "Non-register vreg operand!");
+ unsigned Reg = Op.getReg();
+ O << getRegisterName(Reg, AArch64::vreg);
+}
- O << A64CondCodeToString(static_cast<A64CC::CondCodes>(MO.getImm()));
+void AArch64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+ assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
+ O << "c" << Op.getImm();
}
-template <unsigned field_width, unsigned scale> void
-AArch64InstPrinter::printLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
-
- if (!MO.isImm()) {
- printOperand(MI, OpNum, O);
- return;
+ if (MO.isImm()) {
+ unsigned Val = (MO.getImm() & 0xfff);
+ assert(Val == MO.getImm() && "Add/sub immediate out of range!");
+ unsigned Shift =
+ AArch64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
+ O << '#' << Val;
+ if (Shift != 0)
+ printShifter(MI, OpNum + 1, O);
+
+ if (CommentStream)
+ *CommentStream << '=' << (Val << Shift) << '\n';
+ } else {
+ assert(MO.isExpr() && "Unexpected operand type!");
+ O << *MO.getExpr();
+ printShifter(MI, OpNum + 1, O);
}
+}
- // The immediate of LDR (lit) instructions is a signed 19-bit immediate, which
- // is multiplied by 4 (because all A64 instructions are 32-bits wide).
- uint64_t UImm = MO.getImm();
- uint64_t Sign = UImm & (1LL << (field_width - 1));
- int64_t SImm = scale * ((UImm & ~Sign) - Sign);
-
- O << "#" << SImm;
+void AArch64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ uint64_t Val = MI->getOperand(OpNum).getImm();
+ O << "#0x";
+ O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 32));
}
-template<unsigned RegWidth> void
-AArch64InstPrinter::printLogicalImmOperand(const MCInst *MI, unsigned OpNum,
+void AArch64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- uint64_t Val;
- A64Imms::isLogicalImmBits(RegWidth, MO.getImm(), Val);
+ uint64_t Val = MI->getOperand(OpNum).getImm();
O << "#0x";
- O.write_hex(Val);
+ O.write_hex(AArch64_AM::decodeLogicalImmediate(Val, 64));
}
-void
-AArch64InstPrinter::printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, int MemSize) {
- const MCOperand &MOImm = MI->getOperand(OpNum);
+void AArch64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNum).getImm();
+ // LSL #0 should not be printed.
+ if (AArch64_AM::getShiftType(Val) == AArch64_AM::LSL &&
+ AArch64_AM::getShiftValue(Val) == 0)
+ return;
+ O << ", " << AArch64_AM::getShiftExtendName(AArch64_AM::getShiftType(Val))
+ << " #" << AArch64_AM::getShiftValue(Val);
+}
- if (MOImm.isImm()) {
- uint32_t Imm = MOImm.getImm() * MemSize;
+void AArch64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << getRegisterName(MI->getOperand(OpNum).getReg());
+ printShifter(MI, OpNum + 1, O);
+}
- O << "#" << Imm;
- } else {
- O << "#" << *MOImm.getExpr();
+void AArch64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << getRegisterName(MI->getOperand(OpNum).getReg());
+ printArithExtend(MI, OpNum + 1, O);
+}
+
+void AArch64InstPrinter::printArithExtend(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNum).getImm();
+ AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getArithExtendType(Val);
+ unsigned ShiftVal = AArch64_AM::getArithShiftValue(Val);
+
+ // If the destination or first source register operand is [W]SP, print
+ // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
+ // all.
+ if (ExtType == AArch64_AM::UXTW || ExtType == AArch64_AM::UXTX) {
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src1 = MI->getOperand(1).getReg();
+ if ( ((Dest == AArch64::SP || Src1 == AArch64::SP) &&
+ ExtType == AArch64_AM::UXTX) ||
+ ((Dest == AArch64::WSP || Src1 == AArch64::WSP) &&
+ ExtType == AArch64_AM::UXTW) ) {
+ if (ShiftVal != 0)
+ O << ", lsl #" << ShiftVal;
+ return;
+ }
}
+ O << ", " << AArch64_AM::getShiftExtendName(ExtType);
+ if (ShiftVal != 0)
+ O << " #" << ShiftVal;
}
-void
-AArch64InstPrinter::printShiftOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O,
- A64SE::ShiftExtSpecifiers Shift) {
- const MCOperand &MO = MI->getOperand(OpNum);
+void AArch64InstPrinter::printMemExtend(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O, char SrcRegKind,
+ unsigned Width) {
+ unsigned SignExtend = MI->getOperand(OpNum).getImm();
+ unsigned DoShift = MI->getOperand(OpNum + 1).getImm();
- // LSL #0 is not printed
- if (Shift == A64SE::LSL && MO.isImm() && MO.getImm() == 0)
- return;
+ // sxtw, sxtx, uxtw or lsl (== uxtx)
+ bool IsLSL = !SignExtend && SrcRegKind == 'x';
+ if (IsLSL)
+ O << "lsl";
+ else
+ O << (SignExtend ? 's' : 'u') << "xt" << SrcRegKind;
- switch (Shift) {
- case A64SE::LSL: O << "lsl"; break;
- case A64SE::LSR: O << "lsr"; break;
- case A64SE::ASR: O << "asr"; break;
- case A64SE::ROR: O << "ror"; break;
- default: llvm_unreachable("Invalid shift specifier in logical instruction");
- }
+ if (DoShift || IsLSL)
+ O << " #" << Log2_32(Width / 8);
+}
- O << " #" << MO.getImm();
+void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+ O << AArch64CC::getCondCodeName(CC);
}
-void
-AArch64InstPrinter::printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &UImm16MO = MI->getOperand(OpNum);
- const MCOperand &ShiftMO = MI->getOperand(OpNum + 1);
+void AArch64InstPrinter::printInverseCondCode(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(OpNum).getImm();
+ O << AArch64CC::getCondCodeName(AArch64CC::getInvertedCondCode(CC));
+}
- if (UImm16MO.isImm()) {
- O << '#' << UImm16MO.getImm();
+void AArch64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
+}
- if (ShiftMO.getImm() != 0)
- O << ", lsl #" << (ShiftMO.getImm() * 16);
+template<int Scale>
+void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ O << '#' << Scale * MI->getOperand(OpNum).getImm();
+}
- return;
+void AArch64InstPrinter::printUImm12Offset(const MCInst *MI, unsigned OpNum,
+ unsigned Scale, raw_ostream &O) {
+ const MCOperand MO = MI->getOperand(OpNum);
+ if (MO.isImm()) {
+ O << "#" << (MO.getImm() * Scale);
+ } else {
+ assert(MO.isExpr() && "Unexpected operand type!");
+ O << *MO.getExpr();
}
-
- O << "#" << *UImm16MO.getExpr();
}
-void AArch64InstPrinter::printNamedImmOperand(const NamedImmMapper &Mapper,
- const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- bool ValidName;
- const MCOperand &MO = MI->getOperand(OpNum);
- StringRef Name = Mapper.toString(MO.getImm(), ValidName);
+void AArch64InstPrinter::printAMIndexedWB(const MCInst *MI, unsigned OpNum,
+ unsigned Scale, raw_ostream &O) {
+ const MCOperand MO1 = MI->getOperand(OpNum + 1);
+ O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
+ if (MO1.isImm()) {
+ O << ", #" << (MO1.getImm() * Scale);
+ } else {
+ assert(MO1.isExpr() && "Unexpected operand type!");
+ O << ", " << *MO1.getExpr();
+ }
+ O << ']';
+}
- if (ValidName)
+void AArch64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned prfop = MI->getOperand(OpNum).getImm();
+ bool Valid;
+ StringRef Name = AArch64PRFM::PRFMMapper().toString(prfop, Valid);
+ if (Valid)
O << Name;
else
- O << '#' << MO.getImm();
+ O << '#' << prfop;
}
-void
-AArch64InstPrinter::printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
- const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
+void AArch64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
+ float FPImm =
+ MO.isFPImm() ? MO.getFPImm() : AArch64_AM::getFPImmFloat(MO.getImm());
- bool ValidName;
- std::string Name = Mapper.toString(MO.getImm(), ValidName);
- if (ValidName) {
- O << Name;
- return;
- }
+ // 8 decimal places are enough to perfectly represent permitted floats.
+ O << format("#%.8f", FPImm);
}
+static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
+ while (Stride--) {
+ switch (Reg) {
+ default:
+ assert(0 && "Vector register expected!");
+ case AArch64::Q0: Reg = AArch64::Q1; break;
+ case AArch64::Q1: Reg = AArch64::Q2; break;
+ case AArch64::Q2: Reg = AArch64::Q3; break;
+ case AArch64::Q3: Reg = AArch64::Q4; break;
+ case AArch64::Q4: Reg = AArch64::Q5; break;
+ case AArch64::Q5: Reg = AArch64::Q6; break;
+ case AArch64::Q6: Reg = AArch64::Q7; break;
+ case AArch64::Q7: Reg = AArch64::Q8; break;
+ case AArch64::Q8: Reg = AArch64::Q9; break;
+ case AArch64::Q9: Reg = AArch64::Q10; break;
+ case AArch64::Q10: Reg = AArch64::Q11; break;
+ case AArch64::Q11: Reg = AArch64::Q12; break;
+ case AArch64::Q12: Reg = AArch64::Q13; break;
+ case AArch64::Q13: Reg = AArch64::Q14; break;
+ case AArch64::Q14: Reg = AArch64::Q15; break;
+ case AArch64::Q15: Reg = AArch64::Q16; break;
+ case AArch64::Q16: Reg = AArch64::Q17; break;
+ case AArch64::Q17: Reg = AArch64::Q18; break;
+ case AArch64::Q18: Reg = AArch64::Q19; break;
+ case AArch64::Q19: Reg = AArch64::Q20; break;
+ case AArch64::Q20: Reg = AArch64::Q21; break;
+ case AArch64::Q21: Reg = AArch64::Q22; break;
+ case AArch64::Q22: Reg = AArch64::Q23; break;
+ case AArch64::Q23: Reg = AArch64::Q24; break;
+ case AArch64::Q24: Reg = AArch64::Q25; break;
+ case AArch64::Q25: Reg = AArch64::Q26; break;
+ case AArch64::Q26: Reg = AArch64::Q27; break;
+ case AArch64::Q27: Reg = AArch64::Q28; break;
+ case AArch64::Q28: Reg = AArch64::Q29; break;
+ case AArch64::Q29: Reg = AArch64::Q30; break;
+ case AArch64::Q30: Reg = AArch64::Q31; break;
+ // Vector lists can wrap around.
+ case AArch64::Q31:
+ Reg = AArch64::Q0;
+ break;
+ }
+ }
+ return Reg;
+}
-void AArch64InstPrinter::printRegExtendOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O,
- A64SE::ShiftExtSpecifiers Ext) {
- // FIXME: In principle TableGen should be able to detect this itself far more
- // easily. We will only accumulate more of these hacks.
- unsigned Reg0 = MI->getOperand(0).getReg();
- unsigned Reg1 = MI->getOperand(1).getReg();
-
- if (isStackReg(Reg0) || isStackReg(Reg1)) {
- A64SE::ShiftExtSpecifiers LSLEquiv;
-
- if (Reg0 == AArch64::XSP || Reg1 == AArch64::XSP)
- LSLEquiv = A64SE::UXTX;
- else
- LSLEquiv = A64SE::UXTW;
+void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O,
+ StringRef LayoutSuffix) {
+ unsigned Reg = MI->getOperand(OpNum).getReg();
- if (Ext == LSLEquiv) {
- O << "lsl #" << MI->getOperand(OpNum).getImm();
- return;
- }
+ O << "{ ";
+
+ // Work out how many registers there are in the list (if there is an actual
+ // list).
+ unsigned NumRegs = 1;
+ if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) ||
+ MRI.getRegClass(AArch64::QQRegClassID).contains(Reg))
+ NumRegs = 2;
+ else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) ||
+ MRI.getRegClass(AArch64::QQQRegClassID).contains(Reg))
+ NumRegs = 3;
+ else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) ||
+ MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg))
+ NumRegs = 4;
+
+ // Now forget about the list and find out what the first register is.
+ if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0))
+ Reg = FirstReg;
+ else if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::qsub0))
+ Reg = FirstReg;
+
+ // If it's a D-reg, we need to promote it to the equivalent Q-reg before
+ // printing (otherwise getRegisterName fails).
+ if (MRI.getRegClass(AArch64::FPR64RegClassID).contains(Reg)) {
+ const MCRegisterClass &FPR128RC =
+ MRI.getRegClass(AArch64::FPR128RegClassID);
+ Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC);
}
- switch (Ext) {
- case A64SE::UXTB: O << "uxtb"; break;
- case A64SE::UXTH: O << "uxth"; break;
- case A64SE::UXTW: O << "uxtw"; break;
- case A64SE::UXTX: O << "uxtx"; break;
- case A64SE::SXTB: O << "sxtb"; break;
- case A64SE::SXTH: O << "sxth"; break;
- case A64SE::SXTW: O << "sxtw"; break;
- case A64SE::SXTX: O << "sxtx"; break;
- default: llvm_unreachable("Unexpected shift type for printing");
+ for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
+ O << getRegisterName(Reg, AArch64::vreg) << LayoutSuffix;
+ if (i + 1 != NumRegs)
+ O << ", ";
}
- const MCOperand &MO = MI->getOperand(OpNum);
- if (MO.getImm() != 0)
- O << " #" << MO.getImm();
+ O << " }";
}
-template<int MemScale> void
-AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MOImm = MI->getOperand(OpNum);
- int32_t Imm = unpackSignedImm(7, MOImm.getImm());
+void AArch64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
+ unsigned OpNum,
+ raw_ostream &O) {
+ printVectorList(MI, OpNum, O, "");
+}
+
+template <unsigned NumLanes, char LaneKind>
+void AArch64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ std::string Suffix(".");
+ if (NumLanes)
+ Suffix += itostr(NumLanes) + LaneKind;
+ else
+ Suffix += LaneKind;
- O << "#" << (Imm * MemScale);
+ printVectorList(MI, OpNum, O, Suffix);
}
-void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo,
+void AArch64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
- unsigned Reg = MI->getOperand(OpNo).getReg();
- std::string Name = getRegisterName(Reg);
- Name[0] = 'v';
- O << Name;
+ O << "[" << MI->getOperand(OpNum).getImm() << "]";
}
-void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- O << getRegisterName(Reg);
- } else if (Op.isImm()) {
- O << '#' << Op.getImm();
+void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
+
+ // If the label has already been resolved to an immediate offset (say, when
+ // we're running the disassembler), just print the immediate.
+ if (Op.isImm()) {
+ O << "#" << (Op.getImm() << 2);
+ return;
+ }
+
+ // If the branch target is simply an address then print it in hex.
+ const MCConstantExpr *BranchTarget =
+ dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
+ int64_t Address;
+ if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
+ O << "0x";
+ O.write_hex(Address);
} else {
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- // If a symbolic branch target was added as a constant expression then print
- // that address in hex.
- const MCConstantExpr *BranchTarget = dyn_cast<MCConstantExpr>(Op.getExpr());
- int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
- O << "0x";
- O.write_hex(Address);
- }
- else {
- // Otherwise, just print the expression.
- O << *Op.getExpr();
- }
+ // Otherwise, just print the expression.
+ O << *MI->getOperand(OpNum).getExpr();
}
}
+void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MCOperand &Op = MI->getOperand(OpNum);
-void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
- if (MI->getOpcode() == AArch64::TLSDESCCALL) {
- // This is a special assembler directive which applies an
- // R_AARCH64_TLSDESC_CALL to the following (BLR) instruction. It has a fixed
- // form outside the normal TableGenerated scheme.
- O << "\t.tlsdesccall " << *MI->getOperand(0).getExpr();
- } else if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
+ // If the label has already been resolved to an immediate offset (say, when
+ // we're running the disassembler), just print the immediate.
+ if (Op.isImm()) {
+ O << "#" << (Op.getImm() << 12);
+ return;
+ }
- printAnnotation(O, Annot);
+ // Otherwise, just print the expression.
+ O << *MI->getOperand(OpNum).getExpr();
}
-template <A64SE::ShiftExtSpecifiers Ext, bool isHalf>
-void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
-
- assert(MO.isImm() &&
- "Immediate operand required for Neon vector immediate inst.");
-
- bool IsLSL = false;
- if (Ext == A64SE::LSL)
- IsLSL = true;
- else if (Ext != A64SE::MSL)
- llvm_unreachable("Invalid shift specifier in movi instruction");
-
- int64_t Imm = MO.getImm();
-
- // MSL and LSLH accepts encoded shift amount 0 or 1.
- if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1)
- llvm_unreachable("Invalid shift amount in movi instruction");
-
- // LSH accepts encoded shift amount 0, 1, 2 or 3.
- if (IsLSL && (Imm < 0 || Imm > 3))
- llvm_unreachable("Invalid shift amount in movi instruction");
-
- // Print shift amount as multiple of 8 with MSL encoded shift amount
- // 0 and 1 printed as 8 and 16.
- if (!IsLSL)
- Imm++;
- Imm *= 8;
-
- // LSL #0 is not printed
- if (IsLSL) {
- if (Imm == 0)
- return;
- O << ", lsl";
- } else
- O << ", msl";
-
- O << " #" << Imm;
-}
+void AArch64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
+ unsigned Opcode = MI->getOpcode();
-void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &o) {
- o << "#0x0";
+ bool Valid;
+ StringRef Name;
+ if (Opcode == AArch64::ISB)
+ Name = AArch64ISB::ISBMapper().toString(Val, Valid);
+ else
+ Name = AArch64DB::DBarrierMapper().toString(Val, Valid);
+ if (Valid)
+ O << Name;
+ else
+ O << "#" << Val;
}
-void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MOUImm = MI->getOperand(OpNum);
-
- assert(MOUImm.isImm() &&
- "Immediate operand required for Neon vector immediate inst.");
+void AArch64InstPrinter::printMRSSystemRegister(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
- unsigned Imm = MOUImm.getImm();
+ bool Valid;
+ auto Mapper = AArch64SysReg::MRSMapper(getAvailableFeatures());
+ std::string Name = Mapper.toString(Val, Valid);
- O << "#0x";
- O.write_hex(Imm);
+ if (Valid)
+ O << StringRef(Name).upper();
}
-void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MOUImm = MI->getOperand(OpNum);
+void AArch64InstPrinter::printMSRSystemRegister(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
- assert(MOUImm.isImm()
- && "Immediate operand required for Neon vector immediate inst.");
+ bool Valid;
+ auto Mapper = AArch64SysReg::MSRMapper(getAvailableFeatures());
+ std::string Name = Mapper.toString(Val, Valid);
- unsigned Imm = MOUImm.getImm();
- O << Imm;
+ if (Valid)
+ O << StringRef(Name).upper();
}
-void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MOUImm8 = MI->getOperand(OpNum);
-
- assert(MOUImm8.isImm() &&
- "Immediate operand required for Neon vector immediate bytemask inst.");
+void AArch64InstPrinter::printSystemPStateField(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned Val = MI->getOperand(OpNo).getImm();
- uint32_t UImm8 = MOUImm8.getImm();
- uint64_t Mask = 0;
-
- // Replicates 0x00 or 0xff byte in a 64-bit vector
- for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if ((UImm8 >> ByteNum) & 1)
- Mask |= (uint64_t)0xff << (8 * ByteNum);
- }
-
- O << "#0x";
- O.write_hex(Mask);
+ bool Valid;
+ StringRef Name = AArch64PState::PStateMapper().toString(Val, Valid);
+ if (Valid)
+ O << StringRef(Name.str()).upper();
+ else
+ O << "#" << Val;
}
-// If Count > 1, there are two valid kinds of vector list:
-// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout}
-// (2) {Vn.layout - Vm.layout}
-// We choose the first kind as output.
-template <A64Layout::VectorLayout Layout, unsigned Count>
-void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors");
-
- unsigned Reg = MI->getOperand(OpNum).getReg();
- std::string LayoutStr = A64VectorLayoutToString(Layout);
- O << "{";
- if (Count > 1) { // Print sub registers separately
- bool IsVec64 = (Layout < A64Layout::VL_16B);
- unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0;
- for (unsigned I = 0; I < Count; I++) {
- std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++));
- Name[0] = 'v';
- O << Name << LayoutStr;
- if (I != Count - 1)
- O << ", ";
- }
- } else { // Print the register directly when NumVecs is 1.
- std::string Name = getRegisterName(Reg);
- Name[0] = 'v';
- O << Name << LayoutStr;
- }
- O << "}";
+void AArch64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned RawVal = MI->getOperand(OpNo).getImm();
+ uint64_t Val = AArch64_AM::decodeAdvSIMDModImmType10(RawVal);
+ O << format("#%#016llx", Val);
}
diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
index 37b7273..fe7666e 100644
--- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
+++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h
@@ -11,11 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64INSTPRINTER_H
-#define LLVM_AARCH64INSTPRINTER_H
+#ifndef AArch64INSTPRINTER_H
+#define AArch64INSTPRINTER_H
#include "MCTargetDesc/AArch64MCTargetDesc.h"
-#include "Utils/AArch64BaseInfo.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -28,154 +28,112 @@ public:
AArch64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
- // Autogenerated by tblgen
- void printInstruction(const MCInst *MI, raw_ostream &O);
- bool printAliasInstr(const MCInst *MI, raw_ostream &O);
- static const char *getRegisterName(unsigned RegNo);
- static const char *getInstructionName(unsigned Opcode);
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
- void printRegName(raw_ostream &O, unsigned RegNum) const;
-
- template<unsigned MemSize, unsigned RmSize>
- void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printAddrRegExtendOperand(MI, OpNum, O, MemSize, RmSize);
+ // Autogenerated by tblgen.
+ virtual void printInstruction(const MCInst *MI, raw_ostream &O);
+ virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
+ virtual StringRef getRegName(unsigned RegNo) const {
+ return getRegisterName(RegNo);
}
+ static const char *getRegisterName(unsigned RegNo,
+ unsigned AltIdx = AArch64::NoRegAltName);
-
- void printAddrRegExtendOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, unsigned MemSize,
- unsigned RmSize);
-
- void printAddSubImmLSL0Operand(const MCInst *MI,
- unsigned OpNum, raw_ostream &O);
- void printAddSubImmLSL12Operand(const MCInst *MI,
- unsigned OpNum, raw_ostream &O);
-
- void printBareImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- template<unsigned RegWidth>
- void printBFILSBOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBFIWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBFXWidthOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
-
- void printCondCodeOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printCRxOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printCVTFixedPosOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
-
- void printFPZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &o);
-
- template<int MemScale>
- void printOffsetUImm12Operand(const MCInst *MI,
- unsigned OpNum, raw_ostream &o) {
- printOffsetUImm12Operand(MI, OpNum, o, MemScale);
+protected:
+ bool printSysAlias(const MCInst *MI, raw_ostream &O);
+ // Operand printers
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printHexImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
+ raw_ostream &O);
+ template<int Amount>
+ void printPostIncOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printPostIncOperand(MI, OpNo, Amount, O);
}
- void printOffsetUImm12Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &o, int MemScale);
-
- template<unsigned field_width, unsigned scale>
- void printLabelOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- template<unsigned RegWidth>
- void printLogicalImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- template<typename SomeNamedImmMapper>
- void printNamedImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printNamedImmOperand(SomeNamedImmMapper(), MI, OpNum, O);
+ void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printArithExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+ void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+ char SrcRegKind, unsigned Width);
+ template <char SrcRegKind, unsigned Width>
+ void printMemExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printMemExtend(MI, OpNum, O, SrcRegKind, Width);
}
- void printNamedImmOperand(const NamedImmMapper &Mapper,
- const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printSysRegOperand(const A64SysReg::SysRegMapper &Mapper,
- const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printInverseCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAlignedLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printUImm12Offset(const MCInst *MI, unsigned OpNum, unsigned Scale,
+ raw_ostream &O);
+ void printAMIndexedWB(const MCInst *MI, unsigned OpNum, unsigned Scale,
+ raw_ostream &O);
- void printMRSOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printSysRegOperand(A64SysReg::MRSMapper(), MI, OpNum, O);
+ template<int Scale>
+ void printUImm12Offset(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printUImm12Offset(MI, OpNum, Scale, O);
}
- void printMSROperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printSysRegOperand(A64SysReg::MSRMapper(), MI, OpNum, O);
+ template<int BitWidth>
+ void printAMIndexedWB(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
+ printAMIndexedWB(MI, OpNum, BitWidth / 8, O);
}
- void printShiftOperand(const char *name, const MCInst *MI,
- unsigned OpIdx, raw_ostream &O);
-
- void printLSLOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLSROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printShiftOperand("lsr", MI, OpNum, O);
- }
- void printASROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printShiftOperand("asr", MI, OpNum, O);
- }
- void printROROperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printShiftOperand("ror", MI, OpNum, O);
- }
+ template<int Scale>
+ void printImmScale(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- template<A64SE::ShiftExtSpecifiers Shift>
- void printShiftOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printShiftOperand(MI, OpNum, O, Shift);
- }
+ void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShiftOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, A64SE::ShiftExtSpecifiers Sh);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
+ StringRef LayoutSuffix);
- void printMoveWideImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
+ /// Print a list of vector registers where the type suffix is implicit
+ /// (i.e. attached to the instruction rather than the registers).
+ void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O);
- template<int MemSize> void
- printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ template <unsigned NumLanes, char LaneKind>
+ void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printOffsetSImm9Operand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printPRFMOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- template<A64SE::ShiftExtSpecifiers EXT>
- void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printRegExtendOperand(MI, OpNum, O, EXT);
- }
+ void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMSRSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printMRSSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSystemPStateField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
- void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
+class AArch64AppleInstPrinter : public AArch64InstPrinter {
+public:
+ AArch64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
- void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
- bool isStackReg(unsigned RegNo) {
- return RegNo == AArch64::XSP || RegNo == AArch64::WSP;
+ void printInstruction(const MCInst *MI, raw_ostream &O) override;
+ bool printAliasInstr(const MCInst *MI, raw_ostream &O) override;
+ virtual void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
+ StringRef getRegName(unsigned RegNo) const override {
+ return getRegisterName(RegNo);
}
-
- template <A64SE::ShiftExtSpecifiers Ext, bool IsHalf>
- void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- template <A64Layout::VectorLayout Layout, unsigned Count>
- void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo,
+ unsigned AltIdx = AArch64::NoRegAltName);
};
}
diff --git a/lib/Target/AArch64/InstPrinter/Android.mk b/lib/Target/AArch64/InstPrinter/Android.mk
index ac9b0df..de6aa89 100644
--- a/lib/Target/AArch64/InstPrinter/Android.mk
+++ b/lib/Target/AArch64/InstPrinter/Android.mk
@@ -2,6 +2,7 @@ LOCAL_PATH := $(call my-dir)
arm64_asm_printer_TBLGEN_TABLES := \
AArch64GenAsmWriter.inc \
+ AArch64GenAsmWriter1.inc \
AArch64GenRegisterInfo.inc \
AArch64GenSubtargetInfo.inc \
AArch64GenInstrInfo.inc
diff --git a/lib/Target/AArch64/InstPrinter/CMakeLists.txt b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
index 3db56e4..363f502 100644
--- a/lib/Target/AArch64/InstPrinter/CMakeLists.txt
+++ b/lib/Target/AArch64/InstPrinter/CMakeLists.txt
@@ -1,3 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
add_llvm_library(LLVMAArch64AsmPrinter
AArch64InstPrinter.cpp
)
+
+add_dependencies(LLVMAArch64AsmPrinter AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
index 4836c7c..a13e842 100644
--- a/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
+++ b/lib/Target/AArch64/InstPrinter/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===;
+;===- ./lib/Target/AArch64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/InstPrinter/Makefile b/lib/Target/AArch64/InstPrinter/Makefile
index 1c36a8d..b17e8d0 100644
--- a/lib/Target/AArch64/InstPrinter/Makefile
+++ b/lib/Target/AArch64/InstPrinter/Makefile
@@ -9,7 +9,7 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMAArch64AsmPrinter
-# Hack: we need to include 'main' target directory to grab private headers
+# Hack: we need to include 'main' arm target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/AArch64/LLVMBuild.txt b/lib/Target/AArch64/LLVMBuild.txt
index 4c8f101..642c183 100644
--- a/lib/Target/AArch64/LLVMBuild.txt
+++ b/lib/Target/AArch64/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/LLVMBuild.txt -------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -31,5 +31,5 @@ has_jit = 1
type = Library
name = AArch64CodeGen
parent = AArch64
-required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils AsmPrinter CodeGen Core MC SelectionDAG Support Target
+required_libraries = AArch64AsmPrinter AArch64Desc AArch64Info AArch64Utils Analysis AsmPrinter CodeGen Core MC Scalar SelectionDAG Support Target
add_to_library_groups = AArch64
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 7717743..8b1e44e 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AddressingModes.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -1,4 +1,4 @@
-//===- ARM64AddressingModes.h - ARM64 Addressing Modes ----------*- C++ -*-===//
+//===- AArch64AddressingModes.h - AArch64 Addressing Modes ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,12 +7,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the ARM64 addressing mode implementation stuff.
+// This file contains the AArch64 addressing mode implementation stuff.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
-#define LLVM_TARGET_ARM64_ARM64ADDRESSINGMODES_H
+#ifndef LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
+#define LLVM_TARGET_AArch64_AArch64ADDRESSINGMODES_H
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
@@ -22,38 +22,63 @@
namespace llvm {
-/// ARM64_AM - ARM64 Addressing Mode Stuff
-namespace ARM64_AM {
+/// AArch64_AM - AArch64 Addressing Mode Stuff
+namespace AArch64_AM {
//===----------------------------------------------------------------------===//
// Shifts
//
-enum ShiftType {
- InvalidShift = -1,
+enum ShiftExtendType {
+ InvalidShiftExtend = -1,
LSL = 0,
- LSR = 1,
- ASR = 2,
- ROR = 3,
- MSL = 4
+ LSR,
+ ASR,
+ ROR,
+ MSL,
+
+ UXTB,
+ UXTH,
+ UXTW,
+ UXTX,
+
+ SXTB,
+ SXTH,
+ SXTW,
+ SXTX,
};
/// getShiftName - Get the string encoding for the shift type.
-static inline const char *getShiftName(ARM64_AM::ShiftType ST) {
+static inline const char *getShiftExtendName(AArch64_AM::ShiftExtendType ST) {
switch (ST) {
default: assert(false && "unhandled shift type!");
- case ARM64_AM::LSL: return "lsl";
- case ARM64_AM::LSR: return "lsr";
- case ARM64_AM::ASR: return "asr";
- case ARM64_AM::ROR: return "ror";
- case ARM64_AM::MSL: return "msl";
+ case AArch64_AM::LSL: return "lsl";
+ case AArch64_AM::LSR: return "lsr";
+ case AArch64_AM::ASR: return "asr";
+ case AArch64_AM::ROR: return "ror";
+ case AArch64_AM::MSL: return "msl";
+ case AArch64_AM::UXTB: return "uxtb";
+ case AArch64_AM::UXTH: return "uxth";
+ case AArch64_AM::UXTW: return "uxtw";
+ case AArch64_AM::UXTX: return "uxtx";
+ case AArch64_AM::SXTB: return "sxtb";
+ case AArch64_AM::SXTH: return "sxth";
+ case AArch64_AM::SXTW: return "sxtw";
+ case AArch64_AM::SXTX: return "sxtx";
}
- return 0;
+ return nullptr;
}
/// getShiftType - Extract the shift type.
-static inline ARM64_AM::ShiftType getShiftType(unsigned Imm) {
- return ARM64_AM::ShiftType((Imm >> 6) & 0x7);
+static inline AArch64_AM::ShiftExtendType getShiftType(unsigned Imm) {
+ switch ((Imm >> 6) & 0x7) {
+ default: return AArch64_AM::InvalidShiftExtend;
+ case 0: return AArch64_AM::LSL;
+ case 1: return AArch64_AM::LSR;
+ case 2: return AArch64_AM::ASR;
+ case 3: return AArch64_AM::ROR;
+ case 4: return AArch64_AM::MSL;
+ }
}
/// getShiftValue - Extract the shift value.
@@ -70,56 +95,51 @@ static inline unsigned getShiftValue(unsigned Imm) {
/// 100 ==> msl
/// {8-6} = shifter
/// {5-0} = imm
-static inline unsigned getShifterImm(ARM64_AM::ShiftType ST, unsigned Imm) {
+static inline unsigned getShifterImm(AArch64_AM::ShiftExtendType ST,
+ unsigned Imm) {
assert((Imm & 0x3f) == Imm && "Illegal shifted immedate value!");
- return (unsigned(ST) << 6) | (Imm & 0x3f);
+ unsigned STEnc = 0;
+ switch (ST) {
+ default: llvm_unreachable("Invalid shift requested");
+ case AArch64_AM::LSL: STEnc = 0; break;
+ case AArch64_AM::LSR: STEnc = 1; break;
+ case AArch64_AM::ASR: STEnc = 2; break;
+ case AArch64_AM::ROR: STEnc = 3; break;
+ case AArch64_AM::MSL: STEnc = 4; break;
+ }
+ return (STEnc << 6) | (Imm & 0x3f);
}
//===----------------------------------------------------------------------===//
// Extends
//
-enum ExtendType {
- InvalidExtend = -1,
- UXTB = 0,
- UXTH = 1,
- UXTW = 2,
- UXTX = 3,
- SXTB = 4,
- SXTH = 5,
- SXTW = 6,
- SXTX = 7
-};
-
-/// getExtendName - Get the string encoding for the extend type.
-static inline const char *getExtendName(ARM64_AM::ExtendType ET) {
- switch (ET) {
- default: assert(false && "unhandled extend type!");
- case ARM64_AM::UXTB: return "uxtb";
- case ARM64_AM::UXTH: return "uxth";
- case ARM64_AM::UXTW: return "uxtw";
- case ARM64_AM::UXTX: return "uxtx";
- case ARM64_AM::SXTB: return "sxtb";
- case ARM64_AM::SXTH: return "sxth";
- case ARM64_AM::SXTW: return "sxtw";
- case ARM64_AM::SXTX: return "sxtx";
- }
- return 0;
-}
-
/// getArithShiftValue - get the arithmetic shift value.
static inline unsigned getArithShiftValue(unsigned Imm) {
return Imm & 0x7;
}
/// getExtendType - Extract the extend type for operands of arithmetic ops.
-static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
- return ARM64_AM::ExtendType((Imm >> 3) & 0x7);
+static inline AArch64_AM::ShiftExtendType getExtendType(unsigned Imm) {
+ assert((Imm & 0x7) == Imm && "invalid immediate!");
+ switch (Imm) {
+ default: llvm_unreachable("Compiler bug!");
+ case 0: return AArch64_AM::UXTB;
+ case 1: return AArch64_AM::UXTH;
+ case 2: return AArch64_AM::UXTW;
+ case 3: return AArch64_AM::UXTX;
+ case 4: return AArch64_AM::SXTB;
+ case 5: return AArch64_AM::SXTH;
+ case 6: return AArch64_AM::SXTW;
+ case 7: return AArch64_AM::SXTX;
+ }
}
-/// getArithExtendImm - Encode the extend type and shift amount for an
-/// arithmetic instruction:
-/// imm: 3-bit extend amount
+static inline AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm) {
+ return getExtendType((Imm >> 3) & 0x7);
+}
+
+/// Mapping from extend bits to required operation:
/// shifter: 000 ==> uxtb
/// 001 ==> uxth
/// 010 ==> uxtw
@@ -128,12 +148,29 @@ static inline ARM64_AM::ExtendType getArithExtendType(unsigned Imm) {
/// 101 ==> sxth
/// 110 ==> sxtw
/// 111 ==> sxtx
+inline unsigned getExtendEncoding(AArch64_AM::ShiftExtendType ET) {
+ switch (ET) {
+ default: llvm_unreachable("Invalid extend type requested");
+ case AArch64_AM::UXTB: return 0; break;
+ case AArch64_AM::UXTH: return 1; break;
+ case AArch64_AM::UXTW: return 2; break;
+ case AArch64_AM::UXTX: return 3; break;
+ case AArch64_AM::SXTB: return 4; break;
+ case AArch64_AM::SXTH: return 5; break;
+ case AArch64_AM::SXTW: return 6; break;
+ case AArch64_AM::SXTX: return 7; break;
+ }
+}
+
+/// getArithExtendImm - Encode the extend type and shift amount for an
+/// arithmetic instruction:
+/// imm: 3-bit extend amount
/// {5-3} = shifter
/// {2-0} = imm3
-static inline unsigned getArithExtendImm(ARM64_AM::ExtendType ET,
+static inline unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET,
unsigned Imm) {
assert((Imm & 0x7) == Imm && "Illegal shifted immedate value!");
- return (unsigned(ET) << 3) | (Imm & 0x7);
+ return (getExtendEncoding(ET) << 3) | (Imm & 0x7);
}
/// getMemDoShift - Extract the "do shift" flag value for load/store
@@ -144,8 +181,8 @@ static inline bool getMemDoShift(unsigned Imm) {
/// getExtendType - Extract the extend type for the offset operand of
/// loads/stores.
-static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
- return ARM64_AM::ExtendType((Imm >> 1) & 0x7);
+static inline AArch64_AM::ShiftExtendType getMemExtendType(unsigned Imm) {
+ return getExtendType((Imm >> 1) & 0x7);
}
/// getExtendImm - Encode the extend type and amount for a load/store inst:
@@ -160,66 +197,9 @@ static inline ARM64_AM::ExtendType getMemExtendType(unsigned Imm) {
/// 111 ==> sxtx
/// {3-1} = shifter
/// {0} = doshift
-static inline unsigned getMemExtendImm(ARM64_AM::ExtendType ET, bool DoShift) {
- return (unsigned(ET) << 1) | unsigned(DoShift);
-}
-
-//===----------------------------------------------------------------------===//
-// Prefetch
-//
-
-/// Pre-fetch operator names.
-/// The enum values match the encoding values:
-/// prfop<4:3> 00=preload data, 10=prepare for store
-/// prfop<2:1> 00=target L1 cache, 01=target L2 cache, 10=target L3 cache,
-/// prfop<0> 0=non-streaming (temporal), 1=streaming (non-temporal)
-enum PrefetchOp {
- InvalidPrefetchOp = -1,
- PLDL1KEEP = 0x00,
- PLDL1STRM = 0x01,
- PLDL2KEEP = 0x02,
- PLDL2STRM = 0x03,
- PLDL3KEEP = 0x04,
- PLDL3STRM = 0x05,
- PSTL1KEEP = 0x10,
- PSTL1STRM = 0x11,
- PSTL2KEEP = 0x12,
- PSTL2STRM = 0x13,
- PSTL3KEEP = 0x14,
- PSTL3STRM = 0x15
-};
-
-/// isNamedPrefetchOp - Check if the prefetch-op 5-bit value has a name.
-static inline bool isNamedPrefetchOp(unsigned prfop) {
- switch (prfop) {
- default: return false;
- case ARM64_AM::PLDL1KEEP: case ARM64_AM::PLDL1STRM: case ARM64_AM::PLDL2KEEP:
- case ARM64_AM::PLDL2STRM: case ARM64_AM::PLDL3KEEP: case ARM64_AM::PLDL3STRM:
- case ARM64_AM::PSTL1KEEP: case ARM64_AM::PSTL1STRM: case ARM64_AM::PSTL2KEEP:
- case ARM64_AM::PSTL2STRM: case ARM64_AM::PSTL3KEEP: case ARM64_AM::PSTL3STRM:
- return true;
- }
-}
-
-
-/// getPrefetchOpName - Get the string encoding for the prefetch operator.
-static inline const char *getPrefetchOpName(ARM64_AM::PrefetchOp prfop) {
- switch (prfop) {
- default: assert(false && "unhandled prefetch-op type!");
- case ARM64_AM::PLDL1KEEP: return "pldl1keep";
- case ARM64_AM::PLDL1STRM: return "pldl1strm";
- case ARM64_AM::PLDL2KEEP: return "pldl2keep";
- case ARM64_AM::PLDL2STRM: return "pldl2strm";
- case ARM64_AM::PLDL3KEEP: return "pldl3keep";
- case ARM64_AM::PLDL3STRM: return "pldl3strm";
- case ARM64_AM::PSTL1KEEP: return "pstl1keep";
- case ARM64_AM::PSTL1STRM: return "pstl1strm";
- case ARM64_AM::PSTL2KEEP: return "pstl2keep";
- case ARM64_AM::PSTL2STRM: return "pstl2strm";
- case ARM64_AM::PSTL3KEEP: return "pstl3keep";
- case ARM64_AM::PSTL3STRM: return "pstl3strm";
- }
- return 0;
+static inline unsigned getMemExtendImm(AArch64_AM::ShiftExtendType ET,
+ bool DoShift) {
+ return (getExtendEncoding(ET) << 1) | unsigned(DoShift);
}
static inline uint64_t ror(uint64_t elt, unsigned size) {
@@ -751,7 +731,7 @@ static inline uint64_t decodeAdvSIMDModImmType12(uint8_t Imm) {
return (EncVal << 32) | EncVal;
}
-} // end namespace ARM64_AM
+} // end namespace AArch64_AM
} // end namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index f1452ab..d8900d4 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -6,168 +6,57 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the AArch64 implementation of the MCAsmBackend class,
-// which is principally concerned with relaxation of the various fixup kinds.
-//
-//===----------------------------------------------------------------------===//
+#include "AArch64.h"
+#include "AArch64RegisterInfo.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCDirectives.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MachO.h"
using namespace llvm;
namespace {
-class AArch64AsmBackend : public MCAsmBackend {
- const MCSubtargetInfo* STI;
-public:
- AArch64AsmBackend(const Target &T, const StringRef TT)
- : MCAsmBackend(),
- STI(AArch64_MC::createAArch64MCSubtargetInfo(TT, "", ""))
- {}
-
-
- ~AArch64AsmBackend() {
- delete STI;
- }
-
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
-
- virtual void processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved);
-};
-} // end anonymous namespace
-
-void AArch64AsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target,
- uint64_t &Value, bool &IsResolved) {
- // The ADRP instruction adds some multiple of 0x1000 to the current PC &
- // ~0xfff. This means that the required offset to reach a symbol can vary by
- // up to one step depending on where the ADRP is in memory. For example:
- //
- // ADRP x0, there
- // there:
- //
- // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
- // we'll need that as an offset. At any other address "there" will be in the
- // same page as the ADRP and the instruction should encode 0x0. Assuming the
- // section isn't 0x1000-aligned, we therefore need to delegate this decision
- // to the linker -- a relocation!
- if ((uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_page ||
- (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_prel_got_page ||
- (uint32_t)Fixup.getKind() == AArch64::fixup_a64_adr_gottprel_page ||
- (uint32_t)Fixup.getKind() == AArch64::fixup_a64_tlsdesc_adr_page)
- IsResolved = false;
-}
-
-static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value);
-
-namespace {
+class AArch64AsmBackend : public MCAsmBackend {
+ static const unsigned PCRelFlagVal =
+ MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
-class ELFAArch64AsmBackend : public AArch64AsmBackend {
- uint8_t OSABI;
- bool IsLittle; // Big or little endian
public:
- ELFAArch64AsmBackend(const Target &T, const StringRef TT,
- uint8_t _OSABI, bool isLittle)
- : AArch64AsmBackend(T, TT), OSABI(_OSABI), IsLittle(isLittle) { }
+ AArch64AsmBackend(const Target &T) : MCAsmBackend() {}
- bool fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const;
-
- unsigned int getNumFixupKinds() const {
+ unsigned getNumFixupKinds() const override {
return AArch64::NumTargetFixupKinds;
}
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo Infos[AArch64::NumTargetFixupKinds] = {
-// This table *must* be in the order that the fixup_* kinds are defined in
-// AArch64FixupKinds.h.
-//
-// Name Offset (bits) Size (bits) Flags
-{ "fixup_a64_ld_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_adr_prel", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_adr_prel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_add_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst8_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst16_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst32_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst64_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst128_lo12", 0, 32, 0 },
-{ "fixup_a64_tstbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_condbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_uncondbr", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_call", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_movw_uabs_g0", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g0_nc", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g1", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g1_nc", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g2", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g2_nc", 0, 32, 0 },
-{ "fixup_a64_movw_uabs_g3", 0, 32, 0 },
-{ "fixup_a64_movw_sabs_g0", 0, 32, 0 },
-{ "fixup_a64_movw_sabs_g1", 0, 32, 0 },
-{ "fixup_a64_movw_sabs_g2", 0, 32, 0 },
-{ "fixup_a64_adr_prel_got_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_ld64_got_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_movw_dtprel_g2", 0, 32, 0 },
-{ "fixup_a64_movw_dtprel_g1", 0, 32, 0 },
-{ "fixup_a64_movw_dtprel_g1_nc", 0, 32, 0 },
-{ "fixup_a64_movw_dtprel_g0", 0, 32, 0 },
-{ "fixup_a64_movw_dtprel_g0_nc", 0, 32, 0 },
-{ "fixup_a64_add_dtprel_hi12", 0, 32, 0 },
-{ "fixup_a64_add_dtprel_lo12", 0, 32, 0 },
-{ "fixup_a64_add_dtprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst8_dtprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst8_dtprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst16_dtprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst16_dtprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst32_dtprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst32_dtprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst64_dtprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst64_dtprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_movw_gottprel_g1", 0, 32, 0 },
-{ "fixup_a64_movw_gottprel_g0_nc", 0, 32, 0 },
-{ "fixup_a64_adr_gottprel_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_ld64_gottprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ld_gottprel_prel19", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_movw_tprel_g2", 0, 32, 0 },
-{ "fixup_a64_movw_tprel_g1", 0, 32, 0 },
-{ "fixup_a64_movw_tprel_g1_nc", 0, 32, 0 },
-{ "fixup_a64_movw_tprel_g0", 0, 32, 0 },
-{ "fixup_a64_movw_tprel_g0_nc", 0, 32, 0 },
-{ "fixup_a64_add_tprel_hi12", 0, 32, 0 },
-{ "fixup_a64_add_tprel_lo12", 0, 32, 0 },
-{ "fixup_a64_add_tprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst8_tprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst8_tprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst16_tprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst16_tprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst32_tprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst32_tprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_ldst64_tprel_lo12", 0, 32, 0 },
-{ "fixup_a64_ldst64_tprel_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_tlsdesc_adr_page", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
-{ "fixup_a64_tlsdesc_ld64_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_tlsdesc_add_lo12_nc", 0, 32, 0 },
-{ "fixup_a64_tlsdesc_call", 0, 0, 0 }
+ // This table *must* be in the order that the fixup_* kinds are defined in
+ // AArch64FixupKinds.h.
+ //
+ // Name Offset (bits) Size (bits) Flags
+ { "fixup_aarch64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
+ { "fixup_aarch64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
+ { "fixup_aarch64_add_imm12", 10, 12, 0 },
+ { "fixup_aarch64_ldst_imm12_scale1", 10, 12, 0 },
+ { "fixup_aarch64_ldst_imm12_scale2", 10, 12, 0 },
+ { "fixup_aarch64_ldst_imm12_scale4", 10, 12, 0 },
+ { "fixup_aarch64_ldst_imm12_scale8", 10, 12, 0 },
+ { "fixup_aarch64_ldst_imm12_scale16", 10, 12, 0 },
+ { "fixup_aarch64_ldr_pcrel_imm19", 5, 19, PCRelFlagVal },
+ { "fixup_aarch64_movw", 5, 16, 0 },
+ { "fixup_aarch64_pcrel_branch14", 5, 14, PCRelFlagVal },
+ { "fixup_aarch64_pcrel_branch19", 5, 19, PCRelFlagVal },
+ { "fixup_aarch64_pcrel_branch26", 0, 26, PCRelFlagVal },
+ { "fixup_aarch64_pcrel_call26", 0, 26, PCRelFlagVal },
+ { "fixup_aarch64_tlsdesc_call", 0, 0, 0 }
};
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
@@ -177,417 +66,501 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const {
- unsigned NumBytes = getFixupKindInfo(Fixup.getKind()).TargetSize / 8;
- Value = adjustFixupValue(Fixup.getKind(), Value);
- if (!Value) return; // Doesn't change encoding.
-
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
-
- // For each byte of the fragment that the fixup touches, mask in the bits
- // from the fixup value.
- for (unsigned i = 0; i != NumBytes; ++i) {
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
- }
- }
+ uint64_t Value, bool IsPCRel) const override;
- bool mayNeedRelaxation(const MCInst&) const {
- return false;
- }
+ bool mayNeedRelaxation(const MCInst &Inst) const override;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override;
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
- void relaxInstruction(const MCInst&, llvm::MCInst&) const {
- llvm_unreachable("Cannot relax instructions");
- }
+ void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createAArch64ELFObjectWriter(OS, OSABI, IsLittle);
- }
+ unsigned getPointerSize() const { return 8; }
};
} // end anonymous namespace
-bool
-ELFAArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
- uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
- // Correct for now. With all instructions 32-bit only very low-level
- // considerations could make you select something which may fail.
- return false;
-}
+/// \brief The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ assert(0 && "Unknown fixup kind!");
+ case AArch64::fixup_aarch64_tlsdesc_call:
+ return 0;
-bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- // Can't emit NOP with size not multiple of 32-bits
- if (Count % 4 != 0)
- return false;
+ case FK_Data_1:
+ return 1;
- uint64_t NumNops = Count / 4;
- for (uint64_t i = 0; i != NumNops; ++i)
- OW->Write32(0xd503201f);
+ case FK_Data_2:
+ case AArch64::fixup_aarch64_movw:
+ return 2;
+
+ case AArch64::fixup_aarch64_pcrel_branch14:
+ case AArch64::fixup_aarch64_add_imm12:
+ case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ case AArch64::fixup_aarch64_ldst_imm12_scale16:
+ case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+ case AArch64::fixup_aarch64_pcrel_branch19:
+ return 3;
+
+ case AArch64::fixup_aarch64_pcrel_adr_imm21:
+ case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+ case AArch64::fixup_aarch64_pcrel_branch26:
+ case AArch64::fixup_aarch64_pcrel_call26:
+ case FK_Data_4:
+ return 4;
- return true;
+ case FK_Data_8:
+ return 8;
+ }
}
-static unsigned ADRImmBits(unsigned Value) {
+static unsigned AdrImmBits(unsigned Value) {
unsigned lo2 = Value & 0x3;
- unsigned hi19 = (Value & 0x1fffff) >> 2;
-
+ unsigned hi19 = (Value & 0x1ffffc) >> 2;
return (hi19 << 5) | (lo2 << 29);
}
static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
+ int64_t SignedValue = static_cast<int64_t>(Value);
switch (Kind) {
default:
- llvm_unreachable("Unknown fixup kind!");
- case FK_Data_2:
- assert((int64_t)Value >= -32768 &&
- (int64_t)Value <= 65536 &&
- "Out of range ABS16 fixup");
+ assert(false && "Unknown fixup kind!");
+ case AArch64::fixup_aarch64_pcrel_adr_imm21:
+ if (SignedValue > 2097151 || SignedValue < -2097152)
+ report_fatal_error("fixup value out of range");
+ return AdrImmBits(Value & 0x1fffffULL);
+ case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+ return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
+ case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+ case AArch64::fixup_aarch64_pcrel_branch19:
+ // Signed 21-bit immediate
+ if (SignedValue > 2097151 || SignedValue < -2097152)
+ report_fatal_error("fixup value out of range");
+ // Low two bits are not encoded.
+ return (Value >> 2) & 0x7ffff;
+ case AArch64::fixup_aarch64_add_imm12:
+ case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ // Unsigned 12-bit immediate
+ if (Value >= 0x1000)
+ report_fatal_error("invalid imm12 fixup value");
return Value;
- case FK_Data_4:
- assert((int64_t)Value >= -(1LL << 31) &&
- (int64_t)Value <= (1LL << 32) - 1 &&
- "Out of range ABS32 fixup");
+ case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ // Unsigned 12-bit immediate which gets multiplied by 2
+ if (Value & 1 || Value >= 0x2000)
+ report_fatal_error("invalid imm12 fixup value");
+ return Value >> 1;
+ case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ // Unsigned 12-bit immediate which gets multiplied by 4
+ if (Value & 3 || Value >= 0x4000)
+ report_fatal_error("invalid imm12 fixup value");
+ return Value >> 2;
+ case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ // Unsigned 12-bit immediate which gets multiplied by 8
+ if (Value & 7 || Value >= 0x8000)
+ report_fatal_error("invalid imm12 fixup value");
+ return Value >> 3;
+ case AArch64::fixup_aarch64_ldst_imm12_scale16:
+ // Unsigned 12-bit immediate which gets multiplied by 16
+ if (Value & 15 || Value >= 0x10000)
+ report_fatal_error("invalid imm12 fixup value");
+ return Value >> 4;
+ case AArch64::fixup_aarch64_movw:
+ report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
return Value;
+ case AArch64::fixup_aarch64_pcrel_branch14:
+ // Signed 16-bit immediate
+ if (SignedValue > 32767 || SignedValue < -32768)
+ report_fatal_error("fixup value out of range");
+ // Low two bits are not encoded (4-byte alignment assumed).
+ if (Value & 0x3)
+ report_fatal_error("fixup not sufficiently aligned");
+ return (Value >> 2) & 0x3fff;
+ case AArch64::fixup_aarch64_pcrel_branch26:
+ case AArch64::fixup_aarch64_pcrel_call26:
+ // Signed 28-bit immediate
+ if (SignedValue > 134217727 || SignedValue < -134217728)
+ report_fatal_error("fixup value out of range");
+ // Low two bits are not encoded (4-byte alignment assumed).
+ if (Value & 0x3)
+ report_fatal_error("fixup not sufficiently aligned");
+ return (Value >> 2) & 0x3ffffff;
+ case FK_Data_1:
+ case FK_Data_2:
+ case FK_Data_4:
case FK_Data_8:
return Value;
+ }
+}
- case AArch64::fixup_a64_ld_gottprel_prel19:
- // R_AARCH64_LD_GOTTPREL_PREL19: Set a load-literal immediate to bits 1F
- // FFFC of G(TPREL(S+A)) - P; check -2^20 <= X < 2^20.
- case AArch64::fixup_a64_ld_prel:
- // R_AARCH64_LD_PREL_LO19: Sets a load-literal (immediate) value to bits
- // 1F FFFC of S+A-P, checking that -2^20 <= S+A-P < 2^20.
- assert((int64_t)Value >= -(1LL << 20) &&
- (int64_t)Value < (1LL << 20) && "Out of range LDR (lit) fixup");
- return (Value & 0x1ffffc) << 3;
-
- case AArch64::fixup_a64_adr_prel:
- // R_AARCH64_ADR_PREL_LO21: Sets an ADR immediate value to bits 1F FFFF of
- // the result of S+A-P, checking that -2^20 <= S+A-P < 2^20.
- assert((int64_t)Value >= -(1LL << 20) &&
- (int64_t)Value < (1LL << 20) && "Out of range ADR fixup");
- return ADRImmBits(Value & 0x1fffff);
-
- case AArch64::fixup_a64_adr_prel_page:
- // R_AARCH64_ADR_PREL_PG_HI21: Sets an ADRP immediate value to bits 1 FFFF
- // F000 of the result of the operation, checking that -2^32 <= result <
- // 2^32.
- assert((int64_t)Value >= -(1LL << 32) &&
- (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
- return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
-
- case AArch64::fixup_a64_add_dtprel_hi12:
- // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
- // FF F000 of DTPREL(S+A), check 0 <= X < 2^24.
- case AArch64::fixup_a64_add_tprel_hi12:
- // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
- // FF F000 of TPREL(S+A), check 0 <= X < 2^24.
- assert((int64_t)Value >= 0 &&
- (int64_t)Value < (1LL << 24) && "Out of range ADD fixup");
- return (Value & 0xfff000) >> 2;
-
- case AArch64::fixup_a64_add_dtprel_lo12:
- // R_AARCH64_TLSLD_ADD_DTPREL_LO12: Set an ADD immediate field to bits
- // FFF of DTPREL(S+A), check 0 <= X < 2^12.
- case AArch64::fixup_a64_add_tprel_lo12:
- // R_AARCH64_TLSLD_ADD_TPREL_LO12: Set an ADD immediate field to bits
- // FFF of TPREL(S+A), check 0 <= X < 2^12.
- assert((int64_t)Value >= 0 &&
- (int64_t)Value < (1LL << 12) && "Out of range ADD fixup");
- // ... fallthrough to no-checking versions ...
- case AArch64::fixup_a64_add_dtprel_lo12_nc:
- // R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC: Set an ADD immediate field to bits
- // FFF of DTPREL(S+A) with no overflow check.
- case AArch64::fixup_a64_add_tprel_lo12_nc:
- // R_AARCH64_TLSLD_ADD_TPREL_LO12_NC: Set an ADD immediate field to bits
- // FFF of TPREL(S+A) with no overflow check.
- case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
- // R_AARCH64_TLSDESC_ADD_LO12_NC: Set an ADD immediate field to bits
- // FFF of G(TLSDESC(S+A)), with no overflow check.
- case AArch64::fixup_a64_add_lo12:
- // R_AARCH64_ADD_ABS_LO12_NC: Sets an ADD immediate value to bits FFF of
- // S+A, with no overflow check.
- return (Value & 0xfff) << 10;
-
- case AArch64::fixup_a64_ldst8_dtprel_lo12:
- // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
- // of DTPREL(S+A), check 0 <= X < 2^12.
- case AArch64::fixup_a64_ldst8_tprel_lo12:
- // R_AARCH64_TLSLE_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
- // of DTPREL(S+A), check 0 <= X < 2^12.
- assert((int64_t) Value >= 0 &&
- (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
- // ... fallthrough to no-checking versions ...
- case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST8_DTPREL_LO12: Set an LD/ST offset field to bits FFF
- // of DTPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST8_TPREL_LO12: Set an LD/ST offset field to bits FFF
- // of TPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst8_lo12:
- // R_AARCH64_LDST8_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFF
- // of S+A, with no overflow check.
- return (Value & 0xfff) << 10;
-
- case AArch64::fixup_a64_ldst16_dtprel_lo12:
- // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
- // of DTPREL(S+A), check 0 <= X < 2^12.
- case AArch64::fixup_a64_ldst16_tprel_lo12:
- // R_AARCH64_TLSLE_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
- // of DTPREL(S+A), check 0 <= X < 2^12.
- assert((int64_t) Value >= 0 &&
- (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
- // ... fallthrough to no-checking versions ...
- case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST16_DTPREL_LO12: Set an LD/ST offset field to bits FFE
- // of DTPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST16_TPREL_LO12: Set an LD/ST offset field to bits FFE
- // of TPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst16_lo12:
- // R_AARCH64_LDST16_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFE
- // of S+A, with no overflow check.
- return (Value & 0xffe) << 9;
-
- case AArch64::fixup_a64_ldst32_dtprel_lo12:
- // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
- // of DTPREL(S+A), check 0 <= X < 2^12.
- case AArch64::fixup_a64_ldst32_tprel_lo12:
- // R_AARCH64_TLSLE_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
- // of DTPREL(S+A), check 0 <= X < 2^12.
- assert((int64_t) Value >= 0 &&
- (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
- // ... fallthrough to no-checking versions ...
- case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST32_DTPREL_LO12: Set an LD/ST offset field to bits FFC
- // of DTPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST32_TPREL_LO12: Set an LD/ST offset field to bits FFC
- // of TPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst32_lo12:
- // R_AARCH64_LDST32_ABS_LO12_NC: Sets an LD/ST immediate value to bits FFC
- // of S+A, with no overflow check.
- return (Value & 0xffc) << 8;
-
- case AArch64::fixup_a64_ldst64_dtprel_lo12:
- // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
- // of DTPREL(S+A), check 0 <= X < 2^12.
- case AArch64::fixup_a64_ldst64_tprel_lo12:
- // R_AARCH64_TLSLE_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
- // of DTPREL(S+A), check 0 <= X < 2^12.
- assert((int64_t) Value >= 0 &&
- (int64_t) Value < (1LL << 12) && "Out of range LD/ST fixup");
- // ... fallthrough to no-checking versions ...
- case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST64_DTPREL_LO12: Set an LD/ST offset field to bits FF8
- // of DTPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
- // R_AARCH64_TLSLD_LDST64_TPREL_LO12: Set an LD/ST offset field to bits FF8
- // of TPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_ldst64_lo12:
- // R_AARCH64_LDST64_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF8
- // of S+A, with no overflow check.
- return (Value & 0xff8) << 7;
-
- case AArch64::fixup_a64_ldst128_lo12:
- // R_AARCH64_LDST128_ABS_LO12_NC: Sets an LD/ST immediate value to bits FF0
- // of S+A, with no overflow check.
- return (Value & 0xff0) << 6;
-
- case AArch64::fixup_a64_movw_uabs_g0:
- // R_AARCH64_MOVW_UABS_G0: Sets a MOVZ immediate field to bits FFFF of S+A
- // with a check that S+A < 2^16
- assert(Value <= 0xffff && "Out of range move wide fixup");
- return (Value & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_dtprel_g0_nc:
- // R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC: Sets a MOVK immediate field to bits
- // FFFF of DTPREL(S+A) with no overflow check.
- case AArch64::fixup_a64_movw_gottprel_g0_nc:
- // R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC: Sets a MOVK immediate field to bits
- // FFFF of G(TPREL(S+A)) - GOT with no overflow check.
- case AArch64::fixup_a64_movw_tprel_g0_nc:
- // R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: Sets a MOVK immediate field to bits
- // FFFF of TPREL(S+A) with no overflow check.
- case AArch64::fixup_a64_movw_uabs_g0_nc:
- // R_AARCH64_MOVW_UABS_G0_NC: Sets a MOVK immediate field to bits FFFF of
- // S+A with no overflow check.
- return (Value & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_uabs_g1:
- // R_AARCH64_MOVW_UABS_G1: Sets a MOVZ immediate field to bits FFFF0000 of
- // S+A with a check that S+A < 2^32
- assert(Value <= 0xffffffffull && "Out of range move wide fixup");
- return ((Value >> 16) & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_dtprel_g1_nc:
- // R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC: Set a MOVK immediate field
- // to bits FFFF0000 of DTPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_movw_tprel_g1_nc:
- // R_AARCH64_TLSLD_MOVW_TPREL_G1_NC: Set a MOVK immediate field
- // to bits FFFF0000 of TPREL(S+A), with no overflow check.
- case AArch64::fixup_a64_movw_uabs_g1_nc:
- // R_AARCH64_MOVW_UABS_G1_NC: Sets a MOVK immediate field to bits
- // FFFF0000 of S+A with no overflow check.
- return ((Value >> 16) & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_uabs_g2:
- // R_AARCH64_MOVW_UABS_G2: Sets a MOVZ immediate field to bits FFFF 0000
- // 0000 of S+A with a check that S+A < 2^48
- assert(Value <= 0xffffffffffffull && "Out of range move wide fixup");
- return ((Value >> 32) & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_uabs_g2_nc:
- // R_AARCH64_MOVW_UABS_G2: Sets a MOVK immediate field to bits FFFF 0000
- // 0000 of S+A with no overflow check.
- return ((Value >> 32) & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_uabs_g3:
- // R_AARCH64_MOVW_UABS_G3: Sets a MOVZ immediate field to bits FFFF 0000
- // 0000 0000 of S+A (no overflow check needed)
- return ((Value >> 48) & 0xffff) << 5;
-
- case AArch64::fixup_a64_movw_dtprel_g0:
- // R_AARCH64_TLSLD_MOVW_DTPREL_G0: Set a MOV[NZ] immediate field
- // to bits FFFF of DTPREL(S+A).
- case AArch64::fixup_a64_movw_tprel_g0:
- // R_AARCH64_TLSLE_MOVW_TPREL_G0: Set a MOV[NZ] immediate field to
- // bits FFFF of TPREL(S+A).
- case AArch64::fixup_a64_movw_sabs_g0: {
- // R_AARCH64_MOVW_SABS_G0: Sets MOV[NZ] immediate field using bits FFFF of
- // S+A (see notes below); check -2^16 <= S+A < 2^16. (notes say that we
- // should convert between MOVN and MOVZ to achieve our goals).
- int64_t Signed = Value;
- assert(Signed >= -(1LL << 16) && Signed < (1LL << 16)
- && "Out of range move wide fixup");
- if (Signed >= 0) {
- Value = (Value & 0xffff) << 5;
- // Bit 30 converts the MOVN encoding into a MOVZ
- Value |= 1 << 30;
- } else {
- // MCCodeEmitter should have encoded a MOVN, which is fine.
- Value = (~Value & 0xffff) << 5;
- }
- return Value;
+void AArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel) const {
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+ if (!Value)
+ return; // Doesn't change encoding.
+ MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+ // Apply any target-specific value adjustments.
+ Value = adjustFixupValue(Fixup.getKind(), Value);
+
+ // Shift the value into position.
+ Value <<= Info.TargetOffset;
+
+ unsigned Offset = Fixup.getOffset();
+ assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+ // For each byte of the fragment that the fixup touches, mask in the
+ // bits from the fixup value.
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+bool AArch64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
+ return false;
+}
+
+bool AArch64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
+ uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const {
+ // FIXME: This isn't correct for AArch64. Just moving the "generic" logic
+ // into the targets for now.
+ //
+ // Relax if the value is too big for a (signed) i8.
+ return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+void AArch64AsmBackend::relaxInstruction(const MCInst &Inst,
+ MCInst &Res) const {
+ assert(false && "AArch64AsmBackend::relaxInstruction() unimplemented");
+}
+
+bool AArch64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ // If the count is not 4-byte aligned, we must be writing data into the text
+ // section (otherwise we have unaligned instructions, and thus have far
+ // bigger problems), so just write zeros instead.
+ if ((Count & 3) != 0) {
+ for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
+ OW->Write8(0);
}
- case AArch64::fixup_a64_movw_dtprel_g1:
- // R_AARCH64_TLSLD_MOVW_DTPREL_G1: Set a MOV[NZ] immediate field
- // to bits FFFF0000 of DTPREL(S+A).
- case AArch64::fixup_a64_movw_gottprel_g1:
- // R_AARCH64_TLSIE_MOVW_GOTTPREL_G1: Set a MOV[NZ] immediate field
- // to bits FFFF0000 of G(TPREL(S+A)) - GOT.
- case AArch64::fixup_a64_movw_tprel_g1:
- // R_AARCH64_TLSLE_MOVW_TPREL_G1: Set a MOV[NZ] immediate field to
- // bits FFFF0000 of TPREL(S+A).
- case AArch64::fixup_a64_movw_sabs_g1: {
- // R_AARCH64_MOVW_SABS_G1: Sets MOV[NZ] immediate field using bits FFFF 0000
- // of S+A (see notes below); check -2^32 <= S+A < 2^32. (notes say that we
- // should convert between MOVN and MOVZ to achieve our goals).
- int64_t Signed = Value;
- assert(Signed >= -(1LL << 32) && Signed < (1LL << 32)
- && "Out of range move wide fixup");
- if (Signed >= 0) {
- Value = ((Value >> 16) & 0xffff) << 5;
- // Bit 30 converts the MOVN encoding into a MOVZ
- Value |= 1 << 30;
- } else {
- Value = ((~Value >> 16) & 0xffff) << 5;
- }
- return Value;
+ // We are properly aligned, so write NOPs as requested.
+ Count /= 4;
+ for (uint64_t i = 0; i != Count; ++i)
+ OW->Write32(0xd503201f);
+ return true;
+}
+
+namespace {
+
+namespace CU {
+
+/// \brief Compact unwind encoding values.
+enum CompactUnwindEncodings {
+ /// \brief A "frameless" leaf function, where no non-volatile registers are
+ /// saved. The return remains in LR throughout the function.
+ UNWIND_AArch64_MODE_FRAMELESS = 0x02000000,
+
+ /// \brief No compact unwind encoding available. Instead the low 23-bits of
+ /// the compact unwind encoding is the offset of the DWARF FDE in the
+ /// __eh_frame section. This mode is never used in object files. It is only
+ /// generated by the linker in final linked images, which have only DWARF info
+ /// for a function.
+ UNWIND_AArch64_MODE_DWARF = 0x03000000,
+
+ /// \brief This is a standard arm64 prologue where FP/LR are immediately
+ /// pushed on the stack, then SP is copied to FP. If there are any
+ /// non-volatile register saved, they are copied into the stack fame in pairs
+ /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
+ /// five X pairs and four D pairs can be saved, but the memory layout must be
+ /// in register number order.
+ UNWIND_AArch64_MODE_FRAME = 0x04000000,
+
+ /// \brief Frame register pair encodings.
+ UNWIND_AArch64_FRAME_X19_X20_PAIR = 0x00000001,
+ UNWIND_AArch64_FRAME_X21_X22_PAIR = 0x00000002,
+ UNWIND_AArch64_FRAME_X23_X24_PAIR = 0x00000004,
+ UNWIND_AArch64_FRAME_X25_X26_PAIR = 0x00000008,
+ UNWIND_AArch64_FRAME_X27_X28_PAIR = 0x00000010,
+ UNWIND_AArch64_FRAME_D8_D9_PAIR = 0x00000100,
+ UNWIND_AArch64_FRAME_D10_D11_PAIR = 0x00000200,
+ UNWIND_AArch64_FRAME_D12_D13_PAIR = 0x00000400,
+ UNWIND_AArch64_FRAME_D14_D15_PAIR = 0x00000800
+};
+
+} // end CU namespace
+
+// FIXME: This should be in a separate file.
+class DarwinAArch64AsmBackend : public AArch64AsmBackend {
+ const MCRegisterInfo &MRI;
+
+ /// \brief Encode compact unwind stack adjustment for frameless functions.
+ /// See UNWIND_AArch64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
+ /// The stack size always needs to be 16 byte aligned.
+ uint32_t encodeStackAdjustment(uint32_t StackSize) const {
+ return (StackSize / 16) << 12;
+ }
+
+public:
+ DarwinAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
+ : AArch64AsmBackend(T), MRI(MRI) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createAArch64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
+ MachO::CPU_SUBTYPE_ARM64_ALL);
+ }
+
+ bool doesSectionRequireSymbols(const MCSection &Section) const override {
+ // Any section for which the linker breaks things into atoms needs to
+ // preserve symbols, including assembler local symbols, to identify
+ // those atoms. These sections are:
+ // Sections of type:
+ //
+ // S_CSTRING_LITERALS (e.g. __cstring)
+ // S_LITERAL_POINTERS (e.g. objc selector pointers)
+ // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
+ //
+ // Sections named:
+ //
+ // __TEXT,__eh_frame
+ // __TEXT,__ustring
+ // __DATA,__cfstring
+ // __DATA,__objc_classrefs
+ // __DATA,__objc_catlist
+ //
+ // FIXME: It would be better if the compiler used actual linker local
+ // symbols for each of these sections rather than preserving what
+ // are ostensibly assembler local symbols.
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
+ return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
+ SMO.getType() == MachO::S_4BYTE_LITERALS ||
+ SMO.getType() == MachO::S_8BYTE_LITERALS ||
+ SMO.getType() == MachO::S_16BYTE_LITERALS ||
+ SMO.getType() == MachO::S_LITERAL_POINTERS ||
+ (SMO.getSegmentName() == "__TEXT" &&
+ (SMO.getSectionName() == "__eh_frame" ||
+ SMO.getSectionName() == "__ustring")) ||
+ (SMO.getSegmentName() == "__DATA" &&
+ (SMO.getSectionName() == "__cfstring" ||
+ SMO.getSectionName() == "__objc_classrefs" ||
+ SMO.getSectionName() == "__objc_catlist")));
}
- case AArch64::fixup_a64_movw_dtprel_g2:
- // R_AARCH64_TLSLD_MOVW_DTPREL_G2: Set a MOV[NZ] immediate field
- // to bits FFFF 0000 0000 of DTPREL(S+A).
- case AArch64::fixup_a64_movw_tprel_g2:
- // R_AARCH64_TLSLE_MOVW_TPREL_G2: Set a MOV[NZ] immediate field to
- // bits FFFF 0000 0000 of TPREL(S+A).
- case AArch64::fixup_a64_movw_sabs_g2: {
- // R_AARCH64_MOVW_SABS_G2: Sets MOV[NZ] immediate field using bits FFFF 0000
- // 0000 of S+A (see notes below); check -2^48 <= S+A < 2^48. (notes say that
- // we should convert between MOVN and MOVZ to achieve our goals).
- int64_t Signed = Value;
- assert(Signed >= -(1LL << 48) && Signed < (1LL << 48)
- && "Out of range move wide fixup");
- if (Signed >= 0) {
- Value = ((Value >> 32) & 0xffff) << 5;
- // Bit 30 converts the MOVN encoding into a MOVZ
- Value |= 1 << 30;
- } else {
- Value = ((~Value >> 32) & 0xffff) << 5;
+ /// \brief Generate the compact unwind encoding from the CFI directives.
+ uint32_t generateCompactUnwindEncoding(
+ ArrayRef<MCCFIInstruction> Instrs) const override {
+ if (Instrs.empty())
+ return CU::UNWIND_AArch64_MODE_FRAMELESS;
+
+ bool HasFP = false;
+ unsigned StackSize = 0;
+
+ uint32_t CompactUnwindEncoding = 0;
+ for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
+ const MCCFIInstruction &Inst = Instrs[i];
+
+ switch (Inst.getOperation()) {
+ default:
+ // Cannot handle this directive: bail out.
+ return CU::UNWIND_AArch64_MODE_DWARF;
+ case MCCFIInstruction::OpDefCfa: {
+ // Defines a frame pointer.
+ assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
+ AArch64::FP &&
+ "Invalid frame pointer!");
+ assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
+
+ const MCCFIInstruction &LRPush = Instrs[++i];
+ assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
+ "Link register not pushed!");
+ const MCCFIInstruction &FPPush = Instrs[++i];
+ assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
+ "Frame pointer not pushed!");
+
+ unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
+ unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
+
+ LRReg = getXRegFromWReg(LRReg);
+ FPReg = getXRegFromWReg(FPReg);
+
+ assert(LRReg == AArch64::LR && FPReg == AArch64::FP &&
+ "Pushing invalid registers for frame!");
+
+ // Indicate that the function has a frame.
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAME;
+ HasFP = true;
+ break;
+ }
+ case MCCFIInstruction::OpDefCfaOffset: {
+ assert(StackSize == 0 && "We already have the CFA offset!");
+ StackSize = std::abs(Inst.getOffset());
+ break;
+ }
+ case MCCFIInstruction::OpOffset: {
+ // Registers are saved in pairs. We expect there to be two consecutive
+ // `.cfi_offset' instructions with the appropriate registers specified.
+ unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
+ if (i + 1 == e)
+ return CU::UNWIND_AArch64_MODE_DWARF;
+
+ const MCCFIInstruction &Inst2 = Instrs[++i];
+ if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
+ return CU::UNWIND_AArch64_MODE_DWARF;
+ unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
+
+ // N.B. The encodings must be in register number order, and the X
+ // registers before the D registers.
+
+ // X19/X20 pair = 0x00000001,
+ // X21/X22 pair = 0x00000002,
+ // X23/X24 pair = 0x00000004,
+ // X25/X26 pair = 0x00000008,
+ // X27/X28 pair = 0x00000010
+ Reg1 = getXRegFromWReg(Reg1);
+ Reg2 = getXRegFromWReg(Reg2);
+
+ if (Reg1 == AArch64::X19 && Reg2 == AArch64::X20 &&
+ (CompactUnwindEncoding & 0xF1E) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X19_X20_PAIR;
+ else if (Reg1 == AArch64::X21 && Reg2 == AArch64::X22 &&
+ (CompactUnwindEncoding & 0xF1C) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X21_X22_PAIR;
+ else if (Reg1 == AArch64::X23 && Reg2 == AArch64::X24 &&
+ (CompactUnwindEncoding & 0xF18) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X23_X24_PAIR;
+ else if (Reg1 == AArch64::X25 && Reg2 == AArch64::X26 &&
+ (CompactUnwindEncoding & 0xF10) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X25_X26_PAIR;
+ else if (Reg1 == AArch64::X27 && Reg2 == AArch64::X28 &&
+ (CompactUnwindEncoding & 0xF00) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_X27_X28_PAIR;
+ else {
+ Reg1 = getDRegFromBReg(Reg1);
+ Reg2 = getDRegFromBReg(Reg2);
+
+ // D8/D9 pair = 0x00000100,
+ // D10/D11 pair = 0x00000200,
+ // D12/D13 pair = 0x00000400,
+ // D14/D15 pair = 0x00000800
+ if (Reg1 == AArch64::D8 && Reg2 == AArch64::D9 &&
+ (CompactUnwindEncoding & 0xE00) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D8_D9_PAIR;
+ else if (Reg1 == AArch64::D10 && Reg2 == AArch64::D11 &&
+ (CompactUnwindEncoding & 0xC00) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D10_D11_PAIR;
+ else if (Reg1 == AArch64::D12 && Reg2 == AArch64::D13 &&
+ (CompactUnwindEncoding & 0x800) == 0)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D12_D13_PAIR;
+ else if (Reg1 == AArch64::D14 && Reg2 == AArch64::D15)
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_FRAME_D14_D15_PAIR;
+ else
+ // A pair was pushed which we cannot handle.
+ return CU::UNWIND_AArch64_MODE_DWARF;
+ }
+
+ break;
+ }
+ }
}
- return Value;
+
+ if (!HasFP) {
+ // With compact unwind info we can only represent stack adjustments of up
+ // to 65520 bytes.
+ if (StackSize > 65520)
+ return CU::UNWIND_AArch64_MODE_DWARF;
+
+ CompactUnwindEncoding |= CU::UNWIND_AArch64_MODE_FRAMELESS;
+ CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
+ }
+
+ return CompactUnwindEncoding;
}
+};
- case AArch64::fixup_a64_tstbr:
- // R_AARCH64_TSTBR14: Sets the immediate field of a TBZ/TBNZ instruction to
- // bits FFFC of S+A-P, checking -2^15 <= S+A-P < 2^15.
- assert((int64_t)Value >= -(1LL << 15) &&
- (int64_t)Value < (1LL << 15) && "Out of range TBZ/TBNZ fixup");
- return (Value & 0xfffc) << (5 - 2);
-
- case AArch64::fixup_a64_condbr:
- // R_AARCH64_CONDBR19: Sets the immediate field of a conditional branch
- // instruction to bits 1FFFFC of S+A-P, checking -2^20 <= S+A-P < 2^20.
- assert((int64_t)Value >= -(1LL << 20) &&
- (int64_t)Value < (1LL << 20) && "Out of range B.cond fixup");
- return (Value & 0x1ffffc) << (5 - 2);
-
- case AArch64::fixup_a64_uncondbr:
- // R_AARCH64_JUMP26 same as below (except to a linker, possibly).
- case AArch64::fixup_a64_call:
- // R_AARCH64_CALL26: Sets a CALL immediate field to bits FFFFFFC of S+A-P,
- // checking that -2^27 <= S+A-P < 2^27.
- assert((int64_t)Value >= -(1LL << 27) &&
- (int64_t)Value < (1LL << 27) && "Out of range branch fixup");
- return (Value & 0xffffffc) >> 2;
-
- case AArch64::fixup_a64_adr_gottprel_page:
- // R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: Set an ADRP immediate field to bits
- // 1FFFFF000 of Page(G(TPREL(S+A))) - Page(P); check -2^32 <= X < 2^32.
- case AArch64::fixup_a64_tlsdesc_adr_page:
- // R_AARCH64_TLSDESC_ADR_PAGE: Set an ADRP immediate field to bits 1FFFFF000
- // of Page(G(TLSDESC(S+A))) - Page(P); check -2^32 <= X < 2^32.
- case AArch64::fixup_a64_adr_prel_got_page:
- // R_AARCH64_ADR_GOT_PAGE: Sets the immediate value of an ADRP to bits
- // 1FFFFF000 of the operation, checking that -2^32 < Page(G(S))-Page(GOT) <
- // 2^32.
- assert((int64_t)Value >= -(1LL << 32) &&
- (int64_t)Value < (1LL << 32) && "Out of range ADRP fixup");
- return ADRImmBits((Value & 0x1fffff000ULL) >> 12);
-
- case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
- // R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: Set an LD offset field to bits FF8
- // of X, with no overflow check. Check that X & 7 == 0.
- case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
- // R_AARCH64_TLSDESC_LD64_LO12_NC: Set an LD offset field to bits FF8 of
- // G(TLSDESC(S+A)), with no overflow check. Check that X & 7 == 0.
- case AArch64::fixup_a64_ld64_got_lo12_nc:
- // R_AARCH64_LD64_GOT_LO12_NC: Sets the LD/ST immediate field to bits FF8 of
- // G(S) with no overflow check. Check X & 7 == 0
- assert(((int64_t)Value & 7) == 0 && "Misaligned fixup");
- return (Value & 0xff8) << 7;
-
- case AArch64::fixup_a64_tlsdesc_call:
- // R_AARCH64_TLSDESC_CALL: For relaxation only.
- return 0;
+} // end anonymous namespace
+
+namespace {
+
+class ELFAArch64AsmBackend : public AArch64AsmBackend {
+public:
+ uint8_t OSABI;
+ bool IsLittleEndian;
+
+ ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
+ : AArch64AsmBackend(T), OSABI(OSABI), IsLittleEndian(IsLittleEndian) {}
+
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
}
+
+ void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFixup &Fixup, const MCFragment *DF,
+ const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) override;
+
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+};
+
+void ELFAArch64AsmBackend::processFixupValue(
+ const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFixup &Fixup,
+ const MCFragment *DF, const MCValue &Target, uint64_t &Value,
+ bool &IsResolved) {
+ // The ADRP instruction adds some multiple of 0x1000 to the current PC &
+ // ~0xfff. This means that the required offset to reach a symbol can vary by
+ // up to one step depending on where the ADRP is in memory. For example:
+ //
+ // ADRP x0, there
+ // there:
+ //
+ // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
+ // we'll need that as an offset. At any other address "there" will be in the
+ // same page as the ADRP and the instruction should encode 0x0. Assuming the
+ // section isn't 0x1000-aligned, we therefore need to delegate this decision
+ // to the linker -- a relocation!
+ if ((uint32_t)Fixup.getKind() == AArch64::fixup_aarch64_pcrel_adrp_imm21)
+ IsResolved = false;
+}
+
+void ELFAArch64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+ unsigned DataSize, uint64_t Value,
+ bool IsPCRel) const {
+ // store fixups in .eh_frame section in big endian order
+ if (!IsLittleEndian && Fixup.getKind() == FK_Data_4) {
+ const MCSection *Sec = Fixup.getValue()->FindAssociatedSection();
+ const MCSectionELF *SecELF = static_cast<const MCSectionELF *>(Sec);
+ if (SecELF->getSectionName() == ".eh_frame")
+ Value = ByteSwap_32(unsigned(Value));
+ }
+ AArch64AsmBackend::applyFixup (Fixup, Data, DataSize, Value, IsPCRel);
+}
}
-MCAsmBackend *
-llvm::createAArch64leAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
- return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ true);
+
+ if (TheTriple.isOSDarwin())
+ return new DarwinAArch64AsmBackend(T, MRI);
+
+ assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
+ return new ELFAArch64AsmBackend(T, TheTriple.getOS(), /*IsLittleEndian=*/true);
}
-MCAsmBackend *
-llvm::createAArch64beAsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
Triple TheTriple(TT);
- return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS(), /*isLittle*/ false);
+
+ assert(TheTriple.isOSBinFormatELF() &&
+ "Big endian is only supported for ELF targets!");
+ return new ELFAArch64AsmBackend(T, TheTriple.getOS(),
+ /*IsLittleEndian=*/false);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index a5fe914..e05191e 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCValue.h"
@@ -35,257 +36,222 @@ private:
};
}
-AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian)
- : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
- /*HasRelocationAddend*/ true)
-{}
+AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
+ bool IsLittleEndian)
+ : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
+ /*HasRelocationAddend*/ true) {}
-AArch64ELFObjectWriter::~AArch64ELFObjectWriter()
-{}
+AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
unsigned AArch64ELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- unsigned Type;
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ AArch64MCExpr::VariantKind RefKind =
+ static_cast<AArch64MCExpr::VariantKind>(Target.getRefKind());
+ AArch64MCExpr::VariantKind SymLoc = AArch64MCExpr::getSymbolLoc(RefKind);
+ bool IsNC = AArch64MCExpr::isNotChecked(RefKind);
+
+ assert((!Target.getSymA() ||
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
+ "Should only be expression-level modifiers here");
+
+ assert((!Target.getSymB() ||
+ Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) &&
+ "Should only be expression-level modifiers here");
+
if (IsPCRel) {
switch ((unsigned)Fixup.getKind()) {
- default:
- llvm_unreachable("Unimplemented fixup -> relocation");
- case FK_Data_8:
- return ELF::R_AARCH64_PREL64;
- case FK_Data_4:
- return ELF::R_AARCH64_PREL32;
case FK_Data_2:
return ELF::R_AARCH64_PREL16;
- case AArch64::fixup_a64_ld_prel:
- Type = ELF::R_AARCH64_LD_PREL_LO19;
- break;
- case AArch64::fixup_a64_adr_prel:
- Type = ELF::R_AARCH64_ADR_PREL_LO21;
- break;
- case AArch64::fixup_a64_adr_prel_page:
- Type = ELF::R_AARCH64_ADR_PREL_PG_HI21;
- break;
- case AArch64::fixup_a64_adr_prel_got_page:
- Type = ELF::R_AARCH64_ADR_GOT_PAGE;
- break;
- case AArch64::fixup_a64_tstbr:
- Type = ELF::R_AARCH64_TSTBR14;
- break;
- case AArch64::fixup_a64_condbr:
- Type = ELF::R_AARCH64_CONDBR19;
- break;
- case AArch64::fixup_a64_uncondbr:
- Type = ELF::R_AARCH64_JUMP26;
- break;
- case AArch64::fixup_a64_call:
- Type = ELF::R_AARCH64_CALL26;
- break;
- case AArch64::fixup_a64_adr_gottprel_page:
- Type = ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
- break;
- case AArch64::fixup_a64_ld_gottprel_prel19:
- Type = ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
- break;
- case AArch64::fixup_a64_tlsdesc_adr_page:
- Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
- break;
+ case FK_Data_4:
+ return ELF::R_AARCH64_PREL32;
+ case FK_Data_8:
+ return ELF::R_AARCH64_PREL64;
+ case AArch64::fixup_aarch64_pcrel_adr_imm21:
+ assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
+ return ELF::R_AARCH64_ADR_PREL_LO21;
+ case AArch64::fixup_aarch64_pcrel_adrp_imm21:
+ if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
+ return ELF::R_AARCH64_ADR_PREL_PG_HI21;
+ if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC)
+ return ELF::R_AARCH64_ADR_GOT_PAGE;
+ if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
+ return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+ if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC)
+ return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
+ llvm_unreachable("invalid symbol kind for ADRP relocation");
+ case AArch64::fixup_aarch64_pcrel_branch26:
+ return ELF::R_AARCH64_JUMP26;
+ case AArch64::fixup_aarch64_pcrel_call26:
+ return ELF::R_AARCH64_CALL26;
+ case AArch64::fixup_aarch64_ldr_pcrel_imm19:
+ if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
+ return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
+ return ELF::R_AARCH64_LD_PREL_LO19;
+ case AArch64::fixup_aarch64_pcrel_branch14:
+ return ELF::R_AARCH64_TSTBR14;
+ case AArch64::fixup_aarch64_pcrel_branch19:
+ return ELF::R_AARCH64_CONDBR19;
+ default:
+ llvm_unreachable("Unsupported pc-relative fixup kind");
}
} else {
switch ((unsigned)Fixup.getKind()) {
- default:
- llvm_unreachable("Unimplemented fixup -> relocation");
- case FK_Data_8:
- return ELF::R_AARCH64_ABS64;
- case FK_Data_4:
- return ELF::R_AARCH64_ABS32;
case FK_Data_2:
return ELF::R_AARCH64_ABS16;
- case AArch64::fixup_a64_add_lo12:
- Type = ELF::R_AARCH64_ADD_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_ld64_got_lo12_nc:
- Type = ELF::R_AARCH64_LD64_GOT_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst8_lo12:
- Type = ELF::R_AARCH64_LDST8_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst16_lo12:
- Type = ELF::R_AARCH64_LDST16_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst32_lo12:
- Type = ELF::R_AARCH64_LDST32_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst64_lo12:
- Type = ELF::R_AARCH64_LDST64_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst128_lo12:
- Type = ELF::R_AARCH64_LDST128_ABS_LO12_NC;
- break;
- case AArch64::fixup_a64_movw_uabs_g0:
- Type = ELF::R_AARCH64_MOVW_UABS_G0;
- break;
- case AArch64::fixup_a64_movw_uabs_g0_nc:
- Type = ELF::R_AARCH64_MOVW_UABS_G0_NC;
- break;
- case AArch64::fixup_a64_movw_uabs_g1:
- Type = ELF::R_AARCH64_MOVW_UABS_G1;
- break;
- case AArch64::fixup_a64_movw_uabs_g1_nc:
- Type = ELF::R_AARCH64_MOVW_UABS_G1_NC;
- break;
- case AArch64::fixup_a64_movw_uabs_g2:
- Type = ELF::R_AARCH64_MOVW_UABS_G2;
- break;
- case AArch64::fixup_a64_movw_uabs_g2_nc:
- Type = ELF::R_AARCH64_MOVW_UABS_G2_NC;
- break;
- case AArch64::fixup_a64_movw_uabs_g3:
- Type = ELF::R_AARCH64_MOVW_UABS_G3;
- break;
- case AArch64::fixup_a64_movw_sabs_g0:
- Type = ELF::R_AARCH64_MOVW_SABS_G0;
- break;
- case AArch64::fixup_a64_movw_sabs_g1:
- Type = ELF::R_AARCH64_MOVW_SABS_G1;
- break;
- case AArch64::fixup_a64_movw_sabs_g2:
- Type = ELF::R_AARCH64_MOVW_SABS_G2;
- break;
+ case FK_Data_4:
+ return ELF::R_AARCH64_ABS32;
+ case FK_Data_8:
+ return ELF::R_AARCH64_ABS64;
+ case AArch64::fixup_aarch64_add_imm12:
+ if (RefKind == AArch64MCExpr::VK_DTPREL_HI12)
+ return ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+ if (RefKind == AArch64MCExpr::VK_TPREL_HI12)
+ return ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_LO12_NC)
+ return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_LO12)
+ return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+ if (RefKind == AArch64MCExpr::VK_TPREL_LO12_NC)
+ return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+ if (RefKind == AArch64MCExpr::VK_TPREL_LO12)
+ return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+ if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12)
+ return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_ADD_ABS_LO12_NC;
- // TLS Local-dynamic block
- case AArch64::fixup_a64_movw_dtprel_g2:
- Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
- break;
- case AArch64::fixup_a64_movw_dtprel_g1:
- Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
- break;
- case AArch64::fixup_a64_movw_dtprel_g1_nc:
- Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
- break;
- case AArch64::fixup_a64_movw_dtprel_g0:
- Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
- break;
- case AArch64::fixup_a64_movw_dtprel_g0_nc:
- Type = ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
- break;
- case AArch64::fixup_a64_add_dtprel_hi12:
- Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
- break;
- case AArch64::fixup_a64_add_dtprel_lo12:
- Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
- break;
- case AArch64::fixup_a64_add_dtprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst8_dtprel_lo12:
- Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst8_dtprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst16_dtprel_lo12:
- Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst16_dtprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst32_dtprel_lo12:
- Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst32_dtprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst64_dtprel_lo12:
- Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst64_dtprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
- break;
+ report_fatal_error("invalid fixup for add (uimm12) instruction");
+ return 0;
+ case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
- // TLS initial-exec block
- case AArch64::fixup_a64_movw_gottprel_g1:
- Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
- break;
- case AArch64::fixup_a64_movw_gottprel_g0_nc:
- Type = ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
- break;
- case AArch64::fixup_a64_ld64_gottprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
- break;
+ report_fatal_error("invalid fixup for 8-bit load/store instruction");
+ return 0;
+ case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
- // TLS local-exec block
- case AArch64::fixup_a64_movw_tprel_g2:
- Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
- break;
- case AArch64::fixup_a64_movw_tprel_g1:
- Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
- break;
- case AArch64::fixup_a64_movw_tprel_g1_nc:
- Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
- break;
- case AArch64::fixup_a64_movw_tprel_g0:
- Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
- break;
- case AArch64::fixup_a64_movw_tprel_g0_nc:
- Type = ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
- break;
- case AArch64::fixup_a64_add_tprel_hi12:
- Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
- break;
- case AArch64::fixup_a64_add_tprel_lo12:
- Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
- break;
- case AArch64::fixup_a64_add_tprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst8_tprel_lo12:
- Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst8_tprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst16_tprel_lo12:
- Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst16_tprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst32_tprel_lo12:
- Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst32_tprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
- break;
- case AArch64::fixup_a64_ldst64_tprel_lo12:
- Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
- break;
- case AArch64::fixup_a64_ldst64_tprel_lo12_nc:
- Type = ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
- break;
+ report_fatal_error("invalid fixup for 16-bit load/store instruction");
+ return 0;
+ case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
- // TLS general-dynamic block
- case AArch64::fixup_a64_tlsdesc_adr_page:
- Type = ELF::R_AARCH64_TLSDESC_ADR_PAGE;
- break;
- case AArch64::fixup_a64_tlsdesc_ld64_lo12_nc:
- Type = ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
- break;
- case AArch64::fixup_a64_tlsdesc_add_lo12_nc:
- Type = ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
- break;
- case AArch64::fixup_a64_tlsdesc_call:
- Type = ELF::R_AARCH64_TLSDESC_CALL;
- break;
+ report_fatal_error("invalid fixup for 32-bit load/store instruction");
+ return 0;
+ case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_GOT && IsNC)
+ return ELF::R_AARCH64_LD64_GOT_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
+ return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+ if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
+ return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC)
+ return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+ if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC)
+ return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+
+ report_fatal_error("invalid fixup for 64-bit load/store instruction");
+ return 0;
+ case AArch64::fixup_aarch64_ldst_imm12_scale16:
+ if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
+ return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+
+ report_fatal_error("invalid fixup for 128-bit load/store instruction");
+ return 0;
+ case AArch64::fixup_aarch64_movw:
+ if (RefKind == AArch64MCExpr::VK_ABS_G3)
+ return ELF::R_AARCH64_MOVW_UABS_G3;
+ if (RefKind == AArch64MCExpr::VK_ABS_G2)
+ return ELF::R_AARCH64_MOVW_UABS_G2;
+ if (RefKind == AArch64MCExpr::VK_ABS_G2_S)
+ return ELF::R_AARCH64_MOVW_SABS_G2;
+ if (RefKind == AArch64MCExpr::VK_ABS_G2_NC)
+ return ELF::R_AARCH64_MOVW_UABS_G2_NC;
+ if (RefKind == AArch64MCExpr::VK_ABS_G1)
+ return ELF::R_AARCH64_MOVW_UABS_G1;
+ if (RefKind == AArch64MCExpr::VK_ABS_G1_S)
+ return ELF::R_AARCH64_MOVW_SABS_G1;
+ if (RefKind == AArch64MCExpr::VK_ABS_G1_NC)
+ return ELF::R_AARCH64_MOVW_UABS_G1_NC;
+ if (RefKind == AArch64MCExpr::VK_ABS_G0)
+ return ELF::R_AARCH64_MOVW_UABS_G0;
+ if (RefKind == AArch64MCExpr::VK_ABS_G0_S)
+ return ELF::R_AARCH64_MOVW_SABS_G0;
+ if (RefKind == AArch64MCExpr::VK_ABS_G0_NC)
+ return ELF::R_AARCH64_MOVW_UABS_G0_NC;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_G2)
+ return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_G1)
+ return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_G1_NC)
+ return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_G0)
+ return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+ if (RefKind == AArch64MCExpr::VK_DTPREL_G0_NC)
+ return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+ if (RefKind == AArch64MCExpr::VK_TPREL_G2)
+ return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
+ if (RefKind == AArch64MCExpr::VK_TPREL_G1)
+ return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+ if (RefKind == AArch64MCExpr::VK_TPREL_G1_NC)
+ return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
+ if (RefKind == AArch64MCExpr::VK_TPREL_G0)
+ return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+ if (RefKind == AArch64MCExpr::VK_TPREL_G0_NC)
+ return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+ if (RefKind == AArch64MCExpr::VK_GOTTPREL_G1)
+ return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
+ if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC)
+ return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
+ report_fatal_error("invalid fixup for movz/movk instruction");
+ return 0;
+ case AArch64::fixup_aarch64_tlsdesc_call:
+ return ELF::R_AARCH64_TLSDESC_CALL;
+ default:
+ llvm_unreachable("Unknown ELF relocation type");
}
}
- return Type;
+ llvm_unreachable("Unimplemented fixup -> relocation");
}
MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
- bool IsLittleEndian) {
- MCELFObjectTargetWriter *MOTW = new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
- return createELFObjectWriter(MOTW, OS, IsLittleEndian);
+ uint8_t OSABI,
+ bool IsLittleEndian) {
+ MCELFObjectTargetWriter *MOTW =
+ new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
+ return createELFObjectWriter(MOTW, OS, IsLittleEndian);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index 473b7dd..a79406d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -56,14 +56,14 @@ namespace {
class AArch64ELFStreamer : public MCELFStreamer {
public:
AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter)
+ MCCodeEmitter *Emitter)
: MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
LastEMS(EMS_None) {}
~AArch64ELFStreamer() {}
- virtual void ChangeSection(const MCSection *Section,
- const MCExpr *Subsection) {
+ void ChangeSection(const MCSection *Section,
+ const MCExpr *Subsection) override {
// We have to keep track of the mapping symbol state of any sections we
// use. Each one should start off as EMS_None, which is provided as the
// default constructor by DenseMap::lookup.
@@ -76,7 +76,8 @@ public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to add the appropriate mapping symbol if
/// necessary.
- virtual void EmitInstruction(const MCInst& Inst, const MCSubtargetInfo &STI) {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
EmitA64MappingSymbol();
MCELFStreamer::EmitInstruction(Inst, STI);
}
@@ -84,7 +85,7 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- virtual void EmitBytes(StringRef Data) {
+ void EmitBytes(StringRef Data) override {
EmitDataMappingSymbol();
MCELFStreamer::EmitBytes(Data);
}
@@ -92,7 +93,8 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// AArch64 streamer overrides it to add the appropriate mapping symbol ($d)
/// if necessary.
- virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override {
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
@@ -105,13 +107,15 @@ private:
};
void EmitDataMappingSymbol() {
- if (LastEMS == EMS_Data) return;
+ if (LastEMS == EMS_Data)
+ return;
EmitMappingSymbol("$d");
LastEMS = EMS_Data;
}
void EmitA64MappingSymbol() {
- if (LastEMS == EMS_A64) return;
+ if (LastEMS == EMS_A64)
+ return;
EmitMappingSymbol("$x");
LastEMS = EMS_A64;
}
@@ -120,15 +124,14 @@ private:
MCSymbol *Start = getContext().CreateTempSymbol();
EmitLabel(Start);
- MCSymbol *Symbol =
- getContext().GetOrCreateSymbol(Name + "." +
- Twine(MappingSymbolCounter++));
+ MCSymbol *Symbol = getContext().GetOrCreateSymbol(
+ Name + "." + Twine(MappingSymbolCounter++));
MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
MCELF::SetType(SD, ELF::STT_NOTYPE);
MCELF::SetBinding(SD, ELF::STB_LOCAL);
SD.setExternal(false);
- AssignSection(Symbol, getCurrentSection().first);
+ Symbol->setSection(*getCurrentSection().first);
const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
Symbol->setVariableValue(Value);
@@ -144,16 +147,14 @@ private:
}
namespace llvm {
- MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
- AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
- if (RelaxAll)
- S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
- return S;
- }
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack) {
+ AArch64ELFStreamer *S = new AArch64ELFStreamer(Context, TAB, OS, Emitter);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ if (NoExecStack)
+ S->getAssembler().setNoExecStack(true);
+ return S;
+}
}
-
-
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
index 5a89ca5..bc6973b 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h
@@ -18,10 +18,9 @@
namespace llvm {
- MCELFStreamer* createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
+MCELFStreamer *createAArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ bool RelaxAll, bool NoExecStack);
}
#endif // AArch64_ELF_STREAMER_H
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
index eeb122d..bf405fb 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h
@@ -1,4 +1,4 @@
-//=- AArch64/AArch64FixupKinds.h - AArch64 Specific Fixup Entries -*- C++ -*-=//
+//===-- AArch64FixupKinds.h - AArch64 Specific Fixup Entries ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,108 +6,71 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file describes the LLVM fixups applied to MCInsts in the AArch64
-// backend.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64_AARCH64FIXUPKINDS_H
-#define LLVM_AARCH64_AARCH64FIXUPKINDS_H
+#ifndef LLVM_AArch64FIXUPKINDS_H
+#define LLVM_AArch64FIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
namespace llvm {
- namespace AArch64 {
- enum Fixups {
- fixup_a64_ld_prel = FirstTargetFixupKind,
- fixup_a64_adr_prel,
- fixup_a64_adr_prel_page,
-
- fixup_a64_add_lo12,
-
- fixup_a64_ldst8_lo12,
- fixup_a64_ldst16_lo12,
- fixup_a64_ldst32_lo12,
- fixup_a64_ldst64_lo12,
- fixup_a64_ldst128_lo12,
-
- fixup_a64_tstbr,
- fixup_a64_condbr,
- fixup_a64_uncondbr,
- fixup_a64_call,
-
- fixup_a64_movw_uabs_g0,
- fixup_a64_movw_uabs_g0_nc,
- fixup_a64_movw_uabs_g1,
- fixup_a64_movw_uabs_g1_nc,
- fixup_a64_movw_uabs_g2,
- fixup_a64_movw_uabs_g2_nc,
- fixup_a64_movw_uabs_g3,
-
- fixup_a64_movw_sabs_g0,
- fixup_a64_movw_sabs_g1,
- fixup_a64_movw_sabs_g2,
-
- fixup_a64_adr_prel_got_page,
- fixup_a64_ld64_got_lo12_nc,
-
- // Produce offsets relative to the module's dynamic TLS area.
- fixup_a64_movw_dtprel_g2,
- fixup_a64_movw_dtprel_g1,
- fixup_a64_movw_dtprel_g1_nc,
- fixup_a64_movw_dtprel_g0,
- fixup_a64_movw_dtprel_g0_nc,
- fixup_a64_add_dtprel_hi12,
- fixup_a64_add_dtprel_lo12,
- fixup_a64_add_dtprel_lo12_nc,
- fixup_a64_ldst8_dtprel_lo12,
- fixup_a64_ldst8_dtprel_lo12_nc,
- fixup_a64_ldst16_dtprel_lo12,
- fixup_a64_ldst16_dtprel_lo12_nc,
- fixup_a64_ldst32_dtprel_lo12,
- fixup_a64_ldst32_dtprel_lo12_nc,
- fixup_a64_ldst64_dtprel_lo12,
- fixup_a64_ldst64_dtprel_lo12_nc,
-
- // Produce the GOT entry containing a variable's address in TLS's
- // initial-exec mode.
- fixup_a64_movw_gottprel_g1,
- fixup_a64_movw_gottprel_g0_nc,
- fixup_a64_adr_gottprel_page,
- fixup_a64_ld64_gottprel_lo12_nc,
- fixup_a64_ld_gottprel_prel19,
-
- // Produce offsets relative to the thread pointer: TPIDR_EL0.
- fixup_a64_movw_tprel_g2,
- fixup_a64_movw_tprel_g1,
- fixup_a64_movw_tprel_g1_nc,
- fixup_a64_movw_tprel_g0,
- fixup_a64_movw_tprel_g0_nc,
- fixup_a64_add_tprel_hi12,
- fixup_a64_add_tprel_lo12,
- fixup_a64_add_tprel_lo12_nc,
- fixup_a64_ldst8_tprel_lo12,
- fixup_a64_ldst8_tprel_lo12_nc,
- fixup_a64_ldst16_tprel_lo12,
- fixup_a64_ldst16_tprel_lo12_nc,
- fixup_a64_ldst32_tprel_lo12,
- fixup_a64_ldst32_tprel_lo12_nc,
- fixup_a64_ldst64_tprel_lo12,
- fixup_a64_ldst64_tprel_lo12_nc,
-
- // Produce the special fixups used by the general-dynamic TLS model.
- fixup_a64_tlsdesc_adr_page,
- fixup_a64_tlsdesc_ld64_lo12_nc,
- fixup_a64_tlsdesc_add_lo12_nc,
- fixup_a64_tlsdesc_call,
-
-
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
- };
- }
-}
+namespace AArch64 {
+
+enum Fixups {
+ // fixup_aarch64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
+ // an ADR instruction.
+ fixup_aarch64_pcrel_adr_imm21 = FirstTargetFixupKind,
+
+ // fixup_aarch64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
+ // an ADRP instruction.
+ fixup_aarch64_pcrel_adrp_imm21,
+
+ // fixup_aarch64_imm12 - 12-bit fixup for add/sub instructions.
+ // No alignment adjustment. All value bits are encoded.
+ fixup_aarch64_add_imm12,
+
+ // fixup_aarch64_ldst_imm12_* - unsigned 12-bit fixups for load and
+ // store instructions.
+ fixup_aarch64_ldst_imm12_scale1,
+ fixup_aarch64_ldst_imm12_scale2,
+ fixup_aarch64_ldst_imm12_scale4,
+ fixup_aarch64_ldst_imm12_scale8,
+ fixup_aarch64_ldst_imm12_scale16,
+
+ // fixup_aarch64_ldr_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
+ // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is used by
+ // pc-relative loads and generates relocations directly when necessary.
+ fixup_aarch64_ldr_pcrel_imm19,
+
+ // FIXME: comment
+ fixup_aarch64_movw,
+
+ // fixup_aarch64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
+ // immediate.
+ fixup_aarch64_pcrel_branch14,
+
+ // fixup_aarch64_pcrel_branch19 - The high 19 bits of a 21-bit pc-relative
+ // immediate. Same encoding as fixup_aarch64_pcrel_adrhi, except this is use by
+ // b.cc and generates relocations directly when necessary.
+ fixup_aarch64_pcrel_branch19,
+
+ // fixup_aarch64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
+ // immediate.
+ fixup_aarch64_pcrel_branch26,
+
+ // fixup_aarch64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
+ // immediate. Distinguished from branch26 only on ELF.
+ fixup_aarch64_pcrel_call26,
+
+ // fixup_aarch64_tlsdesc_call - zero-space placeholder for the ELF
+ // R_AARCH64_TLSDESC_CALL relocation.
+ fixup_aarch64_tlsdesc_call,
+
+ // Marker
+ LastTargetFixupKind,
+ NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+
+} // end namespace AArch64
+} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index b090a55..dc4a8bf 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -13,26 +13,82 @@
#include "AArch64MCAsmInfo.h"
#include "llvm/ADT/Triple.h"
-
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
-AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
- if (TheTriple.getArch() == Triple::aarch64_be)
+enum AsmWriterVariantTy {
+ Default = -1,
+ Generic = 0,
+ Apple = 1
+};
+
+static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
+ "aarch64-neon-syntax", cl::init(Default),
+ cl::desc("Choose style of NEON code to emit from AArch64 backend:"),
+ cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
+ clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
+ clEnumValEnd));
+
+AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
+ // We prefer NEON instructions to be printed in the short form.
+ AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
+
+ PrivateGlobalPrefix = "L";
+ SeparatorString = "%%";
+ CommentString = ";";
+ PointerSize = CalleeSaveStackSlotSize = 8;
+
+ AlignmentIsInBytes = false;
+ UsesELFSectionDirectiveForBSS = true;
+ SupportsDebugInformation = true;
+ UseDataRegionDirectives = true;
+
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCExpr *AArch64MCAsmInfoDarwin::getExprForPersonalitySymbol(
+ const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
+ // On Darwin, we can reference dwarf symbols with foo@GOT-., which
+ // is an indirect pc-relative reference. The default implementation
+ // won't reference using the GOT, so we need this target-specific
+ // version.
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
+ MCSymbol *PCSym = Context.CreateTempSymbol();
+ Streamer.EmitLabel(PCSym);
+ const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
+ return MCBinaryExpr::CreateSub(Res, PC, Context);
+}
+
+AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) {
+ Triple T(TT);
+ if (T.getArch() == Triple::arm64_be || T.getArch() == Triple::aarch64_be)
IsLittleEndian = false;
+ // We prefer NEON instructions to be printed in the short form.
+ AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
+
PointerSize = 8;
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
CommentString = "//";
+ PrivateGlobalPrefix = ".L";
Code32Directive = ".code\t32";
Data16bitsDirective = "\t.hword\t";
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = "\t.xword\t";
+ UseDataRegionDirectives = false;
+
+ WeakRefDirective = "\t.weak\t";
+
HasLEB128 = true;
SupportsDebugInformation = true;
@@ -41,6 +97,3 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo(StringRef TT) {
UseIntegratedAssembler = true;
}
-
-// Pin the vtable to this file.
-void AArch64ELFMCAsmInfo::anchor() {}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 43c0e47..42a031d 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -1,4 +1,4 @@
-//==-- AArch64MCAsmInfo.h - AArch64 asm properties -------------*- C++ -*--===//
+//=====-- AArch64MCAsmInfo.h - AArch64 asm properties ---------*- C++ -*--====//
//
// The LLVM Compiler Infrastructure
//
@@ -11,17 +11,24 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64TARGETASMINFO_H
-#define LLVM_AARCH64TARGETASMINFO_H
+#ifndef AArch64TARGETASMINFO_H
+#define AArch64TARGETASMINFO_H
-#include "llvm/MC/MCAsmInfoELF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
namespace llvm {
+class Target;
+class StringRef;
+class MCStreamer;
+struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
+ explicit AArch64MCAsmInfoDarwin();
+ const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const override;
+};
-struct AArch64ELFMCAsmInfo : public MCAsmInfoELF {
- explicit AArch64ELFMCAsmInfo(StringRef TT);
-private:
- virtual void anchor();
+struct AArch64MCAsmInfoELF : public MCAsmInfo {
+ explicit AArch64MCAsmInfoELF(StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index b9a61ef..464a18c 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code =//
+//=- AArch64/AArch64MCCodeEmitter.cpp - Convert AArch64 code to machine code-=//
//
// The LLVM Compiler Infrastructure
//
@@ -11,10 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
+#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64FixupKinds.h"
#include "MCTargetDesc/AArch64MCExpr.h"
-#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
@@ -22,524 +21,562 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/raw_ostream.h"
-
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumFixups, "Number of MC fixups created.");
+
namespace {
+
class AArch64MCCodeEmitter : public MCCodeEmitter {
- AArch64MCCodeEmitter(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
- void operator=(const AArch64MCCodeEmitter &) LLVM_DELETED_FUNCTION;
MCContext &Ctx;
+ AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
+ void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
public:
- AArch64MCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+ AArch64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+ MCContext &ctx)
+ : Ctx(ctx) {}
~AArch64MCCodeEmitter() {}
- unsigned getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ // getBinaryCodeForInstr - TableGen'erated function for getting the
+ // binary encoding for an instruction.
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMachineOpValue - Return binary encoding of operand. If the machine
+ /// operand requires relocation, record the relocation and return zero.
+ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ /// getLdStUImm12OpValue - Return encoding info for 12-bit unsigned immediate
+ /// attached to a load, store or prfm instruction. If operand requires a
+ /// relocation, record it and return zero in that part of the encoding.
+ template <uint32_t FixupKind>
+ uint32_t getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
+ /// target.
+ uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
+ /// the 2-bit shift field.
+ uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- template<int MemSize>
- unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return getOffsetUImm12OpValue(MI, OpIdx, Fixups, STI, MemSize);
- }
+ /// getCondBranchTargetOpValue - Return the encoded value for a conditional
+ /// branch target.
+ uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- int MemSize) const;
+ /// getLoadLiteralOpValue - Return the encoded value for a load-literal
+ /// pc-relative address.
+ uint32_t getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getMemExtendOpValue - Return the encoded value for a reg-extend load/store
+ /// instruction: bit 0 is whether a shift is present, bit 1 is whether the
+ /// operation is a sign extend (as opposed to a zero extend).
+ uint32_t getMemExtendOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getShiftRightImm8(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftRightImm16(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftRightImm32(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftRightImm64(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
+ /// branch target.
+ uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getBranchTargetOpValue - Return the encoded value for an unconditional
+ /// branch target.
+ uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- // Labels are handled mostly the same way: a symbol is needed, and
- // just gets some fixup attached.
- template<AArch64::Fixups fixupDesired>
- unsigned getLabelOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getMoveWideImmOpValue - Return the encoded value for the immediate operand
+ /// of a MOVZ or MOVK instruction.
+ uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getLoadLitLabelOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getVecShifterOpValue - Return the encoded value for the vector shifter.
+ uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// getMoveVecShifterOpValue - Return the encoded value for the vector move
+ /// shifter (MSL).
+ uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ /// getFixedPointScaleOpValue - Return the encoded value for the
+ // FP-to-fixed-point scale factor.
+ uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ /// getSIMDShift64OpValue - Return the encoded value for the
+ // shift-by-immediate AdvSIMD instructions.
+ uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- unsigned getAddressWithFixup(const MCOperand &MO,
- unsigned FixupKind,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
+ uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- // getBinaryCodeForInstr - TableGen'erated function for getting the
- // binary encoding for an instruction.
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
- void EmitByte(unsigned char C, raw_ostream &OS) const {
- OS << (char)C;
- }
+ void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
- void EmitInstruction(uint32_t Val, raw_ostream &OS) const {
+ void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
// Output the constant in little endian byte order.
- for (unsigned i = 0; i != 4; ++i) {
- EmitByte(Val & 0xff, OS);
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(Val & 255, OS);
Val >>= 8;
}
}
-
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- template<int hasRs, int hasRt2> unsigned
- fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue,
- const MCSubtargetInfo &STI) const;
-
- unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
unsigned fixMulHigh(const MCInst &MI, unsigned EncodedValue,
const MCSubtargetInfo &STI) const;
+ template<int hasRs, int hasRt2> unsigned
+ fixLoadStoreExclusive(const MCInst &MI, unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
+ unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
-unsigned AArch64MCCodeEmitter::getAddressWithFixup(const MCOperand &MO,
- unsigned FixupKind,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (!MO.isExpr()) {
- // This can occur for manually decoded or constructed MCInsts, but neither
- // the assembly-parser nor instruction selection will currently produce an
- // MCInst that's not a symbol reference.
- assert(MO.isImm() && "Unexpected address requested");
- return MO.getImm();
- }
+MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new AArch64MCCodeEmitter(MCII, STI, Ctx);
+}
- const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = MCFixupKind(FixupKind);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned
+AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg())
+ return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ else {
+ assert(MO.isImm() && "did not expect relocated expression");
+ return static_cast<unsigned>(MO.getImm());
+ }
+ assert(0 && "Unable to encode MCOperand!");
return 0;
}
-unsigned AArch64MCCodeEmitter::
-getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- int MemSize) const {
- const MCOperand &ImmOp = MI.getOperand(OpIdx);
- if (ImmOp.isImm())
- return ImmOp.getImm();
-
- assert(ImmOp.isExpr() && "Unexpected operand type");
- const AArch64MCExpr *Expr = cast<AArch64MCExpr>(ImmOp.getExpr());
- unsigned FixupKind;
-
-
- switch (Expr->getKind()) {
- default: llvm_unreachable("Unexpected operand modifier");
- case AArch64MCExpr::VK_AARCH64_LO12: {
- static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12,
- AArch64::fixup_a64_ldst16_lo12,
- AArch64::fixup_a64_ldst32_lo12,
- AArch64::fixup_a64_ldst64_lo12,
- AArch64::fixup_a64_ldst128_lo12 };
- assert(MemSize <= 16 && "Invalid fixup for operation");
- FixupKind = FixupsBySize[Log2_32(MemSize)];
- break;
- }
- case AArch64MCExpr::VK_AARCH64_GOT_LO12:
- assert(MemSize == 8 && "Invalid fixup for operation");
- FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc;
- break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: {
- static const unsigned FixupsBySize[] = {
- AArch64::fixup_a64_ldst8_dtprel_lo12,
- AArch64::fixup_a64_ldst16_dtprel_lo12,
- AArch64::fixup_a64_ldst32_dtprel_lo12,
- AArch64::fixup_a64_ldst64_dtprel_lo12
- };
- assert(MemSize <= 8 && "Invalid fixup for operation");
- FixupKind = FixupsBySize[Log2_32(MemSize)];
- break;
- }
- case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: {
- static const unsigned FixupsBySize[] = {
- AArch64::fixup_a64_ldst8_dtprel_lo12_nc,
- AArch64::fixup_a64_ldst16_dtprel_lo12_nc,
- AArch64::fixup_a64_ldst32_dtprel_lo12_nc,
- AArch64::fixup_a64_ldst64_dtprel_lo12_nc
- };
- assert(MemSize <= 8 && "Invalid fixup for operation");
- FixupKind = FixupsBySize[Log2_32(MemSize)];
- break;
- }
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_LO12:
- assert(MemSize == 8 && "Invalid fixup for operation");
- FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc;
- break;
- case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{
- static const unsigned FixupsBySize[] = {
- AArch64::fixup_a64_ldst8_tprel_lo12,
- AArch64::fixup_a64_ldst16_tprel_lo12,
- AArch64::fixup_a64_ldst32_tprel_lo12,
- AArch64::fixup_a64_ldst64_tprel_lo12
- };
- assert(MemSize <= 8 && "Invalid fixup for operation");
- FixupKind = FixupsBySize[Log2_32(MemSize)];
- break;
- }
- case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: {
- static const unsigned FixupsBySize[] = {
- AArch64::fixup_a64_ldst8_tprel_lo12_nc,
- AArch64::fixup_a64_ldst16_tprel_lo12_nc,
- AArch64::fixup_a64_ldst32_tprel_lo12_nc,
- AArch64::fixup_a64_ldst64_tprel_lo12_nc
- };
- assert(MemSize <= 8 && "Invalid fixup for operation");
- FixupKind = FixupsBySize[Log2_32(MemSize)];
- break;
- }
- case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
- assert(MemSize == 8 && "Invalid fixup for operation");
- FixupKind = AArch64::fixup_a64_tlsdesc_ld64_lo12_nc;
- break;
+template<unsigned FixupKind> uint32_t
+AArch64MCCodeEmitter::getLdStUImm12OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ uint32_t ImmVal = 0;
+
+ if (MO.isImm())
+ ImmVal = static_cast<uint32_t>(MO.getImm());
+ else {
+ assert(MO.isExpr() && "unable to encode load/store imm operand");
+ MCFixupKind Kind = MCFixupKind(FixupKind);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+ ++MCNumFixups;
}
- return getAddressWithFixup(ImmOp, FixupKind, Fixups, STI);
+ return ImmVal;
}
-unsigned
-AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
+/// target.
+uint32_t
+AArch64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
+
+ // If the destination is an immediate, we have nothing to do.
if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
+ return MO.getImm();
+ assert(MO.isExpr() && "Unexpected target type!");
+ const MCExpr *Expr = MO.getExpr();
- assert(MO.isExpr());
-
- unsigned FixupKind = 0;
- switch(cast<AArch64MCExpr>(MO.getExpr())->getKind()) {
- default: llvm_unreachable("Invalid expression modifier");
- case AArch64MCExpr::VK_AARCH64_LO12:
- FixupKind = AArch64::fixup_a64_add_lo12; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_HI12:
- FixupKind = AArch64::fixup_a64_add_dtprel_hi12; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_LO12:
- FixupKind = AArch64::fixup_a64_add_dtprel_lo12; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC:
- FixupKind = AArch64::fixup_a64_add_dtprel_lo12_nc; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_HI12:
- FixupKind = AArch64::fixup_a64_add_tprel_hi12; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_LO12:
- FixupKind = AArch64::fixup_a64_add_tprel_lo12; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC:
- FixupKind = AArch64::fixup_a64_add_tprel_lo12_nc; break;
- case AArch64MCExpr::VK_AARCH64_TLSDESC_LO12:
- FixupKind = AArch64::fixup_a64_tlsdesc_add_lo12_nc; break;
- }
+ MCFixupKind Kind = MI.getOpcode() == AArch64::ADR
+ ? MCFixupKind(AArch64::fixup_aarch64_pcrel_adr_imm21)
+ : MCFixupKind(AArch64::fixup_aarch64_pcrel_adrp_imm21);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
- return getAddressWithFixup(MO, FixupKind, Fixups, STI);
-}
+ MCNumFixups += 1;
-unsigned
-AArch64MCCodeEmitter::getAdrpLabelOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ // All of the information is in the fixup.
+ return 0;
+}
+/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
+/// the 2-bit shift field. The shift field is stored in bits 13-14 of the
+/// return value.
+uint32_t
+AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Suboperands are [imm, shifter].
const MCOperand &MO = MI.getOperand(OpIdx);
+ const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+ assert(AArch64_AM::getShiftType(MO1.getImm()) == AArch64_AM::LSL &&
+ "unexpected shift type for add/sub immediate");
+ unsigned ShiftVal = AArch64_AM::getShiftValue(MO1.getImm());
+ assert((ShiftVal == 0 || ShiftVal == 12) &&
+ "unexpected shift value for add/sub immediate");
if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
-
- assert(MO.isExpr());
+ return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
+ assert(MO.isExpr() && "Unable to encode MCOperand!");
+ const MCExpr *Expr = MO.getExpr();
- unsigned Modifier = AArch64MCExpr::VK_AARCH64_None;
- if (const AArch64MCExpr *Expr = dyn_cast<AArch64MCExpr>(MO.getExpr()))
- Modifier = Expr->getKind();
+ // Encode the 12 bits of the fixup.
+ MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_add_imm12);
+ Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
- unsigned FixupKind = 0;
- switch(Modifier) {
- case AArch64MCExpr::VK_AARCH64_None:
- FixupKind = AArch64::fixup_a64_adr_prel_page;
- break;
- case AArch64MCExpr::VK_AARCH64_GOT:
- FixupKind = AArch64::fixup_a64_adr_prel_got_page;
- break;
- case AArch64MCExpr::VK_AARCH64_GOTTPREL:
- FixupKind = AArch64::fixup_a64_adr_gottprel_page;
- break;
- case AArch64MCExpr::VK_AARCH64_TLSDESC:
- FixupKind = AArch64::fixup_a64_tlsdesc_adr_page;
- break;
- default:
- llvm_unreachable("Unknown symbol reference kind for ADRP instruction");
- }
+ ++MCNumFixups;
- return getAddressWithFixup(MO, FixupKind, Fixups, STI);
+ return 0;
}
-unsigned
-AArch64MCCodeEmitter::getBitfield32LSLOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
-
+/// getCondBranchTargetOpValue - Return the encoded value for a conditional
+/// branch target.
+uint32_t AArch64MCCodeEmitter::getCondBranchTargetOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Only immediate expected for shift");
- return ((32 - MO.getImm()) & 0x1f) | (31 - MO.getImm()) << 6;
-}
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isExpr() && "Unexpected target type!");
-unsigned
-AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch19);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Only immediate expected for shift");
+ ++MCNumFixups;
- return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6;
+ // All of the information is in the fixup.
+ return 0;
}
-unsigned AArch64MCCodeEmitter::getShiftRightImm8(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 8 - MI.getOperand(Op).getImm();
-}
+/// getLoadLiteralOpValue - Return the encoded value for a load-literal
+/// pc-relative address.
+uint32_t
+AArch64MCCodeEmitter::getLoadLiteralOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
-unsigned AArch64MCCodeEmitter::getShiftRightImm16(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 16 - MI.getOperand(Op).getImm();
-}
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isExpr() && "Unexpected target type!");
-unsigned AArch64MCCodeEmitter::getShiftRightImm32(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 32 - MI.getOperand(Op).getImm();
-}
+ MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_ldr_pcrel_imm19);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-unsigned AArch64MCCodeEmitter::getShiftRightImm64(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return 64 - MI.getOperand(Op).getImm();
-}
+ ++MCNumFixups;
-unsigned AArch64MCCodeEmitter::getShiftLeftImm8(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return MI.getOperand(Op).getImm() - 8;
+ // All of the information is in the fixup.
+ return 0;
}
-unsigned AArch64MCCodeEmitter::getShiftLeftImm16(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return MI.getOperand(Op).getImm() - 16;
+uint32_t
+AArch64MCCodeEmitter::getMemExtendOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ unsigned SignExtend = MI.getOperand(OpIdx).getImm();
+ unsigned DoShift = MI.getOperand(OpIdx + 1).getImm();
+ return (SignExtend << 1) | DoShift;
}
-unsigned AArch64MCCodeEmitter::getShiftLeftImm32(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return MI.getOperand(Op).getImm() - 32;
-}
+uint32_t
+AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
-unsigned AArch64MCCodeEmitter::getShiftLeftImm64(
- const MCInst &MI, unsigned Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- return MI.getOperand(Op).getImm() - 64;
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isExpr() && "Unexpected movz/movk immediate");
+
+ Fixups.push_back(MCFixup::Create(
+ 0, MO.getExpr(), MCFixupKind(AArch64::fixup_aarch64_movw), MI.getLoc()));
+
+ ++MCNumFixups;
+
+ return 0;
}
-template<AArch64::Fixups fixupDesired> unsigned
-AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI,
- unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
+/// branch target.
+uint32_t AArch64MCCodeEmitter::getTestBranchTargetOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
- if (MO.isExpr())
- return getAddressWithFixup(MO, fixupDesired, Fixups, STI);
+ // If the destination is an immediate, we have nothing to do.
+ if (MO.isImm())
+ return MO.getImm();
+ assert(MO.isExpr() && "Unexpected ADR target type!");
- assert(MO.isImm());
- return MO.getImm();
+ MCFixupKind Kind = MCFixupKind(AArch64::fixup_aarch64_pcrel_branch14);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
+
+ ++MCNumFixups;
+
+ // All of the information is in the fixup.
+ return 0;
}
-unsigned
-AArch64MCCodeEmitter::getLoadLitLabelOpValue(const MCInst &MI,
- unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+/// getBranchTargetOpValue - Return the encoded value for an unconditional
+/// branch target.
+uint32_t
+AArch64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
+ // If the destination is an immediate, we have nothing to do.
if (MO.isImm())
return MO.getImm();
+ assert(MO.isExpr() && "Unexpected ADR target type!");
- assert(MO.isExpr());
+ MCFixupKind Kind = MI.getOpcode() == AArch64::BL
+ ? MCFixupKind(AArch64::fixup_aarch64_pcrel_call26)
+ : MCFixupKind(AArch64::fixup_aarch64_pcrel_branch26);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
- unsigned FixupKind;
- if (isa<AArch64MCExpr>(MO.getExpr())) {
- assert(dyn_cast<AArch64MCExpr>(MO.getExpr())->getKind()
- == AArch64MCExpr::VK_AARCH64_GOTTPREL
- && "Invalid symbol modifier for literal load");
- FixupKind = AArch64::fixup_a64_ld_gottprel_prel19;
- } else {
- FixupKind = AArch64::fixup_a64_ld_prel;
- }
+ ++MCNumFixups;
- return getAddressWithFixup(MO, FixupKind, Fixups, STI);
+ // All of the information is in the fixup.
+ return 0;
}
+/// getVecShifterOpValue - Return the encoded value for the vector shifter:
+///
+/// 00 -> 0
+/// 01 -> 8
+/// 10 -> 16
+/// 11 -> 24
+uint32_t
+AArch64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-unsigned
-AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg()) {
- return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- } else if (MO.isImm()) {
- return static_cast<unsigned>(MO.getImm());
+ switch (MO.getImm()) {
+ default:
+ break;
+ case 0:
+ return 0;
+ case 8:
+ return 1;
+ case 16:
+ return 2;
+ case 24:
+ return 3;
}
- llvm_unreachable("Unable to encode MCOperand!");
+ assert(false && "Invalid value for vector shift amount!");
return 0;
}
-unsigned
-AArch64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &UImm16MO = MI.getOperand(OpIdx);
- const MCOperand &ShiftMO = MI.getOperand(OpIdx + 1);
+uint32_t
+AArch64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the shift amount!");
+ return 64 - (MO.getImm());
+}
- unsigned Result = static_cast<unsigned>(ShiftMO.getImm()) << 16;
+uint32_t AArch64MCCodeEmitter::getSIMDShift64_32OpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the shift amount!");
+ return 64 - (MO.getImm() | 32);
+}
- if (UImm16MO.isImm()) {
- Result |= UImm16MO.getImm();
- return Result;
- }
+uint32_t
+AArch64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the shift amount!");
+ return 32 - (MO.getImm() | 16);
+}
- const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
- AArch64::Fixups requestedFixup;
- switch (A64E->getKind()) {
- default: llvm_unreachable("unexpected expression modifier");
- case AArch64MCExpr::VK_AARCH64_ABS_G0:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g0; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G0_NC:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g0_nc; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G1:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g1; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G1_NC:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g1_nc; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G2:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g2; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G2_NC:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g2_nc; break;
- case AArch64MCExpr::VK_AARCH64_ABS_G3:
- requestedFixup = AArch64::fixup_a64_movw_uabs_g3; break;
- case AArch64MCExpr::VK_AARCH64_SABS_G0:
- requestedFixup = AArch64::fixup_a64_movw_sabs_g0; break;
- case AArch64MCExpr::VK_AARCH64_SABS_G1:
- requestedFixup = AArch64::fixup_a64_movw_sabs_g1; break;
- case AArch64MCExpr::VK_AARCH64_SABS_G2:
- requestedFixup = AArch64::fixup_a64_movw_sabs_g2; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
- requestedFixup = AArch64::fixup_a64_movw_dtprel_g2; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
- requestedFixup = AArch64::fixup_a64_movw_dtprel_g1; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC:
- requestedFixup = AArch64::fixup_a64_movw_dtprel_g1_nc; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
- requestedFixup = AArch64::fixup_a64_movw_dtprel_g0; break;
- case AArch64MCExpr::VK_AARCH64_DTPREL_G0_NC:
- requestedFixup = AArch64::fixup_a64_movw_dtprel_g0_nc; break;
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
- requestedFixup = AArch64::fixup_a64_movw_gottprel_g1; break;
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_G0_NC:
- requestedFixup = AArch64::fixup_a64_movw_gottprel_g0_nc; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_G2:
- requestedFixup = AArch64::fixup_a64_movw_tprel_g2; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_G1:
- requestedFixup = AArch64::fixup_a64_movw_tprel_g1; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_G1_NC:
- requestedFixup = AArch64::fixup_a64_movw_tprel_g1_nc; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_G0:
- requestedFixup = AArch64::fixup_a64_movw_tprel_g0; break;
- case AArch64MCExpr::VK_AARCH64_TPREL_G0_NC:
- requestedFixup = AArch64::fixup_a64_movw_tprel_g0_nc; break;
- }
+uint32_t
+AArch64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the shift amount!");
+ return 16 - (MO.getImm() | 8);
+}
- return Result | getAddressWithFixup(UImm16MO, requestedFixup, Fixups, STI);
+/// getFixedPointScaleOpValue - Return the encoded value for the
+// FP-to-fixed-point scale factor.
+uint32_t AArch64MCCodeEmitter::getFixedPointScaleOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return 64 - MO.getImm();
}
-template<int hasRs, int hasRt2> unsigned
-AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
- unsigned EncodedValue,
+uint32_t
+AArch64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- if (!hasRs) EncodedValue |= 0x001F0000;
- if (!hasRt2) EncodedValue |= 0x00007C00;
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return 64 - MO.getImm();
+}
- return EncodedValue;
+uint32_t
+AArch64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return 32 - MO.getImm();
}
-unsigned
-AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
- const MCSubtargetInfo &STI) const {
+uint32_t
+AArch64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return 16 - MO.getImm();
+}
+
+uint32_t
+AArch64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return 8 - MO.getImm();
+}
+
+uint32_t
+AArch64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return MO.getImm() - 64;
+}
+
+uint32_t
+AArch64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return MO.getImm() - 32;
+}
+
+uint32_t
+AArch64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return MO.getImm() - 16;
+}
+
+uint32_t
+AArch64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() && "Expected an immediate value for the scale amount!");
+ return MO.getImm() - 8;
+}
+
+/// getMoveVecShifterOpValue - Return the encoded value for the vector move
+/// shifter (MSL).
+uint32_t AArch64MCCodeEmitter::getMoveVecShifterOpValue(
+ const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpIdx);
+ assert(MO.isImm() &&
+ "Expected an immediate value for the move shift amount!");
+ unsigned ShiftVal = AArch64_AM::getShiftValue(MO.getImm());
+ assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
+ return ShiftVal == 8 ? 0 : 1;
+}
+
+unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
// If one of the signed fixup kinds is applied to a MOVZ instruction, the
// eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
// job to ensure that any bits possibly affected by this are 0. This means we
@@ -552,23 +589,38 @@ AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
const AArch64MCExpr *A64E = cast<AArch64MCExpr>(UImm16MO.getExpr());
switch (A64E->getKind()) {
- case AArch64MCExpr::VK_AARCH64_SABS_G0:
- case AArch64MCExpr::VK_AARCH64_SABS_G1:
- case AArch64MCExpr::VK_AARCH64_SABS_G2:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G2:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G1:
- case AArch64MCExpr::VK_AARCH64_DTPREL_G0:
- case AArch64MCExpr::VK_AARCH64_GOTTPREL_G1:
- case AArch64MCExpr::VK_AARCH64_TPREL_G2:
- case AArch64MCExpr::VK_AARCH64_TPREL_G1:
- case AArch64MCExpr::VK_AARCH64_TPREL_G0:
+ case AArch64MCExpr::VK_DTPREL_G2:
+ case AArch64MCExpr::VK_DTPREL_G1:
+ case AArch64MCExpr::VK_DTPREL_G0:
+ case AArch64MCExpr::VK_GOTTPREL_G1:
+ case AArch64MCExpr::VK_TPREL_G2:
+ case AArch64MCExpr::VK_TPREL_G1:
+ case AArch64MCExpr::VK_TPREL_G0:
return EncodedValue & ~(1u << 30);
default:
// Nothing to do for an unsigned fixup.
return EncodedValue;
}
- llvm_unreachable("Should have returned by now");
+
+ return EncodedValue & ~(1u << 30);
+}
+
+void AArch64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MI.getOpcode() == AArch64::TLSDESCCALL) {
+ // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
+ // following (BLR) instruction. It doesn't emit any code itself so it
+ // doesn't go through the normal TableGenerated channels.
+ MCFixupKind Fixup = MCFixupKind(AArch64::fixup_aarch64_tlsdesc_call);
+ Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
+ return;
+ }
+
+ uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+ EmitConstant(Binary, 4, OS);
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
unsigned
@@ -581,32 +633,22 @@ AArch64MCCodeEmitter::fixMulHigh(const MCInst &MI,
return EncodedValue;
}
-MCCodeEmitter *llvm::createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new AArch64MCCodeEmitter(Ctx);
-}
-
-void AArch64MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MI.getOpcode() == AArch64::TLSDESCCALL) {
- // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
- // following (BLR) instruction. It doesn't emit any code itself so it
- // doesn't go through the normal TableGenerated channels.
- MCFixupKind Fixup = MCFixupKind(AArch64::fixup_a64_tlsdesc_call);
- const MCExpr *Expr;
- Expr = AArch64MCExpr::CreateTLSDesc(MI.getOperand(0).getExpr(), Ctx);
- Fixups.push_back(MCFixup::Create(0, Expr, Fixup));
- return;
- }
-
- uint32_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+template<int hasRs, int hasRt2> unsigned
+AArch64MCCodeEmitter::fixLoadStoreExclusive(const MCInst &MI,
+ unsigned EncodedValue,
+ const MCSubtargetInfo &STI) const {
+ if (!hasRs) EncodedValue |= 0x001F0000;
+ if (!hasRt2) EncodedValue |= 0x00007C00;
- EmitInstruction(Binary, OS);
+ return EncodedValue;
}
+unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
+ const MCInst &MI, unsigned EncodedValue, const MCSubtargetInfo &STI) const {
+ // The Rm field of FCMP and friends is unused - it should be assembled
+ // as 0, but is ignored by the processor.
+ EncodedValue &= ~(0x1f << 16);
+ return EncodedValue;
+}
#include "AArch64GenMCCodeEmitter.inc"
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
index c7ccaee..85c3ec7 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp
@@ -12,74 +12,121 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "aarch64mcexpr"
#include "AArch64MCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Object/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
-const AArch64MCExpr*
-AArch64MCExpr::Create(VariantKind Kind, const MCExpr *Expr,
- MCContext &Ctx) {
- return new (Ctx) AArch64MCExpr(Kind, Expr);
+#define DEBUG_TYPE "aarch64symbolrefexpr"
+
+const AArch64MCExpr *AArch64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
+ MCContext &Ctx) {
+ return new (Ctx) AArch64MCExpr(Expr, Kind);
+}
+
+StringRef AArch64MCExpr::getVariantKindName() const {
+ switch (static_cast<uint32_t>(getKind())) {
+ case VK_CALL: return "";
+ case VK_LO12: return ":lo12:";
+ case VK_ABS_G3: return ":abs_g3:";
+ case VK_ABS_G2: return ":abs_g2:";
+ case VK_ABS_G2_S: return ":abs_g2_s:";
+ case VK_ABS_G2_NC: return ":abs_g2_nc:";
+ case VK_ABS_G1: return ":abs_g1:";
+ case VK_ABS_G1_S: return ":abs_g1_s:";
+ case VK_ABS_G1_NC: return ":abs_g1_nc:";
+ case VK_ABS_G0: return ":abs_g0:";
+ case VK_ABS_G0_S: return ":abs_g0_s:";
+ case VK_ABS_G0_NC: return ":abs_g0_nc:";
+ case VK_DTPREL_G2: return ":dtprel_g2:";
+ case VK_DTPREL_G1: return ":dtprel_g1:";
+ case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:";
+ case VK_DTPREL_G0: return ":dtprel_g0:";
+ case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:";
+ case VK_DTPREL_HI12: return ":dtprel_hi12:";
+ case VK_DTPREL_LO12: return ":dtprel_lo12:";
+ case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:";
+ case VK_TPREL_G2: return ":tprel_g2:";
+ case VK_TPREL_G1: return ":tprel_g1:";
+ case VK_TPREL_G1_NC: return ":tprel_g1_nc:";
+ case VK_TPREL_G0: return ":tprel_g0:";
+ case VK_TPREL_G0_NC: return ":tprel_g0_nc:";
+ case VK_TPREL_HI12: return ":tprel_hi12:";
+ case VK_TPREL_LO12: return ":tprel_lo12:";
+ case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:";
+ case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
+ case VK_ABS_PAGE: return "";
+ case VK_GOT_PAGE: return ":got:";
+ case VK_GOT_LO12: return ":got_lo12:";
+ case VK_GOTTPREL_PAGE: return ":gottprel:";
+ case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
+ case VK_GOTTPREL_G1: return ":gottprel_g1:";
+ case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:";
+ case VK_TLSDESC: return "";
+ case VK_TLSDESC_PAGE: return ":tlsdesc:";
+ default:
+ llvm_unreachable("Invalid ELF symbol kind");
+ }
}
void AArch64MCExpr::PrintImpl(raw_ostream &OS) const {
- switch (Kind) {
- default: llvm_unreachable("Invalid kind!");
- case VK_AARCH64_GOT: OS << ":got:"; break;
- case VK_AARCH64_GOT_LO12: OS << ":got_lo12:"; break;
- case VK_AARCH64_LO12: OS << ":lo12:"; break;
- case VK_AARCH64_ABS_G0: OS << ":abs_g0:"; break;
- case VK_AARCH64_ABS_G0_NC: OS << ":abs_g0_nc:"; break;
- case VK_AARCH64_ABS_G1: OS << ":abs_g1:"; break;
- case VK_AARCH64_ABS_G1_NC: OS << ":abs_g1_nc:"; break;
- case VK_AARCH64_ABS_G2: OS << ":abs_g2:"; break;
- case VK_AARCH64_ABS_G2_NC: OS << ":abs_g2_nc:"; break;
- case VK_AARCH64_ABS_G3: OS << ":abs_g3:"; break;
- case VK_AARCH64_SABS_G0: OS << ":abs_g0_s:"; break;
- case VK_AARCH64_SABS_G1: OS << ":abs_g1_s:"; break;
- case VK_AARCH64_SABS_G2: OS << ":abs_g2_s:"; break;
- case VK_AARCH64_DTPREL_G2: OS << ":dtprel_g2:"; break;
- case VK_AARCH64_DTPREL_G1: OS << ":dtprel_g1:"; break;
- case VK_AARCH64_DTPREL_G1_NC: OS << ":dtprel_g1_nc:"; break;
- case VK_AARCH64_DTPREL_G0: OS << ":dtprel_g0:"; break;
- case VK_AARCH64_DTPREL_G0_NC: OS << ":dtprel_g0_nc:"; break;
- case VK_AARCH64_DTPREL_HI12: OS << ":dtprel_hi12:"; break;
- case VK_AARCH64_DTPREL_LO12: OS << ":dtprel_lo12:"; break;
- case VK_AARCH64_DTPREL_LO12_NC: OS << ":dtprel_lo12_nc:"; break;
- case VK_AARCH64_GOTTPREL_G1: OS << ":gottprel_g1:"; break;
- case VK_AARCH64_GOTTPREL_G0_NC: OS << ":gottprel_g0_nc:"; break;
- case VK_AARCH64_GOTTPREL: OS << ":gottprel:"; break;
- case VK_AARCH64_GOTTPREL_LO12: OS << ":gottprel_lo12:"; break;
- case VK_AARCH64_TPREL_G2: OS << ":tprel_g2:"; break;
- case VK_AARCH64_TPREL_G1: OS << ":tprel_g1:"; break;
- case VK_AARCH64_TPREL_G1_NC: OS << ":tprel_g1_nc:"; break;
- case VK_AARCH64_TPREL_G0: OS << ":tprel_g0:"; break;
- case VK_AARCH64_TPREL_G0_NC: OS << ":tprel_g0_nc:"; break;
- case VK_AARCH64_TPREL_HI12: OS << ":tprel_hi12:"; break;
- case VK_AARCH64_TPREL_LO12: OS << ":tprel_lo12:"; break;
- case VK_AARCH64_TPREL_LO12_NC: OS << ":tprel_lo12_nc:"; break;
- case VK_AARCH64_TLSDESC: OS << ":tlsdesc:"; break;
- case VK_AARCH64_TLSDESC_LO12: OS << ":tlsdesc_lo12:"; break;
+ if (getKind() != VK_NONE)
+ OS << getVariantKindName();
+ OS << *Expr;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+// FIXME: really do above: now that two backends are using it.
+static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+ break;
+
+ case MCExpr::Constant:
+ break;
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbolsImpl(BE->getLHS(), Asm);
+ AddValueSymbolsImpl(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
}
+}
+
+void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbolsImpl(getSubExpr(), Asm);
+}
- const MCExpr *Expr = getSubExpr();
- if (Expr->getKind() != MCExpr::SymbolRef)
- OS << '(';
- Expr->print(OS);
- if (Expr->getKind() != MCExpr::SymbolRef)
- OS << ')';
+const MCSection *AArch64MCExpr::FindAssociatedSection() const {
+ llvm_unreachable("FIXME: what goes here?");
}
-bool
-AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- return getSubExpr()->EvaluateAsRelocatable(Res, Layout);
+bool AArch64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
+ return false;
+
+ Res =
+ MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
+
+ return true;
}
static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
@@ -113,66 +160,15 @@ static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
}
void AArch64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
- switch (getKind()) {
+ switch (getSymbolLoc(Kind)) {
default:
return;
- case VK_AARCH64_DTPREL_G2:
- case VK_AARCH64_DTPREL_G1:
- case VK_AARCH64_DTPREL_G1_NC:
- case VK_AARCH64_DTPREL_G0:
- case VK_AARCH64_DTPREL_G0_NC:
- case VK_AARCH64_DTPREL_HI12:
- case VK_AARCH64_DTPREL_LO12:
- case VK_AARCH64_DTPREL_LO12_NC:
- case VK_AARCH64_GOTTPREL_G1:
- case VK_AARCH64_GOTTPREL_G0_NC:
- case VK_AARCH64_GOTTPREL:
- case VK_AARCH64_GOTTPREL_LO12:
- case VK_AARCH64_TPREL_G2:
- case VK_AARCH64_TPREL_G1:
- case VK_AARCH64_TPREL_G1_NC:
- case VK_AARCH64_TPREL_G0:
- case VK_AARCH64_TPREL_G0_NC:
- case VK_AARCH64_TPREL_HI12:
- case VK_AARCH64_TPREL_LO12:
- case VK_AARCH64_TPREL_LO12_NC:
- case VK_AARCH64_TLSDESC:
- case VK_AARCH64_TLSDESC_LO12:
+ case VK_DTPREL:
+ case VK_GOTTPREL:
+ case VK_TPREL:
+ case VK_TLSDESC:
break;
}
fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
}
-
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
-}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index d9798ae..e869ed0 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -1,4 +1,4 @@
-//==- AArch64MCExpr.h - AArch64 specific MC expression classes --*- C++ -*-===//
+//=--- AArch64MCExpr.h - AArch64 specific MC expression classes ---*- C++ -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -12,168 +12,149 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64MCEXPR_H
-#define LLVM_AARCH64MCEXPR_H
+#ifndef LLVM_AArch64MCEXPR_H
+#define LLVM_AArch64MCEXPR_H
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
namespace llvm {
class AArch64MCExpr : public MCTargetExpr {
public:
enum VariantKind {
- VK_AARCH64_None,
- VK_AARCH64_GOT, // :got: modifier in assembly
- VK_AARCH64_GOT_LO12, // :got_lo12:
- VK_AARCH64_LO12, // :lo12:
-
- VK_AARCH64_ABS_G0, // :abs_g0:
- VK_AARCH64_ABS_G0_NC, // :abs_g0_nc:
- VK_AARCH64_ABS_G1,
- VK_AARCH64_ABS_G1_NC,
- VK_AARCH64_ABS_G2,
- VK_AARCH64_ABS_G2_NC,
- VK_AARCH64_ABS_G3,
-
- VK_AARCH64_SABS_G0, // :abs_g0_s:
- VK_AARCH64_SABS_G1,
- VK_AARCH64_SABS_G2,
-
- VK_AARCH64_DTPREL_G2, // :dtprel_g2:
- VK_AARCH64_DTPREL_G1,
- VK_AARCH64_DTPREL_G1_NC,
- VK_AARCH64_DTPREL_G0,
- VK_AARCH64_DTPREL_G0_NC,
- VK_AARCH64_DTPREL_HI12,
- VK_AARCH64_DTPREL_LO12,
- VK_AARCH64_DTPREL_LO12_NC,
-
- VK_AARCH64_GOTTPREL_G1, // :gottprel:
- VK_AARCH64_GOTTPREL_G0_NC,
- VK_AARCH64_GOTTPREL,
- VK_AARCH64_GOTTPREL_LO12,
-
- VK_AARCH64_TPREL_G2, // :tprel:
- VK_AARCH64_TPREL_G1,
- VK_AARCH64_TPREL_G1_NC,
- VK_AARCH64_TPREL_G0,
- VK_AARCH64_TPREL_G0_NC,
- VK_AARCH64_TPREL_HI12,
- VK_AARCH64_TPREL_LO12,
- VK_AARCH64_TPREL_LO12_NC,
-
- VK_AARCH64_TLSDESC, // :tlsdesc:
- VK_AARCH64_TLSDESC_LO12
+ VK_NONE = 0x000,
+
+ // Symbol locations specifying (roughly speaking) what calculation should be
+ // performed to construct the final address for the relocated
+ // symbol. E.g. direct, via the GOT, ...
+ VK_ABS = 0x001,
+ VK_SABS = 0x002,
+ VK_GOT = 0x003,
+ VK_DTPREL = 0x004,
+ VK_GOTTPREL = 0x005,
+ VK_TPREL = 0x006,
+ VK_TLSDESC = 0x007,
+ VK_SymLocBits = 0x00f,
+
+ // Variants specifying which part of the final address calculation is
+ // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a
+ // MOVZ/MOVK.
+ VK_PAGE = 0x010,
+ VK_PAGEOFF = 0x020,
+ VK_HI12 = 0x030,
+ VK_G0 = 0x040,
+ VK_G1 = 0x050,
+ VK_G2 = 0x060,
+ VK_G3 = 0x070,
+ VK_AddressFragBits = 0x0f0,
+
+ // Whether the final relocation is a checked one (where a linker should
+ // perform a range-check on the final address) or not. Note that this field
+ // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12:
+ // on its own is a non-checked relocation. We side with ELF on being
+ // explicit about this!
+ VK_NC = 0x100,
+
+ // Convenience definitions for referring to specific textual representations
+ // of relocation specifiers. Note that this means the "_NC" is sometimes
+ // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC
+ // since a user would write ":lo12:").
+ VK_CALL = VK_ABS,
+ VK_ABS_PAGE = VK_ABS | VK_PAGE,
+ VK_ABS_G3 = VK_ABS | VK_G3,
+ VK_ABS_G2 = VK_ABS | VK_G2,
+ VK_ABS_G2_S = VK_SABS | VK_G2,
+ VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC,
+ VK_ABS_G1 = VK_ABS | VK_G1,
+ VK_ABS_G1_S = VK_SABS | VK_G1,
+ VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC,
+ VK_ABS_G0 = VK_ABS | VK_G0,
+ VK_ABS_G0_S = VK_SABS | VK_G0,
+ VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC,
+ VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC,
+ VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
+ VK_GOT_PAGE = VK_GOT | VK_PAGE,
+ VK_DTPREL_G2 = VK_DTPREL | VK_G2,
+ VK_DTPREL_G1 = VK_DTPREL | VK_G1,
+ VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
+ VK_DTPREL_G0 = VK_DTPREL | VK_G0,
+ VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC,
+ VK_DTPREL_HI12 = VK_DTPREL | VK_HI12,
+ VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF,
+ VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC,
+ VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE,
+ VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
+ VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1,
+ VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC,
+ VK_TPREL_G2 = VK_TPREL | VK_G2,
+ VK_TPREL_G1 = VK_TPREL | VK_G1,
+ VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC,
+ VK_TPREL_G0 = VK_TPREL | VK_G0,
+ VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC,
+ VK_TPREL_HI12 = VK_TPREL | VK_HI12,
+ VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF,
+ VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC,
+ VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC,
+ VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE,
+
+ VK_INVALID = 0xfff
};
private:
- const VariantKind Kind;
const MCExpr *Expr;
+ const VariantKind Kind;
- explicit AArch64MCExpr(VariantKind _Kind, const MCExpr *_Expr)
- : Kind(_Kind), Expr(_Expr) {}
+ explicit AArch64MCExpr(const MCExpr *Expr, VariantKind Kind)
+ : Expr(Expr), Kind(Kind) {}
public:
/// @name Construction
/// @{
- static const AArch64MCExpr *Create(VariantKind Kind, const MCExpr *Expr,
- MCContext &Ctx);
-
- static const AArch64MCExpr *CreateLo12(const MCExpr *Expr, MCContext &Ctx) {
- return Create(VK_AARCH64_LO12, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateGOT(const MCExpr *Expr, MCContext &Ctx) {
- return Create(VK_AARCH64_GOT, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateGOTLo12(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_GOT_LO12, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateDTPREL_G1(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_DTPREL_G1, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateDTPREL_G0_NC(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_DTPREL_G0_NC, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateGOTTPREL(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_GOTTPREL, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateGOTTPRELLo12(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_GOTTPREL_LO12, Expr, Ctx);
- }
-
- static const AArch64MCExpr *CreateTLSDesc(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_TLSDESC, Expr, Ctx);
- }
+ static const AArch64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
+ MCContext &Ctx);
- static const AArch64MCExpr *CreateTLSDescLo12(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_TLSDESC_LO12, Expr, Ctx);
- }
+ /// @}
+ /// @name Accessors
+ /// @{
- static const AArch64MCExpr *CreateTPREL_G1(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_TPREL_G1, Expr, Ctx);
- }
+ /// Get the kind of this expression.
+ VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
- static const AArch64MCExpr *CreateTPREL_G0_NC(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_TPREL_G0_NC, Expr, Ctx);
- }
+ /// Get the expression this modifier applies to.
+ const MCExpr *getSubExpr() const { return Expr; }
- static const AArch64MCExpr *CreateABS_G3(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_ABS_G3, Expr, Ctx);
- }
+ /// @}
+ /// @name VariantKind information extractors.
+ /// @{
- static const AArch64MCExpr *CreateABS_G2_NC(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_ABS_G2_NC, Expr, Ctx);
+ static VariantKind getSymbolLoc(VariantKind Kind) {
+ return static_cast<VariantKind>(Kind & VK_SymLocBits);
}
- static const AArch64MCExpr *CreateABS_G1_NC(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_ABS_G1_NC, Expr, Ctx);
+ static VariantKind getAddressFrag(VariantKind Kind) {
+ return static_cast<VariantKind>(Kind & VK_AddressFragBits);
}
- static const AArch64MCExpr *CreateABS_G0_NC(const MCExpr *Expr,
- MCContext &Ctx) {
- return Create(VK_AARCH64_ABS_G0_NC, Expr, Ctx);
- }
+ static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
/// @}
- /// @name Accessors
- /// @{
- /// getOpcode - Get the kind of this expression.
- VariantKind getKind() const { return Kind; }
+ /// Convert the variant kind into an ELF-appropriate modifier
+ /// (e.g. ":got:", ":lo12:").
+ StringRef getVariantKindName() const;
- /// getSubExpr - Get the child of this expression.
- const MCExpr *getSubExpr() const { return Expr; }
+ void PrintImpl(raw_ostream &OS) const override;
- /// @}
+ void AddValueSymbols(MCAssembler *) const override;
+
+ const MCSection *FindAssociatedSection() const override;
- void PrintImpl(raw_ostream &OS) const;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const {
- return getSubExpr()->FindAssociatedSection();
- }
+ const MCAsmLayout *Layout) const override;
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 3d19e42..ae698c5 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions -------------===//
+//===-- AArch64MCTargetDesc.cpp - AArch64 Target Descriptions ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,9 +15,7 @@
#include "AArch64ELFStreamer.h"
#include "AArch64MCAsmInfo.h"
#include "InstPrinter/AArch64InstPrinter.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
@@ -25,8 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_REGINFO_MC_DESC
-#include "AArch64GenRegisterInfo.inc"
+using namespace llvm;
#define GET_INSTRINFO_MC_DESC
#include "AArch64GenInstrInfo.inc"
@@ -34,26 +31,29 @@
#define GET_SUBTARGETINFO_MC_DESC
#include "AArch64GenSubtargetInfo.inc"
-using namespace llvm;
+#define GET_REGINFO_MC_DESC
+#include "AArch64GenRegisterInfo.inc"
-MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT,
- StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
+static MCInstrInfo *createAArch64MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitAArch64MCInstrInfo(X);
return X;
}
+static MCSubtargetInfo *
+createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
+ MCSubtargetInfo *X = new MCSubtargetInfo();
-static MCInstrInfo *createAArch64MCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitAArch64MCInstrInfo(X);
+ if (CPU.empty())
+ CPU = "generic";
+
+ InitAArch64MCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitAArch64MCRegisterInfo(X, AArch64::X30);
+ InitAArch64MCRegisterInfo(X, AArch64::LR);
return X;
}
@@ -61,9 +61,17 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
Triple TheTriple(TT);
- MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(TT);
- unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
+ MCAsmInfo *MAI;
+ if (TheTriple.isOSDarwin())
+ MAI = new AArch64MCAsmInfoDarwin();
+ else {
+ assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
+ MAI = new AArch64MCAsmInfoELF(TT);
+ }
+
+ // Initial state of the frame pointer is SP.
+ unsigned Reg = MRI.getDwarfRegNum(AArch64::SP, true);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -72,40 +80,35 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
static MCCodeGenInfo *createAArch64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
- MCCodeGenInfo *X = new MCCodeGenInfo();
- if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC) {
- // On ELF platforms the default static relocation model has a smart enough
- // linker to cope with referencing external symbols defined in a shared
- // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
- RM = Reloc::Static;
- }
+ Triple TheTriple(TT);
+ assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
+ "Only expect Darwin and ELF targets");
if (CM == CodeModel::Default)
CM = CodeModel::Small;
- else if (CM == CodeModel::JITDefault) {
- // The default MCJIT memory managers make no guarantees about where they can
- // find an executable page; JITed code needs to be able to refer to globals
- // no matter how far away they are.
+ // The default MCJIT memory managers make no guarantees about where they can
+ // find an executable page; JITed code needs to be able to refer to globals
+ // no matter how far away they are.
+ else if (CM == CodeModel::JITDefault)
CM = CodeModel::Large;
- }
+ else if (CM != CodeModel::Small && CM != CodeModel::Large)
+ report_fatal_error(
+ "Only small and large code models are allowed on AArch64");
+
+ // AArch64 Darwin is always PIC.
+ if (TheTriple.isOSDarwin())
+ RM = Reloc::PIC_;
+ // On ELF platforms the default static relocation model has a smart enough
+ // linker to cope with referencing external symbols defined in a shared
+ // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
+ else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
+ RM = Reloc::Static;
+ MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &MAB,
- raw_ostream &OS,
- MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI,
- bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- return createAArch64ELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
-}
-
-
static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -114,108 +117,109 @@ static MCInstPrinter *createAArch64MCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new AArch64InstPrinter(MAI, MII, MRI, STI);
- return 0;
-}
-
-namespace {
-
-class AArch64MCInstrAnalysis : public MCInstrAnalysis {
-public:
- AArch64MCInstrAnalysis(const MCInstrInfo *Info) : MCInstrAnalysis(Info) {}
-
- virtual bool isUnconditionalBranch(const MCInst &Inst) const {
- if (Inst.getOpcode() == AArch64::Bcc
- && Inst.getOperand(0).getImm() == A64CC::AL)
- return true;
- return MCInstrAnalysis::isUnconditionalBranch(Inst);
- }
-
- virtual bool isConditionalBranch(const MCInst &Inst) const {
- if (Inst.getOpcode() == AArch64::Bcc
- && Inst.getOperand(0).getImm() == A64CC::AL)
- return false;
- return MCInstrAnalysis::isConditionalBranch(Inst);
- }
-
- bool evaluateBranch(const MCInst &Inst, uint64_t Addr,
- uint64_t Size, uint64_t &Target) const {
- unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0;
- // FIXME: We only handle PCRel branches for now.
- if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType
- != MCOI::OPERAND_PCREL)
- return false;
-
- int64_t Imm = Inst.getOperand(LblOperand).getImm();
- Target = Addr + Imm;
- return true;
- }
-};
+ if (SyntaxVariant == 1)
+ return new AArch64AppleInstPrinter(MAI, MII, MRI, STI);
+ return nullptr;
}
-static MCInstrAnalysis *createAArch64MCInstrAnalysis(const MCInstrInfo *Info) {
- return new AArch64MCInstrAnalysis(Info);
-}
+static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
+ MCContext &Ctx, MCAsmBackend &TAB,
+ raw_ostream &OS, MCCodeEmitter *Emitter,
+ const MCSubtargetInfo &STI, bool RelaxAll,
+ bool NoExecStack) {
+ Triple TheTriple(TT);
+ if (TheTriple.isOSDarwin())
+ return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
+ /*LabelSections*/ true);
+ return createAArch64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+// Force static initialization.
extern "C" void LLVMInitializeAArch64TargetMC() {
// Register the MC asm info.
- RegisterMCAsmInfoFn A(TheAArch64leTarget, createAArch64MCAsmInfo);
- RegisterMCAsmInfoFn B(TheAArch64beTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn X(TheAArch64leTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn Y(TheAArch64beTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn Z(TheARM64leTarget, createAArch64MCAsmInfo);
+ RegisterMCAsmInfoFn W(TheARM64beTarget, createAArch64MCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64leTarget,
createAArch64MCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(TheAArch64beTarget,
createAArch64MCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheARM64leTarget,
+ createAArch64MCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(TheARM64beTarget,
+ createAArch64MCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(TheAArch64leTarget,
createAArch64MCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(TheAArch64beTarget,
createAArch64MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheARM64leTarget,
+ createAArch64MCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(TheARM64beTarget,
+ createAArch64MCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(TheAArch64leTarget,
createAArch64MCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(TheAArch64beTarget,
createAArch64MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheARM64leTarget,
+ createAArch64MCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(TheARM64beTarget,
+ createAArch64MCRegisterInfo);
// Register the MC subtarget info.
- using AArch64_MC::createAArch64MCSubtargetInfo;
TargetRegistry::RegisterMCSubtargetInfo(TheAArch64leTarget,
createAArch64MCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(TheAArch64beTarget,
createAArch64MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheARM64leTarget,
+ createAArch64MCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(TheARM64beTarget,
+ createAArch64MCSubtargetInfo);
- // Register the MC instruction analyzer.
- TargetRegistry::RegisterMCInstrAnalysis(TheAArch64leTarget,
- createAArch64MCInstrAnalysis);
- TargetRegistry::RegisterMCInstrAnalysis(TheAArch64beTarget,
- createAArch64MCInstrAnalysis);
+ // Register the asm backend.
+ TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
+ createAArch64leAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
+ createAArch64beAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheARM64leTarget,
+ createAArch64leAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(TheARM64beTarget,
+ createAArch64beAsmBackend);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(TheAArch64leTarget,
createAArch64MCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(TheAArch64beTarget,
createAArch64MCCodeEmitter);
-
- // Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheAArch64leTarget,
- createAArch64leAsmBackend);
- TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
- createAArch64beAsmBackend);
+ TargetRegistry::RegisterMCCodeEmitter(TheARM64leTarget,
+ createAArch64MCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(TheARM64beTarget,
+ createAArch64MCCodeEmitter);
// Register the object streamer.
TargetRegistry::RegisterMCObjectStreamer(TheAArch64leTarget,
createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(TheAArch64beTarget,
createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheARM64leTarget, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(TheARM64beTarget, createMCStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(TheAArch64leTarget,
createAArch64MCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(TheAArch64beTarget,
createAArch64MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheARM64leTarget,
+ createAArch64MCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(TheARM64beTarget,
+ createAArch64MCInstPrinter);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index bd8beaf..d886ea2 100644
--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -11,18 +11,19 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64MCTARGETDESC_H
-#define LLVM_AARCH64MCTARGETDESC_H
+#ifndef AArch64MCTARGETDESC_H
+#define AArch64MCTARGETDESC_H
#include "llvm/Support/DataTypes.h"
+#include <string>
namespace llvm {
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCObjectWriter;
class MCRegisterInfo;
+class MCObjectWriter;
class MCSubtargetInfo;
class StringRef;
class Target;
@@ -30,28 +31,25 @@ class raw_ostream;
extern Target TheAArch64leTarget;
extern Target TheAArch64beTarget;
-
-namespace AArch64_MC {
- MCSubtargetInfo *createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS);
-}
+extern Target TheARM64leTarget;
+extern Target TheARM64beTarget;
MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
+ const MCRegisterInfo &MRI,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx);
+MCAsmBackend *createAArch64leAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI, StringRef TT,
+ StringRef CPU);
+MCAsmBackend *createAArch64beAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI, StringRef TT,
+ StringRef CPU);
-MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI,
+MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI,
bool IsLittleEndian);
-MCAsmBackend *createAArch64leAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
-
-MCAsmBackend *createAArch64beAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
+MCObjectWriter *createAArch64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
+ uint32_t CPUSubtype);
} // End llvm namespace
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 1733dc5..5c86189 100644
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MachObjectWriter.cpp
+++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//===-- AArch64MachObjectWriter.cpp - ARM Mach Object Writer --------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
+#include "MCTargetDesc/AArch64FixupKinds.h"
+#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCContext.h"
@@ -23,24 +23,24 @@
using namespace llvm;
namespace {
-class ARM64MachObjectWriter : public MCMachObjectTargetWriter {
- bool getARM64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
+class AArch64MachObjectWriter : public MCMachObjectTargetWriter {
+ bool getAArch64FixupKindMachOInfo(const MCFixup &Fixup, unsigned &RelocType,
const MCSymbolRefExpr *Sym,
unsigned &Log2Size, const MCAssembler &Asm);
public:
- ARM64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
+ AArch64MachObjectWriter(uint32_t CPUType, uint32_t CPUSubtype)
: MCMachObjectTargetWriter(true /* is64Bit */, CPUType, CPUSubtype,
/*UseAggressiveSymbolFolding=*/true) {}
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue);
+ uint64_t &FixedValue) override;
};
}
-bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
+bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
const MCFixup &Fixup, unsigned &RelocType, const MCSymbolRefExpr *Sym,
unsigned &Log2Size, const MCAssembler &Asm) {
RelocType = unsigned(MachO::ARM64_RELOC_UNSIGNED);
@@ -66,12 +66,12 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
if (Sym->getKind() == MCSymbolRefExpr::VK_GOT)
RelocType = unsigned(MachO::ARM64_RELOC_POINTER_TO_GOT);
return true;
- case ARM64::fixup_arm64_add_imm12:
- case ARM64::fixup_arm64_ldst_imm12_scale1:
- case ARM64::fixup_arm64_ldst_imm12_scale2:
- case ARM64::fixup_arm64_ldst_imm12_scale4:
- case ARM64::fixup_arm64_ldst_imm12_scale8:
- case ARM64::fixup_arm64_ldst_imm12_scale16:
+ case AArch64::fixup_aarch64_add_imm12:
+ case AArch64::fixup_aarch64_ldst_imm12_scale1:
+ case AArch64::fixup_aarch64_ldst_imm12_scale2:
+ case AArch64::fixup_aarch64_ldst_imm12_scale4:
+ case AArch64::fixup_aarch64_ldst_imm12_scale8:
+ case AArch64::fixup_aarch64_ldst_imm12_scale16:
Log2Size = llvm::Log2_32(4);
switch (Sym->getKind()) {
default:
@@ -86,7 +86,7 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
RelocType = unsigned(MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12);
return true;
}
- case ARM64::fixup_arm64_pcrel_adrp_imm21:
+ case AArch64::fixup_aarch64_pcrel_adrp_imm21:
Log2Size = llvm::Log2_32(4);
// This encompasses the relocation for the whole 21-bit value.
switch (Sym->getKind()) {
@@ -104,15 +104,15 @@ bool ARM64MachObjectWriter::getARM64FixupKindMachOInfo(
return true;
}
return true;
- case ARM64::fixup_arm64_pcrel_branch26:
- case ARM64::fixup_arm64_pcrel_call26:
+ case AArch64::fixup_aarch64_pcrel_branch26:
+ case AArch64::fixup_aarch64_pcrel_call26:
Log2Size = llvm::Log2_32(4);
RelocType = unsigned(MachO::ARM64_RELOC_BRANCH26);
return true;
}
}
-void ARM64MachObjectWriter::RecordRelocation(
+void AArch64MachObjectWriter::RecordRelocation(
MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
@@ -129,21 +129,20 @@ void ARM64MachObjectWriter::RecordRelocation(
FixupOffset += Fixup.getOffset();
- // ARM64 pcrel relocation addends do not include the section offset.
+ // AArch64 pcrel relocation addends do not include the section offset.
if (IsPCRel)
FixedValue += FixupOffset;
// ADRP fixups use relocations for the whole symbol value and only
// put the addend in the instruction itself. Clear out any value the
// generic code figured out from the sybmol definition.
- if (Kind == ARM64::fixup_arm64_pcrel_adrp_imm21 ||
- Kind == ARM64::fixup_arm64_pcrel_imm19)
+ if (Kind == AArch64::fixup_aarch64_pcrel_adrp_imm21)
FixedValue = 0;
// imm19 relocations are for conditional branches, which require
// assembler local symbols. If we got here, that's not what we have,
// so complain loudly.
- if (Kind == ARM64::fixup_arm64_pcrel_imm19) {
+ if (Kind == AArch64::fixup_aarch64_pcrel_branch19) {
Asm.getContext().FatalError(Fixup.getLoc(),
"conditional branch requires assembler-local"
" label. '" +
@@ -154,15 +153,15 @@ void ARM64MachObjectWriter::RecordRelocation(
// 14-bit branch relocations should only target internal labels, and so
// should never get here.
- if (Kind == ARM64::fixup_arm64_pcrel_branch14) {
+ if (Kind == AArch64::fixup_aarch64_pcrel_branch14) {
Asm.getContext().FatalError(Fixup.getLoc(),
"Invalid relocation on conditional branch!");
return;
}
- if (!getARM64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
+ if (!getAArch64FixupKindMachOInfo(Fixup, Type, Target.getSymA(), Log2Size,
Asm)) {
- Asm.getContext().FatalError(Fixup.getLoc(), "unknown ARM64 fixup kind!");
+ Asm.getContext().FatalError(Fixup.getLoc(), "unknown AArch64 fixup kind!");
return;
}
@@ -184,11 +183,11 @@ void ARM64MachObjectWriter::RecordRelocation(
}
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData &A_SD = Asm.getSymbolData(*A);
+ const MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
- MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Check for "_foo@got - .", which comes through here as:
@@ -221,8 +220,8 @@ void ARM64MachObjectWriter::RecordRelocation(
"unsupported pc-relative relocation of "
"difference");
- // ARM64 always uses external relocations. If there is no symbol to use as
- // a base address (a local symbol with no preceeding non-local symbol),
+ // AArch64 always uses external relocations. If there is no symbol to use as
+ // a base address (a local symbol with no preceding non-local symbol),
// error out.
//
// FIXME: We should probably just synthesize an external symbol and use
@@ -242,14 +241,14 @@ void ARM64MachObjectWriter::RecordRelocation(
Asm.getContext().FatalError(Fixup.getLoc(),
"unsupported relocation with identical base");
- Value += (A_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
- &A_SD, Layout)) -
- (A_Base == NULL || A_Base->getFragment() == NULL
+ Value += (!A_SD.getFragment() ? 0
+ : Writer->getSymbolAddress(&A_SD, Layout)) -
+ (!A_Base || !A_Base->getFragment()
? 0
: Writer->getSymbolAddress(A_Base, Layout));
- Value -= (B_SD.getFragment() == NULL ? 0 : Writer->getSymbolAddress(
- &B_SD, Layout)) -
- (B_Base == NULL || B_Base->getFragment() == NULL
+ Value -= (!B_SD.getFragment() ? 0
+ : Writer->getSymbolAddress(&B_SD, Layout)) -
+ (!B_Base || !B_Base->getFragment()
? 0
: Writer->getSymbolAddress(B_Base, Layout));
@@ -268,7 +267,7 @@ void ARM64MachObjectWriter::RecordRelocation(
Type = MachO::ARM64_RELOC_SUBTRACTOR;
} else { // A + constant
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
const MCSymbolData *Base = Asm.getAtom(&SD);
const MCSectionMachO &Section = static_cast<const MCSectionMachO &>(
Fragment->getParent()->getSection());
@@ -303,12 +302,12 @@ void ARM64MachObjectWriter::RecordRelocation(
// have already been fixed up.
if (Symbol->isInSection()) {
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
- Base = 0;
+ Base = nullptr;
}
- // ARM64 uses external relocations as much as possible. For debug sections,
- // and for pointer-sized relocations (.quad), we allow section relocations.
- // It's code sections that run into trouble.
+ // AArch64 uses external relocations as much as possible. For debug
+ // sections, and for pointer-sized relocations (.quad), we allow section
+ // relocations. It's code sections that run into trouble.
if (Base) {
Index = Base->getIndex();
IsExtern = 1;
@@ -388,9 +387,10 @@ void ARM64MachObjectWriter::RecordRelocation(
Writer->addRelocation(Fragment->getParent(), MRE);
}
-MCObjectWriter *llvm::createARM64MachObjectWriter(raw_ostream &OS,
+MCObjectWriter *llvm::createAArch64MachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype) {
- return createMachObjectWriter(new ARM64MachObjectWriter(CPUType, CPUSubtype),
- OS, /*IsLittleEndian=*/true);
+ return createMachObjectWriter(
+ new AArch64MachObjectWriter(CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/true);
}
diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk
index edcf1f2..c0cdb2b 100644
--- a/lib/Target/AArch64/MCTargetDesc/Android.mk
+++ b/lib/Target/AArch64/MCTargetDesc/Android.mk
@@ -10,6 +10,7 @@ arm64_mc_desc_SRC_FILES := \
AArch64AsmBackend.cpp \
AArch64ELFObjectWriter.cpp \
AArch64ELFStreamer.cpp \
+ AArch64MachObjectWriter.cpp \
AArch64MCAsmInfo.cpp \
AArch64MCCodeEmitter.cpp \
AArch64MCExpr.cpp \
diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
index 54c4465..7d5bced 100644
--- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt
@@ -6,4 +6,9 @@ add_llvm_library(LLVMAArch64Desc
AArch64MCCodeEmitter.cpp
AArch64MCExpr.cpp
AArch64MCTargetDesc.cpp
- )
+ AArch64MachObjectWriter.cpp
+)
+add_dependencies(LLVMAArch64Desc AArch64CommonTableGen)
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
index 37c8035..70cff0b 100644
--- a/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===;
+;===- ./lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/Makefile b/lib/Target/AArch64/Makefile
index 641bb83..f356c58 100644
--- a/lib/Target/AArch64/Makefile
+++ b/lib/Target/AArch64/Makefile
@@ -12,19 +12,14 @@ LIBRARYNAME = LLVMAArch64CodeGen
TARGET = AArch64
# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = AArch64GenAsmMatcher.inc \
- AArch64GenAsmWriter.inc \
- AArch64GenCallingConv.inc \
- AArch64GenDAGISel.inc \
- AArch64GenDisassemblerTables.inc \
- AArch64GenInstrInfo.inc \
- AArch64GenMCCodeEmitter.inc \
- AArch64GenMCPseudoLowering.inc \
- AArch64GenRegisterInfo.inc \
- AArch64GenSubtargetInfo.inc
+BUILT_SOURCES = AArch64GenRegisterInfo.inc AArch64GenInstrInfo.inc \
+ AArch64GenAsmWriter.inc AArch64GenAsmWriter1.inc \
+ AArch64GenDAGISel.inc \
+ AArch64GenCallingConv.inc AArch64GenAsmMatcher.inc \
+ AArch64GenSubtargetInfo.inc AArch64GenMCCodeEmitter.inc \
+ AArch64GenFastISel.inc AArch64GenDisassemblerTables.inc \
+ AArch64GenMCPseudoLowering.inc
-DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
+DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc Utils
include $(LEVEL)/Makefile.common
-
-
diff --git a/lib/Target/AArch64/README.txt b/lib/Target/AArch64/README.txt
deleted file mode 100644
index 601990f..0000000
--- a/lib/Target/AArch64/README.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-This file will contain changes that need to be made before AArch64 can become an
-officially supported target. Currently a placeholder.
diff --git a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index 9281e4e..3a382c1 100644
--- a/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -------------===//
+//===-- AArch64TargetInfo.cpp - AArch64 Target Implementation -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -6,22 +6,26 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file contains the key registration step for the architecture.
-//
-//===----------------------------------------------------------------------===//
-#include "AArch64.h"
-#include "llvm/IR/Module.h"
+#include "llvm/ADT/Triple.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-Target llvm::TheAArch64leTarget;
-Target llvm::TheAArch64beTarget;
+namespace llvm {
+Target TheAArch64leTarget;
+Target TheAArch64beTarget;
+Target TheARM64leTarget;
+Target TheARM64beTarget;
+} // end namespace llvm
extern "C" void LLVMInitializeAArch64TargetInfo() {
- RegisterTarget<Triple::aarch64, /*HasJIT=*/true>
- X(TheAArch64leTarget, "aarch64", "AArch64 (ARM 64-bit little endian target)");
- RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true>
- Y(TheAArch64beTarget, "aarch64_be", "AArch64 (ARM 64-bit big endian target)");
+ RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64leTarget, "arm64",
+ "AArch64 (little endian)");
+ RegisterTarget<Triple::arm64_be, /*HasJIT=*/true> Y(TheARM64beTarget, "arm64_be",
+ "AArch64 (big endian)");
+
+ RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
+ TheAArch64leTarget, "aarch64", "AArch64 (little endian)");
+ RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
+ TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)");
}
diff --git a/lib/Target/AArch64/TargetInfo/CMakeLists.txt b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
index ee734c6..e236eed 100644
--- a/lib/Target/AArch64/TargetInfo/CMakeLists.txt
+++ b/lib/Target/AArch64/TargetInfo/CMakeLists.txt
@@ -1,3 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
add_llvm_library(LLVMAArch64Info
AArch64TargetInfo.cpp
)
+
+add_dependencies(LLVMAArch64Info AArch64CommonTableGen)
diff --git a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
index 6429172..93c5407 100644
--- a/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
+++ b/lib/Target/AArch64/TargetInfo/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
index 2a97cd6..3c24bb3 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp
@@ -18,7 +18,7 @@
using namespace llvm;
-StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
+StringRef AArch64NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
for (unsigned i = 0; i < NumPairs; ++i) {
if (Pairs[i].Value == Value) {
Valid = true;
@@ -30,7 +30,7 @@ StringRef NamedImmMapper::toString(uint32_t Value, bool &Valid) const {
return StringRef();
}
-uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
+uint32_t AArch64NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
std::string LowerCaseName = Name.lower();
for (unsigned i = 0; i < NumPairs; ++i) {
if (Pairs[i].Name == LowerCaseName) {
@@ -43,11 +43,11 @@ uint32_t NamedImmMapper::fromString(StringRef Name, bool &Valid) const {
return -1;
}
-bool NamedImmMapper::validImm(uint32_t Value) const {
+bool AArch64NamedImmMapper::validImm(uint32_t Value) const {
return Value < TooBigImm;
}
-const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64AT::ATMapper::ATPairs[] = {
{"s1e1r", S1E1R},
{"s1e2r", S1E2R},
{"s1e3r", S1E3R},
@@ -62,10 +62,10 @@ const NamedImmMapper::Mapping A64AT::ATMapper::ATPairs[] = {
{"s12e0w", S12E0W},
};
-A64AT::ATMapper::ATMapper()
- : NamedImmMapper(ATPairs, 0) {}
+AArch64AT::ATMapper::ATMapper()
+ : AArch64NamedImmMapper(ATPairs, 0) {}
-const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DB::DBarrierMapper::DBarrierPairs[] = {
{"oshld", OSHLD},
{"oshst", OSHST},
{"osh", OSH},
@@ -80,10 +80,10 @@ const NamedImmMapper::Mapping A64DB::DBarrierMapper::DBarrierPairs[] = {
{"sy", SY}
};
-A64DB::DBarrierMapper::DBarrierMapper()
- : NamedImmMapper(DBarrierPairs, 16u) {}
+AArch64DB::DBarrierMapper::DBarrierMapper()
+ : AArch64NamedImmMapper(DBarrierPairs, 16u) {}
-const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64DC::DCMapper::DCPairs[] = {
{"zva", ZVA},
{"ivac", IVAC},
{"isw", ISW},
@@ -94,26 +94,26 @@ const NamedImmMapper::Mapping A64DC::DCMapper::DCPairs[] = {
{"cisw", CISW}
};
-A64DC::DCMapper::DCMapper()
- : NamedImmMapper(DCPairs, 0) {}
+AArch64DC::DCMapper::DCMapper()
+ : AArch64NamedImmMapper(DCPairs, 0) {}
-const NamedImmMapper::Mapping A64IC::ICMapper::ICPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64IC::ICMapper::ICPairs[] = {
{"ialluis", IALLUIS},
{"iallu", IALLU},
{"ivau", IVAU}
};
-A64IC::ICMapper::ICMapper()
- : NamedImmMapper(ICPairs, 0) {}
+AArch64IC::ICMapper::ICMapper()
+ : AArch64NamedImmMapper(ICPairs, 0) {}
-const NamedImmMapper::Mapping A64ISB::ISBMapper::ISBPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64ISB::ISBMapper::ISBPairs[] = {
{"sy", SY},
};
-A64ISB::ISBMapper::ISBMapper()
- : NamedImmMapper(ISBPairs, 16) {}
+AArch64ISB::ISBMapper::ISBMapper()
+ : AArch64NamedImmMapper(ISBPairs, 16) {}
-const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PRFM::PRFMMapper::PRFMPairs[] = {
{"pldl1keep", PLDL1KEEP},
{"pldl1strm", PLDL1STRM},
{"pldl2keep", PLDL2KEEP},
@@ -134,19 +134,19 @@ const NamedImmMapper::Mapping A64PRFM::PRFMMapper::PRFMPairs[] = {
{"pstl3strm", PSTL3STRM}
};
-A64PRFM::PRFMMapper::PRFMMapper()
- : NamedImmMapper(PRFMPairs, 32) {}
+AArch64PRFM::PRFMMapper::PRFMMapper()
+ : AArch64NamedImmMapper(PRFMPairs, 32) {}
-const NamedImmMapper::Mapping A64PState::PStateMapper::PStatePairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64PState::PStateMapper::PStatePairs[] = {
{"spsel", SPSel},
{"daifset", DAIFSet},
{"daifclr", DAIFClr}
};
-A64PState::PStateMapper::PStateMapper()
- : NamedImmMapper(PStatePairs, 0) {}
+AArch64PState::PStateMapper::PStateMapper()
+ : AArch64NamedImmMapper(PStatePairs, 0) {}
-const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MRSMapper::MRSPairs[] = {
{"mdccsr_el0", MDCCSR_EL0},
{"dbgdtrrx_el0", DBGDTRRX_EL0},
{"mdrar_el1", MDRAR_EL1},
@@ -176,16 +176,16 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
{"id_isar3_el1", ID_ISAR3_EL1},
{"id_isar4_el1", ID_ISAR4_EL1},
{"id_isar5_el1", ID_ISAR5_EL1},
- {"id_aa64pfr0_el1", ID_AA64PFR0_EL1},
- {"id_aa64pfr1_el1", ID_AA64PFR1_EL1},
- {"id_aa64dfr0_el1", ID_AA64DFR0_EL1},
- {"id_aa64dfr1_el1", ID_AA64DFR1_EL1},
- {"id_aa64afr0_el1", ID_AA64AFR0_EL1},
- {"id_aa64afr1_el1", ID_AA64AFR1_EL1},
- {"id_aa64isar0_el1", ID_AA64ISAR0_EL1},
- {"id_aa64isar1_el1", ID_AA64ISAR1_EL1},
- {"id_aa64mmfr0_el1", ID_AA64MMFR0_EL1},
- {"id_aa64mmfr1_el1", ID_AA64MMFR1_EL1},
+ {"id_aa64pfr0_el1", ID_A64PFR0_EL1},
+ {"id_aa64pfr1_el1", ID_A64PFR1_EL1},
+ {"id_aa64dfr0_el1", ID_A64DFR0_EL1},
+ {"id_aa64dfr1_el1", ID_A64DFR1_EL1},
+ {"id_aa64afr0_el1", ID_A64AFR0_EL1},
+ {"id_aa64afr1_el1", ID_A64AFR1_EL1},
+ {"id_aa64isar0_el1", ID_A64ISAR0_EL1},
+ {"id_aa64isar1_el1", ID_A64ISAR1_EL1},
+ {"id_aa64mmfr0_el1", ID_A64MMFR0_EL1},
+ {"id_aa64mmfr1_el1", ID_A64MMFR1_EL1},
{"mvfr0_el1", MVFR0_EL1},
{"mvfr1_el1", MVFR1_EL1},
{"mvfr2_el1", MVFR2_EL1},
@@ -245,12 +245,13 @@ const NamedImmMapper::Mapping A64SysReg::MRSMapper::MRSPairs[] = {
{"ich_elsr_el2", ICH_ELSR_EL2}
};
-A64SysReg::MRSMapper::MRSMapper() {
+AArch64SysReg::MRSMapper::MRSMapper(uint64_t FeatureBits)
+ : SysRegMapper(FeatureBits) {
InstPairs = &MRSPairs[0];
NumInstPairs = llvm::array_lengthof(MRSPairs);
}
-const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::MSRMapper::MSRPairs[] = {
{"dbgdtrtx_el0", DBGDTRTX_EL0},
{"oslar_el1", OSLAR_EL1},
{"pmswinc_el0", PMSWINC_EL0},
@@ -268,13 +269,14 @@ const NamedImmMapper::Mapping A64SysReg::MSRMapper::MSRPairs[] = {
{"icc_sgi0r_el1", ICC_SGI0R_EL1}
};
-A64SysReg::MSRMapper::MSRMapper() {
+AArch64SysReg::MSRMapper::MSRMapper(uint64_t FeatureBits)
+ : SysRegMapper(FeatureBits) {
InstPairs = &MSRPairs[0];
NumInstPairs = llvm::array_lengthof(MSRPairs);
}
-const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64SysReg::SysRegMapper::SysRegPairs[] = {
{"osdtrrx_el1", OSDTRRX_EL1},
{"osdtrtx_el1", OSDTRTX_EL1},
{"teecr32_el1", TEECR32_EL1},
@@ -753,10 +755,16 @@ const NamedImmMapper::Mapping A64SysReg::SysRegMapper::SysRegPairs[] = {
{"ich_lr15_el2", ICH_LR15_EL2}
};
+const AArch64NamedImmMapper::Mapping
+AArch64SysReg::SysRegMapper::CycloneSysRegPairs[] = {
+ {"cpm_ioacc_ctl_el3", CPM_IOACC_CTL_EL3}
+};
+
uint32_t
-A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
- // First search the registers shared by all
+AArch64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
std::string NameLower = Name.lower();
+
+ // First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
if (SysRegPairs[i].Name == NameLower) {
Valid = true;
@@ -764,6 +772,16 @@ A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
}
+ // Next search for target specific registers
+ if (FeatureBits & AArch64::ProcCyclone) {
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
+ if (CycloneSysRegPairs[i].Name == NameLower) {
+ Valid = true;
+ return CycloneSysRegPairs[i].Value;
+ }
+ }
+ }
+
// Now try the instruction-specific registers (either read-only or
// write-only).
for (unsigned i = 0; i < NumInstPairs; ++i) {
@@ -796,7 +814,8 @@ A64SysReg::SysRegMapper::fromString(StringRef Name, bool &Valid) const {
}
std::string
-A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+AArch64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+ // First search the registers shared by all
for (unsigned i = 0; i < array_lengthof(SysRegPairs); ++i) {
if (SysRegPairs[i].Value == Bits) {
Valid = true;
@@ -804,6 +823,18 @@ A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
}
}
+ // Next search for target specific registers
+ if (FeatureBits & AArch64::ProcCyclone) {
+ for (unsigned i = 0; i < array_lengthof(CycloneSysRegPairs); ++i) {
+ if (CycloneSysRegPairs[i].Value == Bits) {
+ Valid = true;
+ return CycloneSysRegPairs[i].Name;
+ }
+ }
+ }
+
+ // Now try the instruction-specific registers (either read-only or
+ // write-only).
for (unsigned i = 0; i < NumInstPairs; ++i) {
if (InstPairs[i].Value == Bits) {
Valid = true;
@@ -831,7 +862,7 @@ A64SysReg::SysRegMapper::toString(uint32_t Bits, bool &Valid) const {
+ "_c" + utostr(CRm) + "_" + utostr(Op2);
}
-const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
+const AArch64NamedImmMapper::Mapping AArch64TLBI::TLBIMapper::TLBIPairs[] = {
{"ipas2e1is", IPAS2E1IS},
{"ipas2le1is", IPAS2LE1IS},
{"vmalle1is", VMALLE1IS},
@@ -866,308 +897,5 @@ const NamedImmMapper::Mapping A64TLBI::TLBIMapper::TLBIPairs[] = {
{"vaale1", VAALE1}
};
-A64TLBI::TLBIMapper::TLBIMapper()
- : NamedImmMapper(TLBIPairs, 0) {}
-
-bool A64Imms::isFPImm(const APFloat &Val, uint32_t &Imm8Bits) {
- const fltSemantics &Sem = Val.getSemantics();
- unsigned FracBits = APFloat::semanticsPrecision(Sem) - 1;
-
- uint32_t ExpMask;
- switch (FracBits) {
- case 10: // IEEE half-precision
- ExpMask = 0x1f;
- break;
- case 23: // IEEE single-precision
- ExpMask = 0xff;
- break;
- case 52: // IEEE double-precision
- ExpMask = 0x7ff;
- break;
- case 112: // IEEE quad-precision
- // No immediates are valid for double precision.
- return false;
- default:
- llvm_unreachable("Only half, single and double precision supported");
- }
-
- uint32_t ExpStart = FracBits;
- uint64_t FracMask = (1ULL << FracBits) - 1;
-
- uint32_t Sign = Val.isNegative();
-
- uint64_t Bits= Val.bitcastToAPInt().getLimitedValue();
- uint64_t Fraction = Bits & FracMask;
- int32_t Exponent = ((Bits >> ExpStart) & ExpMask);
- Exponent -= ExpMask >> 1;
-
- // S[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 5):imm8<5:0>:Zeros(19)
- // D[d] = imm8<7>:NOT(imm8<6>):Replicate(imm8<6>, 8):imm8<5:0>:Zeros(48)
- // This translates to: only 4 bits of fraction; -3 <= exp <= 4.
- uint64_t A64FracStart = FracBits - 4;
- uint64_t A64FracMask = 0xf;
-
- // Are there too many fraction bits?
- if (Fraction & ~(A64FracMask << A64FracStart))
- return false;
-
- if (Exponent < -3 || Exponent > 4)
- return false;
-
- uint32_t PackedFraction = (Fraction >> A64FracStart) & A64FracMask;
- uint32_t PackedExp = (Exponent + 7) & 0x7;
-
- Imm8Bits = (Sign << 7) | (PackedExp << 4) | PackedFraction;
- return true;
-}
-
-// Encoding of the immediate for logical (immediate) instructions:
-//
-// | N | imms | immr | size | R | S |
-// |---+--------+--------+------+--------------+--------------|
-// | 1 | ssssss | rrrrrr | 64 | UInt(rrrrrr) | UInt(ssssss) |
-// | 0 | 0sssss | xrrrrr | 32 | UInt(rrrrr) | UInt(sssss) |
-// | 0 | 10ssss | xxrrrr | 16 | UInt(rrrr) | UInt(ssss) |
-// | 0 | 110sss | xxxrrr | 8 | UInt(rrr) | UInt(sss) |
-// | 0 | 1110ss | xxxxrr | 4 | UInt(rr) | UInt(ss) |
-// | 0 | 11110s | xxxxxr | 2 | UInt(r) | UInt(s) |
-// | 0 | 11111x | - | | UNALLOCATED | |
-//
-// Columns 'R', 'S' and 'size' specify a "bitmask immediate" of size bits in
-// which the lower S+1 bits are ones and the remaining bits are zero, then
-// rotated right by R bits, which is then replicated across the datapath.
-//
-// + Values of 'N', 'imms' and 'immr' which do not match the above table are
-// RESERVED.
-// + If all 's' bits in the imms field are set then the instruction is
-// RESERVED.
-// + The 'x' bits in the 'immr' field are IGNORED.
-
-bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) {
- int RepeatWidth;
- int Rotation = 0;
- int Num1s = 0;
-
- // Because there are S+1 ones in the replicated mask, an immediate of all
- // zeros is not allowed. Filtering it here is probably more efficient.
- if (Imm == 0) return false;
-
- for (RepeatWidth = RegWidth; RepeatWidth > 1; RepeatWidth /= 2) {
- uint64_t RepeatMask = RepeatWidth == 64 ? -1 : (1ULL << RepeatWidth) - 1;
- uint64_t ReplicatedMask = Imm & RepeatMask;
-
- if (ReplicatedMask == 0) continue;
-
- // First we have to make sure the mask is actually repeated in each slot for
- // this width-specifier.
- bool IsReplicatedMask = true;
- for (unsigned i = RepeatWidth; i < RegWidth; i += RepeatWidth) {
- if (((Imm >> i) & RepeatMask) != ReplicatedMask) {
- IsReplicatedMask = false;
- break;
- }
- }
- if (!IsReplicatedMask) continue;
-
- // Now we have to work out the amount of rotation needed. The first part of
- // this calculation is actually independent of RepeatWidth, but the complex
- // case will depend on it.
- Rotation = countTrailingZeros(Imm);
- if (Rotation == 0) {
- // There were no leading zeros, which means it's either in place or there
- // are 1s at each end (e.g. 0x8003 needs rotating).
- Rotation = RegWidth == 64 ? CountLeadingOnes_64(Imm)
- : CountLeadingOnes_32(Imm);
- Rotation = RepeatWidth - Rotation;
- }
-
- uint64_t ReplicatedOnes = ReplicatedMask;
- if (Rotation != 0 && Rotation != 64)
- ReplicatedOnes = (ReplicatedMask >> Rotation)
- | ((ReplicatedMask << (RepeatWidth - Rotation)) & RepeatMask);
-
- // Of course, they may not actually be ones, so we have to check that:
- if (!isMask_64(ReplicatedOnes))
- continue;
-
- Num1s = CountTrailingOnes_64(ReplicatedOnes);
-
- // We know we've got an almost valid encoding (certainly, if this is invalid
- // no other parameters would work).
- break;
- }
-
- // The encodings which would produce all 1s are RESERVED.
- if (RepeatWidth == 1 || Num1s == RepeatWidth) return false;
-
- uint32_t N = RepeatWidth == 64;
- uint32_t ImmR = RepeatWidth - Rotation;
- uint32_t ImmS = Num1s - 1;
-
- switch (RepeatWidth) {
- default: break; // No action required for other valid rotations.
- case 16: ImmS |= 0x20; break; // 10ssss
- case 8: ImmS |= 0x30; break; // 110sss
- case 4: ImmS |= 0x38; break; // 1110ss
- case 2: ImmS |= 0x3c; break; // 11110s
- }
-
- Bits = ImmS | (ImmR << 6) | (N << 12);
-
- return true;
-}
-
-
-bool A64Imms::isLogicalImmBits(unsigned RegWidth, uint32_t Bits,
- uint64_t &Imm) {
- uint32_t N = Bits >> 12;
- uint32_t ImmR = (Bits >> 6) & 0x3f;
- uint32_t ImmS = Bits & 0x3f;
-
- // N=1 encodes a 64-bit replication and is invalid for the 32-bit
- // instructions.
- if (RegWidth == 32 && N != 0) return false;
-
- int Width = 0;
- if (N == 1)
- Width = 64;
- else if ((ImmS & 0x20) == 0)
- Width = 32;
- else if ((ImmS & 0x10) == 0)
- Width = 16;
- else if ((ImmS & 0x08) == 0)
- Width = 8;
- else if ((ImmS & 0x04) == 0)
- Width = 4;
- else if ((ImmS & 0x02) == 0)
- Width = 2;
- else {
- // ImmS is 0b11111x: UNALLOCATED
- return false;
- }
-
- int Num1s = (ImmS & (Width - 1)) + 1;
-
- // All encodings which would map to -1 (signed) are RESERVED.
- if (Num1s == Width) return false;
-
- int Rotation = (ImmR & (Width - 1));
- uint64_t Mask = (1ULL << Num1s) - 1;
- uint64_t WidthMask = Width == 64 ? -1 : (1ULL << Width) - 1;
- if (Rotation != 0 && Rotation != 64)
- Mask = (Mask >> Rotation)
- | ((Mask << (Width - Rotation)) & WidthMask);
-
- Imm = Mask;
- for (unsigned i = 1; i < RegWidth / Width; ++i) {
- Mask <<= Width;
- Imm |= Mask;
- }
-
- return true;
-}
-
-bool A64Imms::isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
- // If high bits are set then a 32-bit MOVZ can't possibly work.
- if (RegWidth == 32 && (Value & ~0xffffffffULL))
- return false;
-
- for (int i = 0; i < RegWidth; i += 16) {
- // If the value is 0 when we mask out all the bits that could be set with
- // the current LSL value then it's representable.
- if ((Value & ~(0xffffULL << i)) == 0) {
- Shift = i / 16;
- UImm16 = (Value >> i) & 0xffff;
- return true;
- }
- }
- return false;
-}
-
-bool A64Imms::isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift) {
- // MOVN is defined to set its register to NOT(LSL(imm16, shift)).
-
- // We have to be a little careful about a 32-bit register: 0xffff_1234 *is*
- // representable, but ~0xffff_1234 == 0xffff_ffff_0000_edcb which is not
- // a valid input for isMOVZImm.
- if (RegWidth == 32 && (Value & ~0xffffffffULL))
- return false;
-
- uint64_t MOVZEquivalent = RegWidth == 32 ? ~Value & 0xffffffff : ~Value;
-
- return isMOVZImm(RegWidth, MOVZEquivalent, UImm16, Shift);
-}
-
-bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value,
- int &UImm16, int &Shift) {
- if (isMOVZImm(RegWidth, Value, UImm16, Shift))
- return false;
-
- return isMOVNImm(RegWidth, Value, UImm16, Shift);
-}
-
-// decodeNeonModShiftImm - Decode a Neon OpCmode value into the
-// the shift amount and the shift type (shift zeros or ones in) and
-// returns whether the OpCmode value implies a shift operation.
-bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
- unsigned &ShiftOnesIn) {
- ShiftImm = 0;
- ShiftOnesIn = false;
- bool HasShift = true;
-
- if (OpCmode == 0xe) {
- // movi byte
- HasShift = false;
- } else if (OpCmode == 0x1e) {
- // movi 64-bit bytemask
- HasShift = false;
- } else if ((OpCmode & 0xc) == 0x8) {
- // shift zeros, per halfword
- ShiftImm = ((OpCmode & 0x2) >> 1);
- } else if ((OpCmode & 0x8) == 0) {
- // shift zeros, per word
- ShiftImm = ((OpCmode & 0x6) >> 1);
- } else if ((OpCmode & 0xe) == 0xc) {
- // shift ones, per word
- ShiftOnesIn = true;
- ShiftImm = (OpCmode & 0x1);
- } else {
- // per byte, per bytemask
- llvm_unreachable("Unsupported Neon modified immediate");
- }
-
- return HasShift;
-}
-
-// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values
-// into the element value and the element size in bits.
-uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode,
- unsigned &EltBits) {
- uint64_t DecodedVal = Val;
- EltBits = 0;
-
- if (OpCmode == 0xe) {
- // movi byte
- EltBits = 8;
- } else if (OpCmode == 0x1e) {
- // movi 64-bit bytemask
- DecodedVal = 0;
- for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if ((Val >> ByteNum) & 1)
- DecodedVal |= (uint64_t)0xff << (8 * ByteNum);
- }
- EltBits = 64;
- } else if ((OpCmode & 0xc) == 0x8) {
- // shift zeros, per halfword
- EltBits = 16;
- } else if ((OpCmode & 0x8) == 0) {
- // shift zeros, per word
- EltBits = 32;
- } else if ((OpCmode & 0xe) == 0xc) {
- // shift ones, per word
- EltBits = 32;
- } else {
- llvm_unreachable("Unsupported Neon modified immediate");
- }
- return DecodedVal;
-}
+AArch64TLBI::TLBIMapper::TLBIMapper()
+ : AArch64NamedImmMapper(TLBIPairs, 0) {}
diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 39b042b..9e4c389 100644
--- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -1,4 +1,4 @@
-//===-- AArch64BaseInfo.h - Top level definitions for AArch64- --*- C++ -*-===//
+//===-- AArch64BaseInfo.h - Top level definitions for AArch64 ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,96 +14,271 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_AARCH64_BASEINFO_H
-#define LLVM_AARCH64_BASEINFO_H
+#ifndef AArch64BASEINFO_H
+#define AArch64BASEINFO_H
+// FIXME: Is it easiest to fix this layering violation by moving the .inc
+// #includes from AArch64MCTargetDesc.h to here?
+#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
namespace llvm {
-// // Enums corresponding to AArch64 condition codes
-namespace A64CC {
- // The CondCodes constants map directly to the 4-bit encoding of the
- // condition field for predicated instructions.
- enum CondCodes { // Meaning (integer) Meaning (floating-point)
- EQ = 0, // Equal Equal
- NE, // Not equal Not equal, or unordered
- HS, // Unsigned higher or same >, ==, or unordered
- LO, // Unsigned lower or same Less than
- MI, // Minus, negative Less than
- PL, // Plus, positive or zero >, ==, or unordered
- VS, // Overflow Unordered
- VC, // No overflow Ordered
- HI, // Unsigned higher Greater than, or unordered
- LS, // Unsigned lower or same Less than or equal
- GE, // Greater than or equal Greater than or equal
- LT, // Less than Less than, or unordered
- GT, // Signed greater than Greater than
- LE, // Signed less than or equal <, ==, or unordered
- AL, // Always (unconditional) Always (unconditional)
- NV, // Always (unconditional) Always (unconditional)
- // Note the NV exists purely to disassemble 0b1111. Execution
- // is "always".
- Invalid
- };
+inline static unsigned getWRegFromXReg(unsigned Reg) {
+ switch (Reg) {
+ case AArch64::X0: return AArch64::W0;
+ case AArch64::X1: return AArch64::W1;
+ case AArch64::X2: return AArch64::W2;
+ case AArch64::X3: return AArch64::W3;
+ case AArch64::X4: return AArch64::W4;
+ case AArch64::X5: return AArch64::W5;
+ case AArch64::X6: return AArch64::W6;
+ case AArch64::X7: return AArch64::W7;
+ case AArch64::X8: return AArch64::W8;
+ case AArch64::X9: return AArch64::W9;
+ case AArch64::X10: return AArch64::W10;
+ case AArch64::X11: return AArch64::W11;
+ case AArch64::X12: return AArch64::W12;
+ case AArch64::X13: return AArch64::W13;
+ case AArch64::X14: return AArch64::W14;
+ case AArch64::X15: return AArch64::W15;
+ case AArch64::X16: return AArch64::W16;
+ case AArch64::X17: return AArch64::W17;
+ case AArch64::X18: return AArch64::W18;
+ case AArch64::X19: return AArch64::W19;
+ case AArch64::X20: return AArch64::W20;
+ case AArch64::X21: return AArch64::W21;
+ case AArch64::X22: return AArch64::W22;
+ case AArch64::X23: return AArch64::W23;
+ case AArch64::X24: return AArch64::W24;
+ case AArch64::X25: return AArch64::W25;
+ case AArch64::X26: return AArch64::W26;
+ case AArch64::X27: return AArch64::W27;
+ case AArch64::X28: return AArch64::W28;
+ case AArch64::FP: return AArch64::W29;
+ case AArch64::LR: return AArch64::W30;
+ case AArch64::SP: return AArch64::WSP;
+ case AArch64::XZR: return AArch64::WZR;
+ }
+ // For anything else, return it unchanged.
+ return Reg;
+}
-} // namespace A64CC
+inline static unsigned getXRegFromWReg(unsigned Reg) {
+ switch (Reg) {
+ case AArch64::W0: return AArch64::X0;
+ case AArch64::W1: return AArch64::X1;
+ case AArch64::W2: return AArch64::X2;
+ case AArch64::W3: return AArch64::X3;
+ case AArch64::W4: return AArch64::X4;
+ case AArch64::W5: return AArch64::X5;
+ case AArch64::W6: return AArch64::X6;
+ case AArch64::W7: return AArch64::X7;
+ case AArch64::W8: return AArch64::X8;
+ case AArch64::W9: return AArch64::X9;
+ case AArch64::W10: return AArch64::X10;
+ case AArch64::W11: return AArch64::X11;
+ case AArch64::W12: return AArch64::X12;
+ case AArch64::W13: return AArch64::X13;
+ case AArch64::W14: return AArch64::X14;
+ case AArch64::W15: return AArch64::X15;
+ case AArch64::W16: return AArch64::X16;
+ case AArch64::W17: return AArch64::X17;
+ case AArch64::W18: return AArch64::X18;
+ case AArch64::W19: return AArch64::X19;
+ case AArch64::W20: return AArch64::X20;
+ case AArch64::W21: return AArch64::X21;
+ case AArch64::W22: return AArch64::X22;
+ case AArch64::W23: return AArch64::X23;
+ case AArch64::W24: return AArch64::X24;
+ case AArch64::W25: return AArch64::X25;
+ case AArch64::W26: return AArch64::X26;
+ case AArch64::W27: return AArch64::X27;
+ case AArch64::W28: return AArch64::X28;
+ case AArch64::W29: return AArch64::FP;
+ case AArch64::W30: return AArch64::LR;
+ case AArch64::WSP: return AArch64::SP;
+ case AArch64::WZR: return AArch64::XZR;
+ }
+ // For anything else, return it unchanged.
+ return Reg;
+}
-inline static const char *A64CondCodeToString(A64CC::CondCodes CC) {
- switch (CC) {
- default: llvm_unreachable("Unknown condition code");
- case A64CC::EQ: return "eq";
- case A64CC::NE: return "ne";
- case A64CC::HS: return "hs";
- case A64CC::LO: return "lo";
- case A64CC::MI: return "mi";
- case A64CC::PL: return "pl";
- case A64CC::VS: return "vs";
- case A64CC::VC: return "vc";
- case A64CC::HI: return "hi";
- case A64CC::LS: return "ls";
- case A64CC::GE: return "ge";
- case A64CC::LT: return "lt";
- case A64CC::GT: return "gt";
- case A64CC::LE: return "le";
- case A64CC::AL: return "al";
- case A64CC::NV: return "nv";
+static inline unsigned getBRegFromDReg(unsigned Reg) {
+ switch (Reg) {
+ case AArch64::D0: return AArch64::B0;
+ case AArch64::D1: return AArch64::B1;
+ case AArch64::D2: return AArch64::B2;
+ case AArch64::D3: return AArch64::B3;
+ case AArch64::D4: return AArch64::B4;
+ case AArch64::D5: return AArch64::B5;
+ case AArch64::D6: return AArch64::B6;
+ case AArch64::D7: return AArch64::B7;
+ case AArch64::D8: return AArch64::B8;
+ case AArch64::D9: return AArch64::B9;
+ case AArch64::D10: return AArch64::B10;
+ case AArch64::D11: return AArch64::B11;
+ case AArch64::D12: return AArch64::B12;
+ case AArch64::D13: return AArch64::B13;
+ case AArch64::D14: return AArch64::B14;
+ case AArch64::D15: return AArch64::B15;
+ case AArch64::D16: return AArch64::B16;
+ case AArch64::D17: return AArch64::B17;
+ case AArch64::D18: return AArch64::B18;
+ case AArch64::D19: return AArch64::B19;
+ case AArch64::D20: return AArch64::B20;
+ case AArch64::D21: return AArch64::B21;
+ case AArch64::D22: return AArch64::B22;
+ case AArch64::D23: return AArch64::B23;
+ case AArch64::D24: return AArch64::B24;
+ case AArch64::D25: return AArch64::B25;
+ case AArch64::D26: return AArch64::B26;
+ case AArch64::D27: return AArch64::B27;
+ case AArch64::D28: return AArch64::B28;
+ case AArch64::D29: return AArch64::B29;
+ case AArch64::D30: return AArch64::B30;
+ case AArch64::D31: return AArch64::B31;
}
+ // For anything else, return it unchanged.
+ return Reg;
}
-inline static A64CC::CondCodes A64StringToCondCode(StringRef CondStr) {
- return StringSwitch<A64CC::CondCodes>(CondStr.lower())
- .Case("eq", A64CC::EQ)
- .Case("ne", A64CC::NE)
- .Case("ne", A64CC::NE)
- .Case("hs", A64CC::HS)
- .Case("cs", A64CC::HS)
- .Case("lo", A64CC::LO)
- .Case("cc", A64CC::LO)
- .Case("mi", A64CC::MI)
- .Case("pl", A64CC::PL)
- .Case("vs", A64CC::VS)
- .Case("vc", A64CC::VC)
- .Case("hi", A64CC::HI)
- .Case("ls", A64CC::LS)
- .Case("ge", A64CC::GE)
- .Case("lt", A64CC::LT)
- .Case("gt", A64CC::GT)
- .Case("le", A64CC::LE)
- .Case("al", A64CC::AL)
- .Case("nv", A64CC::NV)
- .Default(A64CC::Invalid);
+
+static inline unsigned getDRegFromBReg(unsigned Reg) {
+ switch (Reg) {
+ case AArch64::B0: return AArch64::D0;
+ case AArch64::B1: return AArch64::D1;
+ case AArch64::B2: return AArch64::D2;
+ case AArch64::B3: return AArch64::D3;
+ case AArch64::B4: return AArch64::D4;
+ case AArch64::B5: return AArch64::D5;
+ case AArch64::B6: return AArch64::D6;
+ case AArch64::B7: return AArch64::D7;
+ case AArch64::B8: return AArch64::D8;
+ case AArch64::B9: return AArch64::D9;
+ case AArch64::B10: return AArch64::D10;
+ case AArch64::B11: return AArch64::D11;
+ case AArch64::B12: return AArch64::D12;
+ case AArch64::B13: return AArch64::D13;
+ case AArch64::B14: return AArch64::D14;
+ case AArch64::B15: return AArch64::D15;
+ case AArch64::B16: return AArch64::D16;
+ case AArch64::B17: return AArch64::D17;
+ case AArch64::B18: return AArch64::D18;
+ case AArch64::B19: return AArch64::D19;
+ case AArch64::B20: return AArch64::D20;
+ case AArch64::B21: return AArch64::D21;
+ case AArch64::B22: return AArch64::D22;
+ case AArch64::B23: return AArch64::D23;
+ case AArch64::B24: return AArch64::D24;
+ case AArch64::B25: return AArch64::D25;
+ case AArch64::B26: return AArch64::D26;
+ case AArch64::B27: return AArch64::D27;
+ case AArch64::B28: return AArch64::D28;
+ case AArch64::B29: return AArch64::D29;
+ case AArch64::B30: return AArch64::D30;
+ case AArch64::B31: return AArch64::D31;
+ }
+ // For anything else, return it unchanged.
+ return Reg;
}
-inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
- // It turns out that the condition codes have been designed so that in order
- // to reverse the intent of the condition you only have to invert the low bit:
+namespace AArch64CC {
+
+// The CondCodes constants map directly to the 4-bit encoding of the condition
+// field for predicated instructions.
+enum CondCode { // Meaning (integer) Meaning (floating-point)
+ EQ = 0x0, // Equal Equal
+ NE = 0x1, // Not equal Not equal, or unordered
+ HS = 0x2, // Unsigned higher or same >, ==, or unordered
+ LO = 0x3, // Unsigned lower Less than
+ MI = 0x4, // Minus, negative Less than
+ PL = 0x5, // Plus, positive or zero >, ==, or unordered
+ VS = 0x6, // Overflow Unordered
+ VC = 0x7, // No overflow Not unordered
+ HI = 0x8, // Unsigned higher Greater than, or unordered
+ LS = 0x9, // Unsigned lower or same Less than or equal
+ GE = 0xa, // Greater than or equal Greater than or equal
+ LT = 0xb, // Less than Less than, or unordered
+ GT = 0xc, // Greater than Greater than
+ LE = 0xd, // Less than or equal <, ==, or unordered
+ AL = 0xe, // Always (unconditional) Always (unconditional)
+ NV = 0xf, // Always (unconditional) Always (unconditional)
+ // Note the NV exists purely to disassemble 0b1111. Execution is "always".
+ Invalid
+};
- return static_cast<A64CC::CondCodes>(static_cast<unsigned>(CC) ^ 0x1);
+inline static const char *getCondCodeName(CondCode Code) {
+ switch (Code) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return "eq";
+ case NE: return "ne";
+ case HS: return "hs";
+ case LO: return "lo";
+ case MI: return "mi";
+ case PL: return "pl";
+ case VS: return "vs";
+ case VC: return "vc";
+ case HI: return "hi";
+ case LS: return "ls";
+ case GE: return "ge";
+ case LT: return "lt";
+ case GT: return "gt";
+ case LE: return "le";
+ case AL: return "al";
+ case NV: return "nv";
+ }
+}
+
+inline static CondCode getInvertedCondCode(CondCode Code) {
+ switch (Code) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return NE;
+ case NE: return EQ;
+ case HS: return LO;
+ case LO: return HS;
+ case MI: return PL;
+ case PL: return MI;
+ case VS: return VC;
+ case VC: return VS;
+ case HI: return LS;
+ case LS: return HI;
+ case GE: return LT;
+ case LT: return GE;
+ case GT: return LE;
+ case LE: return GT;
+ }
}
+/// Given a condition code, return NZCV flags that would satisfy that condition.
+/// The flag bits are in the format expected by the ccmp instructions.
+/// Note that many different flag settings can satisfy a given condition code,
+/// this function just returns one of them.
+inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
+ // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
+ enum { N = 8, Z = 4, C = 2, V = 1 };
+ switch (Code) {
+ default: llvm_unreachable("Unknown condition code");
+ case EQ: return Z; // Z == 1
+ case NE: return 0; // Z == 0
+ case HS: return C; // C == 1
+ case LO: return 0; // C == 0
+ case MI: return N; // N == 1
+ case PL: return 0; // N == 0
+ case VS: return V; // V == 1
+ case VC: return 0; // V == 0
+ case HI: return C; // C == 1 && Z == 0
+ case LS: return 0; // C == 0 || Z == 1
+ case GE: return 0; // N == V
+ case LT: return N; // N != V
+ case GT: return 0; // Z == 0 && N == V
+ case LE: return Z; // Z == 1 || N != V
+ }
+}
+} // end namespace AArch64CC
+
/// Instances of this class can perform bidirectional mapping from random
/// identifier strings to operand encodings. For example "MSR" takes a named
/// system-register which must be encoded somehow and decoded for printing. This
@@ -115,14 +290,14 @@ inline static A64CC::CondCodes A64InvertCondCode(A64CC::CondCodes CC) {
/// out just how often these instructions are emitted before working on it. It
/// might even be optimal to just reorder the tables for the common instructions
/// rather than changing the algorithm.
-struct NamedImmMapper {
+struct AArch64NamedImmMapper {
struct Mapping {
const char *Name;
uint32_t Value;
};
template<int N>
- NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
+ AArch64NamedImmMapper(const Mapping (&Pairs)[N], uint32_t TooBigImm)
: Pairs(&Pairs[0]), NumPairs(N), TooBigImm(TooBigImm) {}
StringRef toString(uint32_t Value, bool &Valid) const;
@@ -138,7 +313,7 @@ protected:
uint32_t TooBigImm;
};
-namespace A64AT {
+namespace AArch64AT {
enum ATValues {
Invalid = -1, // Op0 Op1 CRn CRm Op2
S1E1R = 0x43c0, // 01 000 0111 1000 000
@@ -155,14 +330,14 @@ namespace A64AT {
S12E0W = 0x63c7 // 01 100 0111 1000 111
};
- struct ATMapper : NamedImmMapper {
+ struct ATMapper : AArch64NamedImmMapper {
const static Mapping ATPairs[];
ATMapper();
};
}
-namespace A64DB {
+namespace AArch64DB {
enum DBValues {
Invalid = -1,
OSHLD = 0x1,
@@ -179,14 +354,14 @@ namespace A64DB {
SY = 0xf
};
- struct DBarrierMapper : NamedImmMapper {
+ struct DBarrierMapper : AArch64NamedImmMapper {
const static Mapping DBarrierPairs[];
DBarrierMapper();
};
}
-namespace A64DC {
+namespace AArch64DC {
enum DCValues {
Invalid = -1, // Op1 CRn CRm Op2
ZVA = 0x5ba1, // 01 011 0111 0100 001
@@ -199,7 +374,7 @@ namespace A64DC {
CISW = 0x43f2 // 01 000 0111 1110 010
};
- struct DCMapper : NamedImmMapper {
+ struct DCMapper : AArch64NamedImmMapper {
const static Mapping DCPairs[];
DCMapper();
@@ -207,7 +382,7 @@ namespace A64DC {
}
-namespace A64IC {
+namespace AArch64IC {
enum ICValues {
Invalid = -1, // Op1 CRn CRm Op2
IALLUIS = 0x0388, // 000 0111 0001 000
@@ -216,7 +391,7 @@ namespace A64IC {
};
- struct ICMapper : NamedImmMapper {
+ struct ICMapper : AArch64NamedImmMapper {
const static Mapping ICPairs[];
ICMapper();
@@ -227,19 +402,19 @@ namespace A64IC {
}
}
-namespace A64ISB {
+namespace AArch64ISB {
enum ISBValues {
Invalid = -1,
SY = 0xf
};
- struct ISBMapper : NamedImmMapper {
+ struct ISBMapper : AArch64NamedImmMapper {
const static Mapping ISBPairs[];
ISBMapper();
};
}
-namespace A64PRFM {
+namespace AArch64PRFM {
enum PRFMValues {
Invalid = -1,
PLDL1KEEP = 0x00,
@@ -262,14 +437,14 @@ namespace A64PRFM {
PSTL3STRM = 0x15
};
- struct PRFMMapper : NamedImmMapper {
+ struct PRFMMapper : AArch64NamedImmMapper {
const static Mapping PRFMPairs[];
PRFMMapper();
};
}
-namespace A64PState {
+namespace AArch64PState {
enum PStateValues {
Invalid = -1,
SPSel = 0x05,
@@ -277,7 +452,7 @@ namespace A64PState {
DAIFClr = 0x1f
};
- struct PStateMapper : NamedImmMapper {
+ struct PStateMapper : AArch64NamedImmMapper {
const static Mapping PStatePairs[];
PStateMapper();
@@ -285,7 +460,7 @@ namespace A64PState {
}
-namespace A64SE {
+namespace AArch64SE {
enum ShiftExtSpecifiers {
Invalid = -1,
LSL,
@@ -306,7 +481,7 @@ namespace A64SE {
};
}
-namespace A64Layout {
+namespace AArch64Layout {
enum VectorLayout {
Invalid = -1,
VL_8B,
@@ -329,43 +504,43 @@ namespace A64Layout {
}
inline static const char *
-A64VectorLayoutToString(A64Layout::VectorLayout Layout) {
+AArch64VectorLayoutToString(AArch64Layout::VectorLayout Layout) {
switch (Layout) {
- case A64Layout::VL_8B: return ".8b";
- case A64Layout::VL_4H: return ".4h";
- case A64Layout::VL_2S: return ".2s";
- case A64Layout::VL_1D: return ".1d";
- case A64Layout::VL_16B: return ".16b";
- case A64Layout::VL_8H: return ".8h";
- case A64Layout::VL_4S: return ".4s";
- case A64Layout::VL_2D: return ".2d";
- case A64Layout::VL_B: return ".b";
- case A64Layout::VL_H: return ".h";
- case A64Layout::VL_S: return ".s";
- case A64Layout::VL_D: return ".d";
+ case AArch64Layout::VL_8B: return ".8b";
+ case AArch64Layout::VL_4H: return ".4h";
+ case AArch64Layout::VL_2S: return ".2s";
+ case AArch64Layout::VL_1D: return ".1d";
+ case AArch64Layout::VL_16B: return ".16b";
+ case AArch64Layout::VL_8H: return ".8h";
+ case AArch64Layout::VL_4S: return ".4s";
+ case AArch64Layout::VL_2D: return ".2d";
+ case AArch64Layout::VL_B: return ".b";
+ case AArch64Layout::VL_H: return ".h";
+ case AArch64Layout::VL_S: return ".s";
+ case AArch64Layout::VL_D: return ".d";
default: llvm_unreachable("Unknown Vector Layout");
}
}
-inline static A64Layout::VectorLayout
-A64StringToVectorLayout(StringRef LayoutStr) {
- return StringSwitch<A64Layout::VectorLayout>(LayoutStr)
- .Case(".8b", A64Layout::VL_8B)
- .Case(".4h", A64Layout::VL_4H)
- .Case(".2s", A64Layout::VL_2S)
- .Case(".1d", A64Layout::VL_1D)
- .Case(".16b", A64Layout::VL_16B)
- .Case(".8h", A64Layout::VL_8H)
- .Case(".4s", A64Layout::VL_4S)
- .Case(".2d", A64Layout::VL_2D)
- .Case(".b", A64Layout::VL_B)
- .Case(".h", A64Layout::VL_H)
- .Case(".s", A64Layout::VL_S)
- .Case(".d", A64Layout::VL_D)
- .Default(A64Layout::Invalid);
+inline static AArch64Layout::VectorLayout
+AArch64StringToVectorLayout(StringRef LayoutStr) {
+ return StringSwitch<AArch64Layout::VectorLayout>(LayoutStr)
+ .Case(".8b", AArch64Layout::VL_8B)
+ .Case(".4h", AArch64Layout::VL_4H)
+ .Case(".2s", AArch64Layout::VL_2S)
+ .Case(".1d", AArch64Layout::VL_1D)
+ .Case(".16b", AArch64Layout::VL_16B)
+ .Case(".8h", AArch64Layout::VL_8H)
+ .Case(".4s", AArch64Layout::VL_4S)
+ .Case(".2d", AArch64Layout::VL_2D)
+ .Case(".b", AArch64Layout::VL_B)
+ .Case(".h", AArch64Layout::VL_H)
+ .Case(".s", AArch64Layout::VL_S)
+ .Case(".d", AArch64Layout::VL_D)
+ .Default(AArch64Layout::Invalid);
}
-namespace A64SysReg {
+namespace AArch64SysReg {
enum SysRegROValues {
MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000
DBGDTRRX_EL0 = 0x9828, // 10 011 0000 0101 000
@@ -396,16 +571,16 @@ namespace A64SysReg {
ID_ISAR3_EL1 = 0xc013, // 11 000 0000 0010 011
ID_ISAR4_EL1 = 0xc014, // 11 000 0000 0010 100
ID_ISAR5_EL1 = 0xc015, // 11 000 0000 0010 101
- ID_AA64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000
- ID_AA64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001
- ID_AA64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000
- ID_AA64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001
- ID_AA64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100
- ID_AA64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101
- ID_AA64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000
- ID_AA64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
- ID_AA64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
- ID_AA64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
+ ID_A64PFR0_EL1 = 0xc020, // 11 000 0000 0100 000
+ ID_A64PFR1_EL1 = 0xc021, // 11 000 0000 0100 001
+ ID_A64DFR0_EL1 = 0xc028, // 11 000 0000 0101 000
+ ID_A64DFR1_EL1 = 0xc029, // 11 000 0000 0101 001
+ ID_A64AFR0_EL1 = 0xc02c, // 11 000 0000 0101 100
+ ID_A64AFR1_EL1 = 0xc02d, // 11 000 0000 0101 101
+ ID_A64ISAR0_EL1 = 0xc030, // 11 000 0000 0110 000
+ ID_A64ISAR1_EL1 = 0xc031, // 11 000 0000 0110 001
+ ID_A64MMFR0_EL1 = 0xc038, // 11 000 0000 0111 000
+ ID_A64MMFR1_EL1 = 0xc039, // 11 000 0000 0111 001
MVFR0_EL1 = 0xc018, // 11 000 0000 0011 000
MVFR1_EL1 = 0xc019, // 11 000 0000 0011 001
MVFR2_EL1 = 0xc01a, // 11 000 0000 0011 010
@@ -960,38 +1135,45 @@ namespace A64SysReg {
ICH_LR12_EL2 = 0xe66c, // 11 100 1100 1101 100
ICH_LR13_EL2 = 0xe66d, // 11 100 1100 1101 101
ICH_LR14_EL2 = 0xe66e, // 11 100 1100 1101 110
- ICH_LR15_EL2 = 0xe66f // 11 100 1100 1101 111
+ ICH_LR15_EL2 = 0xe66f, // 11 100 1100 1101 111
+ };
+
+ // Cyclone specific system registers
+ enum CycloneSysRegValues {
+ CPM_IOACC_CTL_EL3 = 0xff90
};
- // Note that these do not inherit from NamedImmMapper. This class is
+ // Note that these do not inherit from AArch64NamedImmMapper. This class is
// sufficiently different in its behaviour that I don't believe it's worth
- // burdening the common NamedImmMapper with abstractions only needed in
+ // burdening the common AArch64NamedImmMapper with abstractions only needed in
// this one case.
struct SysRegMapper {
- static const NamedImmMapper::Mapping SysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping SysRegPairs[];
+ static const AArch64NamedImmMapper::Mapping CycloneSysRegPairs[];
- const NamedImmMapper::Mapping *InstPairs;
+ const AArch64NamedImmMapper::Mapping *InstPairs;
size_t NumInstPairs;
+ uint64_t FeatureBits;
- SysRegMapper() {}
+ SysRegMapper(uint64_t FeatureBits) : FeatureBits(FeatureBits) { }
uint32_t fromString(StringRef Name, bool &Valid) const;
std::string toString(uint32_t Bits, bool &Valid) const;
};
struct MSRMapper : SysRegMapper {
- static const NamedImmMapper::Mapping MSRPairs[];
- MSRMapper();
+ static const AArch64NamedImmMapper::Mapping MSRPairs[];
+ MSRMapper(uint64_t FeatureBits);
};
struct MRSMapper : SysRegMapper {
- static const NamedImmMapper::Mapping MRSPairs[];
- MRSMapper();
+ static const AArch64NamedImmMapper::Mapping MRSPairs[];
+ MRSMapper(uint64_t FeatureBits);
};
uint32_t ParseGenericRegister(StringRef Name, bool &Valid);
}
-namespace A64TLBI {
+namespace AArch64TLBI {
enum TLBIValues {
Invalid = -1, // Op0 Op1 CRn CRm Op2
IPAS2E1IS = 0x6401, // 01 100 1000 0000 001
@@ -1028,7 +1210,7 @@ namespace A64TLBI {
VAALE1 = 0x443f // 01 000 1000 0111 111
};
- struct TLBIMapper : NamedImmMapper {
+ struct TLBIMapper : AArch64NamedImmMapper {
const static Mapping TLBIPairs[];
TLBIMapper();
@@ -1051,88 +1233,62 @@ namespace A64TLBI {
return true;
}
}
-}
+}
namespace AArch64II {
-
+ /// Target Operand Flag enum.
enum TOF {
- //===--------------------------------------------------------------===//
+ //===------------------------------------------------------------------===//
// AArch64 Specific MachineOperand flags.
MO_NO_FLAG,
- // MO_GOT - Represents a relocation referring to the GOT entry of a given
- // symbol. Used in adrp.
- MO_GOT,
-
- // MO_GOT_LO12 - Represents a relocation referring to the low 12 bits of the
- // GOT entry of a given symbol. Used in ldr only.
- MO_GOT_LO12,
-
- // MO_DTPREL_* - Represents a relocation referring to the offset from a
- // module's dynamic thread pointer. Used in the local-dynamic TLS access
- // model.
- MO_DTPREL_G1,
- MO_DTPREL_G0_NC,
-
- // MO_GOTTPREL_* - Represents a relocation referring to a GOT entry
- // providing the offset of a variable from the thread-pointer. Used in
- // initial-exec TLS model where this offset is assigned in the static thread
- // block and thus known by the dynamic linker.
- MO_GOTTPREL,
- MO_GOTTPREL_LO12,
-
- // MO_TLSDESC_* - Represents a relocation referring to a GOT entry providing
- // a TLS descriptor chosen by the dynamic linker. Used for the
- // general-dynamic and local-dynamic TLS access models where very littls is
- // known at link-time.
- MO_TLSDESC,
- MO_TLSDESC_LO12,
-
- // MO_TPREL_* - Represents a relocation referring to the offset of a
- // variable from the thread pointer itself. Used in the local-exec TLS
- // access model.
- MO_TPREL_G1,
- MO_TPREL_G0_NC,
-
- // MO_LO12 - On a symbol operand, this represents a relocation containing
- // lower 12 bits of the address. Used in add/sub/ldr/str.
- MO_LO12,
-
- // MO_ABS_G* - Represent the 16-bit granules of an absolute reference using
- // movz/movk instructions.
- MO_ABS_G3,
- MO_ABS_G2_NC,
- MO_ABS_G1_NC,
- MO_ABS_G0_NC
+ MO_FRAGMENT = 0x7,
+
+ /// MO_PAGE - A symbol operand with this flag represents the pc-relative
+ /// offset of the 4K page containing the symbol. This is used with the
+ /// ADRP instruction.
+ MO_PAGE = 1,
+
+ /// MO_PAGEOFF - A symbol operand with this flag represents the offset of
+ /// that symbol within a 4K page. This offset is added to the page address
+ /// to produce the complete address.
+ MO_PAGEOFF = 2,
+
+ /// MO_G3 - A symbol operand with this flag (granule 3) represents the high
+ /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
+ MO_G3 = 3,
+
+ /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
+ /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
+ MO_G2 = 4,
+
+ /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
+ /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
+ MO_G1 = 5,
+
+ /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
+ /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
+ MO_G0 = 6,
+
+ /// MO_GOT - This flag indicates that a symbol operand represents the
+ /// address of the GOT entry for the symbol, rather than the address of
+ /// the symbol itself.
+ MO_GOT = 8,
+
+ /// MO_NC - Indicates whether the linker is expected to check the symbol
+ /// reference for overflow. For example in an ADRP/ADD pair of relocations
+ /// the ADRP usually does check, but not the ADD.
+ MO_NC = 0x10,
+
+ /// MO_TLS - Indicates that the operand being accessed is some kind of
+ /// thread-local symbol. On Darwin, only one type of thread-local access
+ /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
+ /// referee will affect interpretation.
+ MO_TLS = 0x20
};
-}
-
-class APFloat;
-
-namespace A64Imms {
- bool isFPImm(const APFloat &Val, uint32_t &Imm8Bits);
-
- inline bool isFPImm(const APFloat &Val) {
- uint32_t Imm8;
- return isFPImm(Val, Imm8);
- }
-
- bool isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits);
- bool isLogicalImmBits(unsigned RegWidth, uint32_t Bits, uint64_t &Imm);
-
- bool isMOVZImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
- bool isMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
-
- // We sometimes want to know whether the immediate is representable with a
- // MOVN but *not* with a MOVZ (because that would take priority).
- bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift);
-
- uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits);
- bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm,
- unsigned &ShiftOnesIn);
- }
+} // end namespace AArch64II
-} // end namespace llvm;
+} // end namespace llvm
#endif
diff --git a/lib/Target/AArch64/Utils/Android.mk b/lib/Target/AArch64/Utils/Android.mk
index b8bf795..3c1d194 100644
--- a/lib/Target/AArch64/Utils/Android.mk
+++ b/lib/Target/AArch64/Utils/Android.mk
@@ -1,5 +1,10 @@
LOCAL_PATH := $(call my-dir)
+arm64_utils_TBLGEN_TABLES := \
+ AArch64GenRegisterInfo.inc \
+ AArch64GenInstrInfo.inc \
+ AArch64GenSubtargetInfo.inc
+
arm64_utils_SRC_FILES := \
AArch64BaseInfo.cpp
@@ -16,7 +21,12 @@ LOCAL_MODULE:= libLLVMARM64Utils
LOCAL_MODULE_TAGS := optional
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+TBLGEN_TABLES := $(arm64_utils_TBLGEN_TABLES)
+
include $(LLVM_DEVICE_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_STATIC_LIBRARY)
endif
@@ -32,5 +42,10 @@ LOCAL_MODULE:= libLLVMARM64Utils
LOCAL_MODULE_TAGS := optional
+TBLGEN_TD_DIR := $(LOCAL_PATH)/..
+TBLGEN_TABLES := $(arm64_utils_TBLGEN_TABLES)
+
include $(LLVM_HOST_BUILD_MK)
+include $(LLVM_TBLGEN_RULES_MK)
+include $(LLVM_GEN_INTRINSICS_MK)
include $(BUILD_HOST_STATIC_LIBRARY)
diff --git a/lib/Target/AArch64/Utils/LLVMBuild.txt b/lib/Target/AArch64/Utils/LLVMBuild.txt
index 4acecc9..bcefeb6 100644
--- a/lib/Target/AArch64/Utils/LLVMBuild.txt
+++ b/lib/Target/AArch64/Utils/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/AArch646/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
+;===- ./lib/Target/AArch64/Utils/LLVMBuild.txt ----------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
diff --git a/lib/Target/AArch64/Utils/Makefile b/lib/Target/AArch64/Utils/Makefile
index 0f4a645..0b80f82 100644
--- a/lib/Target/AArch64/Utils/Makefile
+++ b/lib/Target/AArch64/Utils/Makefile
@@ -9,7 +9,8 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMAArch64Utils
-# Hack: we need to include 'main' AArch64 target directory to grab private headers
-#CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+# Hack: we need to include 'main' AArch64 target directory to grab private
+# headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp
index 28ea879..94faf6f 100644
--- a/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/lib/Target/ARM/A15SDOptimizer.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "a15-sd-optimizer"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
@@ -39,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "a15-sd-optimizer"
+
namespace {
struct A15SDOptimizer : public MachineFunctionPass {
static char ID;
@@ -90,7 +91,7 @@ namespace {
unsigned createImplicitDef(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore,
DebugLoc DL);
-
+
//
// Various property checkers
//
@@ -259,7 +260,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) {
if (DPRMI && SPRMI) {
// See if the first operand of this insert_subreg is IMPLICIT_DEF
MachineInstr *ECDef = elideCopies(DPRMI);
- if (ECDef != 0 && ECDef->isImplicitDef()) {
+ if (ECDef && ECDef->isImplicitDef()) {
// Another corner case - if we're inserting something that is purely
// a subreg copy of a DPR, just use that DPR.
@@ -348,10 +349,10 @@ MachineInstr *A15SDOptimizer::elideCopies(MachineInstr *MI) {
if (!MI->isFullCopy())
return MI;
if (!TRI->isVirtualRegister(MI->getOperand(1).getReg()))
- return NULL;
+ return nullptr;
MachineInstr *Def = MRI->getVRegDef(MI->getOperand(1).getReg());
if (!Def)
- return NULL;
+ return nullptr;
return elideCopies(Def);
}
@@ -435,7 +436,7 @@ A15SDOptimizer::createDupLane(MachineBasicBlock &MBB,
Out)
.addReg(Reg)
.addImm(Lane));
-
+
return Out;
}
@@ -601,7 +602,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) {
// * INSERT_SUBREG: * If the SPR value was originally in another DPR/QPR
// lane, and the other lane(s) of the DPR/QPR register
// that we are inserting in are undefined, use the
- // original DPR/QPR value.
+ // original DPR/QPR value.
// * Otherwise, fall back on the same stategy as COPY.
//
// * REG_SEQUENCE: * If all except one of the input operands are
@@ -693,7 +694,7 @@ bool A15SDOptimizer::runOnMachineFunction(MachineFunction &Fn) {
MI != ME;) {
Modified |= runOnInstruction(MI++);
}
-
+
}
for (std::set<MachineInstr *>::iterator I = DeadInstr.begin(),
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4412b45..55df29c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -49,8 +49,6 @@ FunctionPass *createThumb2SizeReductionPass();
/// \brief Creates an ARM-specific Target Transformation Info pass.
ImmutablePass *createARMTargetTransformInfoPass(const ARMBaseTargetMachine *TM);
-FunctionPass *createARMAtomicExpandPass(const TargetMachine *TM);
-
void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
ARMAsmPrinter &AP);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index 0fa865f..55e9fe5 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "ARMAsmPrinter.h"
#include "ARM.h"
#include "ARMConstantPoolValue.h"
@@ -45,6 +44,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ELF.h"
@@ -55,6 +55,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
void ARMAsmPrinter::EmitFunctionBodyEnd() {
// Make sure to terminate any constant pools that were at the end
// of the function.
@@ -85,7 +87,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) {
? MCSymbolRefExpr::VK_ARM_TARGET1
: MCSymbolRefExpr::VK_None),
OutContext);
-
+
OutStreamer.EmitValue(E, Size);
}
@@ -96,7 +98,28 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
AFI = MF.getInfo<ARMFunctionInfo>();
MCP = MF.getConstantPool();
- return AsmPrinter::runOnMachineFunction(MF);
+ SetupMachineFunction(MF);
+
+ if (Subtarget->isTargetCOFF()) {
+ bool Internal = MF.getFunction()->hasInternalLinkage();
+ COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL;
+ int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
+
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Scl);
+ OutStreamer.EmitCOFFSymbolType(Type);
+ OutStreamer.EndCOFFSymbolDef();
+ }
+
+ // Have common code print out the function header with linkage info etc.
+ EmitFunctionHeader();
+
+ // Emit the rest of the function body.
+ EmitFunctionBody();
+
+ // We didn't modify anything.
+ return false;
}
void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
@@ -239,7 +262,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
if (ARM::GPRPairRegClass.contains(RegBegin)) {
const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0);
- O << ARMInstPrinter::getRegisterName(Reg0) << ", ";;
+ O << ARMInstPrinter::getRegisterName(Reg0) << ", ";
RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1);
}
O << ARMInstPrinter::getRegisterName(RegBegin);
@@ -383,7 +406,7 @@ void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
// If either end mode is unknown (EndInfo == NULL) or different than
// the start mode, then restore the start mode.
const bool WasThumb = isThumb(StartInfo);
- if (EndInfo == NULL || WasThumb != isThumb(*EndInfo)) {
+ if (!EndInfo || WasThumb != isThumb(*EndInfo)) {
OutStreamer.EmitAssemblerFlag(WasThumb ? MCAF_Code16 : MCAF_Code32);
}
}
@@ -456,6 +479,29 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
emitAttributes();
}
+static void
+emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
+ MachineModuleInfoImpl::StubValueTy &MCSym) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(StubLabel);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(
+ MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ 4 /*size*/);
+}
+
void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
if (Subtarget->isTargetMachO()) {
@@ -472,27 +518,9 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// Switch with ".non_lazy_symbol_pointer" directive.
OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(2);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
-
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers need to be indirect and pc-rel. We accomplish this by
- // using NLPs; however, sometimes the types are local to the file.
- // We need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext),
- 4/*size*/);
- }
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -500,17 +528,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+ OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
EmitAlignment(2);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::
- Create(Stubs[i].second.getPointer(),
- OutContext),
- 4/*size*/);
- }
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
Stubs.clear();
OutStreamer.AddBlankLine();
@@ -523,6 +545,28 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
// generates code that does this, it is always safe to set.
OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
}
+
+ // Emit a .data.rel section containing any stubs that were created.
+ if (Subtarget->isTargetELF()) {
+ const TargetLoweringObjectFileELF &TLOFELF =
+ static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+ MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+ // Output stubs for external and common global variables.
+ MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+ if (!Stubs.empty()) {
+ OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+ const DataLayout *TD = TM.getDataLayout();
+
+ for (auto &stub: Stubs) {
+ OutStreamer.EmitLabel(stub.first);
+ OutStreamer.EmitSymbolValue(stub.second.getPointer(),
+ TD->getPointerSize(0));
+ }
+ Stubs.clear();
+ }
+ }
}
//===----------------------------------------------------------------------===//
@@ -575,7 +619,7 @@ void ARMAsmPrinter::emitAttributes() {
getArchForCPU(CPUString, Subtarget));
// Tag_CPU_arch_profile must have the default value of 0 when "Architecture
- // profile is not applicable (e.g. pre v7, or cross-profile code)".
+ // profile is not applicable (e.g. pre v7, or cross-profile code)".
if (Subtarget->hasV7Ops()) {
if (Subtarget->isAClass()) {
ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile,
@@ -627,6 +671,20 @@ void ARMAsmPrinter::emitAttributes() {
ATS.emitFPU(ARM::VFPV2);
}
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ // PIC specific attributes.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
+ ARMBuildAttrs::AddressRWPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data,
+ ARMBuildAttrs::AddressROPCRel);
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressGOT);
+ } else {
+ // Allow direct addressing of imported data for all other relocation models.
+ ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
+ ARMBuildAttrs::AddressDirect);
+ }
+
// Signal various FP modes.
if (!TM.Options.UnsafeFPMath) {
ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed);
@@ -723,7 +781,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV,
MachineModuleInfoImpl::StubValueTy &StubSym =
GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
MMIMachO.getGVStubEntry(MCSym);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
return MCSym;
@@ -971,7 +1029,8 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
RegList.push_back(SrcReg);
break;
}
- ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
+ ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
} else {
// Changes of stack / frame pointer.
if (SrcReg == ARM::SP) {
@@ -1016,18 +1075,20 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
}
}
- if (DstReg == FramePtr && FramePtr != ARM::SP)
- // Set-up of the frame pointer. Positive values correspond to "add"
- // instruction.
- ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
- else if (DstReg == ARM::SP) {
- // Change of SP by an offset. Positive values correspond to "sub"
- // instruction.
- ATS.emitPad(Offset);
- } else {
- // Move of SP to a register. Positive values correspond to an "add"
- // instruction.
- ATS.emitMovSP(DstReg, -Offset);
+ if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) {
+ if (DstReg == FramePtr && FramePtr != ARM::SP)
+ // Set-up of the frame pointer. Positive values correspond to "add"
+ // instruction.
+ ATS.emitSetFP(FramePtr, ARM::SP, -Offset);
+ else if (DstReg == ARM::SP) {
+ // Change of SP by an offset. Positive values correspond to "sub"
+ // instruction.
+ ATS.emitPad(Offset);
+ } else {
+ // Move of SP to a register. Positive values correspond to an "add"
+ // instruction.
+ ATS.emitMovSP(DstReg, -Offset);
+ }
}
} else if (DstReg == ARM::SP) {
MI->dump();
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 46c2626..7c103c6 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -47,16 +47,17 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
bool InConstantPool;
public:
explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL), InConstantPool(false) {
- Subtarget = &TM.getSubtarget<ARMSubtarget>();
- }
+ : AsmPrinter(TM, Streamer), AFI(nullptr), MCP(nullptr),
+ InConstantPool(false) {
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ }
const char *getPassName() const override {
return "ARM Assembly / Object Emitter";
}
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 47f5bf9..bc266e8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -37,11 +37,13 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-instrinfo"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "ARMGenInstrInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
cl::desc("Enable ARM 2-addr to 3-addr conv"));
@@ -125,14 +127,14 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// FIXME: Thumb2 support.
if (!EnableARM3Addr)
- return NULL;
+ return nullptr;
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
- default: return NULL;
+ default: return nullptr;
case ARMII::IndexModePre:
isPre = true;
break;
@@ -144,10 +146,10 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// operation.
unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
if (MemOpc == 0)
- return NULL;
+ return nullptr;
- MachineInstr *UpdateMI = NULL;
- MachineInstr *MemMI = NULL;
+ MachineInstr *UpdateMI = nullptr;
+ MachineInstr *MemMI = nullptr;
unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
const MCInstrDesc &MCID = MI->getDesc();
unsigned NumOps = MCID.getNumOperands();
@@ -169,7 +171,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (ARM_AM::getSOImmVal(Amt) == -1)
// Can't encode it in a so_imm operand. This transformation will
// add more than 1 instruction. Abandon!
- return NULL;
+ return nullptr;
UpdateMI = BuildMI(MF, MI->getDebugLoc(),
get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
.addReg(BaseReg).addImm(Amt)
@@ -273,8 +275,8 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- TBB = 0;
- FBB = 0;
+ TBB = nullptr;
+ FBB = nullptr;
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin())
@@ -331,7 +333,7 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
I->isReturn())) {
// Forget any previous condition branch information - it no longer applies.
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// If we can modify the function, delete everything below this
// unconditional branch.
@@ -405,7 +407,7 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"ARM branch conditions have two components!");
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) { // Unconditional branch?
if (isThumb)
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
@@ -535,7 +537,8 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) {
+namespace llvm {
+template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isUndef() || MO.isUse())
@@ -548,6 +551,7 @@ template<> bool IsCPSRDead<MachineInstr>(MachineInstr* MI) {
// all definitions of CPSR are dead
return true;
}
+}
/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
LLVM_ATTRIBUTE_NOINLINE
@@ -620,7 +624,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
unsigned JTI = JTOP.getIndex();
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- assert(MJTI != 0);
+ assert(MJTI != nullptr);
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
assert(JTI < JT.size());
// Thumb instructions are 2 byte aligned, but JT entries are 4 byte
@@ -1248,7 +1252,7 @@ static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
unsigned PCLabelId = AFI->createPICLabelUId();
- ARMConstantPoolValue *NewCPV = 0;
+ ARMConstantPoolValue *NewCPV = nullptr;
// FIXME: The below assumes PIC relocation model and that the function
// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
@@ -1659,10 +1663,10 @@ ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
// MOVCC AL can't be inverted. Shouldn't happen.
if (CC == ARMCC::AL || PredReg != ARM::CPSR)
- return NULL;
+ return nullptr;
MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
if (!MI)
- return NULL;
+ return nullptr;
// After swapping the MOVCC operands, also invert the condition.
MI->getOperand(MI->findFirstPredOperandIdx())
.setImm(ARMCC::getOppositeCondition(CC));
@@ -1678,35 +1682,36 @@ static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII) {
if (!TargetRegisterInfo::isVirtualRegister(Reg))
- return 0;
+ return nullptr;
if (!MRI.hasOneNonDBGUse(Reg))
- return 0;
+ return nullptr;
MachineInstr *MI = MRI.getVRegDef(Reg);
if (!MI)
- return 0;
+ return nullptr;
// MI is folded into the MOVCC by predicating it.
if (!MI->isPredicable())
- return 0;
+ return nullptr;
// Check if MI has any non-dead defs or physreg uses. This also detects
// predicated instructions which will be reading CPSR.
for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
- return 0;
+ return nullptr;
if (!MO.isReg())
continue;
// MI can't have any tied operands, that would conflict with predication.
if (MO.isTied())
- return 0;
+ return nullptr;
if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- return 0;
+ return nullptr;
if (MO.isDef() && !MO.isDead())
- return 0;
+ return nullptr;
}
bool DontMoveAcrossStores = true;
- if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
- return 0;
+ if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ nullptr,
+ DontMoveAcrossStores))
+ return nullptr;
return MI;
}
@@ -1741,14 +1746,14 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
if (!DefMI)
DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
if (!DefMI)
- return 0;
+ return nullptr;
// Find new register class to use.
MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
unsigned DestReg = MI->getOperand(0).getReg();
const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg());
if (!MRI.constrainRegClass(DestReg, PreviousClass))
- return 0;
+ return nullptr;
// Create a new predicated version of DefMI.
// Rfalse is the first use.
@@ -2254,7 +2259,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// Masked compares sometimes use the same register as the corresponding 'and'.
if (CmpMask != ~0) {
if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
- MI = 0;
+ MI = nullptr;
for (MachineRegisterInfo::use_instr_iterator
UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();
UI != UE; ++UI) {
@@ -2281,17 +2286,17 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
- MI = NULL;
+ MI = nullptr;
else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
// Conservatively refuse to convert an instruction which isn't in the same
// BB as the comparison.
// For CMPri, we need to check Sub, thus we can't return here.
if (CmpInstr->getOpcode() == ARM::CMPri ||
CmpInstr->getOpcode() == ARM::t2CMPri)
- MI = NULL;
+ MI = nullptr;
else
return false;
}
@@ -3295,7 +3300,7 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
if (Idx == -1) {
Dist = 0;
- return 0;
+ return nullptr;
}
UseIdx = Idx;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 3ddddcb..4b3e740 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -261,7 +261,7 @@ private:
unsigned getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
- unsigned *PredCost = 0) const override;
+ unsigned *PredCost = nullptr) const override;
int getInstrLatency(const InstrItineraryData *ItinData,
SDNode *Node) const override;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 8130a2d..a2eee9f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -44,14 +44,18 @@
using namespace llvm;
ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti)
- : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti),
- FramePtr((STI.isTargetMachO() || STI.isThumb()) ? ARM::R7 : ARM::R11),
- BasePtr(ARM::R6) {
+ : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) {
+ if (STI.isTargetMachO())
+ FramePtr = ARM::R7;
+ else if (STI.isTargetWindows())
+ FramePtr = ARM::R11;
+ else // ARM EABI
+ FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11;
}
-const uint16_t*
+const MCPhysReg*
ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
+ const MCPhysReg *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_SaveList
: CSR_AAPCS_SaveList;
@@ -107,7 +111,7 @@ ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const {
// should return NULL
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
- return NULL;
+ return nullptr;
return (STI.isTargetIOS() && !STI.isAAPCS_ABI())
? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask;
}
@@ -173,7 +177,7 @@ ARMBaseRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind
const TargetRegisterClass *
ARMBaseRegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
if (RC == &ARM::CCRRegClass)
- return 0; // Can't copy CCR registers.
+ return nullptr; // Can't copy CCR registers.
return RC;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 66b3c82..91df565 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -100,8 +100,8 @@ protected:
public:
/// Code Generation virtual methods...
- const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
@@ -186,7 +186,7 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h
index 4f94ad2..dc41c1c 100644
--- a/lib/Target/ARM/ARMCallingConv.h
+++ b/lib/Target/ARM/ARMCallingConv.h
@@ -28,7 +28,7 @@ namespace llvm {
static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const uint16_t RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const MCPhysReg RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
// Try to get the first register.
if (unsigned Reg = State.AllocateReg(RegList, 4))
@@ -71,10 +71,10 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
CCState &State, bool CanFail) {
- static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
- static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
- static const uint16_t ShadowRegList[] = { ARM::R0, ARM::R1 };
- static const uint16_t GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg ShadowRegList[] = { ARM::R0, ARM::R1 };
+ static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
if (Reg == 0) {
@@ -123,8 +123,8 @@ static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo, CCState &State) {
- static const uint16_t HiRegList[] = { ARM::R0, ARM::R2 };
- static const uint16_t LoRegList[] = { ARM::R1, ARM::R3 };
+ static const MCPhysReg HiRegList[] = { ARM::R0, ARM::R2 };
+ static const MCPhysReg LoRegList[] = { ARM::R1, ARM::R3 };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
if (Reg == 0)
@@ -160,6 +160,105 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
+static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+ ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+ ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+ ARM::S12, ARM::S13, ARM::S14, ARM::S15 };
+static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+ ARM::D4, ARM::D5, ARM::D6, ARM::D7 };
+static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 };
+
+// Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA
+// has InConsecutiveRegs set, and that the last member also has
+// InConsecutiveRegsLast set. We must process all members of the HA before
+// we can allocate it, as we need to know the total number of registers that
+// will be needed in order to (attempt to) allocate a contiguous block.
+static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs();
+ // AAPCS HFAs must have 1-4 elements, all of the same type
+ assert(PendingHAMembers.size() < 8);
+ if (PendingHAMembers.size() > 0)
+ assert(PendingHAMembers[0].getLocVT() == LocVT);
+
+ // Add the argument to the list to be allocated once we know the size of the
+ // HA
+ PendingHAMembers.push_back(
+ CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
+
+ if (ArgFlags.isInConsecutiveRegsLast()) {
+ assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 8 &&
+ "Homogeneous aggregates must have between 1 and 4 members");
+
+ // Try to allocate a contiguous block of registers, each of the correct
+ // size to hold one member.
+ const uint16_t *RegList;
+ unsigned NumRegs;
+ switch (LocVT.SimpleTy) {
+ case MVT::i32:
+ case MVT::f32:
+ RegList = SRegList;
+ NumRegs = 16;
+ break;
+ case MVT::f64:
+ RegList = DRegList;
+ NumRegs = 8;
+ break;
+ case MVT::v2f64:
+ RegList = QRegList;
+ NumRegs = 4;
+ break;
+ default:
+ llvm_unreachable("Unexpected member type for HA");
+ break;
+ }
+
+ unsigned RegResult =
+ State.AllocateRegBlock(RegList, NumRegs, PendingHAMembers.size());
+
+ if (RegResult) {
+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin();
+ It != PendingHAMembers.end(); ++It) {
+ It->convertToReg(RegResult);
+ State.addLoc(*It);
+ ++RegResult;
+ }
+ PendingHAMembers.clear();
+ return true;
+ }
+
+ // Register allocation failed, fall back to the stack
+
+ // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp)
+ for (unsigned regNo = 0; regNo < 16; ++regNo)
+ State.AllocateReg(SRegList[regNo]);
+
+ unsigned Size = LocVT.getSizeInBits() / 8;
+ unsigned Align = Size;
+
+ if (LocVT.SimpleTy == MVT::v2f64 || LocVT.SimpleTy == MVT::i32) {
+ // Vectors are always aligned to 8 bytes. If we've seen an i32 here
+ // it's because it's been split from a larger type, also with align 8.
+ Align = 8;
+ }
+
+ for (auto It : PendingHAMembers) {
+ It.convertToMem(State.AllocateStack(Size, Align));
+ State.addLoc(It);
+
+ // Only the first member needs to be aligned.
+ Align = 1;
+ }
+
+ // All pending members have now been allocated
+ PendingHAMembers.clear();
+ }
+
+ // This will be allocated by the last member of the HA
+ return true;
+}
+
} // End llvm namespace
#endif
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 7cffd82..526089b 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -174,6 +174,9 @@ def CC_ARM_AAPCS_VFP : CallingConv<[
CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+ // HFAs are passed in a contiguous block of registers, or on the stack
+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>,
+
CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 7359a11..2fd7edd 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMConstantPoolValue.h"
@@ -40,6 +39,8 @@
#endif
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
@@ -65,10 +66,10 @@ namespace {
static char ID;
public:
ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(0),
+ : MachineFunctionPass(ID), JTI(nullptr),
II((const ARMBaseInstrInfo *)tm.getInstrInfo()),
TD(tm.getDataLayout()), TM(tm),
- MCE(mce), MCPEs(0), MJTEs(0),
+ MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
/// getBinaryCodeForInstr - This function, generated by the
@@ -373,7 +374,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = 0;
+ MJTEs = nullptr;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index ba05171..ce264ee 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-cp-islands"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "MCTargetDesc/ARMAddressingModes.h"
@@ -36,6 +35,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "arm-cp-islands"
+
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -593,7 +594,7 @@ ARMConstantIslands::CPEntry
if (CPEs[i].CPEMI == CPEMI)
return &CPEs[i];
}
- return NULL;
+ return nullptr;
}
/// getCPELogAlign - Returns the required alignment of the constant pool entry
@@ -1102,7 +1103,7 @@ bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
removeDeadCPEMI(CPEMI);
- CPE->CPEMI = NULL;
+ CPE->CPEMI = nullptr;
--NumCPEs;
return true;
}
@@ -1135,7 +1136,7 @@ int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == NULL)
+ if (CPEs[i].CPEMI == nullptr)
continue;
if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
U.NegOk)) {
@@ -1317,7 +1318,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
++MI;
unsigned CPUIndex = CPUserIndex+1;
unsigned NumCPUsers = CPUsers.size();
- MachineInstr *LastIT = 0;
+ MachineInstr *LastIT = nullptr;
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
@@ -1491,7 +1492,7 @@ bool ARMConstantIslands::removeUnusedCPEntries() {
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = NULL;
+ CPEs[j].CPEMI = nullptr;
MadeChange = true;
}
}
@@ -1844,7 +1845,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
@@ -1970,7 +1971,7 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
bool MadeChange = false;
MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
- if (MJTI == 0) return false;
+ if (!MJTI) return false;
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
@@ -2012,7 +2013,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
- MachineBasicBlock *TBB = 0, *FBB = 0;
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
SmallVector<MachineOperand, 4> CondPrior;
MachineFunction::iterator BBi = BB;
@@ -2032,7 +2033,7 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// Update numbering to account for the block being moved.
MF->RenumberBlocks();
++NumJTMoved;
- return NULL;
+ return nullptr;
}
// Create a new MBB for the code after the jump BB.
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index bd4ee44..6045738 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-pseudo"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
@@ -23,6 +22,7 @@
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/CommandLine.h"
@@ -31,6 +31,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-pseudo"
+
static cl::opt<bool>
VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
cl::desc("Verify machine code after expanding ARM pseudos"));
@@ -345,7 +347,7 @@ static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
return I;
- return NULL;
+ return nullptr;
}
/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
@@ -614,6 +616,39 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
MI.eraseFromParent();
}
+static bool IsAnAddressOperand(const MachineOperand &MO) {
+ // This check is overly conservative. Unless we are certain that the machine
+ // operand is not a symbol reference, we return that it is a symbol reference.
+ // This is important as the load pair may not be split up Windows.
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ case MachineOperand::MO_Immediate:
+ case MachineOperand::MO_CImmediate:
+ case MachineOperand::MO_FPImmediate:
+ return false;
+ case MachineOperand::MO_MachineBasicBlock:
+ return true;
+ case MachineOperand::MO_FrameIndex:
+ return false;
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_TargetIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ case MachineOperand::MO_ExternalSymbol:
+ case MachineOperand::MO_GlobalAddress:
+ case MachineOperand::MO_BlockAddress:
+ return true;
+ case MachineOperand::MO_RegisterMask:
+ case MachineOperand::MO_RegisterLiveOut:
+ return false;
+ case MachineOperand::MO_Metadata:
+ case MachineOperand::MO_MCSymbol:
+ return true;
+ case MachineOperand::MO_CFIIndex:
+ return false;
+ }
+ llvm_unreachable("unhandled machine operand type");
+}
+
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -624,10 +659,14 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
bool DstIsDead = MI.getOperand(0).isDead();
bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+ bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
MachineInstrBuilder LO16, HI16;
if (!STI->hasV6T2Ops() &&
(Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+ // FIXME Windows CE supports older ARM CPUs
+ assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
+
// Expand into a movi + orr.
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
@@ -664,17 +703,29 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
.addReg(DstReg);
- if (MO.isImm()) {
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate: {
unsigned Imm = MO.getImm();
unsigned Lo16 = Imm & 0xffff;
unsigned Hi16 = (Imm >> 16) & 0xffff;
LO16 = LO16.addImm(Lo16);
HI16 = HI16.addImm(Hi16);
- } else {
+ break;
+ }
+ case MachineOperand::MO_ExternalSymbol: {
+ const char *ES = MO.getSymbolName();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
+ HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
+ break;
+ }
+ default: {
const GlobalValue *GV = MO.getGlobal();
unsigned TF = MO.getTargetFlags();
LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+ break;
+ }
}
LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -682,6 +733,9 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LO16.addImm(Pred).addReg(PredReg);
HI16.addImm(Pred).addReg(PredReg);
+ if (RequiresBundling)
+ finalizeBundle(MBB, &*LO16, &*MBBI);
+
TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
}
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index c442444..6f8fb1a 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -166,8 +166,6 @@ class ARMFastISel final : public FastISel {
// Utility routines.
private:
- unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum,
- unsigned Op);
bool isTypeLegal(Type *Ty, MVT &VT);
bool isLoadTypeLegal(Type *Ty, MVT &VT);
bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
@@ -191,6 +189,8 @@ class ARMFastISel final : public FastISel {
unsigned ARMSelectCallOp(bool UseReg);
unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
+ const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
+
// Call handling routines.
private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
@@ -283,23 +283,6 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
return MIB;
}
-unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II,
- unsigned Op, unsigned OpNum) {
- if (TargetRegisterInfo::isVirtualRegister(Op)) {
- const TargetRegisterClass *RegClass =
- TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
- if (!MRI.constrainRegClass(Op, RegClass)) {
- // If it's not legal to COPY between the register classes, something
- // has gone very wrong before we got here.
- unsigned NewOp = createResultReg(RegClass);
- AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), NewOp).addReg(Op));
- return NewOp;
- }
- }
- return Op;
-}
-
unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
unsigned Op0, bool Op0IsKill) {
@@ -769,7 +752,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// Computes the address to get to an object.
bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
// Some boilerplate from the X86 FastISel.
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
@@ -1400,7 +1383,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
const APInt &CIVal = ConstInt->getValue();
Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
// For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
- // then a cmn, because there is no way to represent 2147483648 as a
+ // then a cmn, because there is no way to represent 2147483648 as a
// signed 32-bit int.
if (Imm < 0 && Imm != (int)0x80000000) {
isNegativeImm = true;
@@ -2182,7 +2165,8 @@ unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
if (!LCREVT.isSimple()) return 0;
GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
- GlobalValue::ExternalLinkage, 0, Name);
+ GlobalValue::ExternalLinkage, nullptr,
+ Name);
assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
}
@@ -2286,7 +2270,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
}
bool ARMFastISel::SelectCall(const Instruction *I,
- const char *IntrMemName = 0) {
+ const char *IntrMemName = nullptr) {
const CallInst *CI = cast<CallInst>(I);
const Value *Callee = CI->getCalledValue();
@@ -3092,6 +3076,6 @@ namespace llvm {
TM.Options.NoFramePointerElim = true;
return new ARMFastISel(funcInfo, libInfo);
}
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/ARM/ARMFeatures.h b/lib/Target/ARM/ARMFeatures.h
index a30f4cd..e191a3c 100644
--- a/lib/Target/ARM/ARMFeatures.h
+++ b/lib/Target/ARM/ARMFeatures.h
@@ -1,4 +1,4 @@
-//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===//
+//===-- ARMFeatures.h - Checks for ARM instruction features -----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,11 +16,11 @@
#include "MCTargetDesc/ARMMCTargetDesc.h"
+namespace llvm {
+
template<typename InstrType> // could be MachineInstr or MCInst
bool IsCPSRDead(InstrType *Instr);
-namespace llvm {
-
template<typename InstrType> // could be MachineInstr or MCInst
inline bool isV8EligibleForIT(InstrType *Instr) {
switch (Instr->getOpcode()) {
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index 36ecfca..0caf4bf 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -87,7 +87,7 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
- const uint16_t *CSRegs) {
+ const MCPhysReg *CSRegs) {
// Integer spill area is handled with "pop".
if (isPopOpcode(MI->getOpcode())) {
// The first two operands are predicates. The last two are
@@ -142,6 +142,14 @@ static int sizeOfSPAdjustment(const MachineInstr *MI) {
return count;
}
+static bool WindowsRequiresStackProbe(const MachineFunction &MF,
+ size_t StackSizeInBytes) {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ if (MFI->getStackProtectorIndex() > 0)
+ return StackSizeInBytes >= 4080;
+ return StackSizeInBytes >= 4096;
+}
+
void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock &MBB = MF.front();
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -149,15 +157,16 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineModuleInfo &MMI = MF.getMMI();
MCContext &Context = MMI.getContext();
+ const TargetMachine &TM = MF.getTarget();
const MCRegisterInfo *MRI = Context.getRegisterInfo();
const ARMBaseRegisterInfo *RegInfo =
- static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
const ARMBaseInstrInfo &TII =
- *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+ *static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
assert(!AFI->isThumb1OnlyFunction() &&
"This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
- unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+ unsigned Align = TM.getFrameLowering()->getStackAlignment();
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align);
unsigned NumBytes = MFI->getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
@@ -187,7 +196,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
.addCFIIndex(CFIIndex);
}
- if (!AFI->hasStackFrame()) {
+ if (!AFI->hasStackFrame() &&
+ (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
if (NumBytes - ArgRegsSaveSize != 0) {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize),
MachineInstr::FrameSetup);
@@ -284,6 +294,51 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
} else
NumBytes = DPRCSOffset;
+ if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
+ uint32_t NumWords = NumBytes >> 2;
+
+ if (NumWords < 65536)
+ AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup));
+ else
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R4)
+ .addImm(NumWords)
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ switch (TM.getCodeModel()) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ case CodeModel::Default:
+ case CodeModel::Kernel:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addExternalSymbol("__chkstk")
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ case CodeModel::Large:
+ case CodeModel::JITDefault:
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
+ .addExternalSymbol("__chkstk")
+ .setMIFlags(MachineInstr::FrameSetup);
+
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
+ .addImm((unsigned)ARMCC::AL).addReg(0)
+ .addReg(ARM::R12, RegState::Kill)
+ .addReg(ARM::R4, RegState::Implicit)
+ .setMIFlags(MachineInstr::FrameSetup);
+ break;
+ }
+
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr),
+ ARM::SP)
+ .addReg(ARM::SP, RegState::Define)
+ .addReg(ARM::R4, RegState::Kill)
+ .setMIFlags(MachineInstr::FrameSetup)));
+ NumBytes = 0;
+ }
+
unsigned adjustedGPRCS1Size = GPRCS1Size;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
@@ -316,10 +371,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineBasicBlock::iterator Pos = ++GPRCS1Push;
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -382,10 +436,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
switch (Reg) {
case ARM::R8:
case ARM::R9:
@@ -411,7 +464,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
do {
MachineBasicBlock::iterator Push = DPRCSPush++;
if (!HasFP) {
- CFAOffset -= sizeOfSPAdjustment(Push);;
+ CFAOffset -= sizeOfSPAdjustment(Push);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
BuildMI(MBB, DPRCSPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -419,10 +472,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
}
} while (DPRCSPush->getOpcode() == ARM::VSTMDDB_UPD);
- for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
- E = CSI.end(); I != E; ++I) {
- unsigned Reg = I->getReg();
- int FI = I->getFrameIdx();
+ for (const auto &Entry : CSI) {
+ unsigned Reg = Entry.getReg();
+ int FI = Entry.getFrameIdx();
if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
@@ -540,7 +592,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ArgRegsSaveSize);
} else {
// Unwind MBBI to point to first LDR / VLDRD.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
if (MBBI != MBB.begin()) {
do {
--MBBI;
@@ -1205,12 +1257,9 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
const ARMBaseInstrInfo &TII) {
unsigned FnSize = 0;
- for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
- MBBI != E; ++MBBI) {
- const MachineBasicBlock &MBB = *MBBI;
- for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
- I != E; ++I)
- FnSize += TII.GetInstSizeInBytes(I);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB)
+ FnSize += TII.GetInstSizeInBytes(&MI);
}
return FnSize;
}
@@ -1223,21 +1272,21 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
const TargetFrameLowering *TFI) {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
unsigned Limit = (1 << 12) - 1;
- for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
- for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (!I->getOperand(i).isFI()) continue;
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ if (!MI.getOperand(i).isFI())
+ continue;
// When using ADDri to get the address of a stack object, 255 is the
// largest offset guaranteed to fit in the immediate offset.
- if (I->getOpcode() == ARM::ADDri) {
+ if (MI.getOpcode() == ARM::ADDri) {
Limit = std::min(Limit, (1U << 8) - 1);
break;
}
// Otherwise check the addressing mode.
- switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
Limit = std::min(Limit, (1U << 8) - 1);
@@ -1374,7 +1423,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -1486,6 +1535,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (hasFP(MF)) {
MRI.setPhysRegUsed(FramePtr);
+ auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
+ FramePtr);
+ if (FPPos != UnspilledCS1GPRs.end())
+ UnspilledCS1GPRs.erase(FPPos);
NumGPRSpills++;
}
@@ -1681,7 +1734,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
if (!ST->isTargetAndroid() && !ST->isTargetLinux())
- report_fatal_error("Segmented stacks not supported on this platfrom.");
+ report_fatal_error("Segmented stacks not supported on this platform.");
MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1693,6 +1746,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;
+ uint64_t StackSize = MFI->getStackSize();
+
+ // Do not generate a prologue for functions with a stack of size zero
+ if (StackSize == 0)
+ return;
+
// Use R4 and R5 as scratch registers.
// We save R4 and R5 before use and restore them before leaving the function.
unsigned ScratchReg0 = ARM::R4;
@@ -1722,8 +1781,6 @@ void ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.push_front(PrevStackMBB);
// The required stack size that is aligned to ARM constant criterion.
- uint64_t StackSize = MFI->getStackSize();
-
AlignedStackSize = alignToARMConstant(StackSize);
// When the frame size is less than 256 we just compare the stack
diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h
index 524ee36..981d320 100644
--- a/lib/Target/ARM/ARMFrameLowering.h
+++ b/lib/Target/ARM/ARMFrameLowering.h
@@ -57,7 +57,7 @@ public:
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const override;
- void adjustForSegmentedStacks(MachineFunction &MF) const;
+ void adjustForSegmentedStacks(MachineFunction &MF) const override;
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 61d4e12..0885c4e 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -77,7 +77,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
}
void ARMHazardRecognizer::Reset() {
- LastMI = 0;
+ LastMI = nullptr;
FpMLxStalls = 0;
ScoreboardHazardRecognizer::Reset();
}
@@ -95,7 +95,7 @@ void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
void ARMHazardRecognizer::AdvanceCycle() {
if (FpMLxStalls && --FpMLxStalls == 0)
// Stalled for 4 cycles but still can't schedule any other instructions.
- LastMI = 0;
+ LastMI = nullptr;
ScoreboardHazardRecognizer::AdvanceCycle();
}
diff --git a/lib/Target/ARM/ARMHazardRecognizer.h b/lib/Target/ARM/ARMHazardRecognizer.h
index e88cd0d..a8198e2 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.h
+++ b/lib/Target/ARM/ARMHazardRecognizer.h
@@ -35,7 +35,7 @@ public:
ARMHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG)
: ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"),
- LastMI(0) {}
+ LastMI(nullptr) {}
HazardType getHazardType(SUnit *SU, int Stalls) override;
void Reset() override;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 70e11c5..08d598d 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMTargetMachine.h"
@@ -37,6 +36,8 @@
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
static cl::opt<bool>
DisableShifterOp("disable-shifter-op", cl::Hidden,
cl::desc("Disable isel of shifter-op"),
@@ -72,6 +73,13 @@ public:
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &TM.getSubtarget<ARMSubtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
const char *getPassName() const override {
return "ARM Instruction Selection";
}
@@ -397,7 +405,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
N1, CurDAG->getConstant(TZ, MVT::i32));
CurDAG->UpdateNodeOperands(N, N0, N1);
- }
+ }
}
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
@@ -1440,7 +1448,7 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return NULL;
+ return nullptr;
EVT LoadedVT = LD->getMemoryVT();
SDValue Offset, AMOpc;
@@ -1506,14 +1514,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
}
}
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
- return NULL;
+ return nullptr;
EVT LoadedVT = LD->getMemoryVT();
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
@@ -1540,7 +1548,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
break;
default:
- return NULL;
+ return nullptr;
}
Match = true;
}
@@ -1554,7 +1562,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
MVT::Other, Ops);
}
- return NULL;
+ return nullptr;
}
/// \brief Form a GPRPair pseudo register from a pair of GPR regs.
@@ -1699,10 +1707,10 @@ static bool isVSTfixed(unsigned Opc)
case ARM::VST1d16wb_fixed : return true;
case ARM::VST1d32wb_fixed : return true;
case ARM::VST1d64wb_fixed : return true;
- case ARM::VST1q8wb_fixed : return true;
- case ARM::VST1q16wb_fixed : return true;
- case ARM::VST1q32wb_fixed : return true;
- case ARM::VST1q64wb_fixed : return true;
+ case ARM::VST1q8wb_fixed : return true;
+ case ARM::VST1q16wb_fixed : return true;
+ case ARM::VST1q32wb_fixed : return true;
+ case ARM::VST1q64wb_fixed : return true;
case ARM::VST1d64TPseudoWB_fixed : return true;
case ARM::VST1d64QPseudoWB_fixed : return true;
case ARM::VST2d8wb_fixed : return true;
@@ -1776,7 +1784,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
SDValue MemAddr, Align;
unsigned AddrOpIdx = isUpdating ? 1 : 2;
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
SDValue Chain = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -1895,7 +1903,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
@@ -1909,7 +1917,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2055,7 +2063,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned AddrOpIdx = isUpdating ? 1 : 2;
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2160,7 +2168,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
@@ -2171,7 +2179,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
SDValue MemAddr, Align;
if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
@@ -2243,7 +2251,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
if (isUpdating)
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
@@ -2282,7 +2290,7 @@ SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
- return NULL;
+ return nullptr;
unsigned Opc = isSigned
? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
@@ -2295,7 +2303,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
if (And_imm & (And_imm + 1))
- return NULL;
+ return nullptr;
unsigned Srl_imm = 0;
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
@@ -2315,7 +2323,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
// ARM models shift instructions as MOVsi with shifter operand.
@@ -2325,17 +2333,17 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
}
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
- getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ getAL(CurDAG), Reg0 };
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
- return NULL;
+ return nullptr;
}
// Otherwise, we're looking for a shift of a shift
@@ -2349,16 +2357,16 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
unsigned Width = 32 - Srl_imm - 1;
int LSB = Srl_imm - Shl_imm;
if (LSB < 0)
- return NULL;
+ return nullptr;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(0).getOperand(0),
CurDAG->getTargetConstant(LSB, MVT::i32),
CurDAG->getTargetConstant(Width, MVT::i32),
getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
- return NULL;
+ return nullptr;
}
/// Target-specific DAG combining for ISD::XOR.
@@ -2377,10 +2385,10 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
- return NULL;
+ return nullptr;
if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
- return NULL;
+ return nullptr;
SDValue ADDSrc0 = XORSrc0.getOperand(0);
SDValue ADDSrc1 = XORSrc0.getOperand(1);
@@ -2391,13 +2399,13 @@ SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
unsigned Size = XType.getSizeInBits() - 1;
if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
- XType.isInteger() && SRAConstant != NULL &&
+ XType.isInteger() && SRAConstant != nullptr &&
Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
}
- return NULL;
+ return nullptr;
}
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
@@ -2414,7 +2422,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (N->getOpcode()) {
@@ -2478,7 +2486,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
Ops);
}
ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
- return NULL;
+ return nullptr;
}
// Other cases are autogenerated.
@@ -2492,14 +2500,14 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb1Only()) {
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, Ops);
} else {
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
ARM::t2ADDri : ARM::ADDri);
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+ return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
}
}
case ISD::SRL:
@@ -2526,10 +2534,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+ return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
}
}
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
@@ -2542,10 +2550,10 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops, 7);
+ return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
}
}
}
@@ -2660,7 +2668,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
}
case ISD::LOAD: {
- SDNode *ResNode = 0;
+ SDNode *ResNode = nullptr;
if (Subtarget->isThumb() && Subtarget->hasThumb2())
ResNode = SelectT2IndexedLoad(N);
else
@@ -2707,13 +2715,13 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
- return NULL;
+ return nullptr;
}
case ARMISD::VZIP: {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VZIPd8; break;
case MVT::v4i16: Opc = ARM::VZIPd16; break;
case MVT::v2f32:
@@ -2733,7 +2741,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VUZPd8; break;
case MVT::v4i16: Opc = ARM::VUZPd16; break;
case MVT::v2f32:
@@ -2753,7 +2761,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
unsigned Opc = 0;
EVT VT = N->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: return NULL;
+ default: return nullptr;
case MVT::v8i8: Opc = ARM::VTRNd8; break;
case MVT::v4i16: Opc = ARM::VTRNd16; break;
case MVT::v2f32:
@@ -2834,7 +2842,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1q16wb_fixed,
ARM::VLD1q32wb_fixed,
ARM::VLD1q64wb_fixed };
- return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VLD2_UPD: {
@@ -2845,7 +2853,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
ARM::VLD2q16PseudoWB_fixed,
ARM::VLD2q32PseudoWB_fixed };
- return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VLD3_UPD: {
@@ -2912,7 +2920,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1q16wb_fixed,
ARM::VST1q32wb_fixed,
ARM::VST1q64wb_fixed };
- return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VST2_UPD: {
@@ -2923,7 +2931,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
ARM::VST2q16PseudoWB_fixed,
ARM::VST2q32PseudoWB_fixed };
- return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
}
case ARMISD::VST3_UPD: {
@@ -3047,7 +3055,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(SDValue(N, 1), Result);
}
ReplaceUses(SDValue(N, 2), OutChain);
- return NULL;
+ return nullptr;
}
case Intrinsic::arm_stlexd:
case Intrinsic::arm_strexd: {
@@ -3093,7 +3101,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD1d32, ARM::VLD1d64 };
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
ARM::VLD1q32, ARM::VLD1q64};
- return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vld2: {
@@ -3101,7 +3109,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VLD2d32, ARM::VLD1q64 };
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
ARM::VLD2q32Pseudo };
- return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+ return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vld3: {
@@ -3164,7 +3172,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST1d32, ARM::VST1d64 };
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
ARM::VST1q32, ARM::VST1q64 };
- return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vst2: {
@@ -3172,7 +3180,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ARM::VST2d32, ARM::VST1q64 };
static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
ARM::VST2q32Pseudo };
- return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+ return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
}
case Intrinsic::arm_neon_vst3: {
@@ -3306,7 +3314,8 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
// them into a GPRPair.
SDLoc dl(N);
- SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0);
+ SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
+ : SDValue(nullptr,0);
SmallVector<bool, 8> OpChanged;
// Glue node will be appended late.
@@ -3388,7 +3397,7 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
// Update the original glue user.
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
Ops.push_back(T1.getValue(1));
- CurDAG->UpdateNodeOperands(GU, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(GU, Ops);
GU = T1.getNode();
}
else {
@@ -3435,11 +3444,10 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
if (Glue.getNode())
AsmNodeOperands.push_back(Glue);
if (!Changed)
- return NULL;
+ return nullptr;
SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
- CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0],
- AsmNodeOperands.size());
+ CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
New->setNodeId(-1);
return New.getNode();
}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 2ebad8e..00d07e8 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-isel"
#include "ARMISelLowering.h"
#include "ARMCallingConv.h"
#include "ARMConstantPoolValue.h"
@@ -37,18 +36,22 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Type.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "arm-isel"
+
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
@@ -79,7 +82,7 @@ namespace {
}
// The APCS parameter registers.
-static const uint16_t GPRArgRegs[] = {
+static const MCPhysReg GPRArgRegs[] = {
ARM::R0, ARM::R1, ARM::R2, ARM::R3
};
@@ -155,7 +158,8 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
if (TM.getSubtarget<ARMSubtarget>().isTargetMachO())
return new TargetLoweringObjectFileMachO();
-
+ if (TM.getSubtarget<ARMSubtarget>().isTargetWindows())
+ return new TargetLoweringObjectFileCOFF();
return new ARMElfTargetObjectFile();
}
@@ -170,7 +174,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
if (Subtarget->isTargetMachO()) {
// Uses VFP for Thumb libfuncs if available.
if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
- Subtarget->hasARMOps()) {
+ Subtarget->hasARMOps() && !TM.Options.UseSoftFloat) {
// Single-precision floating-point arithmetic.
setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
@@ -246,173 +250,134 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, 0);
- setLibcallName(RTLIB::SRL_I128, 0);
- setLibcallName(RTLIB::SRA_I128, 0);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetMachO() &&
!Subtarget->isTargetWindows()) {
- // Double-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 2
- setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
- setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
- setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
- setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
- setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
-
- // Double-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 3
- setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
- setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
- setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
- setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
- setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE);
- setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun");
- setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 4
- setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
- setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
- setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
- setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
- setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
-
- // Single-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 5
- setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
- setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
- setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
- setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
- setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
- setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
- setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
- setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
- setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
- setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
- setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
- setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun");
- setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
- setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
-
- // Floating-point to integer conversions.
- // RTABI chapter 4.1.2, Table 6
- setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
- setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
- setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
- setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
- setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
- setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
- setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
- setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
-
- // Conversions between floating types.
- // RTABI chapter 4.1.2, Table 7
- setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
- setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d");
- setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
-
- // Integer to floating-point conversions.
- // RTABI chapter 4.1.2, Table 8
- setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
- setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
- setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
- setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
- setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
- setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
- setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
- setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
-
- // Long long helper functions
- // RTABI chapter 4.2, Table 9
- setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul");
- setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
- setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
- setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
- setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
-
- // Integer division functions
- // RTABI chapter 4.3.1
- setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
- setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
- setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
- setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
- setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
-
- // Memory operations
- // RTABI chapter 4.3.4
- setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy");
- setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
- setLibcallName(RTLIB::MEMSET, "__aeabi_memset");
- setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
- setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ const ISD::CondCode Cond;
+ } LibraryCalls[] = {
+ // Double-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 2
+ { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Double-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 3
+ { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Single-precision floating-point arithmetic helper functions
+ // RTABI chapter 4.1.2, Table 4
+ { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Single-precision floating-point comparison helper functions
+ // RTABI chapter 4.1.2, Table 5
+ { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
+ { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
+ { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
+
+ // Floating-point to integer conversions.
+ // RTABI chapter 4.1.2, Table 6
+ { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Conversions between floating types.
+ // RTABI chapter 4.1.2, Table 7
+ { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer to floating-point conversions.
+ // RTABI chapter 4.1.2, Table 8
+ { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Long long helper functions
+ // RTABI chapter 4.2, Table 9
+ { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Integer division functions
+ // RTABI chapter 4.3.1
+ { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+
+ // Memory operations
+ // RTABI chapter 4.3.4
+ { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ if (LC.Cond != ISD::SETCC_INVALID)
+ setCmpLibcallCC(LC.Op, LC.Cond);
+ }
+ }
+
+ if (Subtarget->isTargetWindows()) {
+ static const struct {
+ const RTLIB::Libcall Op;
+ const char * const Name;
+ const CallingConv::ID CC;
+ } LibraryCalls[] = {
+ { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
+ { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
+ };
+
+ for (const auto &LC : LibraryCalls) {
+ setLibcallName(LC.Op, LC.Name);
+ setLibcallCallingConv(LC.Op, LC.CC);
+ }
}
// Use divmod compiler-rt calls for iOS 5.0 and later.
@@ -444,6 +409,13 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::MULHS, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::MULHU, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+
+ setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
}
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
@@ -631,6 +603,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
}
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO, MVT::i32, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO, MVT::i32, Custom);
+
// i64 operation support.
setOperationAction(ISD::MUL, MVT::i64, Expand);
setOperationAction(ISD::MULHU, MVT::i32, Expand);
@@ -850,7 +827,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
}
}
-
+
// Combine sin / cos into one node or libcall if possible.
if (Subtarget->hasSinCos()) {
setLibcallName(RTLIB::SINCOS_F32, "sincosf");
@@ -913,7 +890,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// and extractions.
std::pair<const TargetRegisterClass*, uint8_t>
ARMTargetLowering::findRepresentativeClass(MVT VT) const{
- const TargetRegisterClass *RRC = 0;
+ const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
@@ -950,7 +927,7 @@ ARMTargetLowering::findRepresentativeClass(MVT VT) const{
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case ARMISD::Wrapper: return "ARMISD::Wrapper";
case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
@@ -1204,40 +1181,58 @@ static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
#include "ARMGenCallingConv.inc"
-/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
-/// given CallingConvention value.
-CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
- bool Return,
- bool isVarArg) const {
+/// getEffectiveCallingConv - Get the effective calling convention, taking into
+/// account presence of floating point hardware and calling convention
+/// limitations, such as support for variadic functions.
+CallingConv::ID
+ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const {
switch (CC) {
default:
llvm_unreachable("Unsupported calling convention");
- case CallingConv::Fast:
- if (Subtarget->hasVFP2() && !isVarArg) {
- if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
- // For AAPCS ABI targets, just use VFP variant of the calling convention.
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- }
- // Fallthrough
- case CallingConv::C: {
- // Use target triple & subtarget features to do actual dispatch.
+ case CallingConv::ARM_AAPCS:
+ case CallingConv::ARM_APCS:
+ case CallingConv::GHC:
+ return CC;
+ case CallingConv::ARM_AAPCS_VFP:
+ return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
+ case CallingConv::C:
if (!Subtarget->isAAPCS_ABI())
- return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ return CallingConv::ARM_APCS;
else if (Subtarget->hasVFP2() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
+ case CallingConv::Fast:
+ if (!Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::Fast;
+ return CallingConv::ARM_APCS;
+ } else if (Subtarget->hasVFP2() && !isVarArg)
+ return CallingConv::ARM_AAPCS_VFP;
+ else
+ return CallingConv::ARM_AAPCS;
}
- case CallingConv::ARM_AAPCS_VFP:
- if (!isVarArg)
- return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
- // Fallthrough
- case CallingConv::ARM_AAPCS:
- return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+}
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
+/// CallingConvention.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+ bool Return,
+ bool isVarArg) const {
+ switch (getEffectiveCallingConv(CC, isVarArg)) {
+ default:
+ llvm_unreachable("Unsupported calling convention");
case CallingConv::ARM_APCS:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+ case CallingConv::Fast:
+ return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
case CallingConv::GHC:
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
}
@@ -1286,6 +1281,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
@@ -1301,6 +1298,8 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
@@ -1351,16 +1350,17 @@ void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+ unsigned id = Subtarget->isLittle() ? 0 : 1;
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
if (NextVA.isRegLoc())
- RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+ RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
else {
assert(NextVA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
- MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+ MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
dl, DAG, NextVA,
Flags));
}
@@ -1398,6 +1398,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
@@ -1542,7 +1545,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
- Ops, array_lengthof(Ops)));
+ Ops));
}
} else if (!isSibCall) {
assert(VA.isMemLoc());
@@ -1553,8 +1556,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -1741,10 +1743,10 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall)
- return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -2049,8 +2051,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false));
- return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
}
SDValue
@@ -2074,6 +2075,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SDValue Flag;
SmallVector<SDValue, 4> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+ bool isLittleEndian = Subtarget->isLittle();
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
@@ -2100,12 +2102,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(1), Flag);
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
@@ -2117,12 +2122,15 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+ DAG.getVTList(MVT::i32, MVT::i32), Arg);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 0 : 1),
+ Flag);
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ fmrrd.getValue(isLittleEndian ? 1 : 0),
Flag);
} else
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
@@ -2151,8 +2159,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2314,13 +2321,13 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
Entry.Node = Argument;
Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
Args.push_back(Entry);
+
// FIXME: is there useful debug info available here?
- TargetLowering::CallLoweringInfo CLI(Chain,
- (Type *) Type::getInt32Ty(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0);
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.first;
}
@@ -2466,6 +2473,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
return Result;
}
+SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
+ assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt");
+
+ const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ EVT PtrVT = getPointerTy();
+ SDLoc DL(Op);
+
+ ++NumMovwMovt;
+
+ // FIXME: Once remat is capable of dealing with instructions with register
+ // operands, expand this into two nodes.
+ return DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
+ DAG.getTargetGlobalAddress(GV, DL, PtrVT));
+}
+
SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG) const {
assert(Subtarget->isTargetELF() &&
@@ -2654,7 +2678,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
-
+ if (!Subtarget->isLittle())
+ std::swap (ArgValue, ArgValue2);
return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
@@ -2803,8 +2828,7 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return FrameIndex;
} else {
if (ArgSize == 0) {
@@ -2834,8 +2858,9 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// If there is no regs to be stored, just point address after last
// argument passed via stack.
int FrameIndex =
- StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
- 0, ArgOffset, 0, ForceMutable, 0, TotalArgRegsSaveSize);
+ StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
+ CCInfo.getInRegsParamsCount(), 0, ArgOffset, 0, ForceMutable,
+ 0, TotalArgRegsSaveSize);
AFI->setVarArgsFrameIndex(FrameIndex);
}
@@ -3166,11 +3191,96 @@ ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
}
+std::pair<SDValue, SDValue>
+ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
+ SDValue &ARMcc) const {
+ assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
+
+ SDValue Value, OverflowCmp;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+
+
+ // FIXME: We are currently always generating CMPs because we don't support
+ // generating CMN through the backend. This is not as good as the natural
+ // CMP case because it causes a register dependency and cannot be folded
+ // later.
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unknown overflow instruction!");
+ case ISD::SADDO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::UADDO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, Value, LHS);
+ break;
+ case ISD::SSUBO:
+ ARMcc = DAG.getConstant(ARMCC::VC, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ case ISD::USUBO:
+ ARMcc = DAG.getConstant(ARMCC::HS, MVT::i32);
+ Value = DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), LHS, RHS);
+ OverflowCmp = DAG.getNode(ARMISD::CMP, SDLoc(Op), MVT::Glue, LHS, RHS);
+ break;
+ } // switch (...)
+
+ return std::make_pair(Value, OverflowCmp);
+}
+
+
+SDValue
+ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+ // Let legalize expand this if it isn't a legal type yet.
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ // We use 0 and 1 as false and true values.
+ SDValue TVal = DAG.getConstant(1, MVT::i32);
+ SDValue FVal = DAG.getConstant(0, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ SDValue Overflow = DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, TVal, FVal,
+ ARMcc, CCR, OverflowCmp);
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
+}
+
+
SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SDValue Cond = Op.getOperand(0);
SDValue SelectTrue = Op.getOperand(1);
SDValue SelectFalse = Op.getOperand(2);
SDLoc dl(Op);
+ unsigned Opc = Cond.getOpcode();
+
+ if (Cond.getResNo() == 1 &&
+ (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
+ Opc == ISD::USUBO)) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
+ return SDValue();
+
+ SDValue Value, OverflowCmp;
+ SDValue ARMcc;
+ std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ EVT VT = Op.getValueType();
+
+ return DAG.getNode(ARMISD::CMOV, SDLoc(Op), VT, SelectTrue, SelectFalse,
+ ARMcc, CCR, OverflowCmp);
+
+ }
// Convert:
//
@@ -3472,7 +3582,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
ARMcc = DAG.getConstant(CondCode, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
- return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
}
return SDValue();
@@ -3512,11 +3622,11 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
- SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
if (CondCode2 != ARMCC::AL) {
ARMcc = DAG.getConstant(CondCode2, MVT::i32);
SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
- Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+ Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
}
return Res;
}
@@ -3713,7 +3823,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Bitcast operand 1 to i32.
if (SrcVT == MVT::f64)
Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp1, 1).getValue(1);
+ Tmp1).getValue(1);
Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
// Or in the signbit with integer operations.
@@ -3729,7 +3839,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// f64: Or the high part with signbit and then combine two parts.
Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
- &Tmp0, 1);
+ Tmp0);
SDValue Lo = Tmp0.getValue(0);
SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
@@ -3761,14 +3871,16 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
}
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ const ARMBaseRegisterInfo &ARI =
+ *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setFrameAddressIsTaken(true);
EVT VT = Op.getValueType();
SDLoc dl(Op); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetMachO())
- ? ARM::R7 : ARM::R11;
+ unsigned FrameReg = ARI.getFrameRegister(MF);
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
@@ -3777,6 +3889,18 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned ARMTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("sp", ARM::SP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
/// ExpandBITCAST - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -3806,8 +3930,15 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
// Turn f64->i64 into VMOVRRD.
if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
- SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
- DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+ SDValue Cvt;
+ if (TLI.isBigEndian() && SrcVT.isVector() &&
+ SrcVT.getVectorNumElements() > 1)
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32),
+ DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
+ else
+ Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+ DAG.getVTList(MVT::i32, MVT::i32), Op);
// Merge the pieces into a single i64 value.
return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
}
@@ -3863,7 +3994,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
@@ -3897,7 +4028,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
@@ -4102,7 +4233,7 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
- Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+ Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
@@ -4859,7 +4990,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(N);
Ops.push_back(Op.getOperand(I));
Ops.push_back(DAG.getConstant(I, MVT::i32));
- N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
+ N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
}
}
return N;
@@ -4870,7 +5001,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
Op.getOperand(i)));
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+ SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
Val = LowerBUILD_VECTOR(Val, DAG, ST);
if (Val.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
@@ -4906,7 +5037,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i)
Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
@@ -5213,12 +5344,10 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
if (V2.getNode()->getOpcode() == ISD::UNDEF)
return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
- DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
- &VTBLMask[0], 8));
+ DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, VTBLMask));
}
static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
@@ -5371,7 +5500,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(ShuffleMask[i] & (NumElts-1),
MVT::i32)));
}
- SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, Val);
}
@@ -5608,7 +5737,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
- MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+ MVT::getVectorVT(TruncVT, NumElts), Ops);
}
static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
@@ -5946,12 +6075,12 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
? "__sincos_stret" : "__sincosf_stret";
SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0,
- CallingConv::C, /*isTaillCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed*/false,
- Callee, Args, DAG, dl);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee,
+ &Args, 0)
+ .setDiscardResult();
+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet,
@@ -5998,8 +6127,7 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
};
Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
- DAG.getVTList(MVT::i32, MVT::Other), &Ops[0],
- array_lengthof(Ops));
+ DAG.getVTList(MVT::i32, MVT::Other), Ops);
OutChain = Cycles32.getValue(1);
} else {
// Intrinsic is defined to return 0 on unsupported platforms. Technically
@@ -6022,8 +6150,15 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::GlobalAddress:
- return Subtarget->isTargetMachO() ? LowerGlobalAddressDarwin(Op, DAG) :
- LowerGlobalAddressELF(Op, DAG);
+ switch (Subtarget->getTargetTriple().getObjectFormat()) {
+ default: llvm_unreachable("unknown object format");
+ case Triple::COFF:
+ return LowerGlobalAddressWindows(Op, DAG);
+ case Triple::ELF:
+ return LowerGlobalAddressELF(Op, DAG);
+ case Triple::MachO:
+ return LowerGlobalAddressDarwin(Op, DAG);
+ }
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -6068,6 +6203,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDE:
case ISD::SUBC:
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ return LowerXALUO(Op, DAG);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -6558,7 +6698,7 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
}
// N.B. the order the invoke BBs are processed in doesn't matter here.
- const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
+ const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
SmallVector<MachineBasicBlock*, 64> MBBLPads;
for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
@@ -6755,8 +6895,8 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned UnitSize = 0;
- const TargetRegisterClass *TRC = 0;
- const TargetRegisterClass *VecTRC = 0;
+ const TargetRegisterClass *TRC = nullptr;
+ const TargetRegisterClass *VecTRC = nullptr;
bool IsThumb1 = Subtarget->isThumb1Only();
bool IsThumb2 = Subtarget->isThumb2();
@@ -6790,7 +6930,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI,
? (const TargetRegisterClass *)&ARM::DPairRegClass
: UnitSize == 8
? (const TargetRegisterClass *)&ARM::DPRRegClass
- : 0;
+ : nullptr;
unsigned BytesLeft = SizeVal % UnitSize;
unsigned LoopSize = SizeVal - BytesLeft;
@@ -7520,8 +7660,7 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
llvm_unreachable("Invalid vector element type for padd optimization.");
}
- SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
- widenType, &Ops[0], Ops.size());
+ SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, Ops);
unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
return DAG.getNode(ExtOp, SDLoc(N), VT, tmp);
}
@@ -7581,7 +7720,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// Look for the glued ADDE.
SDNode* AddeNode = AddcNode->getGluedUser();
- if (AddeNode == NULL)
+ if (!AddeNode)
return SDValue();
// Make sure it is really an ADDE.
@@ -7616,9 +7755,9 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
// Figure out the high and low input values to the MLAL node.
SDValue* HiMul = &MULOp;
- SDValue* HiAdd = NULL;
- SDValue* LoMul = NULL;
- SDValue* LowAdd = NULL;
+ SDValue* HiAdd = nullptr;
+ SDValue* LoMul = nullptr;
+ SDValue* LowAdd = nullptr;
if (IsLeftOperandMUL)
HiAdd = &AddeOp1;
@@ -7635,7 +7774,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
LowAdd = &AddcOp0;
}
- if (LoMul == NULL)
+ if (!LoMul)
return SDValue();
if (LoMul->getNode() != HiMul->getNode())
@@ -7652,8 +7791,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
Ops.push_back(*HiAdd);
SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode),
- DAG.getVTList(MVT::i32, MVT::i32),
- &Ops[0], Ops.size());
+ DAG.getVTList(MVT::i32, MVT::i32), Ops);
// Replace the ADDs' nodes uses by the MLA node's values.
SDValue HiMLALResult(MLALNode.getNode(), 1);
@@ -8290,8 +8428,7 @@ static SDValue PerformSTORECombine(SDNode *N,
Increment);
Chains.push_back(Ch);
}
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
- Chains.size());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
if (!ISD::isNormalStore(St))
@@ -8302,16 +8439,18 @@ static SDValue PerformSTORECombine(SDNode *N,
if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
StVal.getNode()->hasOneUse()) {
SelectionDAG &DAG = DCI.DAG;
+ bool isBigEndian = DAG.getTargetLoweringInfo().isBigEndian();
SDLoc DL(St);
SDValue BasePtr = St->getBasePtr();
SDValue NewST1 = DAG.getStore(St->getChain(), DL,
- StVal.getNode()->getOperand(0), BasePtr,
- St->getPointerInfo(), St->isVolatile(),
+ StVal.getNode()->getOperand(isBigEndian ? 1 : 0 ),
+ BasePtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(), St->getAlignment());
SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
DAG.getConstant(4, MVT::i32));
- return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
+ return DAG.getStore(NewST1.getValue(0), DL,
+ StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
OffsetPtr, St->getPointerInfo(), St->isVolatile(),
St->isNonTemporal(),
std::min(4U, St->getAlignment() / 2));
@@ -8387,7 +8526,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N,
DCI.AddToWorklist(V.getNode());
}
EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, VT, BV);
}
@@ -8470,7 +8609,7 @@ PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
// Fold obvious case.
V = V.getOperand(0);
else {
- V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
+ V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
// Make the DAGCombiner fold the bitcasts.
DCI.AddToWorklist(V.getNode());
}
@@ -8666,7 +8805,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
Tys[n] = VecTy;
Tys[n++] = MVT::i32;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumResultVecs+2));
SmallVector<SDValue, 8> Ops;
Ops.push_back(N->getOperand(0)); // incoming chain
Ops.push_back(N->getOperand(AddrOpIdx));
@@ -8676,8 +8815,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
}
MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys,
- Ops.data(), Ops.size(),
- MemInt->getMemoryVT(),
+ Ops, MemInt->getMemoryVT(),
MemInt->getMemOperand());
// Update the uses.
@@ -8746,11 +8884,11 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (n = 0; n < NumVecs; ++n)
Tys[n] = VT;
Tys[n] = MVT::Other;
- SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+ SDVTList SDTys = DAG.getVTList(ArrayRef<EVT>(Tys, NumVecs+1));
SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
- Ops, 2, VLDMemInt->getMemoryVT(),
+ Ops, VLDMemInt->getMemoryVT(),
VLDMemInt->getMemOperand());
// Update the uses.
@@ -9348,7 +9486,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (Res.getNode()) {
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
+ DAG.computeKnownBits(SDValue(N,0), KnownZero, KnownOne);
// Capture demanded bits information that would be otherwise lost.
if (KnownZero == 0xfffffffe)
Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
@@ -9935,11 +10073,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return true;
}
-void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
unsigned BitWidth = KnownOne.getBitWidth();
KnownZero = KnownOne = APInt(BitWidth, 0);
switch (Op.getOpcode()) {
@@ -9955,11 +10093,11 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
break;
case ARMISD::CMOV: {
// Bits are known zero/one if known on the LHS and RHS.
- DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
if (KnownZero == 0 && KnownOne == 0) return;
APInt KnownZeroRHS, KnownOneRHS;
- DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
KnownZero &= KnownZeroRHS;
KnownOne &= KnownOneRHS;
return;
@@ -10053,7 +10191,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
@@ -10132,7 +10270,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
// Currently only support length 1 constraints.
if (Constraint.length() != 1) return;
@@ -10331,13 +10469,12 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL);
SDLoc dl(Op);
- TargetLowering::
- CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true,
- 0, getLibcallCallingConv(LC), /*isTailCall=*/false,
- /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0)
+ .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
return CallInfo.first;
}
@@ -10494,3 +10631,160 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
return false;
return true;
}
+
+bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const {
+ // Loads and stores less than 64-bits are already atomic; ones above that
+ // are doomed anyway, so defer to the default libcall and blame the OS when
+ // things go wrong:
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64;
+ else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ return LI->getType()->getPrimitiveSizeInBits() == 64;
+
+ // For the real atomic operations, we have ldrex/strex up to 64 bits.
+ return Inst->getType()->getPrimitiveSizeInBits() <= 64;
+}
+
+Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ Type *ValTy = cast<PointerType>(Addr->getType())->getElementType();
+ bool IsAcquire =
+ Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
+ // intrinsic must return {i32, i32} and we have to recombine them into a
+ // single i64 here.
+ if (ValTy->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int);
+
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
+
+ Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
+ Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
+ Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
+ return Builder.CreateOr(
+ Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 32)), "val64");
+ }
+
+ Type *Tys[] = { Addr->getType() };
+ Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
+ Function *Ldrex = llvm::Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateTruncOrBitCast(
+ Builder.CreateCall(Ldrex, Addr),
+ cast<PointerType>(Addr->getType())->getElementType());
+}
+
+Value *ARMTargetLowering::emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr,
+ AtomicOrdering Ord) const {
+ Module *M = Builder.GetInsertBlock()->getParent()->getParent();
+ bool IsRelease =
+ Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent;
+
+ // Since the intrinsics must have legal type, the i64 intrinsics take two
+ // parameters: "i32, i32". We must marshal Val into the appropriate form
+ // before the call.
+ if (Val->getType()->getPrimitiveSizeInBits() == 64) {
+ Intrinsic::ID Int =
+ IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
+ Function *Strex = Intrinsic::getDeclaration(M, Int);
+ Type *Int32Ty = Type::getInt32Ty(M->getContext());
+
+ Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
+ Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
+ if (!Subtarget->isLittle())
+ std::swap (Lo, Hi);
+ Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
+ return Builder.CreateCall3(Strex, Lo, Hi, Addr);
+ }
+
+ Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
+ Type *Tys[] = { Addr->getType() };
+ Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
+
+ return Builder.CreateCall2(
+ Strex, Builder.CreateZExtOrBitCast(
+ Val, Strex->getFunctionType()->getParamType(0)),
+ Addr);
+}
+
+enum HABaseType {
+ HA_UNKNOWN = 0,
+ HA_FLOAT,
+ HA_DOUBLE,
+ HA_VECT64,
+ HA_VECT128
+};
+
+static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base,
+ uint64_t &Members) {
+ if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0; i < ST->getNumElements(); ++i) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
+ return false;
+ Members += SubMembers;
+ }
+ } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+ uint64_t SubMembers = 0;
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
+ return false;
+ Members += SubMembers * AT->getNumElements();
+ } else if (Ty->isFloatTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_FLOAT)
+ return false;
+ Members = 1;
+ Base = HA_FLOAT;
+ } else if (Ty->isDoubleTy()) {
+ if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
+ return false;
+ Members = 1;
+ Base = HA_DOUBLE;
+ } else if (const VectorType *VT = dyn_cast<VectorType>(Ty)) {
+ Members = 1;
+ switch (Base) {
+ case HA_FLOAT:
+ case HA_DOUBLE:
+ return false;
+ case HA_VECT64:
+ return VT->getBitWidth() == 64;
+ case HA_VECT128:
+ return VT->getBitWidth() == 128;
+ case HA_UNKNOWN:
+ switch (VT->getBitWidth()) {
+ case 64:
+ Base = HA_VECT64;
+ return true;
+ case 128:
+ Base = HA_VECT128;
+ return true;
+ default:
+ return false;
+ }
+ }
+ }
+
+ return (Members > 0 && Members <= 4);
+}
+
+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate.
+bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const {
+ if (getEffectiveCallingConv(CallConv, isVarArg) !=
+ CallingConv::ARM_AAPCS_VFP)
+ return false;
+
+ HABaseType Base = HA_UNKNOWN;
+ uint64_t Members = 0;
+ bool result = isHomogeneousAggregate(Ty, Base, Members);
+ DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump(); dbgs() << "\n");
+ return result;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index f33e6db..c15305c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -313,10 +313,10 @@ namespace llvm {
SDValue &Offset, ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
- void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
bool ExpandInlineAsm(CallInst *CI) const override;
@@ -384,6 +384,18 @@ namespace llvm {
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const override;
+ /// \brief Returns true if an argument of type Ty needs to be passed in a
+ /// contiguous block of registers in calling convention CallConv.
+ bool functionArgumentNeedsConsecutiveRegisters(
+ Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override;
+
+ Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
+ AtomicOrdering Ord) const override;
+ Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
+ Value *Addr, AtomicOrdering Ord) const override;
+
+ bool shouldExpandAtomicInIR(Instruction *Inst) const override;
+
protected:
std::pair<const TargetRegisterClass*, uint8_t>
findRepresentativeClass(MVT VT) const override;
@@ -404,6 +416,7 @@ namespace llvm {
void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
+ std::pair<SDValue, SDValue> getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const;
typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
@@ -417,6 +430,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
SDLoc dl) const;
+ CallingConv::ID getEffectiveCallingConv(CallingConv::ID CC,
+ bool isVarArg) const;
CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
@@ -430,6 +445,7 @@ namespace llvm {
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
@@ -438,6 +454,7 @@ namespace llvm {
TLSModel::Model model) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
@@ -454,6 +471,8 @@ namespace llvm {
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
@@ -567,7 +586,6 @@ namespace llvm {
OtherModImm
};
-
namespace ARM {
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
const TargetLibraryInfo *libInfo);
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index aafff98..59e9260 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -2029,7 +2029,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// Same as N2V but not predicated.
class N2Vnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6,
dag oops, dag iops, InstrItinClass itin, string OpcodeStr,
- string Dt, ValueType ResTy, ValueType OpTy, list<dag> pattern>
+ string Dt, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, N2RegFrm, itin,
OpcodeStr, Dt, "$Vd, $Vm", "", pattern> {
bits<5> Vd;
@@ -2138,8 +2138,7 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
class N3Vnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
bit op4, dag oops, dag iops,Format f, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDPatternOperator IntOp, bit Commutable, list<dag> pattern>
+ string OpcodeStr, string Dt, list<dag> pattern>
: NeonInp<oops, iops, AddrModeNone, IndexModeNone, f, itin, OpcodeStr,
Dt, "$Vd, $Vn, $Vm", "", pattern> {
bits<5> Vd;
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 75a109e..718d5da 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -991,6 +991,81 @@ def addrmode6oneL32 : Operand<i32>,
let EncoderMethod = "getAddrMode6OneLane32AddressOpValue";
}
+// Base class for addrmode6 with specific alignment restrictions.
+class AddrMode6Align : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm:$align);
+ let EncoderMethod = "getAddrMode6AddressOpValue";
+ let DecoderMethod = "DecodeAddrMode6Operand";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD/VST instructions and checking the alignment is not specified.
+def AddrMode6AlignNoneAsmOperand : AsmOperandClass {
+ let Name = "AlignedMemoryNone";
+ let DiagnosticType = "AlignedMemoryRequiresNone";
+}
+def addrmode6alignNone : AddrMode6Align {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6AlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align16AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory16";
+ let DiagnosticType = "AlignedMemoryRequires16";
+}
+def addrmode6align16 : AddrMode6Align {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6Align16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align32AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory32";
+ let DiagnosticType = "AlignedMemoryRequires32";
+}
+def addrmode6align32 : AddrMode6Align {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6Align32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for
+// VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64";
+ let DiagnosticType = "AlignedMemoryRequires64";
+}
+def addrmode6align64 : AddrMode6Align {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6Align64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128";
+ let DiagnosticType = "AlignedMemoryRequires64or128";
+}
+def addrmode6align64or128 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit, 128-bit or 256-bit alignment
+// encoding for VLD/VST instructions and checking the alignment value.
+def AddrMode6Align64or128or256AsmOperand : AsmOperandClass {
+ let Name = "AlignedMemory64or128or256";
+ let DiagnosticType = "AlignedMemoryRequires64or128or256";
+}
+def addrmode6align64or128or256 : AddrMode6Align {
+ // The alignment specifier can only be 64, 128, 256 or omitted.
+ let ParserMatchClass = AddrMode6Align64or128or256AsmOperand;
+}
+
// Special version of addrmode6 to handle alignment encoding for VLD-dup
// instructions, specifically VLD4-dup.
def addrmode6dup : Operand<i32>,
@@ -1003,6 +1078,69 @@ def addrmode6dup : Operand<i32>,
let ParserMatchClass = AddrMode6AsmOperand;
}
+// Base class for addrmode6dup with specific alignment restrictions.
+class AddrMode6DupAlign : Operand<i32>,
+ ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+ let PrintMethod = "printAddrMode6Operand";
+ let MIOperandInfo = (ops GPR:$addr, i32imm);
+ let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// Special version of addrmode6 to handle no allowed alignment encoding for
+// VLD-dup instruction and checking the alignment is not specified.
+def AddrMode6dupAlignNoneAsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemoryNone";
+ let DiagnosticType = "DupAlignedMemoryRequiresNone";
+}
+def addrmode6dupalignNone : AddrMode6DupAlign {
+ // The alignment specifier can only be omitted.
+ let ParserMatchClass = AddrMode6dupAlignNoneAsmOperand;
+}
+
+// Special version of addrmode6 to handle 16-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign16AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory16";
+ let DiagnosticType = "DupAlignedMemoryRequires16";
+}
+def addrmode6dupalign16 : AddrMode6DupAlign {
+ // The alignment specifier can only be 16 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign16AsmOperand;
+}
+
+// Special version of addrmode6 to handle 32-bit alignment encoding for VLD-dup
+// instruction and checking the alignment value.
+def AddrMode6dupAlign32AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory32";
+ let DiagnosticType = "DupAlignedMemoryRequires32";
+}
+def addrmode6dupalign32 : AddrMode6DupAlign {
+ // The alignment specifier can only be 32 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign32AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit alignment encoding for VLD
+// instructions and checking the alignment value.
+def AddrMode6dupAlign64AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64";
+ let DiagnosticType = "DupAlignedMemoryRequires64";
+}
+def addrmode6dupalign64 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64AsmOperand;
+}
+
+// Special version of addrmode6 to handle 64-bit or 128-bit alignment encoding
+// for VLD instructions and checking the alignment value.
+def AddrMode6dupAlign64or128AsmOperand : AsmOperandClass {
+ let Name = "DupAlignedMemory64or128";
+ let DiagnosticType = "DupAlignedMemoryRequires64or128";
+}
+def addrmode6dupalign64or128 : AddrMode6DupAlign {
+ // The alignment specifier can only be 64, 128 or omitted.
+ let ParserMatchClass = AddrMode6dupAlign64or128AsmOperand;
+}
+
// addrmodepc := pc + reg
//
def addrmodepc : Operand<i32>,
@@ -1689,7 +1827,8 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
}
def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary,
- "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> {
+ "hint", "\t$imm", [(int_arm_hint imm0_239:$imm)]>,
+ Requires<[IsARM, HasV6]> {
bits<8> imm;
let Inst{27-8} = 0b00110010000011110000;
let Inst{7-0} = imm;
@@ -1702,8 +1841,6 @@ def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>;
def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>;
-def : Pat<(int_arm_sevl), (HINT 5)>;
-
def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel",
"\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> {
bits<4> Rd;
@@ -1830,6 +1967,18 @@ def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
let Inst{3-0} = opt;
}
+// A8.8.247 UDF - Undefined (Encoding A1)
+def UDF : AInoP<(outs), (ins imm0_65535:$imm16), MiscFrm, NoItinerary,
+ "udf", "\t$imm16", [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-28} = 0b1110; // AL
+ let Inst{27-25} = 0b011;
+ let Inst{24-20} = 0b11111;
+ let Inst{19-8} = imm16{15-4};
+ let Inst{7-4} = 0b1111;
+ let Inst{3-0} = imm16{3-0};
+}
+
/*
* A5.4 Permanently UNDEFINED instructions.
*
@@ -2282,12 +2431,6 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode3:$addr),
LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $Rt2, $addr", []>,
Requires<[IsARM, HasV5TE]>;
-
- // GNU Assembler extension (compatibility)
- let isAsmParserOnly = 1 in
- def LDRD_PAIR : AI3ld<0b1101, 0, (outs GPRPairOp:$Rt), (ins addrmode3:$addr),
- LdMiscFrm, IIC_iLoad_d_r, "ldrd", "\t$Rt, $addr", []>,
- Requires<[IsARM, HasV5TE]>;
}
def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr),
@@ -2557,14 +2700,6 @@ let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
Requires<[IsARM, HasV5TE]> {
let Inst{21} = 0;
}
-
- // GNU Assembler extension (compatibility)
- let isAsmParserOnly = 1 in
- def STRD_PAIR : AI3str<0b1111, (outs), (ins GPRPairOp:$Rt, addrmode3:$addr),
- StMiscFrm, IIC_iStore_d_r, "strd", "\t$Rt, $addr", []>,
- Requires<[IsARM, HasV5TE]> {
- let Inst{21} = 0;
- }
}
// Indexed stores
@@ -3999,6 +4134,11 @@ def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
Requires<[IsARM, HasV6]>,
Sched<[WriteALU]>;
+def : ARMV6Pat<(srl (bswap (extloadi16 addrmode3:$addr)), (i32 16)),
+ (REV16 (LDRH addrmode3:$addr))>;
+def : ARMV6Pat<(truncstorei16 (srl (bswap GPR:$Rn), (i32 16)), addrmode3:$addr),
+ (STRH (REV16 GPR:$Rn), addrmode3:$addr)>;
+
let AddedComplexity = 5 in
def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
IIC_iUNAr, "revsh", "\t$Rd, $Rm",
@@ -4816,7 +4956,7 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
[(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
imm:$CRm, imm:$opc2)]>,
Requires<[PreV8]>;
-def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm",
+def : ARMInstAlias<"mcr2 $cop, $opc1, $Rt, $CRn, $CRm",
(MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
c_imm:$CRm, 0)>;
def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
@@ -4824,7 +4964,7 @@ def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
(ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm,
imm0_7:$opc2), []>,
Requires<[PreV8]>;
-def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm",
+def : ARMInstAlias<"mrc2 $cop, $opc1, $Rt, $CRn, $CRm",
(MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn,
c_imm:$CRm, 0)>;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 0d46c49..b32b5d2 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -39,6 +39,49 @@ def nImmVMOVI32 : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
let ParserMatchClass = nImmVMOVI32AsmOperand;
}
+
+def nImmVMOVI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16vmovByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMOVI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32vmovByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONvmovByteReplicateOperands";
+}
+def nImmVMVNI16AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi16invByteReplicate";
+ let PredicateMethod = "isNEONi16ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+def nImmVMVNI32AsmOperandByteReplicate :
+ AsmOperandClass {
+ let Name = "NEONi32invByteReplicate";
+ let PredicateMethod = "isNEONi32ByteReplicate";
+ let RenderMethod = "addNEONinvByteReplicateOperands";
+}
+
+def nImmVMOVI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI16AsmOperandByteReplicate;
+}
+def nImmVMOVI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMOVI32AsmOperandByteReplicate;
+}
+def nImmVMVNI16ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI16AsmOperandByteReplicate;
+}
+def nImmVMVNI32ByteReplicate : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
+ let ParserMatchClass = nImmVMVNI32AsmOperandByteReplicate;
+}
+
def nImmVMOVI32NegAsmOperand : AsmOperandClass { let Name = "NEONi32vmovNeg"; }
def nImmVMOVI32Neg : Operand<i32> {
let PrintMethod = "printNEONModImmOperand";
@@ -617,37 +660,37 @@ class VLDQQQQWBPseudo<InstrItinClass itin>
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string Dt>
+class VLD1D<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1,
+ (ins AddrMode:$Rn), IIC_VLD1,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-class VLD1Q<bits<4> op7_4, string Dt>
+class VLD1Q<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x2,
+ (ins AddrMode:$Rn), IIC_VLD1x2,
"vld1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-def VLD1d8 : VLD1D<{0,0,0,?}, "8">;
-def VLD1d16 : VLD1D<{0,1,0,?}, "16">;
-def VLD1d32 : VLD1D<{1,0,0,?}, "32">;
-def VLD1d64 : VLD1D<{1,1,0,?}, "64">;
+def VLD1d8 : VLD1D<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16 : VLD1D<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32 : VLD1D<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64 : VLD1D<{1,1,0,?}, "64", addrmode6align64>;
-def VLD1q8 : VLD1Q<{0,0,?,?}, "8">;
-def VLD1q16 : VLD1Q<{0,1,?,?}, "16">;
-def VLD1q32 : VLD1Q<{1,0,?,?}, "32">;
-def VLD1q64 : VLD1Q<{1,1,?,?}, "64">;
+def VLD1q8 : VLD1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VLD1q16 : VLD1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VLD1q32 : VLD1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VLD1q64 : VLD1Q<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with address register writeback:
-multiclass VLD1DWB<bits<4> op7_4, string Dt> {
+multiclass VLD1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1u,
+ (ins AddrMode:$Rn), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -655,16 +698,16 @@ multiclass VLD1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
-multiclass VLD1QWB<bits<4> op7_4, string Dt> {
+multiclass VLD1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -672,7 +715,7 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -680,27 +723,27 @@ multiclass VLD1QWB<bits<4> op7_4, string Dt> {
}
}
-defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">;
-defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">;
-defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">;
-defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">;
-defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">;
-defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">;
-defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">;
-defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">;
+defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64", addrmode6align64>;
+defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with 3 registers
-class VLD1D3<bits<4> op7_4, string Dt>
+class VLD1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+ (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
+multiclass VLD1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -708,7 +751,7 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -716,32 +759,32 @@ multiclass VLD1D3WB<bits<4> op7_4, string Dt> {
}
}
-def VLD1d8T : VLD1D3<{0,0,0,?}, "8">;
-def VLD1d16T : VLD1D3<{0,1,0,?}, "16">;
-def VLD1d32T : VLD1D3<{1,0,0,?}, "32">;
-def VLD1d64T : VLD1D3<{1,1,0,?}, "64">;
+def VLD1d8T : VLD1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VLD1d16T : VLD1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VLD1d32T : VLD1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VLD1d64T : VLD1D3<{1,1,0,?}, "64", addrmode6align64>;
-defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">;
-defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">;
-defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">;
-defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">;
+defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>;
def VLD1d64TPseudo : VLDQQPseudo<IIC_VLD1x3>;
def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x3>;
def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x3>;
// ...with 4 registers
-class VLD1D4<bits<4> op7_4, string Dt>
+class VLD1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd),
- (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+ (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt,
"$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
+multiclass VLD1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), IIC_VLD1x2u,
+ (ins AddrMode:$Rn), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -749,7 +792,7 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -757,15 +800,15 @@ multiclass VLD1D4WB<bits<4> op7_4, string Dt> {
}
}
-def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">;
-def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">;
-def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">;
-def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">;
+def VLD1d8Q : VLD1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VLD1d16Q : VLD1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VLD1d32Q : VLD1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VLD1d64Q : VLD1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
-defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">;
-defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">;
-defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">;
-defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">;
+defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
def VLD1d64QPseudo : VLDQQPseudo<IIC_VLD1x4>;
def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD1x4>;
@@ -773,22 +816,28 @@ def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD1x4>;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
- InstrItinClass itin>
+ InstrItinClass itin, Operand AddrMode>
: NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd),
- (ins addrmode6:$Rn), itin,
+ (ins AddrMode:$Rn), itin,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
-def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>;
-def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2>;
-def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2>;
+def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2,
+ addrmode6align64or128>;
-def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2>;
-def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2>;
-def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2>;
+def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
+def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2,
+ addrmode6align64or128or256>;
def VLD2q8Pseudo : VLDQQPseudo<IIC_VLD2x2>;
def VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
@@ -796,9 +845,9 @@ def VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
// ...with address register writeback:
multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
- RegisterOperand VdTy, InstrItinClass itin> {
+ RegisterOperand VdTy, InstrItinClass itin, Operand AddrMode> {
def _fixed : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn), itin,
+ (ins AddrMode:$Rn), itin,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -806,7 +855,7 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm), itin,
+ (ins AddrMode:$Rn, rGPR:$Rm), itin,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
@@ -814,13 +863,19 @@ multiclass VLD2WB<bits<4> op11_8, bits<4> op7_4, string Dt,
}
}
-defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u>;
-defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u>;
-defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u>;
+defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u,
+ addrmode6align64or128>;
-defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u>;
-defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u>;
-defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u>;
+defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
+defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u,
+ addrmode6align64or128or256>;
def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo<IIC_VLD2x2u>;
@@ -830,12 +885,18 @@ def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo<IIC_VLD2x2u>;
// ...with double-spaced registers
-def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2>;
-def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2>;
-def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2>;
-defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u>;
-defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u>;
-defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u>;
+def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2,
+ addrmode6align64or128>;
+defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
+defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u,
+ addrmode6align64or128>;
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -1293,47 +1354,55 @@ def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// VLD1DUP : Vector Load (single element to all lanes)
-class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd),
- (ins addrmode6dup:$Rn),
+ (ins AddrMode:$Rn),
IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "",
[(set VecListOneDAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
-def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
-def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+def VLD1DUPd8 : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load,
+ addrmode6dupalign32>;
def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPd32 addrmode6:$addr)>;
-class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+class VLD1QDUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp,
+ Operand AddrMode>
: NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd),
- (ins addrmode6dup:$Rn), IIC_VLD1dup,
+ (ins AddrMode:$Rn), IIC_VLD1dup,
"vld1", Dt, "$Vd, $Rn", "",
[(set VecListDPairAllLanes:$Vd,
- (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+ (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
-def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8>;
-def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16>;
-def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load>;
+def VLD1DUPq8 : VLD1QDUP<{0,0,1,0}, "8", v16i8, extloadi8,
+ addrmode6dupalignNone>;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16", v8i16, extloadi16,
+ addrmode6dupalign16>;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32", v4i32, load,
+ addrmode6dupalign32>;
def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
(VLD1DUPq32 addrmode6:$addr)>;
let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// ...with address register writeback:
-multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
+multiclass VLD1DUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1342,17 +1411,17 @@ multiclass VLD1DUPWB<bits<4> op7_4, string Dt> {
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListOneDAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD1DupInstruction";
}
}
-multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
+multiclass VLD1QDUPWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD1dupu,
+ (ins AddrMode:$Rn), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1361,7 +1430,7 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
}
def _register : NLdSt<1, 0b10, 0b1100, op7_4,
(outs VecListDPairAllLanes:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD1dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1dupu,
"vld1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -1369,38 +1438,47 @@ multiclass VLD1QDUPWB<bits<4> op7_4, string Dt> {
}
}
-defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8">;
-defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16">;
-defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32">;
+defm VLD1DUPd8wb : VLD1DUPWB<{0,0,0,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPd16wb : VLD1DUPWB<{0,1,0,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPd32wb : VLD1DUPWB<{1,0,0,?}, "32", addrmode6dupalign32>;
-defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8">;
-defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16">;
-defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32">;
+defm VLD1DUPq8wb : VLD1QDUPWB<{0,0,1,0}, "8", addrmode6dupalignNone>;
+defm VLD1DUPq16wb : VLD1QDUPWB<{0,1,1,?}, "16", addrmode6dupalign16>;
+defm VLD1DUPq32wb : VLD1QDUPWB<{1,0,1,?}, "32", addrmode6dupalign32>;
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
-class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy>
+class VLD2DUP<bits<4> op7_4, string Dt, RegisterOperand VdTy, Operand AddrMode>
: NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd),
- (ins addrmode6dup:$Rn), IIC_VLD2dup,
+ (ins AddrMode:$Rn), IIC_VLD2dup,
"vld2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLD2DupInstruction";
}
-def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes>;
-def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes>;
-def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes>;
+def VLD2DUPd8 : VLD2DUP<{0,0,0,?}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
+// HACK this one, VLD2DUPd8x2 must be changed at the same time with VLD2b8 or
+// "vld2.8 {d0[], d2[]}, [r4:32]" will become "vld2.8 {d0, d2}, [r4:32]".
// ...with double-spaced registers
-def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes>;
-def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
-def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+def VLD2DUPd8x2 : VLD2DUP<{0,0,1,?}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
// ...with address register writeback:
-multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
+multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy,
+ Operand AddrMode> {
def _fixed : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn), IIC_VLD2dupu,
+ (ins AddrMode:$Rn), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1409,7 +1487,7 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
def _register : NLdSt<1, 0b10, 0b1101, op7_4,
(outs VdTy:$Vd, GPR:$wb),
- (ins addrmode6dup:$Rn, rGPR:$Rm), IIC_VLD2dupu,
+ (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu,
"vld2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = Rn{4};
@@ -1417,13 +1495,19 @@ multiclass VLD2DUPWB<bits<4> op7_4, string Dt, RegisterOperand VdTy> {
}
}
-defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes>;
-defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes>;
-defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes>;
+defm VLD2DUPd8wb : VLD2DUPWB<{0,0,0,0}, "8", VecListDPairAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16wb : VLD2DUPWB<{0,1,0,?}, "16", VecListDPairAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32wb : VLD2DUPWB<{1,0,0,?}, "32", VecListDPairAllLanes,
+ addrmode6dupalign64>;
-defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes>;
-defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes>;
-defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes>;
+defm VLD2DUPd8x2wb : VLD2DUPWB<{0,0,1,0}, "8", VecListDPairSpacedAllLanes,
+ addrmode6dupalign16>;
+defm VLD2DUPd16x2wb : VLD2DUPWB<{0,1,1,?}, "16", VecListDPairSpacedAllLanes,
+ addrmode6dupalign32>;
+defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes,
+ addrmode6dupalign64>;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
class VLD3DUP<bits<4> op7_4, string Dt>
@@ -1449,22 +1533,22 @@ def VLD3DUPq16 : VLD3DUP<{0,1,1,?}, "16">;
def VLD3DUPq32 : VLD3DUP<{1,0,1,?}, "32">;
// ...with address register writeback:
-class VLD3DUPWB<bits<4> op7_4, string Dt>
+class VLD3DUPWB<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+ (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu,
"vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
"$Rn.addr = $wb", []> {
let Inst{4} = 0;
let DecoderMethod = "DecodeVLD3DupInstruction";
}
-def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8">;
-def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
-def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+def VLD3DUPd8_UPD : VLD3DUPWB<{0,0,0,0}, "8", addrmode6dupalign64>;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16", addrmode6dupalign64>;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32", addrmode6dupalign64>;
-def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8">;
-def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
-def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>;
+def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>;
+def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>;
def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
@@ -1560,35 +1644,35 @@ class VSTQQQQWBPseudo<InstrItinClass itin>
"$addr.addr = $wb">;
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd),
+class VST1D<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd),
IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-class VST1Q<bits<4> op7_4, string Dt>
- : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd),
+class VST1Q<bits<4> op7_4, string Dt, Operand AddrMode>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd),
IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-def VST1d8 : VST1D<{0,0,0,?}, "8">;
-def VST1d16 : VST1D<{0,1,0,?}, "16">;
-def VST1d32 : VST1D<{1,0,0,?}, "32">;
-def VST1d64 : VST1D<{1,1,0,?}, "64">;
+def VST1d8 : VST1D<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16 : VST1D<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32 : VST1D<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64 : VST1D<{1,1,0,?}, "64", addrmode6align64>;
-def VST1q8 : VST1Q<{0,0,?,?}, "8">;
-def VST1q16 : VST1Q<{0,1,?,?}, "16">;
-def VST1q32 : VST1Q<{1,0,?,?}, "32">;
-def VST1q64 : VST1Q<{1,1,?,?}, "64">;
+def VST1q8 : VST1Q<{0,0,?,?}, "8", addrmode6align64or128>;
+def VST1q16 : VST1Q<{0,1,?,?}, "16", addrmode6align64or128>;
+def VST1q32 : VST1Q<{1,0,?,?}, "32", addrmode6align64or128>;
+def VST1q64 : VST1Q<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with address register writeback:
-multiclass VST1DWB<bits<4> op7_4, string Dt> {
+multiclass VST1DWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1596,7 +1680,7 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd),
IIC_VLD1u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1604,9 +1688,9 @@ multiclass VST1DWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
}
-multiclass VST1QWB<bits<4> op7_4, string Dt> {
+multiclass VST1QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
+ (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1614,7 +1698,7 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd),
IIC_VLD1x2u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1623,28 +1707,28 @@ multiclass VST1QWB<bits<4> op7_4, string Dt> {
}
}
-defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">;
-defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">;
-defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">;
-defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">;
+defm VST1d8wb : VST1DWB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16wb : VST1DWB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32wb : VST1DWB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64wb : VST1DWB<{1,1,0,?}, "64", addrmode6align64>;
-defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">;
-defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">;
-defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">;
-defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">;
+defm VST1q8wb : VST1QWB<{0,0,?,?}, "8", addrmode6align64or128>;
+defm VST1q16wb : VST1QWB<{0,1,?,?}, "16", addrmode6align64or128>;
+defm VST1q32wb : VST1QWB<{1,0,?,?}, "32", addrmode6align64or128>;
+defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>;
// ...with 3 registers
-class VST1D3<bits<4> op7_4, string Dt>
+class VST1D3<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
- (ins addrmode6:$Rn, VecListThreeD:$Vd),
+ (ins AddrMode:$Rn, VecListThreeD:$Vd),
IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{4} = Rn{4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VST1D3WB<bits<4> op7_4, string Dt> {
+multiclass VST1D3WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
+ (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1652,7 +1736,7 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd),
IIC_VLD1x3u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1661,33 +1745,33 @@ multiclass VST1D3WB<bits<4> op7_4, string Dt> {
}
}
-def VST1d8T : VST1D3<{0,0,0,?}, "8">;
-def VST1d16T : VST1D3<{0,1,0,?}, "16">;
-def VST1d32T : VST1D3<{1,0,0,?}, "32">;
-def VST1d64T : VST1D3<{1,1,0,?}, "64">;
+def VST1d8T : VST1D3<{0,0,0,?}, "8", addrmode6align64>;
+def VST1d16T : VST1D3<{0,1,0,?}, "16", addrmode6align64>;
+def VST1d32T : VST1D3<{1,0,0,?}, "32", addrmode6align64>;
+def VST1d64T : VST1D3<{1,1,0,?}, "64", addrmode6align64>;
-defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8">;
-defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16">;
-defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32">;
-defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64">;
+defm VST1d8Twb : VST1D3WB<{0,0,0,?}, "8", addrmode6align64>;
+defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>;
+defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>;
+defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>;
def VST1d64TPseudo : VSTQQPseudo<IIC_VST1x3>;
def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x3u>;
def VST1d64TPseudoWB_register : VSTQQWBPseudo<IIC_VST1x3u>;
// ...with 4 registers
-class VST1D4<bits<4> op7_4, string Dt>
+class VST1D4<bits<4> op7_4, string Dt, Operand AddrMode>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$Rn, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, VecListFourD:$Vd),
IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "",
[]> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST1Instruction";
}
-multiclass VST1D4WB<bits<4> op7_4, string Dt> {
+multiclass VST1D4WB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1695,7 +1779,7 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST1Instruction";
}
def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1x4u,
"vst1", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1704,15 +1788,15 @@ multiclass VST1D4WB<bits<4> op7_4, string Dt> {
}
}
-def VST1d8Q : VST1D4<{0,0,?,?}, "8">;
-def VST1d16Q : VST1D4<{0,1,?,?}, "16">;
-def VST1d32Q : VST1D4<{1,0,?,?}, "32">;
-def VST1d64Q : VST1D4<{1,1,?,?}, "64">;
+def VST1d8Q : VST1D4<{0,0,?,?}, "8", addrmode6align64or128or256>;
+def VST1d16Q : VST1D4<{0,1,?,?}, "16", addrmode6align64or128or256>;
+def VST1d32Q : VST1D4<{1,0,?,?}, "32", addrmode6align64or128or256>;
+def VST1d64Q : VST1D4<{1,1,?,?}, "64", addrmode6align64or128or256>;
-defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8">;
-defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16">;
-defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32">;
-defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64">;
+defm VST1d8Qwb : VST1D4WB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>;
+defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>;
def VST1d64QPseudo : VSTQQPseudo<IIC_VST1x4>;
def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST1x4u>;
@@ -1720,21 +1804,27 @@ def VST1d64QPseudoWB_register : VSTQQWBPseudo<IIC_VST1x4u>;
// VST2 : Vector Store (multiple 2-element structures)
class VST2<bits<4> op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy,
- InstrItinClass itin>
- : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, VdTy:$Vd),
+ InstrItinClass itin, Operand AddrMode>
+ : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, VdTy:$Vd),
itin, "vst2", Dt, "$Vd, $Rn", "", []> {
let Rm = 0b1111;
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
-def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>;
-def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2>;
-def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2>;
+def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
+def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2,
+ addrmode6align64or128>;
-def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2>;
-def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2>;
-def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2>;
+def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
+def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2,
+ addrmode6align64or128or256>;
def VST2q8Pseudo : VSTQQPseudo<IIC_VST2x2>;
def VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
@@ -1742,9 +1832,9 @@ def VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
// ...with address register writeback:
multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
- RegisterOperand VdTy> {
+ RegisterOperand VdTy, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VdTy:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1752,16 +1842,16 @@ multiclass VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt,
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
let Inst{5-4} = Rn{5-4};
let DecoderMethod = "DecodeVLDST2Instruction";
}
}
-multiclass VST2QWB<bits<4> op7_4, string Dt> {
+multiclass VST2QWB<bits<4> op7_4, string Dt, Operand AddrMode> {
def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, VecListFourD:$Vd), IIC_VLD1u,
+ (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn!",
"$Rn.addr = $wb", []> {
let Rm = 0b1101; // NLdSt will assign to the right encoding bits.
@@ -1769,7 +1859,7 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
let DecoderMethod = "DecodeVLDST2Instruction";
}
def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
- (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd),
+ (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd),
IIC_VLD1u,
"vst2", Dt, "$Vd, $Rn, $Rm",
"$Rn.addr = $wb", []> {
@@ -1778,13 +1868,16 @@ multiclass VST2QWB<bits<4> op7_4, string Dt> {
}
}
-defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair>;
-defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair>;
-defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair>;
+defm VST2d8wb : VST2DWB<0b1000, {0,0,?,?}, "8", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d16wb : VST2DWB<0b1000, {0,1,?,?}, "16", VecListDPair,
+ addrmode6align64or128>;
+defm VST2d32wb : VST2DWB<0b1000, {1,0,?,?}, "32", VecListDPair,
+ addrmode6align64or128>;
-defm VST2q8wb : VST2QWB<{0,0,?,?}, "8">;
-defm VST2q16wb : VST2QWB<{0,1,?,?}, "16">;
-defm VST2q32wb : VST2QWB<{1,0,?,?}, "32">;
+defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>;
+defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>;
+defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>;
def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo<IIC_VST2x2u>;
@@ -1794,12 +1887,18 @@ def VST2q16PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
def VST2q32PseudoWB_register : VSTQQWBregisterPseudo<IIC_VST2x2u>;
// ...with double-spaced registers
-def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2>;
-def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2>;
-def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2>;
-defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced>;
-defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced>;
-defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced>;
+def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b16 : VST2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+def VST2b32 : VST2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VST2,
+ addrmode6align64or128>;
+defm VST2b8wb : VST2DWB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b16wb : VST2DWB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced,
+ addrmode6align64or128>;
+defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced,
+ addrmode6align64or128>;
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -2267,9 +2366,9 @@ def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
(VST1q64 addrmode6:$addr, QPR:$value)>;
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
- (VLD1q32 addrmode6:$addr)>;
+ (VLD1q32 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
- (VST1q32 addrmode6:$addr, QPR:$value)>;
+ (VST1q32 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
@@ -2357,14 +2456,14 @@ class N2VDIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
class N2VQIntnp<bits<2> op17_16, bits<3> op10_8, bit op7,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Similar to NV2VQIntnp with some more encoding bits exposed (crypto).
@@ -2372,7 +2471,7 @@ class N2VQIntXnp<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
bit op7, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6, (outs QPR:$Vd), (ins QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
// Same as N2VQIntXnp but with Vd as a src register.
@@ -2381,7 +2480,7 @@ class N2VQIntX2np<bits<2> op19_18, bits<2> op17_16, bits<3> op10_8, bit op6,
ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp>
: N2Vnp<op19_18, op17_16, op10_8, op7, op6,
(outs QPR:$Vd), (ins QPR:$src, QPR:$Vm),
- itin, OpcodeStr, Dt, ResTy, OpTy,
+ itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
}
@@ -2555,7 +2654,6 @@ class N3VDIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -2609,7 +2707,6 @@ class N3VQIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
// Same as N3VQIntnp but with Vd as a src register.
@@ -2618,8 +2715,8 @@ class N3VQInt3np<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
string Dt, ValueType ResTy, ValueType OpTy,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
- (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm), f, itin, OpcodeStr,
- Dt, ResTy, OpTy, IntOp, Commutable,
+ (outs QPR:$Vd), (ins QPR:$src, QPR:$Vn, QPR:$Vm),
+ f, itin, OpcodeStr, Dt,
[(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src), (OpTy QPR:$Vn),
(OpTy QPR:$Vm))))]> {
let Constraints = "$src = $Vd";
@@ -2939,7 +3036,6 @@ class N3VLIntnp<bits<5> op27_23, bits<2> op21_20, bits<4> op11_8, bit op6,
SDPatternOperator IntOp, bit Commutable>
: N3Vnp<op27_23, op21_20, op11_8, op6, op4,
(outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin, OpcodeStr, Dt,
- ResTy, OpTy, IntOp, Commutable,
[(set QPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
@@ -5245,6 +5341,35 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
} // isReMaterializable
+// Add support for bytes replication feature, so it could be GAS compatible.
+// E.g. instructions below:
+// "vmov.i32 d0, 0xffffffff"
+// "vmov.i32 d0, 0xabababab"
+// "vmov.i16 d0, 0xabab"
+// are incorrect, but we could deal with such cases.
+// For last two instructions, for example, it should emit:
+// "vmov.i8 d0, 0xab"
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmov${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMOVI32ByteReplicate:$Vm, pred:$p)>;
+
+// Also add same support for VMVN instructions. So instruction:
+// "vmvn.i32 d0, 0xabababab"
+// actually means:
+// "vmov.i8 d0, 0x54"
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv8i8 DPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i16 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI16ByteReplicate:$Vm, pred:$p)>;
+def : NEONInstAlias<"vmvn${p}.i32 $Vd, $Vm",
+ (VMOVv16i8 QPR:$Vd, nImmVMVNI32ByteReplicate:$Vm, pred:$p)>;
// On some CPUs the two instructions "vmov.i32 dD, #0" and "vmov.i32 qD, #0"
// require zero cycles to execute so they should be used wherever possible for
@@ -5617,22 +5742,22 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
}
-def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0",
(VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0",
(VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0",
(VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0",
(VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0",
(VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0",
(VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0",
(VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
-def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
+def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0",
(VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>;
@@ -6051,67 +6176,145 @@ def : Pat<(f32 (bitconvert GPR:$a)),
//===----------------------------------------------------------------------===//
// bit_convert
-def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (v1i64 DPR:$src)>;
+}
def : Pat<(v1i64 (bitconvert (f64 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (v2i32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (v2i32 DPR:$src)>;
+}
def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (v8i8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (v8i8 DPR:$src)>;
+}
def : Pat<(f64 (bitconvert (v1i64 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (f64 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+}
def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (v2f32 DPR:$src)>;
+}
-def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+}
def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+}
def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+}
def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+let Predicates = [IsLE] in {
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
+}
+
+let Predicates = [IsBE] in {
+ // 64 bit conversions
+ def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v2i32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v8i8 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (f64 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v1i64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2i32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v4i16 DPR:$src))), (VREV16d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (f64 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(v8i8 (bitconvert (v2f32 DPR:$src))), (VREV32d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2i32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v4i16 DPR:$src))), (VREV64d16 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v8i8 DPR:$src))), (VREV64d8 DPR:$src)>;
+ def : Pat<(f64 (bitconvert (v2f32 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (f64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (VREV64d32 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (VREV32d16 DPR:$src)>;
+ def : Pat<(v2f32 (bitconvert (v8i8 DPR:$src))), (VREV32d8 DPR:$src)>;
+
+ // 128 bit conversions
+ def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (VREV16q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (VREV32q16 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (VREV32q8 QPR:$src)>;
+ def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (VREV64q32 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (VREV64q16 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (VREV64q8 QPR:$src)>;
+ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
+}
// Fold extracting an element out of a v2i32 into a vfp register.
def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
@@ -6120,7 +6323,7 @@ def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
// Vector lengthening move with load, matching extending loads.
// extload, zextload and sextload for a standard lengthening load. Example:
-// Lengthen_Single<"8", "i16", "8"> =
+// Lengthen_Single<"8", "i16", "8"> =
// Pat<(v8i16 (extloadvi8 addrmode6:$addr))
// (VMOVLuv8i16 (VLD1d8 addrmode6:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0)))>;
@@ -6147,7 +6350,7 @@ multiclass Lengthen_Single<string DestLanes, string DestTy, string SrcTy> {
// half the lanes available. Example:
// Lengthen_HalfSingle<"4", "i16", "8", "i16", "i8"> =
// Pat<(v4i16 (extloadvi8 addrmode6oneL32:$addr)),
-// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
+// (EXTRACT_SUBREG (VMOVLuv8i16 (VLD1LNd32 addrmode6oneL32:$addr,
// (f64 (IMPLICIT_DEF)), (i32 0))),
// dsub_0)>;
multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy,
@@ -6257,7 +6460,7 @@ defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">;
// Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (VLD1LNd16 addrmode6:$addr,
+ (VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
@@ -6311,379 +6514,442 @@ defm : NEONDTAnyInstAlias<"vorr${p}", "$Vdn, $Vm",
// VLD1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr!",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr!",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr!",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".8", "$list, $addr, $Rm",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".16", "$list, $addr, $Rm",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VLD1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld1${p}", ".32", "$list, $addr, $Rm",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
// VST1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST1LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST1LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST1LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr!",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr!",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST1LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr!",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST1LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".8", "$list, $addr, $Rm",
- (ins VecListOneDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".16", "$list, $addr, $Rm",
- (ins VecListOneDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDHWordIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VST1LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst1${p}", ".32", "$list, $addr, $Rm",
- (ins VecListOneDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
// VLD2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr, pred:$p)>;
def VLD2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr!",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr!",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr!",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr, $Rm",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".16", "$list, $addr, $Rm",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VST2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST2LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr!",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16", "$list, $addr!",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST2LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr!",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST2LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".8", "$list, $addr, $Rm",
- (ins VecListTwoDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDByteIndexed:$list, addrmode6align16:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
- (ins VecListTwoDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoDWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".16","$list, $addr, $Rm",
- (ins VecListTwoQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQHWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST2LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst2${p}", ".32", "$list, $addr, $Rm",
- (ins VecListTwoQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
+ pred:$p)>;
def VLD3DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeDAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQAllLanes:$list, addrmode6:$addr,
+ (ins VecListThreeQAllLanes:$list, addrmode6dupalignNone:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VLD3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VLD3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VLD3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
// VLD3 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD3dAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_16 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qAsm_32 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VLD3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VST3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
+ pred:$p)>;
def VST3LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDByteIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeDHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeDHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VST3LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeDWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeDWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQHWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListThreeQHWordIndexed:$list,
+ addrmode6alignNone:$addr, rGPR:$Rm, pred:$p)>;
def VST3LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQWordIndexed:$list, addrmode6:$addr,
+ (ins VecListThreeQWordIndexed:$list, addrmode6alignNone:$addr,
rGPR:$Rm, pred:$p)>;
@@ -6691,168 +6957,190 @@ def VST3LNqWB_register_Asm_32 :
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST3dAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_16 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qAsm_32 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeD:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr!",
- (ins VecListThreeQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListThreeQ:$list, addrmode6align64:$addr, pred:$p)>;
def VST3dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeD:$list, addrmode6:$addr,
+ (ins VecListThreeD:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".16", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST3qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst3${p}", ".32", "$list, $addr, $Rm",
- (ins VecListThreeQ:$list, addrmode6:$addr,
+ (ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPdAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPdAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_16: NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPqAsm_32: NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
+ pred:$p)>;
def VLD4DUPqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64or128:$addr,
+ pred:$p)>;
def VLD4DUPdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourDAllLanes:$list, addrmode6dupalign64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDAllLanes:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
+ (ins VecListFourQAllLanes:$list, addrmode6dupalign64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4DUPqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQAllLanes:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQAllLanes:$list,
+ addrmode6dupalign64or128:$addr, rGPR:$Rm, pred:$p)>;
// VLD4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VLD4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VLD4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
@@ -6860,168 +7148,202 @@ def VLD4LNqWB_register_Asm_32 :
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VLD4dAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_16 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qAsm_32 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VLD4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VLD4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vld4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
// VST4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST4LNdAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNdAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNqAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNqAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNdWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNqWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
+ pred:$p)>;
def VST4LNqWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list, addrmode6align64or128:$addr,
+ pred:$p)>;
def VST4LNdWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourDByteIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDByteIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourDHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourDHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNdWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourDWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourDWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQHWordIndexed:$list, addrmode6:$addr,
+ (ins VecListFourQHWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;
def VST4LNqWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQWordIndexed:$list, addrmode6:$addr,
- rGPR:$Rm, pred:$p)>;
+ (ins VecListFourQWordIndexed:$list,
+ addrmode6align64or128:$addr, rGPR:$Rm, pred:$p)>;
// VST4 multiple structure pseudo-instructions. These need special handling for
// the vector operands that the normal instructions don't yet model.
// FIXME: Remove these when the register classes and instructions are updated.
def VST4dAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_16 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qAsm_32 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourD:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4qWB_fixed_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr!",
- (ins VecListFourQ:$list, addrmode6:$addr, pred:$p)>;
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
+ pred:$p)>;
def VST4dWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4dWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourD:$list, addrmode6:$addr,
+ (ins VecListFourD:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_8 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_16 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".16", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
def VST4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
- (ins VecListFourQ:$list, addrmode6:$addr,
+ (ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
// VMOV/VMVN takes an optional datatype suffix
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 754295f..e17f73a 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -269,7 +269,8 @@ class T1SystemEncoding<bits<8> opc>
let Inst{7-0} = opc;
}
-def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm", []>,
+def tHINT : T1pI<(outs), (ins imm0_15:$imm), NoItinerary, "hint", "\t$imm",
+ [(int_arm_hint imm0_15:$imm)]>,
T1SystemEncoding<0x00>,
Requires<[IsThumb, HasV6M]> {
bits<4> imm;
@@ -288,7 +289,6 @@ def : tHintAlias<"sev$p", (tHINT 4, pred:$p)>; // A8.6.157
def : tInstAlias<"sevl$p", (tHINT 5, pred:$p)> {
let Predicates = [IsThumb2, HasV8];
}
-def : T2Pat<(int_arm_sevl), (tHINT 5)>;
// The imm operand $val can be used by a debugger to store more information
// about the breakpoint.
@@ -1193,6 +1193,15 @@ def tTST : // A8.6.230
[(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>,
Sched<[WriteALU]>;
+// A8.8.247 UDF - Undefined (Encoding T1)
+def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
+ [(int_arm_undefined imm0_255:$imm8)]>, Encoding16 {
+ bits<8> imm8;
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1110;
+ let Inst{7-0} = imm8;
+}
+
// Zero-extend byte
def tUXTB : // A8.6.262
T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1308,6 +1317,18 @@ def : T1Pat<(addc tGPR:$lhs, imm8_255_neg:$rhs),
def : T1Pat<(subc tGPR:$lhs, tGPR:$rhs),
(tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+// Bswap 16 with load/store
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_rrs2:$addr)), (i32 16)),
+ (tREV16 (tLDRHr t_addrmode_rrs2:$addr))>;
+def : T1Pat<(srl (bswap (extloadi16 t_addrmode_is2:$addr)), (i32 16)),
+ (tREV16 (tLDRHi t_addrmode_is2:$addr))>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_rrs2:$addr),
+ (tSTRHr (tREV16 tGPR:$Rn), t_addrmode_rrs2:$addr)>;
+def : T1Pat<(truncstorei16 (srl (bswap tGPR:$Rn), (i32 16)),
+ t_addrmode_is2:$addr),
+ (tSTRHi(tREV16 tGPR:$Rn), t_addrmode_is2:$addr)>;
+
// ConstantPool
def : T1Pat<(ARMWrapper tconstpool :$dst), (tLEApcrel tconstpool :$dst)>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 387bd60..c30d6ab 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1445,7 +1445,7 @@ defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
// Store doubleword
let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
- (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+ (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_imm8s4:$addr),
IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", "", []>;
// Indexed stores
@@ -1676,7 +1676,7 @@ defm t2PLI : T2Ipl<0, 1, "pli">, Requires<[IsThumb2,HasV7]>;
// pci variant is very similar to i12, but supports negative offsets
// from the PC. Only PLD and PLI have pci variants (not PLDW)
class T2Iplpci<bits<1> inst, string opc> : T2Iso<(outs), (ins t2ldrlabel:$addr),
- IIC_Preload, opc, "\t$addr",
+ IIC_Preload, opc, "\t$addr",
[(ARMPreload (ARMWrapper tconstpool:$addr),
(i32 0), (i32 inst))]>, Sched<[WritePreLd]> {
let Inst{31-25} = 0b1111100;
@@ -1918,7 +1918,7 @@ def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins imm0_65535_expr:$imm), IIC_iMOVi,
let DecoderMethod = "DecodeT2MOVTWInstruction";
}
-def : t2InstAlias<"mov${p} $Rd, $imm",
+def : t2InstAlias<"mov${p} $Rd, $imm",
(t2MOVi16 rGPR:$Rd, imm256_65535_expr:$imm, pred:$p)>;
def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
@@ -2407,6 +2407,19 @@ def t2UBFX: T2TwoRegBitFI<
let Inst{15} = 0;
}
+// A8.8.247 UDF - Undefined (Encoding T2)
+def t2UDF : T2XI<(outs), (ins imm0_65535:$imm16), IIC_Br, "udf.w\t$imm16",
+ [(int_arm_undefined imm0_65535:$imm16)]> {
+ bits<16> imm16;
+ let Inst{31-29} = 0b111;
+ let Inst{28-27} = 0b10;
+ let Inst{26-20} = 0b1111111;
+ let Inst{19-16} = imm16{15-12};
+ let Inst{15} = 0b1;
+ let Inst{14-12} = 0b010;
+ let Inst{11-0} = imm16{11-0};
+}
+
// A8.6.18 BFI - Bitfield insert (Encoding T1)
let Constraints = "$src = $Rd" in {
def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
@@ -3495,8 +3508,8 @@ def t2B : T2I<(outs), (ins uncondbrtarget:$target), IIC_Br,
let Inst{25-16} = target{20-11};
let Inst{10-0} = target{10-0};
let DecoderMethod = "DecodeT2BInstruction";
- let AsmMatchConverter = "cvtThumbBranches";
-}
+ let AsmMatchConverter = "cvtThumbBranches";
+}
let isNotDuplicable = 1, isIndirectBranch = 1 in {
def t2BR_JT : t2PseudoInst<(outs),
@@ -3671,7 +3684,8 @@ def : t2InstAlias<"cps.w $mode", (t2CPS1p imm0_31:$mode), 0>;
// A6.3.4 Branches and miscellaneous control
// Table A6-14 Change Processor State, and hint instructions
-def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",[]> {
+def t2HINT : T2I<(outs), (ins imm0_239:$imm), NoItinerary, "hint", ".w\t$imm",
+ [(int_arm_hint imm0_239:$imm)]> {
bits<8> imm;
let Inst{31-3} = 0b11110011101011111000000000000;
let Inst{7-0} = imm;
@@ -3698,7 +3712,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
// Secure Monitor Call is a system instruction.
// Option = Inst{19-16}
-def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
+def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
[]>, Requires<[IsThumb2, HasTrustZone]> {
let Inst{31-27} = 0b11110;
let Inst{26-20} = 0b1111111;
@@ -4278,7 +4292,7 @@ def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $ShiftedRm",
// Aliases for ADD without the ".w" optional width specifier.
def : t2InstAlias<"add${s}${p} $Rd, $Rn, $imm",
- (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
+ (t2ADDri GPRnopc:$Rd, GPRnopc:$Rn, t2_so_imm:$imm, pred:$p,
cc_out:$s)>;
def : t2InstAlias<"add${p} $Rd, $Rn, $imm",
(t2ADDri12 GPRnopc:$Rd, GPR:$Rn, imm0_4095:$imm, pred:$p)>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index 73c6eb7..8821c2d 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "ARMJITInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
@@ -25,6 +24,8 @@
#include <cstdlib>
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
}
@@ -319,13 +320,13 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
break;
}
case ARM::reloc_arm_movw: {
- ResultPtr = ResultPtr & 0xFFFF;
+ ResultPtr = ResultPtr & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
break;
}
case ARM::reloc_arm_movt: {
- ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
break;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 48e0bd7..ee7df54 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-ldst-opt"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
+#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSubtarget.h"
#include "MCTargetDesc/ARMAddressingModes.h"
+#include "Thumb1RegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -42,6 +43,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-ldst-opt"
+
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
@@ -65,9 +68,10 @@ namespace {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const ARMSubtarget *STI;
+ const TargetLowering *TL;
ARMFunctionInfo *AFI;
RegScavenger *RS;
- bool isThumb2;
+ bool isThumb1, isThumb2;
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -93,7 +97,10 @@ namespace {
void findUsesOfImpDef(SmallVectorImpl<MachineOperand *> &UsesOfImpDefs,
const MemOpQueue &MemOps, unsigned DefReg,
unsigned RangeBegin, unsigned RangeEnd);
-
+ void UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base, unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg);
bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
int Offset, unsigned Base, bool BaseKill, int Opcode,
ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
@@ -119,7 +126,6 @@ namespace {
ARMCC::CondCodes Pred, unsigned PredReg,
unsigned Scratch, MemOpQueue &MemOps,
SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
-
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
@@ -159,6 +165,21 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
case ARM_AM::db: return ARM::STMDB;
case ARM_AM::ib: return ARM::STMIB;
}
+ case ARM::tLDRi:
+ // tLDMIA is writeback-only - unless the base register is in the input
+ // reglist.
+ ++NumLDMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tLDMIA;
+ }
+ case ARM::tSTRi:
+ // There is no non-writeback tSTMIA either.
+ ++NumSTMGened;
+ switch (Mode) {
+ default: llvm_unreachable("Unhandled submode!");
+ case ARM_AM::ia: return ARM::tSTMIA_UPD;
+ }
case ARM::t2LDRi8:
case ARM::t2LDRi12:
++NumLDMGened;
@@ -217,6 +238,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
case ARM::LDMIA_UPD:
case ARM::STMIA:
case ARM::STMIA_UPD:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA:
case ARM::t2LDMIA_UPD:
@@ -263,12 +287,20 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
} // end namespace ARM_AM
} // end namespace llvm
+static bool isT1i32Load(unsigned Opc) {
+ return Opc == ARM::tLDRi;
+}
+
static bool isT2i32Load(unsigned Opc) {
return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
}
static bool isi32Load(unsigned Opc) {
- return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+ return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
+}
+
+static bool isT1i32Store(unsigned Opc) {
+ return Opc == ARM::tSTRi;
}
static bool isT2i32Store(unsigned Opc) {
@@ -276,7 +308,102 @@ static bool isT2i32Store(unsigned Opc) {
}
static bool isi32Store(unsigned Opc) {
- return Opc == ARM::STRi12 || isT2i32Store(Opc);
+ return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
+}
+
+static unsigned getImmScale(unsigned Opc) {
+ switch (Opc) {
+ default: llvm_unreachable("Unhandled opcode!");
+ case ARM::tLDRi:
+ case ARM::tSTRi:
+ return 1;
+ case ARM::tLDRHi:
+ case ARM::tSTRHi:
+ return 2;
+ case ARM::tLDRBi:
+ case ARM::tSTRBi:
+ return 4;
+ }
+}
+
+/// Update future uses of the base register with the offset introduced
+/// due to writeback. This function only works on Thumb1.
+void
+ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc dl, unsigned Base,
+ unsigned WordOffset,
+ ARMCC::CondCodes Pred, unsigned PredReg) {
+ assert(isThumb1 && "Can only update base register uses for Thumb1!");
+
+ // Start updating any instructions with immediate offsets. Insert a sub before
+ // the first non-updateable instruction (if any).
+ for (; MBBI != MBB.end(); ++MBBI) {
+ if (MBBI->readsRegister(Base)) {
+ unsigned Opc = MBBI->getOpcode();
+ int Offset;
+ bool InsertSub = false;
+
+ if (Opc == ARM::tLDRi || Opc == ARM::tSTRi ||
+ Opc == ARM::tLDRHi || Opc == ARM::tSTRHi ||
+ Opc == ARM::tLDRBi || Opc == ARM::tSTRBi) {
+ // Loads and stores with immediate offsets can be updated, but only if
+ // the new offset isn't negative.
+ // The MachineOperand containing the offset immediate is the last one
+ // before predicates.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ // The offsets are scaled by 1, 2 or 4 depending on the Opcode
+ Offset = MO.getImm() - WordOffset * getImmScale(Opc);
+ if (Offset >= 0)
+ MO.setImm(Offset);
+ else
+ InsertSub = true;
+
+ } else if (Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) {
+ // SUB/ADD using this register. Merge it with the update.
+ // If the merged offset is too large, insert a new sub instead.
+ MachineOperand &MO =
+ MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
+ Offset = (Opc == ARM::tSUBi8) ?
+ MO.getImm() + WordOffset * 4 :
+ MO.getImm() - WordOffset * 4 ;
+ if (TL->isLegalAddImmediate(Offset)) {
+ MO.setImm(Offset);
+ // The base register has now been reset, so exit early.
+ return;
+ } else {
+ InsertSub = true;
+ }
+
+ } else {
+ // Can't update the instruction.
+ InsertSub = true;
+ }
+
+ if (InsertSub) {
+ // An instruction above couldn't be updated, so insert a sub.
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(ARM::tSUBi8), Base))
+ .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
+ return;
+ }
+ }
+
+ if (MBBI->killsRegister(Base))
+ // Register got killed. Stop updating.
+ return;
+ }
+
+ // The end of the block was reached. This means register liveness escapes the
+ // block, and it's necessary to insert a sub before the last instruction.
+ if (MBB.succ_size() > 0)
+ // But only insert the SUB if there is actually a successor block.
+ // FIXME: Check more carefully if register is live at this point, e.g. by
+ // also examining the successor block's register liveness information.
+ AddDefaultT1CC(BuildMI(MBB, --MBBI, dl, TII->get(ARM::tSUBi8), Base))
+ .addReg(Base, getKillRegState(true)).addImm(WordOffset * 4)
+ .addImm(Pred).addReg(PredReg);
}
/// MergeOps - Create and insert a LDM or STM with Base as base register and
@@ -296,18 +423,19 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
ARM_AM::AMSubMode Mode = ARM_AM::ia;
- // VFP and Thumb2 do not support IB or DA modes.
+ // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
- bool haveIBAndDA = isNotVFP && !isThumb2;
- if (Offset == 4 && haveIBAndDA)
+ bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
+
+ if (Offset == 4 && haveIBAndDA) {
Mode = ARM_AM::ib;
- else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+ } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
Mode = ARM_AM::da;
- else if (Offset == -4 * (int)NumRegs && isNotVFP)
+ } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
// VLDM/VSTM do not support DB mode without also updating the base reg.
Mode = ARM_AM::db;
- else if (Offset != 0) {
- // Check if this is a supported opcode before we insert instructions to
+ } else if (Offset != 0) {
+ // Check if this is a supported opcode before inserting instructions to
// calculate a new base register.
if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
@@ -318,41 +446,98 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
return false;
unsigned NewBase;
- if (isi32Load(Opcode))
+ if (isi32Load(Opcode)) {
// If it is a load, then just use one of the destination register to
// use as the new base.
NewBase = Regs[NumRegs-1].first;
- else {
+ } else {
// Use the scratch register to use as a new base.
NewBase = Scratch;
if (NewBase == 0)
return false;
}
- int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
+
+ int BaseOpc =
+ isThumb2 ? ARM::t2ADDri :
+ isThumb1 ? ARM::tADDi8 : ARM::ADDri;
+
if (Offset < 0) {
- BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
+ BaseOpc =
+ isThumb2 ? ARM::t2SUBri :
+ isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
Offset = - Offset;
}
- int ImmedOffset = isThumb2
- ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
- if (ImmedOffset == -1)
- // FIXME: Try t2ADDri12 or t2SUBri12?
- return false; // Probably not worth it then.
-
- BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
- .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
- .addImm(Pred).addReg(PredReg).addReg(0);
+
+ if (!TL->isLegalAddImmediate(Offset))
+ // FIXME: Try add with register operand?
+ return false; // Probably not worth it then.
+
+ if (isThumb1) {
+ if (Base != NewBase) {
+ // Need to insert a MOV to the new base first.
+ // FIXME: If the immediate fits in 3 bits, use ADD instead.
+ BuildMI(MBB, MBBI, dl, TII->get(ARM::tMOVr), NewBase)
+ .addReg(Base, getKillRegState(BaseKill))
+ .addImm(Pred).addReg(PredReg);
+ }
+ AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase))
+ .addReg(NewBase, getKillRegState(true)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+ .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+ .addImm(Pred).addReg(PredReg).addReg(0);
+ }
+
Base = NewBase;
- BaseKill = true; // New base is always killed right its use.
+ BaseKill = true; // New base is always killed straight away.
}
bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
Opcode == ARM::VLDRD);
+
+ // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
+ // base register writeback.
Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
if (!Opcode) return false;
- MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
- .addReg(Base, getKillRegState(BaseKill))
- .addImm(Pred).addReg(PredReg);
+
+ bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
+
+ // Exception: If the base register is in the input reglist, Thumb1 LDM is
+ // non-writeback. Check for this.
+ if (Opcode == ARM::tLDRi && isThumb1)
+ for (unsigned I = 0; I < NumRegs; ++I)
+ if (Base == Regs[I].first) {
+ Writeback = false;
+ break;
+ }
+
+ MachineInstrBuilder MIB;
+
+ if (Writeback) {
+ if (Opcode == ARM::tLDMIA)
+ // Update tLDMIA with writeback if necessary.
+ Opcode = ARM::tLDMIA_UPD;
+
+ // The base isn't dead after a merged instruction with writeback. Update
+ // future uses of the base with the added offset (if possible), or reset
+ // the base register as necessary.
+ if (!BaseKill)
+ UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg);
+
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+
+ // Thumb1: we might need to set base writeback when building the MI.
+ MIB.addReg(Base, getDefRegState(true))
+ .addReg(Base, getKillRegState(BaseKill));
+ } else {
+ // No writeback, simply build the MachineInstr.
+ MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode));
+ MIB.addReg(Base, getKillRegState(BaseKill));
+ }
+
+ MIB.addImm(Pred).addReg(PredReg);
+
for (unsigned i = 0; i != NumRegs; ++i)
MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
| getKillRegState(Regs[i].second));
@@ -492,7 +677,7 @@ void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
// affected uses.
for (SmallVectorImpl<MachineOperand *>::iterator I = UsesOfImpDefs.begin(),
E = UsesOfImpDefs.end();
- I != E; ++I)
+ I != E; ++I)
(*I)->setIsUndef();
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
@@ -589,7 +774,6 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
- return;
}
static bool definesCPSR(MachineInstr *MI) {
@@ -616,6 +800,7 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tSUBi8:
case ARM::t2SUBri:
case ARM::SUBri:
CheckCPSRDef = true;
@@ -628,10 +813,11 @@ static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
if (Bytes == 0 || (Limit && Bytes >= Limit))
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tSUBspi ||
+ MI->getOpcode() == ARM::tSUBi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@@ -649,6 +835,7 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
bool CheckCPSRDef = false;
switch (MI->getOpcode()) {
default: return false;
+ case ARM::tADDi8:
case ARM::t2ADDri:
case ARM::ADDri:
CheckCPSRDef = true;
@@ -661,10 +848,11 @@ static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
// Make sure the offset fits in 8 bits.
return false;
- unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+ unsigned Scale = (MI->getOpcode() == ARM::tADDspi ||
+ MI->getOpcode() == ARM::tADDi8) ? 4 : 1; // FIXME
if (!(MI->getOperand(0).getReg() == Base &&
MI->getOperand(1).getReg() == Base &&
- (MI->getOperand(2).getImm()*Scale) == Bytes &&
+ (MI->getOperand(2).getImm() * Scale) == Bytes &&
getInstrPredicate(MI, MyPredReg) == Pred &&
MyPredReg == PredReg))
return false;
@@ -677,6 +865,8 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
default: return 0;
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -695,6 +885,9 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::STMDA:
case ARM::STMDB:
case ARM::STMIB:
+ case ARM::tLDMIA:
+ case ARM::tLDMIA_UPD:
+ case ARM::tSTMIA_UPD:
case ARM::t2LDMIA:
case ARM::t2LDMDB:
case ARM::t2STMIA:
@@ -791,6 +984,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 is already using updating loads/stores.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(0).getReg();
bool BaseKill = MI->getOperand(0).isKill();
@@ -927,6 +1123,10 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
const TargetInstrInfo *TII,
bool &Advance,
MachineBasicBlock::iterator &I) {
+ // Thumb1 doesn't have updating LDR/STR.
+ // FIXME: Use LDM/STM with single register instead.
+ if (isThumb1) return false;
+
MachineInstr *MI = MBBI;
unsigned Base = MI->getOperand(1).getReg();
bool BaseKill = MI->getOperand(1).isKill();
@@ -1002,7 +1202,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
return false;
if (isAM5) {
- // VLDM[SD}_UPD, VSTM[SD]_UPD
+ // VLDM[SD]_UPD, VSTM[SD]_UPD
// (There are no base-updating versions of VLDR/VSTR instructions, but the
// updating load/store-multiple instructions can be used with only one
// register.)
@@ -1100,6 +1300,8 @@ static bool isMemoryOp(const MachineInstr *MI) {
return MI->getOperand(1).isReg();
case ARM::LDRi12:
case ARM::STRi12:
+ case ARM::tLDRi:
+ case ARM::tSTRi:
case ARM::t2LDRi8:
case ARM::t2LDRi12:
case ARM::t2STRi8:
@@ -1137,6 +1339,10 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
return OffField;
+ // Thumb1 immediate offsets are scaled by 4
+ if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi)
+ return OffField * 4;
+
int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
: ARM_AM::getAM5Offset(OffField) * 4;
if (isAM3) {
@@ -1408,16 +1614,20 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (MBBI == E)
// Reach the end of the block, try merging the memory instructions.
TryMerge = true;
- } else
+ } else {
TryMerge = true;
+ }
if (TryMerge) {
if (NumMemOps > 1) {
// Try to find a free register to use as a new base in case it's needed.
// First advance to the instruction just before the start of the chain.
AdvanceRS(MBB, MemOps);
+
// Find a scratch register.
- unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
+ unsigned Scratch =
+ RS->FindUnusedReg(isThumb1 ? &ARM::tGPRRegClass : &ARM::GPRRegClass);
+
// Process the load / store instructions.
RS->forward(std::prev(MBBI));
@@ -1483,6 +1693,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
/// =>
/// ldmfd sp!, {..., pc}
bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+ // Thumb1 LDM doesn't allow high registers.
+ if (isThumb1) return false;
if (MBB.empty()) return false;
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
@@ -1513,12 +1725,14 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
+ TL = TM.getTargetLowering();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
STI = &TM.getSubtarget<ARMSubtarget>();
RS = new RegScavenger();
isThumb2 = AFI->isThumb2Function();
+ isThumb1 = AFI->isThumbFunction() && !isThumb2;
bool Modified = false;
for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
@@ -1666,11 +1880,11 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
// FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
- if (Opcode == ARM::LDRi12)
+ if (Opcode == ARM::LDRi12) {
NewOpc = ARM::LDRD;
- else if (Opcode == ARM::STRi12)
+ } else if (Opcode == ARM::STRi12) {
NewOpc = ARM::STRD;
- else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+ } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
NewOpc = ARM::t2LDRDi8;
Scale = 4;
isT2 = true;
@@ -1678,8 +1892,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
NewOpc = ARM::t2STRDi8;
Scale = 4;
isT2 = true;
- } else
+ } else {
return false;
+ }
// Make sure the base address satisfies i64 ld / st alignment requirement.
// At the moment, we ignore the memoryoperand's value.
@@ -1746,8 +1961,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
while (Ops.size() > 1) {
unsigned FirstLoc = ~0U;
unsigned LastLoc = 0;
- MachineInstr *FirstOp = 0;
- MachineInstr *LastOp = 0;
+ MachineInstr *FirstOp = nullptr;
+ MachineInstr *LastOp = nullptr;
int LastOffset = 0;
unsigned LastOpcode = 0;
unsigned LastBytes = 0;
diff --git a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 20619fa..2a49255 100644
--- a/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -8,8 +8,6 @@
//
//===------------------------------------------------------------------------------------------===//
-#define DEBUG_TYPE "double barriers"
-
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMInstrInfo.h"
@@ -17,6 +15,8 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
+#define DEBUG_TYPE "double barriers"
+
STATISTIC(NumDMBsRemoved, "Number of DMBs removed");
namespace {
@@ -25,9 +25,9 @@ public:
static char ID;
ARMOptimizeBarriersPass() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "optimise barriers pass";
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 7f0fe05..b290e7f 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -116,13 +116,13 @@ def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>;
}
// VFP3 defines 16 additional double registers
-def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
+def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>;
def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>;
def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>;
def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>;
def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>;
def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>;
-def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
+def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>;
def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>;
def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>;
def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>;
@@ -158,11 +158,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Current Program Status Register.
// We model fpscr with two registers: FPSCR models the control bits and will be
// reserved. FPSCR_NZCV models the flag bits and will be unreserved. APSR_NZCV
-// models the APSR when it's accessed by some special instructions. In such cases
+// models the APSR when it's accessed by some special instructions. In such cases
// it has the same encoding as PC.
def CPSR : ARMReg<0, "cpsr">;
def APSR : ARMReg<1, "apsr">;
-def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
+def APSR_NZCV : ARMReg<15, "apsr_nzcv">;
def SPSR : ARMReg<2, "spsr">;
def FPSCR : ARMReg<3, "fpscr">;
def FPSCR_NZCV : ARMReg<3, "fpscr_nzcv"> {
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index 0ace9bc..57d0bfb 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -93,7 +93,7 @@ def ARMV6Itineraries : ProcessorItineraries<
InstrItinData<IIC_iMAC32 , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
InstrItinData<IIC_iMUL64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
InstrItinData<IIC_iMAC64 , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
-
+
// Integer load pipeline
//
// Immediate offset
@@ -181,7 +181,7 @@ def ARMV6Itineraries : ProcessorItineraries<
//
// Store multiple + update
InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
-
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index ba3cf4d..008ad64 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-selectiondag-info"
#include "ARMTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
+#define DEBUG_TYPE "arm-selectiondag-info"
+
ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
: TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
@@ -52,9 +53,10 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
EVT VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
- const unsigned MAX_LOADS_IN_LDM = 6;
- SDValue TFOps[MAX_LOADS_IN_LDM];
- SDValue Loads[MAX_LOADS_IN_LDM];
+ // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
+ const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6;
+ SDValue TFOps[6];
+ SDValue Loads[6];
uint64_t SrcOff = 0, DstOff = 0;
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
@@ -71,7 +73,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
@@ -82,7 +85,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
isVolatile, false, 0);
DstOff += VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
EmittedNumMemOps += i;
}
@@ -112,7 +116,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcOff += VTSize;
BytesLeft -= VTSize;
}
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
i = 0;
BytesLeft = BytesLeftSave;
@@ -133,7 +138,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
DstOff += VTSize;
BytesLeft -= VTSize;
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ makeArrayRef(TFOps, i));
}
// Adjust parameters for memset, EABI uses format (ptr, size, value),
@@ -146,7 +152,8 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
// Use default for non-AAPCS (or MachO) subtargets
- if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO())
+ if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() ||
+ Subtarget->isTargetWindows())
return SDValue();
const ARMTargetLowering &TLI =
@@ -179,22 +186,14 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
Args.push_back(Entry);
// Emit __eabi_memset call
- TargetLowering::CallLoweringInfo CLI(Chain,
- Type::getVoidTy(*DAG.getContext()), // return type
- false, // return sign ext
- false, // return zero ext
- false, // is var arg
- false, // is in regs
- 0, // number of fixed arguments
- TLI.getLibcallCallingConv(RTLIB::MEMSET), // call conv
- false, // is tail call
- false, // does not return
- false, // is return val used
- DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
- TLI.getPointerTy()), // callee
- Args, DAG, dl);
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(CLI);
-
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+ TLI.getPointerTy()), &Args, 0)
+ .setDiscardResult();
+
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 73e2018..5b204f6 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -21,12 +21,14 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "arm-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "ARMGenSubtargetInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 3855419..38536b2 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -31,7 +31,7 @@ class TargetOptions;
class ARMSubtarget : public ARMGenSubtargetInfo {
protected:
enum ARMProcFamilyEnum {
- Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
+ Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
CortexR5, Swift, CortexA53, CortexA57, Krait
};
enum ARMProcClassEnum {
@@ -242,9 +242,7 @@ protected:
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
unsigned getMaxInlineSizeThreshold() const {
- // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
- // Change this once Thumb1 ldmia / stmia support is added.
- return isThumb1Only() ? 0 : 64;
+ return 64;
}
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
@@ -396,7 +394,7 @@ public:
bool isLittle() const { return IsLittle; }
unsigned getMispredictionPenalty() const;
-
+
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 4ae539a..8876227 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -228,7 +228,7 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) {
bool ARMPassConfig::addPreISel() {
const ARMSubtarget *Subtarget = &getARMSubtarget();
if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only())
- addPass(createARMAtomicExpandPass(TM));
+ addPass(createAtomicExpandLoadLinkedPass(TM));
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createGlobalMergePass(TM));
@@ -247,8 +247,7 @@ bool ARMPassConfig::addInstSelector() {
}
bool ARMPassConfig::addPreRegAlloc() {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
- if (getOptLevel() != CodeGenOpt::None && !getARMSubtarget().isThumb1Only())
+ if (getOptLevel() != CodeGenOpt::None)
addPass(createARMLoadStoreOptimizationPass(true));
if (getOptLevel() != CodeGenOpt::None && getARMSubtarget().isCortexA9())
addPass(createMLxExpansionPass());
@@ -262,12 +261,10 @@ bool ARMPassConfig::addPreRegAlloc() {
}
bool ARMPassConfig::addPreSched2() {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (getOptLevel() != CodeGenOpt::None) {
- if (!getARMSubtarget().isThumb1Only()) {
- addPass(createARMLoadStoreOptimizationPass());
- printAndVerify("After ARM load / store optimizer");
- }
+ addPass(createARMLoadStoreOptimizationPass());
+ printAndVerify("After ARM load / store optimizer");
+
if (getARMSubtarget().hasNEON())
addPass(createExecutionDependencyFixPass(&ARM::DPRRegClass));
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 0c80a95..664c992 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -23,7 +23,6 @@
#include "Thumb1FrameLowering.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetMachine.h"
@@ -102,7 +101,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
/// ARMLETargetMachine - ARM little endian target machine.
///
class ARMLETargetMachine : public ARMTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ARMLETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -113,7 +112,7 @@ public:
/// ARMBETargetMachine - ARM big endian target machine.
///
class ARMBETargetMachine : public ARMTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ARMBETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -128,12 +127,12 @@ public:
class ThumbTargetMachine : public ARMBaseTargetMachine {
virtual void anchor();
// Either Thumb1InstrInfo or Thumb2InstrInfo.
- OwningPtr<ARMBaseInstrInfo> InstrInfo;
+ std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
const DataLayout DL; // Calculates type size & alignment
ARMTargetLowering TLInfo;
ARMSelectionDAGInfo TSInfo;
// Either Thumb1FrameLowering or ARMFrameLowering.
- OwningPtr<ARMFrameLowering> FrameLowering;
+ std::unique_ptr<ARMFrameLowering> FrameLowering;
public:
ThumbTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
@@ -169,7 +168,7 @@ public:
/// ThumbLETargetMachine - Thumb little endian target machine.
///
class ThumbLETargetMachine : public ThumbTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
ThumbLETargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
@@ -180,10 +179,10 @@ public:
/// ThumbBETargetMachine - Thumb big endian target machine.
///
class ThumbBETargetMachine : public ThumbTargetMachine {
- virtual void anchor();
+ void anchor() override;
public:
- ThumbBETargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS, const TargetOptions &Options,
+ ThumbBETargetMachine(const Target &T, StringRef TT, StringRef CPU,
+ StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
};
diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp
index 3379f85..48238bf 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -11,6 +11,7 @@
#include "ARMSubtarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
@@ -31,7 +32,7 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
InitializeELF(isAAPCS_ABI);
if (isAAPCS_ABI) {
- LSDASection = NULL;
+ LSDASection = nullptr;
}
AttributesSection =
@@ -45,6 +46,10 @@ const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
const TargetMachine &TM, MachineModuleInfo *MMI,
MCStreamer &Streamer) const {
+ if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM)
+ return TargetLoweringObjectFileELF::getTTypeGlobalReference(
+ GV, Encoding, Mang, TM, MMI, Streamer);
+
assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
return MCSymbolRefExpr::Create(TM.getSymbol(GV, Mang),
diff --git a/lib/Target/ARM/ARMTargetObjectFile.h b/lib/Target/ARM/ARMTargetObjectFile.h
index 5f8d612..c926421 100644
--- a/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/lib/Target/ARM/ARMTargetObjectFile.h
@@ -23,7 +23,7 @@ protected:
public:
ARMElfTargetObjectFile() :
TargetLoweringObjectFileELF(),
- AttributesSection(NULL)
+ AttributesSection(nullptr)
{}
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp
index d3b43cd..57df7da 100644
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -14,7 +14,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "armtti"
#include "ARM.h"
#include "ARMTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -23,8 +22,10 @@
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "armtti"
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeARMTTIPass(PassRegistry &);
@@ -42,7 +43,7 @@ class ARMTTI final : public ImmutablePass, public TargetTransformInfo {
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- ARMTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ ARMTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
diff --git a/lib/Target/ARM/Android.mk b/lib/Target/ARM/Android.mk
index 4be95aa..095955b 100644
--- a/lib/Target/ARM/Android.mk
+++ b/lib/Target/ARM/Android.mk
@@ -17,7 +17,6 @@ arm_codegen_TBLGEN_TABLES := \
arm_codegen_SRC_FILES := \
A15SDOptimizer.cpp \
ARMAsmPrinter.cpp \
- ARMAtomicExpandPass.cpp \
ARMBaseInstrInfo.cpp \
ARMBaseRegisterInfo.cpp \
ARMCodeEmitter.cpp \
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 9c57a24..5cdf394 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -13,7 +13,6 @@
#include "MCTargetDesc/ARMArchName.h"
#include "MCTargetDesc/ARMBaseInfo.h"
#include "MCTargetDesc/ARMMCExpr.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -23,9 +22,7 @@
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
-#include "llvm/MC/MCELF.h"
#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -345,7 +342,8 @@ public:
};
ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(MII), UC(_Parser) {
MCAsmParserExtension::Initialize(_Parser);
@@ -416,7 +414,7 @@ class ARMOperand : public MCParsedAsmOperand {
k_Token
} Kind;
- SMLoc StartLoc, EndLoc;
+ SMLoc StartLoc, EndLoc, AlignmentLoc;
SmallVector<unsigned, 8> Registers;
struct CCOp {
@@ -633,6 +631,12 @@ public:
/// operand.
SMRange getLocRange() const { return SMRange(StartLoc, EndLoc); }
+ /// getAlignmentLoc - Get the location of the Alignment token of this operand.
+ SMLoc getAlignmentLoc() const {
+ assert(Kind == k_Memory && "Invalid access!");
+ return AlignmentLoc;
+ }
+
ARMCC::CondCodes getCondCode() const {
assert(Kind == k_CondCode && "Invalid access!");
return CC.Val;
@@ -1089,12 +1093,12 @@ public:
bool isPostIdxReg() const {
return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift;
}
- bool isMemNoOffset(bool alignOK = false) const {
+ bool isMemNoOffset(bool alignOK = false, unsigned Alignment = 0) const {
if (!isMem())
return false;
// No offset of any kind.
- return Memory.OffsetRegNum == 0 && Memory.OffsetImm == 0 &&
- (alignOK || Memory.Alignment == 0);
+ return Memory.OffsetRegNum == 0 && Memory.OffsetImm == nullptr &&
+ (alignOK || Memory.Alignment == Alignment);
}
bool isMemPCRelImm12() const {
if (!isMem() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0)
@@ -1110,6 +1114,65 @@ public:
bool isAlignedMemory() const {
return isMemNoOffset(true);
}
+ bool isAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemoryNone() const {
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory16() const {
+ if (isMemNoOffset(false, 2)) // alignment in bytes for 16-bits is 2.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory32() const {
+ if (isMemNoOffset(false, 4)) // alignment in bytes for 32-bits is 4.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isDupAlignedMemory64or128() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
+ bool isAlignedMemory64or128or256() const {
+ if (isMemNoOffset(false, 8)) // alignment in bytes for 64-bits is 8.
+ return true;
+ if (isMemNoOffset(false, 16)) // alignment in bytes for 128-bits is 16.
+ return true;
+ if (isMemNoOffset(false, 32)) // alignment in bytes for 256-bits is 32.
+ return true;
+ return isMemNoOffset(false, 0);
+ }
bool isAddrMode2() const {
if (!isMem() || Memory.Alignment != 0) return false;
// Check for register offset.
@@ -1545,7 +1608,10 @@ public:
}
bool isNEONi16splat() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(2))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
@@ -1555,7 +1621,10 @@ public:
}
bool isNEONi32splat() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(4))
+ return false; // Leave that for bytes replication and forbid by default.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
if (!CE) return false;
@@ -1567,11 +1636,36 @@ public:
(Value >= 0x01000000 && Value <= 0xff000000);
}
+ bool isNEONByteReplicate(unsigned NumBytes) const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // Must be a constant.
+ if (!CE)
+ return false;
+ int64_t Value = CE->getValue();
+ if (!Value)
+ return false; // Don't bother with zero.
+
+ unsigned char B = Value & 0xff;
+ for (unsigned i = 1; i < NumBytes; ++i) {
+ Value >>= 8;
+ if ((Value & 0xff) != B)
+ return false;
+ }
+ return true;
+ }
+ bool isNEONi16ByteReplicate() const { return isNEONByteReplicate(2); }
+ bool isNEONi32ByteReplicate() const { return isNEONByteReplicate(4); }
bool isNEONi32vmov() const {
- if (!isImm()) return false;
+ if (isNEONByteReplicate(4))
+ return false; // Let it to be classified as byte-replicate case.
+ if (!isImm())
+ return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
// Must be a constant.
- if (!CE) return false;
+ if (!CE)
+ return false;
int64_t Value = CE->getValue();
// i32 value with set bits only in one byte X000, 0X00, 00X0, or 000X,
// for VMOV/VMVN only, 00Xf or 0Xff are also accepted.
@@ -1612,7 +1706,7 @@ public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
- if (Expr == 0)
+ if (!Expr)
Inst.addOperand(MCOperand::CreateImm(0));
else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
@@ -1926,6 +2020,50 @@ public:
Inst.addOperand(MCOperand::CreateImm(Memory.Alignment));
}
+ void addDupAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemoryNoneOperands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory16Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory32Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addDupAlignedMemory64or128Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
+ void addAlignedMemory64or128or256Operands(MCInst &Inst, unsigned N) const {
+ addAlignedMemoryOperands(Inst, N);
+ }
+
void addAddrMode2Operands(MCInst &Inst, unsigned N) const {
assert(N == 3 && "Invalid number of operands!");
int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() : 0;
@@ -2275,6 +2413,19 @@ public:
Inst.addOperand(MCOperand::CreateImm(Value));
}
+ void addNEONinvByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All vmvn instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = ((~Value) & 0xff);
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::CreateImm(B));
+ }
void addNEONi32vmovOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
@@ -2289,6 +2440,19 @@ public:
Inst.addOperand(MCOperand::CreateImm(Value));
}
+ void addNEONvmovByteReplicateOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ // The immediate encodes the type of constant as well as the value.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ unsigned Value = CE->getValue();
+ assert((Inst.getOpcode() == ARM::VMOVv8i8 ||
+ Inst.getOpcode() == ARM::VMOVv16i8) &&
+ "All instructions that wants to replicate non-zero byte "
+ "always must be replaced with VMOVv8i8 or VMOVv16i8.");
+ unsigned B = Value & 0xff;
+ B |= 0xe00; // cmode = 0b1110
+ Inst.addOperand(MCOperand::CreateImm(B));
+ }
void addNEONi32vmovNegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
// The immediate encodes the type of constant as well as the value.
@@ -2523,7 +2687,8 @@ public:
unsigned ShiftImm,
unsigned Alignment,
bool isNegative,
- SMLoc S, SMLoc E) {
+ SMLoc S, SMLoc E,
+ SMLoc AlignmentLoc = SMLoc()) {
ARMOperand *Op = new ARMOperand(k_Memory);
Op->Memory.BaseRegNum = BaseRegNum;
Op->Memory.OffsetImm = OffsetImm;
@@ -2534,6 +2699,7 @@ public:
Op->Memory.isNegative = isNegative;
Op->StartLoc = S;
Op->EndLoc = E;
+ Op->AlignmentLoc = AlignmentLoc;
return Op;
}
@@ -2806,7 +2972,7 @@ int ARMAsmParser::tryParseShiftRegister(
// The source register for the shift has already been added to the
// operand list, so we need to pop it off and combine it into the shifted
// register operand instead.
- OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+ std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
if (!PrevOp->isReg())
return Error(PrevOp->getStartLoc(), "shift must be of a register");
int SrcReg = PrevOp->getReg();
@@ -2825,7 +2991,7 @@ int ARMAsmParser::tryParseShiftRegister(
Parser.getTok().is(AsmToken::Dollar)) {
Parser.Lex(); // Eat hash.
SMLoc ImmLoc = Parser.getTok().getLoc();
- const MCExpr *ShiftExpr = 0;
+ const MCExpr *ShiftExpr = nullptr;
if (getParser().parseExpression(ShiftExpr, EndLoc)) {
Error(ImmLoc, "invalid immediate shift value");
return -1;
@@ -2855,12 +3021,12 @@ int ARMAsmParser::tryParseShiftRegister(
EndLoc = Parser.getTok().getEndLoc();
ShiftReg = tryParseRegister();
if (ShiftReg == -1) {
- Error (L, "expected immediate or register in shift operand");
+ Error(L, "expected immediate or register in shift operand");
return -1;
}
} else {
- Error (Parser.getTok().getLoc(),
- "expected immediate or register in shift operand");
+ Error(Parser.getTok().getLoc(),
+ "expected immediate or register in shift operand");
return -1;
}
}
@@ -4323,8 +4489,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Tok.getEndLoc();
Parser.Lex(); // Eat right bracket token.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0, ARM_AM::no_shift,
- 0, 0, false, S, E));
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
+ ARM_AM::no_shift, 0, 0, false,
+ S, E));
// If there's a pre-indexing writeback marker, '!', just add it as a token
// operand. It's rather odd, but syntactically valid.
@@ -4346,6 +4513,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
if (Parser.getTok().is(AsmToken::Colon)) {
Parser.Lex(); // Eat the ':'.
E = Parser.getTok().getLoc();
+ SMLoc AlignmentLoc = Tok.getLoc();
const MCExpr *Expr;
if (getParser().parseExpression(Expr))
@@ -4378,9 +4546,9 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// Don't worry about range checking the value here. That's handled by
// the is*() predicates.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, 0,
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, 0,
ARM_AM::no_shift, 0, Align,
- false, S, E));
+ false, S, E, AlignmentLoc));
// If there's a pre-indexing writeback marker, '!', just add it as a token
// operand.
@@ -4471,7 +4639,7 @@ parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat right bracket token.
- Operands.push_back(ARMOperand::CreateMem(BaseRegNum, 0, OffsetRegNum,
+ Operands.push_back(ARMOperand::CreateMem(BaseRegNum, nullptr, OffsetRegNum,
ShiftType, ShiftImm, 0, isNegative,
S, E));
@@ -4926,8 +5094,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst,
if (Mnemonic == "bkpt" || Mnemonic == "cbnz" || Mnemonic == "setend" ||
Mnemonic == "cps" || Mnemonic == "it" || Mnemonic == "cbz" ||
- Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic.startswith("crc32") ||
- Mnemonic.startswith("cps") || Mnemonic.startswith("vsel") ||
+ Mnemonic == "trap" || Mnemonic == "hlt" || Mnemonic == "udf" ||
+ Mnemonic.startswith("crc32") || Mnemonic.startswith("cps") ||
+ Mnemonic.startswith("vsel") ||
Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" ||
Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" ||
Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" ||
@@ -5404,21 +5573,24 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
// GNU Assembler extension (compatibility)
- if ((Mnemonic == "ldrd" || Mnemonic == "strd") && !isThumb() &&
- Operands.size() == 4) {
- ARMOperand *Op = static_cast<ARMOperand *>(Operands[2]);
- assert(Op->isReg() && "expected register argument");
+ if ((Mnemonic == "ldrd" || Mnemonic == "strd")) {
+ ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]);
+ ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]);
+ if (Op3->isMem()) {
+ assert(Op2->isReg() && "expected register argument");
- unsigned SuperReg = MRI->getMatchingSuperReg(
- Op->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
+ unsigned SuperReg = MRI->getMatchingSuperReg(
+ Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID));
- assert(SuperReg && "expected register pair");
+ assert(SuperReg && "expected register pair");
- unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
+ unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1);
- Operands.insert(Operands.begin() + 3,
- ARMOperand::CreateReg(PairedReg, Op->getStartLoc(),
- Op->getEndLoc()));
+ Operands.insert(Operands.begin() + 3,
+ ARMOperand::CreateReg(PairedReg,
+ Op2->getStartLoc(),
+ Op2->getEndLoc()));
+ }
}
// FIXME: As said above, this is all a pretty gross hack. This instruction
@@ -5748,6 +5920,30 @@ validateInstruction(MCInst &Inst,
return Error(Operands[Op]->getStartLoc(), "branch target out of range");
break;
}
+ case ARM::MOVi16:
+ case ARM::t2MOVi16:
+ case ARM::t2MOVTi16:
+ {
+ // We want to avoid misleadingly allowing something like "mov r0, <symbol>"
+ // especially when we turn it into a movw and the expression <symbol> does
+ // not have a :lower16: or :upper16 as part of the expression. We don't
+ // want the behavior of silently truncating, which can be unexpected and
+ // lead to bugs that are difficult to find since this is an easy mistake
+ // to make.
+ int i = (Operands[3]->isImm()) ? 3 : 4;
+ ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]);
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
+ if (CE) break;
+ const MCExpr *E = dyn_cast<MCExpr>(Op->getImm());
+ if (!E) break;
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
+ if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 &&
+ ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) {
+ return Error(Op->getStartLoc(),
+ "immediate expression for mov requires :lower16: or :upper16");
+ break;
+ }
+ }
}
return false;
@@ -5898,7 +6094,7 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) {
case ARM::VLD3DUPdWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
case ARM::VLD3DUPdWB_fixed_Asm_32: Spacing = 1; return ARM::VLD3DUPd32_UPD;
case ARM::VLD3DUPqWB_fixed_Asm_8: Spacing = 1; return ARM::VLD3DUPq8_UPD;
- case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 1; return ARM::VLD3DUPq16_UPD;
+ case ARM::VLD3DUPqWB_fixed_Asm_16: Spacing = 2; return ARM::VLD3DUPq16_UPD;
case ARM::VLD3DUPqWB_fixed_Asm_32: Spacing = 2; return ARM::VLD3DUPq32_UPD;
case ARM::VLD3DUPdWB_register_Asm_8: Spacing = 1; return ARM::VLD3DUPd8_UPD;
case ARM::VLD3DUPdWB_register_Asm_16: Spacing = 1; return ARM::VLD3DUPd16_UPD;
@@ -7860,9 +8056,11 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_Success;
}
-template<> inline bool IsCPSRDead<MCInst>(MCInst* Instr) {
+namespace llvm {
+template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
return true; // In an assembly source, no need to second-guess
}
+}
static const char *getSubtargetFeatureName(unsigned Val);
bool ARMAsmParser::
@@ -7965,6 +8163,42 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
return Error(ErrorLoc, "immediate operand must be in the range [0,239]");
}
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ case Match_AlignedMemoryRequires64or128or256:
+ {
+ SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc();
+ if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+ switch (MatchResult) {
+ default:
+ llvm_unreachable("Missing Match_Aligned type");
+ case Match_AlignedMemoryRequiresNone:
+ case Match_DupAlignedMemoryRequiresNone:
+ return Error(ErrorLoc, "alignment must be omitted");
+ case Match_AlignedMemoryRequires16:
+ case Match_DupAlignedMemoryRequires16:
+ return Error(ErrorLoc, "alignment must be 16 or omitted");
+ case Match_AlignedMemoryRequires32:
+ case Match_DupAlignedMemoryRequires32:
+ return Error(ErrorLoc, "alignment must be 32 or omitted");
+ case Match_AlignedMemoryRequires64:
+ case Match_DupAlignedMemoryRequires64:
+ return Error(ErrorLoc, "alignment must be 64 or omitted");
+ case Match_AlignedMemoryRequires64or128:
+ case Match_DupAlignedMemoryRequires64or128:
+ return Error(ErrorLoc, "alignment must be 64, 128 or omitted");
+ case Match_AlignedMemoryRequires64or128or256:
+ return Error(ErrorLoc, "alignment must be 64, 128, 256 or omitted");
+ }
+ }
}
llvm_unreachable("Implement any new match types added!");
@@ -7972,6 +8206,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/// parseDirective parses the arm specific directives
bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+ const MCObjectFileInfo::Environment Format =
+ getContext().getObjectFileInfo()->getObjectFileType();
+ bool IsMachO = Format == MCObjectFileInfo::IsMachO;
+
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
return parseLiteralValues(4, DirectiveID.getLoc());
@@ -7989,16 +8227,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveSyntax(DirectiveID.getLoc());
else if (IDVal == ".unreq")
return parseDirectiveUnreq(DirectiveID.getLoc());
- else if (IDVal == ".arch")
- return parseDirectiveArch(DirectiveID.getLoc());
- else if (IDVal == ".eabi_attribute")
- return parseDirectiveEabiAttr(DirectiveID.getLoc());
- else if (IDVal == ".cpu")
- return parseDirectiveCPU(DirectiveID.getLoc());
- else if (IDVal == ".fpu")
- return parseDirectiveFPU(DirectiveID.getLoc());
- else if (IDVal == ".fnstart")
- return parseDirectiveFnStart(DirectiveID.getLoc());
else if (IDVal == ".fnend")
return parseDirectiveFnEnd(DirectiveID.getLoc());
else if (IDVal == ".cantunwind")
@@ -8015,12 +8243,6 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectiveRegSave(DirectiveID.getLoc(), false);
else if (IDVal == ".vsave")
return parseDirectiveRegSave(DirectiveID.getLoc(), true);
- else if (IDVal == ".inst")
- return parseDirectiveInst(DirectiveID.getLoc());
- else if (IDVal == ".inst.n")
- return parseDirectiveInst(DirectiveID.getLoc(), 'n');
- else if (IDVal == ".inst.w")
- return parseDirectiveInst(DirectiveID.getLoc(), 'w');
else if (IDVal == ".ltorg" || IDVal == ".pool")
return parseDirectiveLtorg(DirectiveID.getLoc());
else if (IDVal == ".even")
@@ -8029,18 +8251,38 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseDirectivePersonalityIndex(DirectiveID.getLoc());
else if (IDVal == ".unwind_raw")
return parseDirectiveUnwindRaw(DirectiveID.getLoc());
- else if (IDVal == ".tlsdescseq")
- return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
else if (IDVal == ".movsp")
return parseDirectiveMovSP(DirectiveID.getLoc());
- else if (IDVal == ".object_arch")
- return parseDirectiveObjectArch(DirectiveID.getLoc());
else if (IDVal == ".arch_extension")
return parseDirectiveArchExtension(DirectiveID.getLoc());
else if (IDVal == ".align")
return parseDirectiveAlign(DirectiveID.getLoc());
else if (IDVal == ".thumb_set")
return parseDirectiveThumbSet(DirectiveID.getLoc());
+
+ if (!IsMachO) {
+ if (IDVal == ".arch")
+ return parseDirectiveArch(DirectiveID.getLoc());
+ else if (IDVal == ".cpu")
+ return parseDirectiveCPU(DirectiveID.getLoc());
+ else if (IDVal == ".eabi_attribute")
+ return parseDirectiveEabiAttr(DirectiveID.getLoc());
+ else if (IDVal == ".fpu")
+ return parseDirectiveFPU(DirectiveID.getLoc());
+ else if (IDVal == ".fnstart")
+ return parseDirectiveFnStart(DirectiveID.getLoc());
+ else if (IDVal == ".inst")
+ return parseDirectiveInst(DirectiveID.getLoc());
+ else if (IDVal == ".inst.n")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'n');
+ else if (IDVal == ".inst.w")
+ return parseDirectiveInst(DirectiveID.getLoc(), 'w');
+ else if (IDVal == ".object_arch")
+ return parseDirectiveObjectArch(DirectiveID.getLoc());
+ else if (IDVal == ".tlsdescseq")
+ return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
+ }
+
return true;
}
@@ -8121,32 +8363,6 @@ void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
if (NextSymbolIsThumb) {
getParser().getStreamer().EmitThumbFunc(Symbol);
NextSymbolIsThumb = false;
- return;
- }
-
- if (!isThumb())
- return;
-
- const MCObjectFileInfo::Environment Format =
- getContext().getObjectFileInfo()->getObjectFileType();
- switch (Format) {
- case MCObjectFileInfo::IsCOFF: {
- const MCSymbolData &SD =
- getParser().getStreamer().getOrCreateSymbolData(Symbol);
- char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- if (SD.getFlags() & (Type << COFF::SF_TypeShift))
- getParser().getStreamer().EmitThumbFunc(Symbol);
- break;
- }
- case MCObjectFileInfo::IsELF: {
- const MCSymbolData &SD =
- getParser().getStreamer().getOrCreateSymbolData(Symbol);
- if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
- getParser().getStreamer().EmitThumbFunc(Symbol);
- break;
- }
- case MCObjectFileInfo::IsMachO:
- break;
}
}
@@ -8303,14 +8519,6 @@ bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
/// parseDirectiveArch
/// ::= .arch token
bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".arch directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef Arch = getParser().parseStringToEndOfStatement().trim();
unsigned ID = StringSwitch<unsigned>(Arch)
@@ -8334,14 +8542,6 @@ bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
/// ::= .eabi_attribute int, int [, "str"]
/// ::= .eabi_attribute Tag_name, int [, "str"]
bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".eabi_attribute directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
int64_t Tag;
SMLoc TagLoc;
TagLoc = Parser.getTok().getLoc();
@@ -8447,14 +8647,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
/// parseDirectiveCPU
/// ::= .cpu str
bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".cpu directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef CPU = getParser().parseStringToEndOfStatement().trim();
getTargetStreamer().emitTextAttribute(ARMBuildAttrs::CPU_name, CPU);
return false;
@@ -8463,14 +8655,6 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
/// parseDirectiveFPU
/// ::= .fpu str
bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".fpu directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
unsigned ID = StringSwitch<unsigned>(FPU)
@@ -8490,14 +8674,6 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
/// parseDirectiveFnStart
/// ::= .fnstart
bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".fnstart directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (UC.hasFnStart()) {
Error(L, ".fnstart starts before the end of previous one");
UC.emitFnStartLocNotes();
@@ -8777,14 +8953,6 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
/// ::= .inst.n opcode [, ...]
/// ::= .inst.w opcode [, ...]
bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(Loc, ".inst directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
int Width;
if (isThumb()) {
@@ -9033,14 +9201,6 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
/// parseDirectiveTLSDescSeq
/// ::= .tlsdescseq tls-variable
bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".tlsdescseq directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (getLexer().isNot(AsmToken::Identifier)) {
TokError("expected variable after '.tlsdescseq' directive");
Parser.eatToEndOfStatement();
@@ -9128,14 +9288,6 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
/// parseDirectiveObjectArch
/// ::= .object_arch name
bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
- const MCAsmInfo *MAI = getParser().getStreamer().getContext().getAsmInfo();
- bool isMachO = MAI->hasSubsectionsViaSymbols();
- if (isMachO) {
- Error(L, ".object_arch directive not valid for Mach-O");
- Parser.eatToEndOfStatement();
- return false;
- }
-
if (getLexer().isNot(AsmToken::Identifier)) {
Error(getLexer().getLoc(), "unexpected token");
Parser.eatToEndOfStatement();
@@ -9221,36 +9373,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
Lex();
MCSymbol *Alias = getContext().GetOrCreateSymbol(Name);
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
- MCSymbol *Sym = getContext().LookupSymbol(SRE->getSymbol().getName());
- if (!Sym->isDefined()) {
- getStreamer().EmitSymbolAttribute(Sym, MCSA_Global);
- getStreamer().EmitAssignment(Alias, Value);
- return false;
- }
-
- const MCObjectFileInfo::Environment Format =
- getContext().getObjectFileInfo()->getObjectFileType();
- switch (Format) {
- case MCObjectFileInfo::IsCOFF: {
- char Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT;
- getStreamer().EmitCOFFSymbolType(Type);
- // .set values are always local in COFF
- getStreamer().EmitSymbolAttribute(Alias, MCSA_Local);
- break;
- }
- case MCObjectFileInfo::IsELF:
- getStreamer().EmitSymbolAttribute(Alias, MCSA_ELF_TypeFunction);
- break;
- case MCObjectFileInfo::IsMachO:
- break;
- }
- }
-
- // FIXME: set the function as being a thumb function via the assembler
- getStreamer().EmitThumbFunc(Alias);
- getStreamer().EmitAssignment(Alias, Value);
-
+ getTargetStreamer().emitThumbSet(Alias, Value);
return false;
}
@@ -9365,8 +9488,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
int64_t Value;
if (!SOExpr->EvaluateAsAbsolute(Value))
return Match_Success;
- assert((Value >= INT32_MIN && Value <= INT32_MAX) &&
- "expression value must be representiable in 32 bits");
+ assert((Value >= INT32_MIN && Value <= UINT32_MAX) &&
+ "expression value must be representable in 32 bits");
}
break;
case MCK_GPRPair:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 8e14883..9b5fa75 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,7 +17,6 @@ add_public_tablegen_target(ARMCommonTableGen)
add_llvm_target(ARMCodeGen
A15SDOptimizer.cpp
ARMAsmPrinter.cpp
- ARMAtomicExpandPass.cpp
ARMBaseInstrInfo.cpp
ARMBaseRegisterInfo.cpp
ARMCodeEmitter.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 9e40381..4d4038d 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "arm-disassembler"
-
#include "llvm/MC/MCDisassembler.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -29,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "arm-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
@@ -90,8 +90,8 @@ class ARMDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- ARMDisassembler(const MCSubtargetInfo &STI) :
- MCDisassembler(STI) {
+ ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
}
~ARMDisassembler() {
@@ -109,8 +109,8 @@ class ThumbDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- ThumbDisassembler(const MCSubtargetInfo &STI) :
- MCDisassembler(STI) {
+ ThumbDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {
}
~ThumbDisassembler() {
@@ -400,12 +400,16 @@ static DecodeStatus DecodeMRRC2(llvm::MCInst &Inst, unsigned Val,
uint64_t Address, const void *Decoder);
#include "ARMGenDisassemblerTables.inc"
-static MCDisassembler *createARMDisassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new ARMDisassembler(STI);
+static MCDisassembler *createARMDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ARMDisassembler(STI, Ctx);
}
-static MCDisassembler *createThumbDisassembler(const Target &T, const MCSubtargetInfo &STI) {
- return new ThumbDisassembler(STI);
+static MCDisassembler *createThumbDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new ThumbDisassembler(STI, Ctx);
}
DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index da3fe01..e4b785d 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "ARMInstPrinter.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -23,6 +22,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "ARMGenAsmWriter.inc"
/// translateShiftImm - Convert shift immediate from 0-31 to 1-32 for printing.
diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 1db517f..7acd9cc 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -306,8 +306,36 @@ bool ARMAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
return true;
}
+static uint32_t swapHalfWords(uint32_t Value, bool IsLittleEndian) {
+ if (IsLittleEndian) {
+ // Note that the halfwords are stored high first and low second in thumb;
+ // so we need to swap the fixup value here to map properly.
+ uint32_t Swapped = (Value & 0xFFFF0000) >> 16;
+ Swapped |= (Value & 0x0000FFFF) << 16;
+ return Swapped;
+ }
+ else
+ return Value;
+}
+
+static uint32_t joinHalfWords(uint32_t FirstHalf, uint32_t SecondHalf,
+ bool IsLittleEndian) {
+ uint32_t Value;
+
+ if (IsLittleEndian) {
+ Value = (SecondHalf & 0xFFFF) << 16;
+ Value |= (FirstHalf & 0xFFFF);
+ } else {
+ Value = (SecondHalf & 0xFFFF);
+ Value |= (FirstHalf & 0xFFFF) << 16;
+ }
+
+ return Value;
+}
+
static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- bool IsPCRel, MCContext *Ctx) {
+ bool IsPCRel, MCContext *Ctx,
+ bool IsLittleEndian) {
unsigned Kind = Fixup.getKind();
switch (Kind) {
default:
@@ -316,6 +344,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case FK_Data_2:
case FK_Data_4:
return Value;
+ case FK_SecRel_2:
+ return Value;
+ case FK_SecRel_4:
+ return Value;
case ARM::fixup_arm_movt_hi16:
if (!IsPCRel)
Value >>= 16;
@@ -342,9 +374,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// inst{14-12} = Mid3;
// inst{7-0} = Lo8;
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
- uint64_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(Value, IsLittleEndian);
}
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
@@ -364,11 +394,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Same addressing mode as fixup_arm_pcrel_10,
// but with 16-bit halfwords swapped.
- if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
- uint64_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
- }
+ if (Kind == ARM::fixup_t2_ldst_pcrel_12)
+ return swapHalfWords(Value, IsLittleEndian);
return Value;
}
@@ -401,9 +428,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x700) << 4;
out |= (Value & 0x0FF);
- uint64_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_condbranch:
@@ -434,9 +459,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x1FF800) << 5; // imm6 field
out |= (Value & 0x0007FF); // imm11 field
- uint64_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_t2_condbranch: {
Value = Value - 4;
@@ -449,9 +472,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
out |= (Value & 0x1F800) << 5; // imm6 field
out |= (Value & 0x007FF); // imm11 field
- uint32_t swapped = (out & 0xFFFF0000) >> 16;
- swapped |= (out & 0x0000FFFF) << 16;
- return swapped;
+ return swapHalfWords(out, IsLittleEndian);
}
case ARM::fixup_arm_thumb_bl: {
// The value doesn't encode the low bit (always zero) and is offset by
@@ -475,13 +496,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t imm10Bits = (offset & 0x1FF800) >> 11;
uint32_t imm11Bits = (offset & 0x000007FF);
- uint32_t Binary = 0;
- uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10Bits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
(uint16_t)imm11Bits);
- Binary |= secondHalf << 16;
- Binary |= firstHalf;
- return Binary;
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_blx: {
// The value doesn't encode the low two bits (always zero) and is offset by
@@ -508,13 +526,10 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
uint32_t imm10HBits = (offset & 0xFFC00) >> 10;
uint32_t imm10LBits = (offset & 0x3FF);
- uint32_t Binary = 0;
- uint32_t firstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
- uint32_t secondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
+ uint32_t FirstHalf = (((uint16_t)signBit << 10) | (uint16_t)imm10HBits);
+ uint32_t SecondHalf = (((uint16_t)J1Bit << 13) | ((uint16_t)J2Bit << 11) |
((uint16_t)imm10LBits) << 1);
- Binary |= secondHalf << 16;
- Binary |= firstHalf;
- return Binary;
+ return joinHalfWords(FirstHalf, SecondHalf, IsLittleEndian);
}
case ARM::fixup_arm_thumb_cp:
// Offset by 4, and don't encode the low two bits. Two bytes of that
@@ -566,11 +581,8 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
// Same addressing mode as fixup_arm_pcrel_10, but with 16-bit halfwords
// swapped.
- if (Kind == ARM::fixup_t2_pcrel_10) {
- uint32_t swapped = (Value & 0xFFFF0000) >> 16;
- swapped |= (Value & 0x0000FFFF) << 16;
- return swapped;
- }
+ if (Kind == ARM::fixup_t2_pcrel_10)
+ return swapHalfWords(Value, IsLittleEndian);
return Value;
}
@@ -603,7 +615,7 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// the offset when the destination has the same MCFragment.
if (A && (unsigned)Fixup.getKind() == ARM::fixup_arm_thumb_bl) {
const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
- MCSymbolData &SymData = Asm.getSymbolData(Sym);
+ const MCSymbolData &SymData = Asm.getSymbolData(Sym);
IsResolved = (SymData.getFragment() == DF);
}
// We must always generate a relocation for BL/BLX instructions if we have
@@ -618,7 +630,8 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
// Try to get the encoded value for the fixup as-if we're mapping it into
// the instruction. This allows adjustFixupValue() to issue a diagnostic
// if the value aren't invalid.
- (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext());
+ (void)adjustFixupValue(Fixup, Value, false, &Asm.getContext(),
+ IsLittleEndian);
}
/// getFixupKindNumBytes - The number of bytes the fixup may change.
@@ -662,6 +675,11 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_t2_movt_hi16:
case ARM::fixup_t2_movw_lo16:
return 4;
+
+ case FK_SecRel_2:
+ return 2;
+ case FK_SecRel_4:
+ return 4;
}
}
@@ -716,7 +734,7 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
unsigned DataSize, uint64_t Value,
bool IsPCRel) const {
unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr);
+ Value = adjustFixupValue(Fixup, Value, IsPCRel, nullptr, IsLittleEndian);
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
@@ -724,8 +742,11 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
// Used to point to big endian bytes.
unsigned FullSizeBytes;
- if (!IsLittleEndian)
+ if (!IsLittleEndian) {
FullSizeBytes = getFixupKindContainerSizeBytes(Fixup.getKind());
+ assert((Offset + FullSizeBytes) <= DataSize && "Invalid fixup size!");
+ assert(NumBytes <= FullSizeBytes && "Invalid fixup size!");
+ }
// For each byte of the fragment that the fixup touches, mask in the bits from
// the fixup value. The Value has been "split up" into the appropriate
@@ -737,6 +758,15 @@ void ARMAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
}
namespace {
+// FIXME: This should be in a separate file.
+class ARMWinCOFFAsmBackend : public ARMAsmBackend {
+public:
+ ARMWinCOFFAsmBackend(const Target &T, const StringRef &Triple)
+ : ARMAsmBackend(T, Triple, true) { }
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
+ return createARMWinCOFFObjectWriter(OS, /*Is64Bit=*/false);
+ }
+};
// FIXME: This should be in a separate file.
// ELF is an ELF of course...
@@ -777,7 +807,9 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
bool isLittle) {
Triple TheTriple(TT);
- if (TheTriple.isOSBinFormatMachO()) {
+ switch (TheTriple.getObjectFormat()) {
+ default: llvm_unreachable("unsupported object format");
+ case Triple::MachO: {
MachO::CPUSubTypeARM CS =
StringSwitch<MachO::CPUSubTypeARM>(TheTriple.getArchName())
.Cases("armv4t", "thumbv4t", MachO::CPU_SUBTYPE_ARM_V4T)
@@ -792,15 +824,14 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
return new DarwinARMAsmBackend(T, TT, CS);
}
-
-#if 0
- // FIXME: Introduce yet another checker but assert(0).
- if (TheTriple.isOSBinFormatCOFF())
- assert(0 && "Windows not supported on ARM");
-#endif
-
- uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
- return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
+ return new ARMWinCOFFAsmBackend(T, TT);
+ case Triple::ELF:
+ assert(TheTriple.isOSBinFormatELF() && "using ELF for non-ELF target");
+ uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(Triple(TT).getOS());
+ return new ELFARMAsmBackend(T, TT, OSABI, isLittle);
+ }
}
MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index a4661b1..1c84263 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -74,7 +74,7 @@ unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant();
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type = 0;
if (IsPCRel) {
@@ -91,6 +91,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTTPOFF:
Type = ELF::R_ARM_TLS_IE32;
break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
}
break;
case ARM::fixup_arm_blx:
@@ -167,6 +170,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_GOTOFF:
Type = ELF::R_ARM_GOTOFF32;
break;
+ case MCSymbolRefExpr::VK_GOTPCREL:
+ Type = ELF::R_ARM_GOT_PREL;
+ break;
case MCSymbolRefExpr::VK_ARM_TARGET1:
Type = ELF::R_ARM_TARGET1;
break;
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 5a01d26..a4d13ed 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -30,6 +30,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
@@ -62,7 +63,7 @@ static const char *GetFPUName(unsigned ID) {
#define ARM_FPU_NAME(NAME, ID) case ARM::ID: return NAME;
#include "ARMFPUName.def"
}
- return NULL;
+ return nullptr;
}
static const char *GetArchName(unsigned ID) {
@@ -75,7 +76,7 @@ static const char *GetArchName(unsigned ID) {
#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
#include "ARMArchName.def"
}
- return NULL;
+ return nullptr;
}
static const char *GetArchDefaultCPUName(unsigned ID) {
@@ -88,7 +89,7 @@ static const char *GetArchDefaultCPUName(unsigned ID) {
#define ARM_ARCH_ALIAS(NAME, ID) /* empty */
#include "ARMArchName.def"
}
- return NULL;
+ return nullptr;
}
static unsigned GetArchDefaultCPUArch(unsigned ID) {
@@ -139,6 +140,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void finishAttributeSection() override;
void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
public:
ARMTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS,
@@ -260,6 +262,10 @@ ARMTargetAsmStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
OS << "\t.tlsdescseq\t" << S->getSymbol().getName();
}
+void ARMTargetAsmStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ OS << "\t.thumb_set\t" << *Symbol << ", " << *Value << '\n';
+}
+
void ARMTargetAsmStreamer::emitInst(uint32_t Inst, char Suffix) {
OS << "\t.inst";
if (Suffix)
@@ -310,7 +316,7 @@ private:
for (size_t i = 0; i < Contents.size(); ++i)
if (Contents[i].Tag == Attribute)
return &Contents[i];
- return 0;
+ return nullptr;
}
void setAttributeItem(unsigned Attribute, unsigned Value,
@@ -406,8 +412,10 @@ private:
void emitFPU(unsigned FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
+ void emitLabel(MCSymbol *Symbol) override;
void AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) override;
+ void emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) override;
size_t calculateContentSize() const;
@@ -415,7 +423,7 @@ public:
ARMTargetELFStreamer(MCStreamer &S)
: ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::INVALID_FPU),
Arch(ARM::INVALID_ARCH), EmittedArch(ARM::INVALID_ARCH),
- AttributeSection(0) {}
+ AttributeSection(nullptr) {}
};
/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
@@ -531,7 +539,8 @@ public:
/// This is one of the functions used to emit data into an ELF section, so the
/// ARM streamer overrides it to add the appropriate mapping symbol ($d) if
/// necessary.
- void EmitValueImpl(const MCExpr *Value, unsigned Size) override {
+ void EmitValueImpl(const MCExpr *Value, unsigned Size,
+ const SMLoc &Loc) override {
EmitDataMappingSymbol();
MCELFStreamer::EmitValueImpl(Value, Size);
}
@@ -600,12 +609,8 @@ private:
}
void EmitThumbFunc(MCSymbol *Func) override {
- // FIXME: Anything needed here to flag the function as thumb?
-
getAssembler().setIsThumbFunc(Func);
-
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Func);
- SD.setFlags(SD.getFlags() | ELF_Other_ThumbFunc);
+ EmitSymbolAttribute(Func, MCSA_ELF_TypeFunction);
}
// Helper functions for ARM exception handling directives
@@ -980,10 +985,35 @@ void ARMTargetELFStreamer::finishAttributeSection() {
Contents.clear();
FPU = ARM::INVALID_FPU;
}
+
+void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) {
+ ARMELFStreamer &Streamer = getStreamer();
+ if (!Streamer.IsThumb)
+ return;
+
+ const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol);
+ if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift))
+ Streamer.EmitThumbFunc(Symbol);
+}
+
void
ARMTargetELFStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *S) {
getStreamer().EmitFixup(S, FK_Data_4);
}
+
+void ARMTargetELFStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Value)) {
+ const MCSymbol &Sym = SRE->getSymbol();
+ if (!Sym.isDefined()) {
+ getStreamer().EmitAssignment(Symbol, Value);
+ return;
+ }
+ }
+
+ getStreamer().EmitThumbFunc(Symbol);
+ getStreamer().EmitAssignment(Symbol, Value);
+}
+
void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
getStreamer().emitInst(Inst, Suffix);
}
@@ -1012,7 +1042,7 @@ inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
}
// Get .ARM.extab or .ARM.exidx section
- const MCSectionELF *EHSection = NULL;
+ const MCSectionELF *EHSection = nullptr;
if (const MCSymbol *Group = FnSection.getGroup()) {
EHSection = getContext().getELFSection(
EHSecName, Type, Flags | ELF::SHF_GROUP, Kind,
@@ -1049,9 +1079,9 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
}
void ARMELFStreamer::Reset() {
- ExTab = NULL;
- FnStart = NULL;
- Personality = NULL;
+ ExTab = nullptr;
+ FnStart = nullptr;
+ Personality = nullptr;
PersonalityIndex = ARM::EHABI::NUM_PERSONALITY_INDEX;
FPReg = ARM::SP;
FPOffset = 0;
@@ -1065,7 +1095,7 @@ void ARMELFStreamer::Reset() {
}
void ARMELFStreamer::emitFnStart() {
- assert(FnStart == 0);
+ assert(FnStart == nullptr);
FnStart = getContext().CreateTempSymbol();
EmitLabel(FnStart);
}
@@ -1104,11 +1134,14 @@ void ARMELFStreamer::emitFnEnd() {
// the second word of exception index table entry. The size of the unwind
// opcodes should always be 4 bytes.
assert(PersonalityIndex == ARM::EHABI::AEABI_UNWIND_CPP_PR0 &&
- "Compact model must use __aeabi_cpp_unwind_pr0 as personality");
+ "Compact model must use __aeabi_unwind_cpp_pr0 as personality");
assert(Opcodes.size() == 4u &&
- "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be equal to 4");
- EmitBytes(StringRef(reinterpret_cast<const char*>(Opcodes.data()),
- Opcodes.size()));
+ "Unwind opcode size for __aeabi_unwind_cpp_pr0 must be equal to 4");
+ uint64_t Intval = Opcodes[0] |
+ Opcodes[1] << 8 |
+ Opcodes[2] << 16 |
+ Opcodes[3] << 24;
+ EmitIntValue(Intval, Opcodes.size());
}
// Switch to the section containing FnStart
@@ -1180,8 +1213,15 @@ void ARMELFStreamer::FlushUnwindOpcodes(bool NoHandlerData) {
}
// Emit unwind opcodes
- EmitBytes(StringRef(reinterpret_cast<const char *>(Opcodes.data()),
- Opcodes.size()));
+ assert((Opcodes.size() % 4) == 0 &&
+ "Unwind opcode size for __aeabi_cpp_unwind_pr0 must be multiple of 4");
+ for (unsigned I = 0; I != Opcodes.size(); I += 4) {
+ uint64_t Intval = Opcodes[I] |
+ Opcodes[I + 1] << 8 |
+ Opcodes[I + 2] << 16 |
+ Opcodes[I + 3] << 24;
+ EmitIntValue(Intval, 4);
+ }
// According to ARM EHABI section 9.2, if the __aeabi_unwind_cpp_pr1() or
// __aeabi_unwind_cpp_pr2() is used, then the handler data must be emitted
@@ -1283,13 +1323,11 @@ void ARMELFStreamer::emitUnwindRaw(int64_t Offset,
namespace llvm {
MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI,
- bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new ARMTargetAsmStreamer(*S, OS, *InstPrint, isVerboseAsm);
return S;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index b7f96e0..7a19208 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -25,7 +25,7 @@ ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin(StringRef TT) {
(TheTriple.getArch() == Triple::thumbeb))
IsLittleEndian = false;
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
@@ -50,7 +50,7 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
// ".comm align is in bytes but .align is pow-2."
AlignmentIsInBytes = false;
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
CommentString = "@";
Code16Directive = ".code\t16";
Code32Directive = ".code\t32";
@@ -59,7 +59,14 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo(StringRef TT) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::ARM;
+ switch (TheTriple.getOS()) {
+ case Triple::NetBSD:
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+ break;
+ default:
+ ExceptionsType = ExceptionHandling::ARM;
+ break;
+ }
// foo(plt) instead of foo@plt
UseParensForSymbolVariant = true;
@@ -89,6 +96,7 @@ void ARMCOFFMCAsmInfoGNU::anchor() { }
ARMCOFFMCAsmInfoGNU::ARMCOFFMCAsmInfoGNU() {
AlignmentIsInBytes = false;
+ HasSingleParameterDotFile = true;
CommentString = "@";
Code16Directive = ".code\t16";
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index beaf6a4..51cfa0a 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
@@ -35,13 +35,13 @@ namespace llvm {
};
class ARMCOFFMCAsmInfoMicrosoft : public MCAsmInfoMicrosoft {
- void anchor();
+ void anchor() override;
public:
explicit ARMCOFFMCAsmInfoMicrosoft();
};
class ARMCOFFMCAsmInfoGNU : public MCAsmInfoGNUCOFF {
- void anchor();
+ void anchor() override;
public:
explicit ARMCOFFMCAsmInfoGNU();
};
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 5564e0a..5b51a52 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/ARMMCTargetDesc.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "MCTargetDesc/ARMBaseInfo.h"
@@ -31,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
@@ -1036,16 +1037,17 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
: ARM::fixup_arm_movw_lo16);
break;
}
+
Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
return 0;
}
// If the expression doesn't have :upper16: or :lower16: on it,
- // it's just a plain immediate expression, and those evaluate to
+ // it's just a plain immediate expression, previously those evaluated to
// the lower 16 bits of the expression regardless of whether
- // we have a movt or a movw.
- Kind = MCFixupKind(isThumb2(STI) ? ARM::fixup_t2_movw_lo16
- : ARM::fixup_arm_movw_lo16);
- Fixups.push_back(MCFixup::Create(0, E, Kind, MI.getLoc()));
+ // we have a movt or a movw, but that led to misleadingly results.
+ // This is now disallowed in the the AsmParser in validateInstruction()
+ // so this should never happen.
+ assert(0 && "expression without :upper16: or :lower16:");
return 0;
}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index fc8505b..87ea875 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -7,12 +7,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "armmcexpr"
#include "ARMMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
using namespace llvm;
+#define DEBUG_TYPE "armmcexpr"
+
const ARMMCExpr*
ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx) {
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 949a3d5..04d63a7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -106,9 +107,11 @@ std::string ARM_MC::ParseARMTriple(StringRef TT, StringRef CPU) {
unsigned SubVer = TT[Idx];
if (SubVer == '8') {
if (NoCPU)
- // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2, FeatureMP,
- // FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone, FeatureT2XtPk, FeatureCrypto, FeatureCRC
- ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,+trustzone,+t2xtpk,+crypto,+crc";
+ // v8a: FeatureDB, FeatureFPARMv8, FeatureNEON, FeatureDSPThumb2,
+ // FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureTrustZone,
+ // FeatureT2XtPk, FeatureCrypto, FeatureCRC
+ ARMArchFeature = "+v8,+db,+fp-armv8,+neon,+t2dsp,+mp,+hwdiv,+hwdiv-arm,"
+ "+trustzone,+t2xtpk,+crypto,+crc";
else
// Use CPU to figure out the exact features
ARMArchFeature = "+v8";
@@ -245,7 +248,7 @@ static MCAsmInfo *createARMMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
}
unsigned Reg = MRI.getDwarfRegNum(ARM::SP, true);
- MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(0, Reg, 0));
+ MAI->addInitialFrameState(MCCFIInstruction::createDefCfa(nullptr, Reg, 0));
return MAI;
}
@@ -273,18 +276,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- if (TheTriple.isOSBinFormatMachO()) {
+ switch (TheTriple.getObjectFormat()) {
+ default: llvm_unreachable("unsupported object format");
+ case Triple::MachO: {
MCStreamer *S = createMachOStreamer(Ctx, MAB, OS, Emitter, false);
new ARMTargetStreamer(*S);
return S;
}
-
- if (TheTriple.isOSWindows()) {
- llvm_unreachable("ARM does not support Windows COFF format");
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "non-Windows ARM COFF is not supported");
+ return createARMWinCOFFStreamer(Ctx, MAB, *Emitter, OS);
+ case Triple::ELF:
+ return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
+ TheTriple.getArch() == Triple::thumb);
}
-
- return createARMELFStreamer(Ctx, MAB, OS, Emitter, false, NoExecStack,
- TheTriple.getArch() == Triple::thumb);
}
static MCInstPrinter *createARMMCInstPrinter(const Target &T,
@@ -295,7 +300,7 @@ static MCInstPrinter *createARMMCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new ARMInstPrinter(MAI, MII, MRI, STI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createARMMCRelocationInfo(StringRef TT,
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index e81876f..8853a8c 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -47,8 +47,7 @@ namespace ARM_MC {
}
MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI,
- bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst);
@@ -78,6 +77,11 @@ MCAsmBackend *createThumbLEAsmBackend(const Target &T, const MCRegisterInfo &MRI
MCAsmBackend *createThumbBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
+/// createARMWinCOFFStreamer - Construct a PE/COFF machine code streamer which
+/// will generate a PE/COFF object file.
+MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &Emitter, raw_ostream &OS);
+
/// createARMELFObjectWriter - Construct an ELF Mach-O object writer.
MCObjectWriter *createARMELFObjectWriter(raw_ostream &OS,
uint8_t OSABI,
@@ -89,6 +93,8 @@ MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
uint32_t CPUType,
uint32_t CPUSubtype);
+/// createARMWinCOFFObjectWriter - Construct an ARM PE/COFF object writer.
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit);
/// createARMMachORelocationInfo - Construct ARM Mach-O relocation info.
MCRelocationInfo *createARMMachORelocationInfo(MCContext &Ctx);
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index 3bf5cf1..ecfa4e5 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -156,7 +156,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -170,7 +170,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
FixedValue += SecAddr;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -206,11 +206,11 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
// The thumb bit shouldn't be set in the 'other-half' bit of the
// relocation, but it will be set in FixedValue if the base symbol
// is a thumb function. Clear it out here.
- if (A_SD->getFlags() & SF_ThumbFunc)
+ if (Asm.isThumbFunc(A))
FixedValue &= 0xfffffffe;
break;
case ARM::fixup_t2_movt_hi16:
- if (A_SD->getFlags() & SF_ThumbFunc)
+ if (Asm.isThumbFunc(A))
FixedValue &= 0xfffffffe;
MovtBit = 1;
// Fallthrough
@@ -259,7 +259,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -272,7 +272,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
Asm.getContext().FatalError(Fixup.getLoc(),
@@ -378,7 +378,7 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
}
// Get the symbol data, if any.
- MCSymbolData *SD = 0;
+ const MCSymbolData *SD = nullptr;
if (Target.getSymA())
SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index fdc0ed7..e3cfb05 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -109,7 +109,7 @@ ConstantPool *
AssemblerConstantPools::getConstantPool(const MCSection *Section) {
ConstantPoolMapTy::iterator CP = ConstantPools.find(Section);
if (CP == ConstantPools.end())
- return 0;
+ return nullptr;
return &CP->second;
}
@@ -246,3 +246,7 @@ void ARMTargetStreamer::AnnotateTLSDescriptorSequence(
const MCSymbolRefExpr *SRE) {
llvm_unreachable("unimplemented");
}
+
+void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {
+ llvm_unreachable("unimplemented");
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
new file mode 100644
index 0000000..d31f1f4
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFObjectWriter.cpp
@@ -0,0 +1,82 @@
+//===-- ARMWinCOFFObjectWriter.cpp - ARM Windows COFF Object Writer -- C++ -==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/ARMFixupKinds.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCWinCOFFObjectWriter.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
+public:
+ ARMWinCOFFObjectWriter(bool Is64Bit)
+ : MCWinCOFFObjectTargetWriter(COFF::IMAGE_FILE_MACHINE_ARMNT) {
+ assert(!Is64Bit && "AArch64 support not yet implemented");
+ }
+ virtual ~ARMWinCOFFObjectWriter() { }
+
+ unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
+ bool IsCrossSection) const override;
+
+ bool recordRelocation(const MCFixup &) const override;
+};
+
+unsigned ARMWinCOFFObjectWriter::getRelocType(const MCValue &Target,
+ const MCFixup &Fixup,
+ bool IsCrossSection) const {
+ assert(getMachine() == COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ "AArch64 support not yet implemented");
+
+ MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ switch (static_cast<unsigned>(Fixup.getKind())) {
+ default: llvm_unreachable("unsupported relocation type");
+ case FK_Data_4:
+ switch (Modifier) {
+ case MCSymbolRefExpr::VK_COFF_IMGREL32:
+ return COFF::IMAGE_REL_ARM_ADDR32NB;
+ case MCSymbolRefExpr::VK_SECREL:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ default:
+ return COFF::IMAGE_REL_ARM_ADDR32;
+ }
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_ARM_SECTION;
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_ARM_SECREL;
+ case ARM::fixup_t2_condbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH20T;
+ case ARM::fixup_t2_uncondbranch:
+ return COFF::IMAGE_REL_ARM_BRANCH24T;
+ case ARM::fixup_arm_thumb_bl:
+ case ARM::fixup_arm_thumb_blx:
+ return COFF::IMAGE_REL_ARM_BLX23T;
+ case ARM::fixup_t2_movw_lo16:
+ case ARM::fixup_t2_movt_hi16:
+ return COFF::IMAGE_REL_ARM_MOV32T;
+ }
+}
+
+bool ARMWinCOFFObjectWriter::recordRelocation(const MCFixup &Fixup) const {
+ return static_cast<unsigned>(Fixup.getKind()) != ARM::fixup_t2_movt_hi16;
+}
+}
+
+namespace llvm {
+MCObjectWriter *createARMWinCOFFObjectWriter(raw_ostream &OS, bool Is64Bit) {
+ MCWinCOFFObjectTargetWriter *MOTW = new ARMWinCOFFObjectWriter(Is64Bit);
+ return createWinCOFFObjectWriter(MOTW, OS);
+}
+}
+
diff --git a/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
new file mode 100644
index 0000000..b344ced
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMWinCOFFStreamer.cpp
@@ -0,0 +1,46 @@
+//===-- ARMWinCOFFStreamer.cpp - ARM Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+class ARMWinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ ARMWinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter &CE,
+ raw_ostream &OS)
+ : MCWinCOFFStreamer(C, AB, CE, OS) { }
+
+ void EmitAssemblerFlag(MCAssemblerFlag Flag) override;
+ void EmitThumbFunc(MCSymbol *Symbol) override;
+};
+
+void ARMWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ switch (Flag) {
+ default: llvm_unreachable("not implemented");
+ case MCAF_SyntaxUnified:
+ case MCAF_Code16:
+ break;
+ }
+}
+
+void ARMWinCOFFStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+ getAssembler().setIsThumbFunc(Symbol);
+}
+}
+
+namespace llvm {
+MCStreamer *createARMWinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB,
+ MCCodeEmitter &Emitter, raw_ostream &OS) {
+ return new ARMWinCOFFStreamer(Context, MAB, Emitter, OS);
+}
+}
+
diff --git a/lib/Target/ARM/MCTargetDesc/Android.mk b/lib/Target/ARM/MCTargetDesc/Android.mk
index 074d29e..a5827f7 100644
--- a/lib/Target/ARM/MCTargetDesc/Android.mk
+++ b/lib/Target/ARM/MCTargetDesc/Android.mk
@@ -17,7 +17,9 @@ arm_mc_desc_SRC_FILES := \
ARMMachObjectWriter.cpp \
ARMMachORelocationInfo.cpp \
ARMTargetStreamer.cpp \
- ARMUnwindOpAsm.cpp
+ ARMUnwindOpAsm.cpp \
+ ARMWinCOFFObjectWriter.cpp \
+ ARMWinCOFFStreamer.cpp \
# For the host
# =====================================================
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
index 06812d4..9582e8c 100644
--- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -1,14 +1,16 @@
add_llvm_library(LLVMARMDesc
ARMAsmBackend.cpp
ARMELFObjectWriter.cpp
+ ARMELFObjectWriter.cpp
ARMELFStreamer.cpp
+ ARMMachObjectWriter.cpp
+ ARMMachORelocationInfo.cpp
ARMMCAsmInfo.cpp
ARMMCCodeEmitter.cpp
ARMMCExpr.cpp
ARMMCTargetDesc.cpp
- ARMMachObjectWriter.cpp
- ARMELFObjectWriter.cpp
ARMTargetStreamer.cpp
ARMUnwindOpAsm.cpp
- ARMMachORelocationInfo.cpp
+ ARMWinCOFFObjectWriter.cpp
+ ARMWinCOFFStreamer.cpp
)
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index 80af859..f6d24e9 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mlx-expansion"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "mlx-expansion"
+
static cl::opt<bool>
ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
static cl::opt<unsigned>
@@ -73,7 +74,7 @@ namespace {
}
void MLxExpansion::clearStack() {
- std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+ std::fill(LastMIs, LastMIs + 4, nullptr);
MIIdx = 0;
}
@@ -88,7 +89,7 @@ MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
// real definition MI. This is important for _sfp instructions.
unsigned Reg = MI->getOperand(1).getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return 0;
+ return nullptr;
MachineBasicBlock *MBB = MI->getParent();
MachineInstr *DefMI = MRI->getVRegDef(Reg);
@@ -352,7 +353,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
if (Domain == ARMII::DomainGeneral) {
if (++Skip == 2)
// Assume dual issues of non-VFP / NEON instructions.
- pushStack(0);
+ pushStack(nullptr);
} else {
Skip = 0;
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index a64707e..f4d9be3 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -215,10 +215,6 @@ etc. Almost all Thumb instructions clobber condition code.
//===---------------------------------------------------------------------===//
-Add ldmia, stmia support.
-
-//===---------------------------------------------------------------------===//
-
Thumb load / store address mode offsets are scaled. The values kept in the
instruction operands are pre-scale values. This probably ought to be changed
to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index 2224652..be29dc5 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -293,7 +293,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setShouldRestoreSPFromFP(true);
}
-static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
+static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
if (MI->getOpcode() == ARM::tLDRspi &&
MI->getOperand(1).isFI() &&
isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
@@ -328,7 +328,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
int NumBytes = (int)MFI->getStackSize();
assert((unsigned)NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs();
unsigned FramePtr = RegInfo->getFrameRegister(MF);
if (!AFI->hasStackFrame()) {
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 93e2b5a..0c0abbe 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -56,7 +56,7 @@ public:
unsigned Reg) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
};
}
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 406dbe0..edb9ff3 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "thumb2-it"
#include "ARM.h"
#include "ARMMachineFunctionInfo.h"
#include "Thumb2InstrInfo.h"
@@ -19,6 +18,8 @@
#include "llvm/CodeGen/MachineInstrBundle.h"
using namespace llvm;
+#define DEBUG_TYPE "thumb2-it"
+
STATISTIC(NumITs, "Number of IT blocks inserted");
STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 04b83fb..6267ecf 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "t2-reduce-size"
#include "ARM.h"
#include "ARMBaseInstrInfo.h"
#include "ARMSubtarget.h"
@@ -25,6 +24,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "t2-reduce-size"
+
STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
@@ -915,15 +916,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
// Yes, CPSR could be livein.
bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
- MachineInstr *BundleMI = 0;
+ MachineInstr *BundleMI = nullptr;
- CPSRDef = 0;
+ CPSRDef = nullptr;
HighLatencyCPSR = false;
// Check predecessors for the latest CPSRDef.
- for (MachineBasicBlock::pred_iterator
- I = MBB.pred_begin(), E = MBB.pred_end(); I != E; ++I) {
- const MBBInfo &PInfo = BlockInfo[(*I)->getNumber()];
+ for (auto *Pred : MBB.predecessors()) {
+ const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
if (!PInfo.Visited) {
// Since blocks are visited in RPO, this must be a back-edge.
continue;
@@ -984,7 +984,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
if (MI->isCall()) {
// Calls don't really set CPSR.
- CPSRDef = 0;
+ CPSRDef = nullptr;
HighLatencyCPSR = false;
IsSelfLoop = false;
} else if (DefCPSR) {
diff --git a/lib/Target/ARM64/ARM64.h b/lib/Target/ARM64/ARM64.h
deleted file mode 100644
index f2c5e60..0000000
--- a/lib/Target/ARM64/ARM64.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===-- ARM64.h - Top-level interface for ARM64 representation --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the entry points for global functions defined in the LLVM
-// ARM64 back-end.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef TARGET_ARM64_H
-#define TARGET_ARM64_H
-
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-class ARM64TargetMachine;
-class FunctionPass;
-class MachineFunctionPass;
-
-FunctionPass *createARM64DeadRegisterDefinitions();
-FunctionPass *createARM64ConditionalCompares();
-FunctionPass *createARM64AdvSIMDScalar();
-FunctionPass *createARM64BranchRelaxation();
-FunctionPass *createARM64ISelDag(ARM64TargetMachine &TM,
- CodeGenOpt::Level OptLevel);
-FunctionPass *createARM64StorePairSuppressPass();
-FunctionPass *createARM64ExpandPseudoPass();
-FunctionPass *createARM64LoadStoreOptimizationPass();
-ModulePass *createARM64PromoteConstantPass();
-FunctionPass *createARM64AddressTypePromotionPass();
-/// \brief Creates an ARM-specific Target Transformation Info pass.
-ImmutablePass *createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM);
-
-FunctionPass *createARM64CleanupLocalDynamicTLSPass();
-
-FunctionPass *createARM64CollectLOHPass();
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64.td b/lib/Target/ARM64/ARM64.td
deleted file mode 100644
index 3eef8b2..0000000
--- a/lib/Target/ARM64/ARM64.td
+++ /dev/null
@@ -1,95 +0,0 @@
-//===- ARM64.td - Describe the ARM64 Target Machine --------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Target-independent interfaces which we are implementing
-//===----------------------------------------------------------------------===//
-
-include "llvm/Target/Target.td"
-
-//===----------------------------------------------------------------------===//
-// ARM64 Subtarget features.
-//
-
-/// Cyclone has register move instructions which are "free".
-def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
- "Has zereo-cycle register moves">;
-
-/// Cyclone has instructions which zero registers for "free".
-def FeatureZCZeroing : SubtargetFeature<"zcz", "HasZeroCycleZeroing", "true",
- "Has zero-cycle zeroing instructions">;
-
-//===----------------------------------------------------------------------===//
-// Register File Description
-//===----------------------------------------------------------------------===//
-
-include "ARM64RegisterInfo.td"
-include "ARM64CallingConvention.td"
-
-//===----------------------------------------------------------------------===//
-// Instruction Descriptions
-//===----------------------------------------------------------------------===//
-
-include "ARM64Schedule.td"
-include "ARM64InstrInfo.td"
-
-def ARM64InstrInfo : InstrInfo;
-
-//===----------------------------------------------------------------------===//
-// ARM64 Processors supported.
-//
-include "ARM64SchedCyclone.td"
-
-def : ProcessorModel<"arm64-generic", NoSchedModel, []>;
-
-def : ProcessorModel<"cyclone", CycloneModel, [FeatureZCRegMove, FeatureZCZeroing]>;
-
-//===----------------------------------------------------------------------===//
-// Assembly parser
-//===----------------------------------------------------------------------===//
-
-def GenericAsmParserVariant : AsmParserVariant {
- int Variant = 0;
- string Name = "generic";
-}
-
-def AppleAsmParserVariant : AsmParserVariant {
- int Variant = 1;
- string Name = "apple-neon";
-}
-
-//===----------------------------------------------------------------------===//
-// Assembly printer
-//===----------------------------------------------------------------------===//
-// ARM64 Uses the MC printer for asm output, so make sure the TableGen
-// AsmWriter bits get associated with the correct class.
-def GenericAsmWriter : AsmWriter {
- string AsmWriterClassName = "InstPrinter";
- int Variant = 0;
- bit isMCAsmWriter = 1;
-}
-
-def AppleAsmWriter : AsmWriter {
- let AsmWriterClassName = "AppleInstPrinter";
- int Variant = 1;
- int isMCAsmWriter = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Target Declaration
-//===----------------------------------------------------------------------===//
-
-def ARM64 : Target {
- let InstructionSet = ARM64InstrInfo;
- let AssemblyParserVariants = [GenericAsmParserVariant, AppleAsmParserVariant];
- let AssemblyWriters = [GenericAsmWriter, AppleAsmWriter];
-}
diff --git a/lib/Target/ARM64/ARM64AsmPrinter.cpp b/lib/Target/ARM64/ARM64AsmPrinter.cpp
deleted file mode 100644
index d0aa6af..0000000
--- a/lib/Target/ARM64/ARM64AsmPrinter.cpp
+++ /dev/null
@@ -1,563 +0,0 @@
-//===-- ARM64AsmPrinter.cpp - ARM64 LLVM assembly writer ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the ARM64 assembly language.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "ARM64.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64MCInstLower.h"
-#include "ARM64RegisterInfo.h"
-#include "InstPrinter/ARM64InstPrinter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/StackMaps.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstBuilder.h"
-#include "llvm/MC/MCLinkerOptimizationHint.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-namespace {
-
-class ARM64AsmPrinter : public AsmPrinter {
- ARM64MCInstLower MCInstLowering;
- StackMaps SM;
-
-public:
- ARM64AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), MCInstLowering(OutContext, *Mang, *this),
- SM(*this), ARM64FI(NULL), LOHLabelCounter(0) {}
-
- virtual const char *getPassName() const { return "ARM64 Assembly Printer"; }
-
- /// \brief Wrapper for MCInstLowering.lowerOperand() for the
- /// tblgen'erated pseudo lowering.
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
- return MCInstLowering.lowerOperand(MO, MCOp);
- }
-
- void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
- void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI);
- /// \brief tblgen'erated driver function for lowering simple MI->MC
- /// pseudo instructions.
- bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
- const MachineInstr *MI);
-
- void EmitInstruction(const MachineInstr *MI);
-
- void getAnalysisUsage(AnalysisUsage &AU) const {
- AsmPrinter::getAnalysisUsage(AU);
- AU.setPreservesAll();
- }
-
- bool runOnMachineFunction(MachineFunction &F) {
- ARM64FI = F.getInfo<ARM64FunctionInfo>();
- return AsmPrinter::runOnMachineFunction(F);
- }
-
-private:
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
- void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O);
- bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
- bool printAsmRegInClass(const MachineOperand &MO,
- const TargetRegisterClass *RC, bool isVector,
- raw_ostream &O);
-
- bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
- bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
-
- void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
-
- void EmitFunctionBodyEnd();
-
- MCSymbol *GetCPISymbol(unsigned CPID) const;
- void EmitEndOfAsmFile(Module &M);
- ARM64FunctionInfo *ARM64FI;
-
- /// \brief Emit the LOHs contained in ARM64FI.
- void EmitLOHs();
-
- typedef std::map<const MachineInstr *, MCSymbol *> MInstToMCSymbol;
- MInstToMCSymbol LOHInstToLabel;
- unsigned LOHLabelCounter;
-};
-
-} // end of anonymous namespace
-
-//===----------------------------------------------------------------------===//
-
-void ARM64AsmPrinter::EmitEndOfAsmFile(Module &M) {
- // Funny Darwin hack: This flag tells the linker that no global symbols
- // contain code that falls through to other global symbols (e.g. the obvious
- // implementation of multiple entry points). If this doesn't occur, the
- // linker can safely perform dead code stripping. Since LLVM never
- // generates code that does this, it is always safe to set.
- OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
- SM.serializeToStackMapSection();
-}
-
-MachineLocation
-ARM64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
- if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
- else {
- DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
- }
- return Location;
-}
-
-void ARM64AsmPrinter::EmitLOHs() {
- SmallVector<MCSymbol *, 3> MCArgs;
-
- for (const auto &D : ARM64FI->getLOHContainer()) {
- for (const MachineInstr *MI : D.getArgs()) {
- MInstToMCSymbol::iterator LabelIt = LOHInstToLabel.find(MI);
- assert(LabelIt != LOHInstToLabel.end() &&
- "Label hasn't been inserted for LOH related instruction");
- MCArgs.push_back(LabelIt->second);
- }
- OutStreamer.EmitLOHDirective(D.getKind(), MCArgs);
- MCArgs.clear();
- }
-}
-
-void ARM64AsmPrinter::EmitFunctionBodyEnd() {
- if (!ARM64FI->getLOHRelated().empty())
- EmitLOHs();
-}
-
-/// GetCPISymbol - Return the symbol for the specified constant pool entry.
-MCSymbol *ARM64AsmPrinter::GetCPISymbol(unsigned CPID) const {
- // Darwin uses a linker-private symbol name for constant-pools (to
- // avoid addends on the relocation?), ELF has no such concept and
- // uses a normal private symbol.
- if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
- return OutContext.GetOrCreateSymbol(
- Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
- Twine(getFunctionNumber()) + "_" + Twine(CPID));
-
- return OutContext.GetOrCreateSymbol(
- Twine(getDataLayout().getPrivateGlobalPrefix()) + "CPI" +
- Twine(getFunctionNumber()) + "_" + Twine(CPID));
-}
-
-void ARM64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum,
- raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- switch (MO.getType()) {
- default:
- assert(0 && "<unknown operand type>");
- case MachineOperand::MO_Register: {
- unsigned Reg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
- assert(!MO.getSubReg() && "Subregs should be eliminated!");
- O << ARM64InstPrinter::getRegisterName(Reg);
- break;
- }
- case MachineOperand::MO_Immediate: {
- int64_t Imm = MO.getImm();
- O << '#' << Imm;
- break;
- }
- }
-}
-
-bool ARM64AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
- raw_ostream &O) {
- unsigned Reg = MO.getReg();
- switch (Mode) {
- default:
- return true; // Unknown mode.
- case 'w':
- Reg = getWRegFromXReg(Reg);
- break;
- case 'x':
- Reg = getXRegFromWReg(Reg);
- break;
- }
-
- O << ARM64InstPrinter::getRegisterName(Reg);
- return false;
-}
-
-// Prints the register in MO using class RC using the offset in the
-// new register class. This should not be used for cross class
-// printing.
-bool ARM64AsmPrinter::printAsmRegInClass(const MachineOperand &MO,
- const TargetRegisterClass *RC,
- bool isVector, raw_ostream &O) {
- assert(MO.isReg() && "Should only get here with a register!");
- const ARM64RegisterInfo *RI =
- static_cast<const ARM64RegisterInfo *>(TM.getRegisterInfo());
- unsigned Reg = MO.getReg();
- unsigned RegToPrint = RC->getRegister(RI->getEncodingValue(Reg));
- assert(RI->regsOverlap(RegToPrint, Reg));
- O << ARM64InstPrinter::getRegisterName(
- RegToPrint, isVector ? ARM64::vreg : ARM64::NoRegAltName);
- return false;
-}
-
-bool ARM64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
- unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O) {
- const MachineOperand &MO = MI->getOperand(OpNum);
- // Does this asm operand have a single letter operand modifier?
- if (ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0)
- return true; // Unknown modifier.
-
- switch (ExtraCode[0]) {
- default:
- return true; // Unknown modifier.
- case 'w': // Print W register
- case 'x': // Print X register
- if (MO.isReg())
- return printAsmMRegister(MO, ExtraCode[0], O);
- if (MO.isImm() && MO.getImm() == 0) {
- unsigned Reg = ExtraCode[0] == 'w' ? ARM64::WZR : ARM64::XZR;
- O << ARM64InstPrinter::getRegisterName(Reg);
- return false;
- }
- printOperand(MI, OpNum, O);
- return false;
- case 'b': // Print B register.
- case 'h': // Print H register.
- case 's': // Print S register.
- case 'd': // Print D register.
- case 'q': // Print Q register.
- if (MO.isReg()) {
- const TargetRegisterClass *RC;
- switch (ExtraCode[0]) {
- case 'b':
- RC = &ARM64::FPR8RegClass;
- break;
- case 'h':
- RC = &ARM64::FPR16RegClass;
- break;
- case 's':
- RC = &ARM64::FPR32RegClass;
- break;
- case 'd':
- RC = &ARM64::FPR64RegClass;
- break;
- case 'q':
- RC = &ARM64::FPR128RegClass;
- break;
- default:
- return true;
- }
- return printAsmRegInClass(MO, RC, false /* vector */, O);
- }
- printOperand(MI, OpNum, O);
- return false;
- }
- }
-
- // According to ARM, we should emit x and v registers unless we have a
- // modifier.
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
-
- // If this is a w or x register, print an x register.
- if (ARM64::GPR32allRegClass.contains(Reg) ||
- ARM64::GPR64allRegClass.contains(Reg))
- return printAsmMRegister(MO, 'x', O);
-
- // If this is a b, h, s, d, or q register, print it as a v register.
- return printAsmRegInClass(MO, &ARM64::FPR128RegClass, true /* vector */, O);
- }
-
- printOperand(MI, OpNum, O);
- return false;
-}
-
-bool ARM64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
- unsigned OpNum, unsigned AsmVariant,
- const char *ExtraCode,
- raw_ostream &O) {
- if (ExtraCode && ExtraCode[0])
- return true; // Unknown modifier.
-
- const MachineOperand &MO = MI->getOperand(OpNum);
- assert(MO.isReg() && "unexpected inline asm memory operand");
- O << "[" << ARM64InstPrinter::getRegisterName(MO.getReg()) << "]";
- return false;
-}
-
-void ARM64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
- raw_ostream &OS) {
- unsigned NOps = MI->getNumOperands();
- assert(NOps == 4);
- OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps - 1).getMetadata()));
- OS << V.getName();
- OS << " <- ";
- // Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- OS << '[';
- printOperand(MI, 0, OS);
- OS << '+';
- printOperand(MI, 1, OS);
- OS << ']';
- OS << "+";
- printOperand(MI, NOps - 2, OS);
-}
-
-void ARM64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
- unsigned NumNOPBytes = MI.getOperand(1).getImm();
-
- SM.recordStackMap(MI);
- // Emit padding.
- assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
- for (unsigned i = 0; i < NumNOPBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
-}
-
-// Lower a patchpoint of the form:
-// [<def>], <id>, <numBytes>, <target>, <numArgs>
-void ARM64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
- const MachineInstr &MI) {
- SM.recordPatchPoint(MI);
-
- PatchPointOpers Opers(&MI);
-
- int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
- unsigned EncodedBytes = 0;
- if (CallTarget) {
- assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
- "High 16 bits of call target should be zero.");
- unsigned ScratchReg = MI.getOperand(Opers.getNextScratchIdx()).getReg();
- EncodedBytes = 16;
- // Materialize the jump address:
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVZWi)
- .addReg(ScratchReg)
- .addImm((CallTarget >> 32) & 0xFFFF)
- .addImm(32));
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
- .addReg(ScratchReg)
- .addReg(ScratchReg)
- .addImm((CallTarget >> 16) & 0xFFFF)
- .addImm(16));
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::MOVKWi)
- .addReg(ScratchReg)
- .addReg(ScratchReg)
- .addImm(CallTarget & 0xFFFF)
- .addImm(0));
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::BLR).addReg(ScratchReg));
- }
- // Emit padding.
- unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
- assert(NumBytes >= EncodedBytes &&
- "Patchpoint can't request size less than the length of a call.");
- assert((NumBytes - EncodedBytes) % 4 == 0 &&
- "Invalid number of NOP bytes requested!");
- for (unsigned i = EncodedBytes; i < NumBytes; i += 4)
- EmitToStreamer(OutStreamer, MCInstBuilder(ARM64::HINT).addImm(0));
-}
-
-// Simple pseudo-instructions have their lowering (with expansion to real
-// instructions) auto-generated.
-#include "ARM64GenMCPseudoLowering.inc"
-
-static unsigned getRealIndexedOpcode(unsigned Opc) {
- switch (Opc) {
- case ARM64::LDRXpre_isel: return ARM64::LDRXpre;
- case ARM64::LDRWpre_isel: return ARM64::LDRWpre;
- case ARM64::LDRDpre_isel: return ARM64::LDRDpre;
- case ARM64::LDRSpre_isel: return ARM64::LDRSpre;
- case ARM64::LDRBBpre_isel: return ARM64::LDRBBpre;
- case ARM64::LDRHHpre_isel: return ARM64::LDRHHpre;
- case ARM64::LDRSBWpre_isel: return ARM64::LDRSBWpre;
- case ARM64::LDRSBXpre_isel: return ARM64::LDRSBXpre;
- case ARM64::LDRSHWpre_isel: return ARM64::LDRSHWpre;
- case ARM64::LDRSHXpre_isel: return ARM64::LDRSHXpre;
- case ARM64::LDRSWpre_isel: return ARM64::LDRSWpre;
-
- case ARM64::LDRDpost_isel: return ARM64::LDRDpost;
- case ARM64::LDRSpost_isel: return ARM64::LDRSpost;
- case ARM64::LDRXpost_isel: return ARM64::LDRXpost;
- case ARM64::LDRWpost_isel: return ARM64::LDRWpost;
- case ARM64::LDRHHpost_isel: return ARM64::LDRHHpost;
- case ARM64::LDRBBpost_isel: return ARM64::LDRBBpost;
- case ARM64::LDRSWpost_isel: return ARM64::LDRSWpost;
- case ARM64::LDRSHWpost_isel: return ARM64::LDRSHWpost;
- case ARM64::LDRSHXpost_isel: return ARM64::LDRSHXpost;
- case ARM64::LDRSBWpost_isel: return ARM64::LDRSBWpost;
- case ARM64::LDRSBXpost_isel: return ARM64::LDRSBXpost;
-
- case ARM64::STRXpre_isel: return ARM64::STRXpre;
- case ARM64::STRWpre_isel: return ARM64::STRWpre;
- case ARM64::STRHHpre_isel: return ARM64::STRHHpre;
- case ARM64::STRBBpre_isel: return ARM64::STRBBpre;
- case ARM64::STRDpre_isel: return ARM64::STRDpre;
- case ARM64::STRSpre_isel: return ARM64::STRSpre;
- }
- llvm_unreachable("Unexpected pre-indexed opcode!");
-}
-
-void ARM64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
- // Do any auto-generated pseudo lowerings.
- if (emitPseudoExpansionLowering(OutStreamer, MI))
- return;
-
- if (ARM64FI->getLOHRelated().count(MI)) {
- // Generate a label for LOH related instruction
- MCSymbol *LOHLabel = GetTempSymbol("loh", LOHLabelCounter++);
- // Associate the instruction with the label
- LOHInstToLabel[MI] = LOHLabel;
- OutStreamer.EmitLabel(LOHLabel);
- }
-
- // Do any manual lowerings.
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::DBG_VALUE: {
- if (isVerbose() && OutStreamer.hasRawTextSupport()) {
- SmallString<128> TmpStr;
- raw_svector_ostream OS(TmpStr);
- PrintDebugValueComment(MI, OS);
- OutStreamer.EmitRawText(StringRef(OS.str()));
- }
- return;
- }
- // Indexed loads and stores use a pseudo to handle complex operand
- // tricks and writeback to the base register. We strip off the writeback
- // operand and switch the opcode here. Post-indexed stores were handled by the
- // tablegen'erated pseudos above. (The complex operand <--> simple
- // operand isel is beyond tablegen's ability, so we do these manually).
- case ARM64::LDRHHpre_isel:
- case ARM64::LDRBBpre_isel:
- case ARM64::LDRXpre_isel:
- case ARM64::LDRWpre_isel:
- case ARM64::LDRDpre_isel:
- case ARM64::LDRSpre_isel:
- case ARM64::LDRSBWpre_isel:
- case ARM64::LDRSBXpre_isel:
- case ARM64::LDRSHWpre_isel:
- case ARM64::LDRSHXpre_isel:
- case ARM64::LDRSWpre_isel:
- case ARM64::LDRDpost_isel:
- case ARM64::LDRSpost_isel:
- case ARM64::LDRXpost_isel:
- case ARM64::LDRWpost_isel:
- case ARM64::LDRHHpost_isel:
- case ARM64::LDRBBpost_isel:
- case ARM64::LDRSWpost_isel:
- case ARM64::LDRSHWpost_isel:
- case ARM64::LDRSHXpost_isel:
- case ARM64::LDRSBWpost_isel:
- case ARM64::LDRSBXpost_isel: {
- MCInst TmpInst;
- // For loads, the writeback operand to be skipped is the second.
- TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- EmitToStreamer(OutStreamer, TmpInst);
- return;
- }
- case ARM64::STRXpre_isel:
- case ARM64::STRWpre_isel:
- case ARM64::STRHHpre_isel:
- case ARM64::STRBBpre_isel:
- case ARM64::STRDpre_isel:
- case ARM64::STRSpre_isel: {
- MCInst TmpInst;
- // For loads, the writeback operand to be skipped is the first.
- TmpInst.setOpcode(getRealIndexedOpcode(MI->getOpcode()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(2).getReg()));
- TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
- EmitToStreamer(OutStreamer, TmpInst);
- return;
- }
-
- // Tail calls use pseudo instructions so they have the proper code-gen
- // attributes (isCall, isReturn, etc.). We lower them to the real
- // instruction here.
- case ARM64::TCRETURNri: {
- MCInst TmpInst;
- TmpInst.setOpcode(ARM64::BR);
- TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
- EmitToStreamer(OutStreamer, TmpInst);
- return;
- }
- case ARM64::TCRETURNdi: {
- MCOperand Dest;
- MCInstLowering.lowerOperand(MI->getOperand(0), Dest);
- MCInst TmpInst;
- TmpInst.setOpcode(ARM64::B);
- TmpInst.addOperand(Dest);
- EmitToStreamer(OutStreamer, TmpInst);
- return;
- }
- case ARM64::TLSDESC_BLR: {
- MCOperand Callee, Sym;
- MCInstLowering.lowerOperand(MI->getOperand(0), Callee);
- MCInstLowering.lowerOperand(MI->getOperand(1), Sym);
-
- // First emit a relocation-annotation. This expands to no code, but requests
- // the following instruction gets an R_AARCH64_TLSDESC_CALL.
- MCInst TLSDescCall;
- TLSDescCall.setOpcode(ARM64::TLSDESCCALL);
- TLSDescCall.addOperand(Sym);
- EmitToStreamer(OutStreamer, TLSDescCall);
-
- // Other than that it's just a normal indirect call to the function loaded
- // from the descriptor.
- MCInst BLR;
- BLR.setOpcode(ARM64::BLR);
- BLR.addOperand(Callee);
- EmitToStreamer(OutStreamer, BLR);
-
- return;
- }
-
- case TargetOpcode::STACKMAP:
- return LowerSTACKMAP(OutStreamer, SM, *MI);
-
- case TargetOpcode::PATCHPOINT:
- return LowerPATCHPOINT(OutStreamer, SM, *MI);
- }
-
- // Finally, do the automated lowerings for everything else.
- MCInst TmpInst;
- MCInstLowering.Lower(MI, TmpInst);
- EmitToStreamer(OutStreamer, TmpInst);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeARM64AsmPrinter() {
- RegisterAsmPrinter<ARM64AsmPrinter> X(TheARM64Target);
-}
diff --git a/lib/Target/ARM64/ARM64CallingConv.h b/lib/Target/ARM64/ARM64CallingConv.h
deleted file mode 100644
index 0128236..0000000
--- a/lib/Target/ARM64/ARM64CallingConv.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//=== ARM64CallingConv.h - Custom Calling Convention Routines -*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the custom routines for the ARM64 Calling Convention that
-// aren't done by tablegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64CALLINGCONV_H
-#define ARM64CALLINGCONV_H
-
-#include "ARM64InstrInfo.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
-
-/// CC_ARM64_Custom_i1i8i16_Reg - customized handling of passing i1/i8/i16 via
-/// register. Here, ValVT can be i1/i8/i16 or i32 depending on whether the
-/// argument is already promoted and LocVT is i1/i8/i16. We only promote the
-/// argument to i32 if we are sure this argument will be passed in register.
-static bool CC_ARM64_Custom_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- CCState &State,
- bool IsWebKitJS = false) {
- static const uint16_t RegList1[] = { ARM64::W0, ARM64::W1, ARM64::W2,
- ARM64::W3, ARM64::W4, ARM64::W5,
- ARM64::W6, ARM64::W7 };
- static const uint16_t RegList2[] = { ARM64::X0, ARM64::X1, ARM64::X2,
- ARM64::X3, ARM64::X4, ARM64::X5,
- ARM64::X6, ARM64::X7 };
- static const uint16_t WebKitRegList1[] = { ARM64::W0 };
- static const uint16_t WebKitRegList2[] = { ARM64::X0 };
-
- const uint16_t *List1 = IsWebKitJS ? WebKitRegList1 : RegList1;
- const uint16_t *List2 = IsWebKitJS ? WebKitRegList2 : RegList2;
-
- if (unsigned Reg = State.AllocateReg(List1, List2, 8)) {
- // Customized extra section for handling i1/i8/i16:
- // We need to promote the argument to i32 if it is not done already.
- if (ValVT != MVT::i32) {
- if (ArgFlags.isSExt())
- LocInfo = CCValAssign::SExt;
- else if (ArgFlags.isZExt())
- LocInfo = CCValAssign::ZExt;
- else
- LocInfo = CCValAssign::AExt;
- ValVT = MVT::i32;
- }
- // Set LocVT to i32 as well if passing via register.
- LocVT = MVT::i32;
- State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return true;
- }
- return false;
-}
-
-/// CC_ARM64_WebKit_JS_i1i8i16_Reg - customized handling of passing i1/i8/i16
-/// via register. This behaves the same as CC_ARM64_Custom_i1i8i16_Reg, but only
-/// uses the first register.
-static bool CC_ARM64_WebKit_JS_i1i8i16_Reg(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- CCState &State) {
- return CC_ARM64_Custom_i1i8i16_Reg(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
- State, true);
-}
-
-/// CC_ARM64_Custom_i1i8i16_Stack: customized handling of passing i1/i8/i16 on
-/// stack. Here, ValVT can be i1/i8/i16 or i32 depending on whether the argument
-/// is already promoted and LocVT is i1/i8/i16. If ValVT is already promoted,
-/// it will be truncated back to i1/i8/i16.
-static bool CC_ARM64_Custom_i1i8i16_Stack(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags,
- CCState &State) {
- unsigned Space = ((LocVT == MVT::i1 || LocVT == MVT::i8) ? 1 : 2);
- unsigned Offset12 = State.AllocateStack(Space, Space);
- ValVT = LocVT;
- State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset12, LocVT, LocInfo));
- return true;
-}
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp b/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
deleted file mode 100644
index 3e410e5..0000000
--- a/lib/Target/ARM64/ARM64DeadRegisterDefinitionsPass.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-//===-- ARM64DeadRegisterDefinitions.cpp - Replace dead defs w/ zero reg --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-// When allowed by the instruction, replace a dead definition of a GPR with
-// the zero register. This makes the code a bit friendlier towards the
-// hardware's register renamer.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-dead-defs"
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
-
-namespace {
-class ARM64DeadRegisterDefinitions : public MachineFunctionPass {
-private:
- bool processMachineBasicBlock(MachineBasicBlock *MBB);
-
-public:
- static char ID; // Pass identification, replacement for typeid.
- explicit ARM64DeadRegisterDefinitions() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &F);
-
- const char *getPassName() const { return "Dead register definitions"; }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-};
-char ARM64DeadRegisterDefinitions::ID = 0;
-} // end anonymous namespace
-
-bool
-ARM64DeadRegisterDefinitions::processMachineBasicBlock(MachineBasicBlock *MBB) {
- bool Changed = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
- ++I) {
- MachineInstr *MI = I;
- for (int i = 0, e = MI->getDesc().getNumDefs(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDead() && MO.isDef()) {
- assert(!MO.isImplicit() && "Unexpected implicit def!");
- DEBUG(dbgs() << " Dead def operand #" << i << " in:\n ";
- MI->print(dbgs()));
- // Be careful not to change the register if it's a tied operand.
- if (MI->isRegTiedToUseOperand(i)) {
- DEBUG(dbgs() << " Ignoring, def is tied operand.\n");
- continue;
- }
- // Make sure the instruction take a register class that contains
- // the zero register and replace it if so.
- unsigned NewReg;
- switch (MI->getDesc().OpInfo[i].RegClass) {
- default:
- DEBUG(dbgs() << " Ignoring, register is not a GPR.\n");
- continue;
- case ARM64::GPR32RegClassID:
- NewReg = ARM64::WZR;
- break;
- case ARM64::GPR64RegClassID:
- NewReg = ARM64::XZR;
- break;
- }
- DEBUG(dbgs() << " Replacing with zero register. New:\n ");
- MO.setReg(NewReg);
- DEBUG(MI->print(dbgs()));
- ++NumDeadDefsReplaced;
- }
- }
- }
- return Changed;
-}
-
-// Scan the function for instructions that have a dead definition of a
-// register. Replace that register with the zero register when possible.
-bool ARM64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &mf) {
- MachineFunction *MF = &mf;
- bool Changed = false;
- DEBUG(dbgs() << "***** ARM64DeadRegisterDefinitions *****\n");
-
- for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I)
- if (processMachineBasicBlock(I))
- Changed = true;
- return Changed;
-}
-
-FunctionPass *llvm::createARM64DeadRegisterDefinitions() {
- return new ARM64DeadRegisterDefinitions();
-}
diff --git a/lib/Target/ARM64/ARM64FrameLowering.cpp b/lib/Target/ARM64/ARM64FrameLowering.cpp
deleted file mode 100644
index 798986c..0000000
--- a/lib/Target/ARM64/ARM64FrameLowering.cpp
+++ /dev/null
@@ -1,816 +0,0 @@
-//===- ARM64FrameLowering.cpp - ARM64 Frame Lowering -----------*- C++ -*-====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of TargetFrameLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "frame-info"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64Subtarget.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static cl::opt<bool> EnableRedZone("arm64-redzone",
- cl::desc("enable use of redzone on ARM64"),
- cl::init(false), cl::Hidden);
-
-STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
-
-static unsigned estimateStackSize(MachineFunction &MF) {
- const MachineFrameInfo *FFI = MF.getFrameInfo();
- int Offset = 0;
- for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
- int FixedOff = -FFI->getObjectOffset(i);
- if (FixedOff > Offset)
- Offset = FixedOff;
- }
- for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
- Offset += FFI->getObjectSize(i);
- unsigned Align = FFI->getObjectAlignment(i);
- // Adjust to alignment boundary
- Offset = (Offset + Align - 1) / Align * Align;
- }
- // This does not include the 16 bytes used for fp and lr.
- return (unsigned)Offset;
-}
-
-bool ARM64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
- if (!EnableRedZone)
- return false;
- // Don't use the red zone if the function explicitly asks us not to.
- // This is typically used for kernel code.
- if (MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoRedZone))
- return false;
-
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- unsigned NumBytes = AFI->getLocalStackSize();
-
- // Note: currently hasFP() is always true for hasCalls(), but that's an
- // implementation detail of the current code, not a strict requirement,
- // so stay safe here and check both.
- if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
- return false;
- return true;
-}
-
-/// hasFP - Return true if the specified function should have a dedicated frame
-/// pointer register.
-bool ARM64FrameLowering::hasFP(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
-#ifndef NDEBUG
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
- assert(!RegInfo->needsStackRealignment(MF) &&
- "No stack realignment on ARM64!");
-#endif
-
- return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
- MFI->isFrameAddressTaken());
-}
-
-/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
-/// not required, we reserve argument space for call sites in the function
-/// immediately on entry to the current function. This eliminates the need for
-/// add/sub sp brackets around call sites. Returns true if the call frame is
-/// included as part of the stack frame.
-bool ARM64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
- return !MF.getFrameInfo()->hasVarSizedObjects();
-}
-
-void ARM64FrameLowering::eliminateCallFramePseudoInstr(
- MachineFunction &MF, MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
- if (!TFI->hasReservedCallFrame(MF)) {
- // If we have alloca, convert as follows:
- // ADJCALLSTACKDOWN -> sub, sp, sp, amount
- // ADJCALLSTACKUP -> add, sp, sp, amount
- MachineInstr *Old = I;
- DebugLoc DL = Old->getDebugLoc();
- unsigned Amount = Old->getOperand(0).getImm();
- if (Amount != 0) {
- // We need to keep the stack aligned properly. To do this, we round the
- // amount of space needed for the outgoing arguments up to the next
- // alignment boundary.
- unsigned Align = TFI->getStackAlignment();
- Amount = (Amount + Align - 1) / Align * Align;
-
- // Replace the pseudo instruction with a new instruction...
- unsigned Opc = Old->getOpcode();
- if (Opc == ARM64::ADJCALLSTACKDOWN) {
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, -Amount, TII);
- } else {
- assert(Opc == ARM64::ADJCALLSTACKUP && "expected ADJCALLSTACKUP");
- emitFrameOffset(MBB, I, DL, ARM64::SP, ARM64::SP, Amount, TII);
- }
- }
- }
- MBB.erase(I);
-}
-
-void
-ARM64FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned FramePtr) const {
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
- const ARM64InstrInfo *TII = TM.getInstrInfo();
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- if (CSI.empty())
- return;
-
- const DataLayout *TD = MF.getTarget().getDataLayout();
- bool HasFP = hasFP(MF);
-
- // Calculate amount of bytes used for return address storing.
- int stackGrowth = -TD->getPointerSize(0);
-
- // Calculate offsets.
- int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
- unsigned TotalSkipped = 0;
- for (const auto &Info : CSI) {
- unsigned Reg = Info.getReg();
- int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
- getOffsetOfLocalArea() + saveAreaOffset;
-
- // Don't output a new CFI directive if we're re-saving the frame pointer or
- // link register. This happens when the PrologEpilogInserter has inserted an
- // extra "STP" of the frame pointer and link register -- the "emitPrologue"
- // method automatically generates the directives when frame pointers are
- // used. If we generate CFI directives for the extra "STP"s, the linker will
- // lose track of the correct values for the frame pointer and link register.
- if (HasFP && (FramePtr == Reg || Reg == ARM64::LR)) {
- TotalSkipped += stackGrowth;
- continue;
- }
-
- unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
- unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
- nullptr, DwarfReg, Offset - TotalSkipped));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-}
-
-void ARM64FrameLowering::emitPrologue(MachineFunction &MF) const {
- MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
- MachineBasicBlock::iterator MBBI = MBB.begin();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const Function *Fn = MF.getFunction();
- const ARM64RegisterInfo *RegInfo = TM.getRegisterInfo();
- const ARM64InstrInfo *TII = TM.getInstrInfo();
- MachineModuleInfo &MMI = MF.getMMI();
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
- bool HasFP = hasFP(MF);
- DebugLoc DL = MBB.findDebugLoc(MBBI);
-
- int NumBytes = (int)MFI->getStackSize();
- if (!AFI->hasStackFrame()) {
- assert(!HasFP && "unexpected function without stack frame but with FP");
-
- // All of the stack allocation is for locals.
- AFI->setLocalStackSize(NumBytes);
-
- // Label used to tie together the PROLOG_LABEL and the MachineMoves.
- MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
-
- // REDZONE: If the stack size is less than 128 bytes, we don't need
- // to actually allocate.
- if (NumBytes && !canUseRedZone(MF)) {
- emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
-
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else if (NumBytes) {
- ++NumRedZoneFunctions;
- }
-
- return;
- }
-
- // Only set up FP if we actually need to.
- int FPOffset = 0;
- if (HasFP) {
- // First instruction must a) allocate the stack and b) have an immediate
- // that is a multiple of -2.
- assert((MBBI->getOpcode() == ARM64::STPXpre ||
- MBBI->getOpcode() == ARM64::STPDpre) &&
- MBBI->getOperand(2).getReg() == ARM64::SP &&
- MBBI->getOperand(3).getImm() < 0 &&
- (MBBI->getOperand(3).getImm() & 1) == 0);
-
- // Frame pointer is fp = sp - 16. Since the STPXpre subtracts the space
- // required for the callee saved register area we get the frame pointer
- // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
- FPOffset = -(MBBI->getOperand(3).getImm() + 2) * 8;
- assert(FPOffset >= 0 && "Bad Framepointer Offset");
- }
-
- // Move past the saves of the callee-saved registers.
- while (MBBI->getOpcode() == ARM64::STPXi ||
- MBBI->getOpcode() == ARM64::STPDi ||
- MBBI->getOpcode() == ARM64::STPXpre ||
- MBBI->getOpcode() == ARM64::STPDpre) {
- ++MBBI;
- NumBytes -= 16;
- }
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
- if (HasFP) {
- // Issue sub fp, sp, FPOffset or
- // mov fp,sp when FPOffset is zero.
- // Note: All stores of callee-saved registers are marked as "FrameSetup".
- // This code marks the instruction(s) that set the FP also.
- emitFrameOffset(MBB, MBBI, DL, ARM64::FP, ARM64::SP, FPOffset, TII,
- MachineInstr::FrameSetup);
- }
-
- // All of the remaining stack allocations are for locals.
- AFI->setLocalStackSize(NumBytes);
-
- // Allocate space for the rest of the frame.
- if (NumBytes) {
- // If we're a leaf function, try using the red zone.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, MBBI, DL, ARM64::SP, ARM64::SP, -NumBytes, TII,
- MachineInstr::FrameSetup);
- }
-
- // If we need a base pointer, set it up here. It's whatever the value of the
- // stack pointer is at this point. Any variable size objects will be allocated
- // after this, so we can still use the base pointer to reference locals.
- //
- // FIXME: Clarify FrameSetup flags here.
- // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
- // needed.
- //
- if (RegInfo->hasBasePointer(MF))
- TII->copyPhysReg(MBB, MBBI, DL, ARM64::X19, ARM64::SP, false);
-
- if (needsFrameMoves) {
- const DataLayout *TD = MF.getTarget().getDataLayout();
- const int StackGrowth = -TD->getPointerSize(0);
- unsigned FramePtr = RegInfo->getFrameRegister(MF);
-
- // An example of the prologue:
- //
- // .globl __foo
- // .align 2
- // __foo:
- // Ltmp0:
- // .cfi_startproc
- // .cfi_personality 155, ___gxx_personality_v0
- // Leh_func_begin:
- // .cfi_lsda 16, Lexception33
- //
- // stp xa,bx, [sp, -#offset]!
- // ...
- // stp x28, x27, [sp, #offset-32]
- // stp fp, lr, [sp, #offset-16]
- // add fp, sp, #offset - 16
- // sub sp, sp, #1360
- //
- // The Stack:
- // +-------------------------------------------+
- // 10000 | ........ | ........ | ........ | ........ |
- // 10004 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10008 | ........ | ........ | ........ | ........ |
- // 1000c | ........ | ........ | ........ | ........ |
- // +===========================================+
- // 10010 | X28 Register |
- // 10014 | X28 Register |
- // +-------------------------------------------+
- // 10018 | X27 Register |
- // 1001c | X27 Register |
- // +===========================================+
- // 10020 | Frame Pointer |
- // 10024 | Frame Pointer |
- // +-------------------------------------------+
- // 10028 | Link Register |
- // 1002c | Link Register |
- // +===========================================+
- // 10030 | ........ | ........ | ........ | ........ |
- // 10034 | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- // 10038 | ........ | ........ | ........ | ........ |
- // 1003c | ........ | ........ | ........ | ........ |
- // +-------------------------------------------+
- //
- // [sp] = 10030 :: >>initial value<<
- // sp = 10020 :: stp fp, lr, [sp, #-16]!
- // fp = sp == 10020 :: mov fp, sp
- // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
- // sp == 10010 :: >>final value<<
- //
- // The frame pointer (w29) points to address 10020. If we use an offset of
- // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
- // for w27, and -32 for w28:
- //
- // Ltmp1:
- // .cfi_def_cfa w29, 16
- // Ltmp2:
- // .cfi_offset w30, -8
- // Ltmp3:
- // .cfi_offset w29, -16
- // Ltmp4:
- // .cfi_offset w27, -24
- // Ltmp5:
- // .cfi_offset w28, -32
-
- if (HasFP) {
- // Define the current CFA rule to use the provided FP.
- unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Record the location of the stored LR
- unsigned LR = RegInfo->getDwarfRegNum(ARM64::LR, true);
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
-
- // Record the location of the stored FP
- CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- } else {
- // Encode the stack size of the leaf function.
- unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex);
- }
-
- // Now emit the moves for whatever callee saved regs we have.
- emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
- }
-}
-
-static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) {
- for (unsigned i = 0; CSRegs[i]; ++i)
- if (Reg == CSRegs[i])
- return true;
- return false;
-}
-
-static bool isCSRestore(MachineInstr *MI, const uint16_t *CSRegs) {
- if (MI->getOpcode() == ARM64::LDPXpost ||
- MI->getOpcode() == ARM64::LDPDpost || MI->getOpcode() == ARM64::LDPXi ||
- MI->getOpcode() == ARM64::LDPDi) {
- if (!isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) ||
- !isCalleeSavedRegister(MI->getOperand(1).getReg(), CSRegs) ||
- MI->getOperand(2).getReg() != ARM64::SP)
- return false;
- return true;
- }
-
- return false;
-}
-
-void ARM64FrameLowering::emitEpilogue(MachineFunction &MF,
- MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
- assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
- MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo *>(MF.getTarget().getInstrInfo());
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- DebugLoc DL = MBBI->getDebugLoc();
-
- int NumBytes = MFI->getStackSize();
- unsigned NumRestores = 0;
- // Move past the restores of the callee-saved registers.
- MachineBasicBlock::iterator LastPopI = MBBI;
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
- if (LastPopI != MBB.begin()) {
- do {
- ++NumRestores;
- --LastPopI;
- } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
- if (!isCSRestore(LastPopI, CSRegs)) {
- ++LastPopI;
- --NumRestores;
- }
- }
- NumBytes -= NumRestores * 16;
- assert(NumBytes >= 0 && "Negative stack allocation size!?");
-
- if (!hasFP(MF)) {
- // If this was a redzone leaf function, we don't need to restore the
- // stack pointer.
- if (!canUseRedZone(MF))
- emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::SP, NumBytes, TII);
- return;
- }
-
- // Restore the original stack pointer.
- // FIXME: Rather than doing the math here, we should instead just use
- // non-post-indexed loads for the restores if we aren't actually going to
- // be able to save any instructions.
- if (NumBytes || MFI->hasVarSizedObjects())
- emitFrameOffset(MBB, LastPopI, DL, ARM64::SP, ARM64::FP,
- -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
-}
-
-/// getFrameIndexOffset - Returns the displacement from the frame register to
-/// the stack frame of the specified index.
-int ARM64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
- int FI) const {
- unsigned FrameReg;
- return getFrameIndexReference(MF, FI, FrameReg);
-}
-
-/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
-/// debug info. It's the same as what we use for resolving the code-gen
-/// references for now. FIXME: This can go wrong when references are
-/// SP-relative and simple call frames aren't used.
-int ARM64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
- int FI,
- unsigned &FrameReg) const {
- return resolveFrameIndexReference(MF, FI, FrameReg);
-}
-
-int ARM64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
- int FI, unsigned &FrameReg,
- bool PreferFP) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- const ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- int FPOffset = MFI->getObjectOffset(FI) + 16;
- int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
- bool isFixed = MFI->isFixedObjectIndex(FI);
-
- // Use frame pointer to reference fixed objects. Use it for locals if
- // there are VLAs (and thus the SP isn't reliable as a base).
- // Make sure useFPForScavengingIndex() does the right thing for the emergency
- // spill slot.
- bool UseFP = false;
- if (AFI->hasStackFrame()) {
- // Note: Keeping the following as multiple 'if' statements rather than
- // merging to a single expression for readability.
- //
- // Argument access should always use the FP.
- if (isFixed) {
- UseFP = hasFP(MF);
- } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
- // Use SP or FP, whichever gives us the best chance of the offset
- // being in range for direct access. If the FPOffset is positive,
- // that'll always be best, as the SP will be even further away.
- // If the FPOffset is negative, we have to keep in mind that the
- // available offset range for negative offsets is smaller than for
- // positive ones. If we have variable sized objects, we're stuck with
- // using the FP regardless, though, as the SP offset is unknown
- // and we don't have a base pointer available. If an offset is
- // available via the FP and the SP, use whichever is closest.
- if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
- (FPOffset >= -256 && Offset > -FPOffset))
- UseFP = true;
- }
- }
-
- if (UseFP) {
- FrameReg = RegInfo->getFrameRegister(MF);
- return FPOffset;
- }
-
- // Use the base pointer if we have one.
- if (RegInfo->hasBasePointer(MF))
- FrameReg = RegInfo->getBaseRegister();
- else {
- FrameReg = ARM64::SP;
- // If we're using the red zone for this function, the SP won't actually
- // be adjusted, so the offsets will be negative. They're also all
- // within range of the signed 9-bit immediate instructions.
- if (canUseRedZone(MF))
- Offset -= AFI->getLocalStackSize();
- }
-
- return Offset;
-}
-
-static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
- if (Reg != ARM64::LR)
- return getKillRegState(true);
-
- // LR maybe referred to later by an @llvm.returnaddress intrinsic.
- bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM64::LR);
- bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
- return getKillRegState(LRKill);
-}
-
-bool ARM64FrameLowering::spillCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned Count = CSI.size();
- DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned idx = Count - i - 2;
- unsigned Reg1 = CSI[idx].getReg();
- unsigned Reg2 = CSI[idx + 1].getReg();
- // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
- // list to come in sorted by frame index so that we can issue the store
- // pair instructions directly. Assert if we see anything otherwise.
- //
- // The order of the registers in the list is controlled by
- // getCalleeSavedRegs(), so they will always be in-order, as well.
- assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
- "Out of order callee saved regs!");
- unsigned StrOpc;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- // Issue sequence of non-sp increment and pi sp spills for cs regs. The
- // first spill is a pre-increment that allocates the stack.
- // For example:
- // stp x22, x21, [sp, #-48]! // addImm(-6)
- // stp x20, x19, [sp, #16] // addImm(+2)
- // stp fp, lr, [sp, #32] // addImm(+4)
- // Rationale: This sequence saves uop updates compared to a sequence of
- // pre-increment spills like stp xi,xj,[sp,#-16]!
- // Note: Similar rational and sequence for restores in epilog.
- if (ARM64::GPR64RegClass.contains(Reg1)) {
- assert(ARM64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = ARM64::STPXpre;
- else
- StrOpc = ARM64::STPXi;
- } else if (ARM64::FPR64RegClass.contains(Reg1)) {
- assert(ARM64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- // For first spill use pre-increment store.
- if (i == 0)
- StrOpc = ARM64::STPDpre;
- else
- StrOpc = ARM64::STPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
- << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
- // Compute offset: i = 0 => offset = -Count;
- // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
- const int Offset = (i == 0) ? -Count : i;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for STP immediate");
- BuildMI(MBB, MI, DL, TII.get(StrOpc))
- .addReg(Reg2, getPrologueDeath(MF, Reg2))
- .addReg(Reg1, getPrologueDeath(MF, Reg1))
- .addReg(ARM64::SP)
- .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
- .setMIFlag(MachineInstr::FrameSetup);
- }
- return true;
-}
-
-bool ARM64FrameLowering::restoreCalleeSavedRegisters(
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
- MachineFunction &MF = *MBB.getParent();
- const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
- unsigned Count = CSI.size();
- DebugLoc DL;
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
-
- if (MI != MBB.end())
- DL = MI->getDebugLoc();
-
- for (unsigned i = 0; i < Count; i += 2) {
- unsigned Reg1 = CSI[i].getReg();
- unsigned Reg2 = CSI[i + 1].getReg();
- // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
- // list to come in sorted by frame index so that we can issue the store
- // pair instructions directly. Assert if we see anything otherwise.
- assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
- "Out of order callee saved regs!");
- // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
- // the last load is sp-pi post-increment and de-allocates the stack:
- // For example:
- // ldp fp, lr, [sp, #32] // addImm(+4)
- // ldp x20, x19, [sp, #16] // addImm(+2)
- // ldp x22, x21, [sp], #48 // addImm(+6)
- // Note: see comment in spillCalleeSavedRegisters()
- unsigned LdrOpc;
-
- assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
- assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
- if (ARM64::GPR64RegClass.contains(Reg1)) {
- assert(ARM64::GPR64RegClass.contains(Reg2) &&
- "Expected GPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = ARM64::LDPXpost;
- else
- LdrOpc = ARM64::LDPXi;
- } else if (ARM64::FPR64RegClass.contains(Reg1)) {
- assert(ARM64::FPR64RegClass.contains(Reg2) &&
- "Expected FPR64 callee-saved register pair!");
- if (i == Count - 2)
- LdrOpc = ARM64::LDPDpost;
- else
- LdrOpc = ARM64::LDPDi;
- } else
- llvm_unreachable("Unexpected callee saved register!");
- DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
- << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
- << ", " << CSI[i + 1].getFrameIdx() << ")\n");
-
- // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
- // etc.
- const int Offset = (i == Count - 2) ? Count : Count - i - 2;
- assert((Offset >= -64 && Offset <= 63) &&
- "Offset out of bounds for LDP immediate");
- BuildMI(MBB, MI, DL, TII.get(LdrOpc))
- .addReg(Reg2, getDefRegState(true))
- .addReg(Reg1, getDefRegState(true))
- .addReg(ARM64::SP)
- .addImm(Offset); // [sp], #offset * 8 or [sp, #offset * 8]
- // where the factor * 8 is implicit
- }
- return true;
-}
-
-void ARM64FrameLowering::processFunctionBeforeCalleeSavedScan(
- MachineFunction &MF, RegScavenger *RS) const {
- const ARM64RegisterInfo *RegInfo =
- static_cast<const ARM64RegisterInfo *>(MF.getTarget().getRegisterInfo());
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- MachineRegisterInfo *MRI = &MF.getRegInfo();
- SmallVector<unsigned, 4> UnspilledCSGPRs;
- SmallVector<unsigned, 4> UnspilledCSFPRs;
-
- // The frame record needs to be created by saving the appropriate registers
- if (hasFP(MF)) {
- MRI->setPhysRegUsed(ARM64::FP);
- MRI->setPhysRegUsed(ARM64::LR);
- }
-
- // Spill the BasePtr if it's used. Do this first thing so that the
- // getCalleeSavedRegs() below will get the right answer.
- if (RegInfo->hasBasePointer(MF))
- MRI->setPhysRegUsed(RegInfo->getBaseRegister());
-
- // If any callee-saved registers are used, the frame cannot be eliminated.
- unsigned NumGPRSpilled = 0;
- unsigned NumFPRSpilled = 0;
- bool ExtraCSSpill = false;
- bool CanEliminateFrame = true;
- DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
- const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
-
- // Check pairs of consecutive callee-saved registers.
- for (unsigned i = 0; CSRegs[i]; i += 2) {
- assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
-
- const unsigned OddReg = CSRegs[i];
- const unsigned EvenReg = CSRegs[i + 1];
- assert((ARM64::GPR64RegClass.contains(OddReg) &&
- ARM64::GPR64RegClass.contains(EvenReg)) ^
- (ARM64::FPR64RegClass.contains(OddReg) &&
- ARM64::FPR64RegClass.contains(EvenReg)) &&
- "Register class mismatch!");
-
- const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
- const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
-
- // Early exit if none of the registers in the register pair is actually
- // used.
- if (!OddRegUsed && !EvenRegUsed) {
- if (ARM64::GPR64RegClass.contains(OddReg)) {
- UnspilledCSGPRs.push_back(OddReg);
- UnspilledCSGPRs.push_back(EvenReg);
- } else {
- UnspilledCSFPRs.push_back(OddReg);
- UnspilledCSFPRs.push_back(EvenReg);
- }
- continue;
- }
-
- unsigned Reg = ARM64::NoRegister;
- // If only one of the registers of the register pair is used, make sure to
- // mark the other one as used as well.
- if (OddRegUsed ^ EvenRegUsed) {
- // Find out which register is the additional spill.
- Reg = OddRegUsed ? EvenReg : OddReg;
- MRI->setPhysRegUsed(Reg);
- }
-
- DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
- DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
-
- assert(((OddReg == ARM64::LR && EvenReg == ARM64::FP) ||
- (RegInfo->getEncodingValue(OddReg) + 1 ==
- RegInfo->getEncodingValue(EvenReg))) &&
- "Register pair of non-adjacent registers!");
- if (ARM64::GPR64RegClass.contains(OddReg)) {
- NumGPRSpilled += 2;
- // If it's not a reserved register, we can use it in lieu of an
- // emergency spill slot for the register scavenger.
- // FIXME: It would be better to instead keep looking and choose another
- // unspilled register that isn't reserved, if there is one.
- if (Reg != ARM64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
- ExtraCSSpill = true;
- } else
- NumFPRSpilled += 2;
-
- CanEliminateFrame = false;
- }
-
- // FIXME: Set BigStack if any stack slot references may be out of range.
- // For now, just conservatively guestimate based on unscaled indexing
- // range. We'll end up allocating an unnecessary spill slot a lot, but
- // realistically that's not a big deal at this stage of the game.
- // The CSR spill slots have not been allocated yet, so estimateStackSize
- // won't include them.
- MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
- DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
- bool BigStack = (CFSize >= 256);
- if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
- AFI->setHasStackFrame(true);
-
- // Estimate if we might need to scavenge a register at some point in order
- // to materialize a stack offset. If so, either spill one additional
- // callee-saved register or reserve a special spill slot to facilitate
- // register scavenging. If we already spilled an extra callee-saved register
- // above to keep the number of spills even, we don't need to do anything else
- // here.
- if (BigStack && !ExtraCSSpill) {
-
- // If we're adding a register to spill here, we have to add two of them
- // to keep the number of regs to spill even.
- assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
- unsigned Count = 0;
- while (!UnspilledCSGPRs.empty() && Count < 2) {
- unsigned Reg = UnspilledCSGPRs.back();
- UnspilledCSGPRs.pop_back();
- DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
- << " to get a scratch register.\n");
- MRI->setPhysRegUsed(Reg);
- ExtraCSSpill = true;
- ++Count;
- }
-
- // If we didn't find an extra callee-saved register to spill, create
- // an emergency spill slot.
- if (!ExtraCSSpill) {
- const TargetRegisterClass *RC = &ARM64::GPR64RegClass;
- int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
- RS->addScavengingFrameIndex(FI);
- DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
- << " as the emergency spill slot.\n");
- }
- }
-}
diff --git a/lib/Target/ARM64/ARM64FrameLowering.h b/lib/Target/ARM64/ARM64FrameLowering.h
deleted file mode 100644
index 02edcdb..0000000
--- a/lib/Target/ARM64/ARM64FrameLowering.h
+++ /dev/null
@@ -1,75 +0,0 @@
-//===-- ARM64FrameLowering.h - TargetFrameLowering for ARM64 ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64_FRAMELOWERING_H
-#define ARM64_FRAMELOWERING_H
-
-#include "llvm/Target/TargetFrameLowering.h"
-
-namespace llvm {
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64FrameLowering : public TargetFrameLowering {
- const ARM64TargetMachine &TM;
-
-public:
- explicit ARM64FrameLowering(const ARM64TargetMachine &TM,
- const ARM64Subtarget &STI)
- : TargetFrameLowering(StackGrowsDown, 16, 0, 16,
- false /*StackRealignable*/),
- TM(TM) {}
-
- void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned FramePtr) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
- /// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
-
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- int getFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg) const;
- int resolveFrameIndexReference(const MachineFunction &MF, int FI,
- unsigned &FrameReg,
- bool PreferFP = false) const;
- bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- /// \brief Can this function use the red zone for local allocations.
- bool canUseRedZone(const MachineFunction &MF) const;
-
- bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
-
- void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp b/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
deleted file mode 100644
index 2e234c9..0000000
--- a/lib/Target/ARM64/ARM64ISelDAGToDAG.cpp
+++ /dev/null
@@ -1,2381 +0,0 @@
-//===-- ARM64ISelDAGToDAG.cpp - A dag to dag inst selector for ARM64 ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an instruction selector for the ARM64 target.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-isel"
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/Function.h" // To access function attributes.
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-//===--------------------------------------------------------------------===//
-/// ARM64DAGToDAGISel - ARM64 specific code to select ARM64 machine
-/// instructions for SelectionDAG operations.
-///
-namespace {
-
-class ARM64DAGToDAGISel : public SelectionDAGISel {
- ARM64TargetMachine &TM;
-
- /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const ARM64Subtarget *Subtarget;
-
- bool ForCodeSize;
-
-public:
- explicit ARM64DAGToDAGISel(ARM64TargetMachine &tm, CodeGenOpt::Level OptLevel)
- : SelectionDAGISel(tm, OptLevel), TM(tm),
- Subtarget(&TM.getSubtarget<ARM64Subtarget>()), ForCodeSize(false) {}
-
- virtual const char *getPassName() const {
- return "ARM64 Instruction Selection";
- }
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- AttributeSet FnAttrs = MF.getFunction()->getAttributes();
- ForCodeSize =
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
- Attribute::OptimizeForSize) ||
- FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
- return SelectionDAGISel::runOnMachineFunction(MF);
- }
-
- SDNode *Select(SDNode *Node);
-
- /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
- /// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
-
- SDNode *SelectMLAV64LaneV128(SDNode *N);
- SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
- bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
- bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
- bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
- bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
- return SelectShiftedRegister(N, false, Reg, Shift);
- }
- bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
- return SelectShiftedRegister(N, true, Reg, Shift);
- }
- bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 1, Base, OffImm);
- }
- bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 2, Base, OffImm);
- }
- bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 4, Base, OffImm);
- }
- bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 8, Base, OffImm);
- }
- bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeIndexed(N, 16, Base, OffImm);
- }
- bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 1, Base, OffImm);
- }
- bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 2, Base, OffImm);
- }
- bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 4, Base, OffImm);
- }
- bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 8, Base, OffImm);
- }
- bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
- return SelectAddrModeUnscaled(N, 16, Base, OffImm);
- }
-
- bool SelectAddrModeRO8(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- return SelectAddrModeRO(N, 1, Base, Offset, Imm);
- }
- bool SelectAddrModeRO16(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- return SelectAddrModeRO(N, 2, Base, Offset, Imm);
- }
- bool SelectAddrModeRO32(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- return SelectAddrModeRO(N, 4, Base, Offset, Imm);
- }
- bool SelectAddrModeRO64(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- return SelectAddrModeRO(N, 8, Base, Offset, Imm);
- }
- bool SelectAddrModeRO128(SDValue N, SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- return SelectAddrModeRO(N, 16, Base, Offset, Imm);
- }
- bool SelectAddrModeNoIndex(SDValue N, SDValue &Val);
-
- /// Form sequences of consecutive 64/128-bit registers for use in NEON
- /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
- /// between 1 and 4 elements. If it contains a single element that is returned
- /// unchanged; otherwise a REG_SEQUENCE value is returned.
- SDValue createDTuple(ArrayRef<SDValue> Vecs);
- SDValue createQTuple(ArrayRef<SDValue> Vecs);
-
- /// Generic helper for the createDTuple/createQTuple
- /// functions. Those should almost always be called instead.
- SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
- unsigned SubRegs[]);
-
- SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
-
- SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
-
- SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
- unsigned SubRegIdx);
- SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-
- SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
- SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-
- SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node);
- SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node);
-
- SDNode *SelectAtomic(SDNode *Node, unsigned Op8, unsigned Op16, unsigned Op32,
- unsigned Op64);
-
- SDNode *SelectBitfieldExtractOp(SDNode *N);
- SDNode *SelectBitfieldInsertOp(SDNode *N);
-
- SDNode *SelectLIBM(SDNode *N);
-
-// Include the pieces autogenerated from the target description.
-#include "ARM64GenDAGISel.inc"
-
-private:
- bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
- SDValue &Shift);
- bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm);
- bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
- SDValue &OffImm);
- bool SelectAddrModeRO(SDValue N, unsigned Size, SDValue &Base,
- SDValue &Offset, SDValue &Imm);
- bool isWorthFolding(SDValue V) const;
- bool SelectExtendedSHL(SDValue N, unsigned Size, SDValue &Offset,
- SDValue &Imm);
-};
-} // end anonymous namespace
-
-/// isIntImmediate - This method tests to see if the node is a constant
-/// operand. If so Imm will receive the 32-bit value.
-static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
- if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
- Imm = C->getZExtValue();
- return true;
- }
- return false;
-}
-
-// isIntImmediate - This method tests to see if a constant operand.
-// If so Imm will receive the value.
-static bool isIntImmediate(SDValue N, uint64_t &Imm) {
- return isIntImmediate(N.getNode(), Imm);
-}
-
-// isOpcWithIntImmediate - This method tests to see if the node is a specific
-// opcode and that it has a immediate integer right operand.
-// If so Imm will receive the 32 bit value.
-static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
- uint64_t &Imm) {
- return N->getOpcode() == Opc &&
- isIntImmediate(N->getOperand(1).getNode(), Imm);
-}
-
-bool ARM64DAGToDAGISel::SelectAddrModeNoIndex(SDValue N, SDValue &Val) {
- EVT ValTy = N.getValueType();
- if (ValTy != MVT::i64)
- return false;
- Val = N;
- return true;
-}
-
-bool ARM64DAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
- assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
- // Require the address to be in a register. That is safe for all ARM64
- // variants and it is hard to do anything much smarter without knowing
- // how the operand is used.
- OutOps.push_back(Op);
- return false;
-}
-
-/// SelectArithImmed - Select an immediate value that can be represented as
-/// a 12-bit value shifted left by either 0 or 12. If so, return true with
-/// Val set to the 12-bit value and Shift set to the shifter operand.
-bool ARM64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
- SDValue &Shift) {
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
- if (!isa<ConstantSDNode>(N.getNode()))
- return false;
-
- uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
- unsigned ShiftAmt;
-
- if (Immed >> 12 == 0) {
- ShiftAmt = 0;
- } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
- ShiftAmt = 12;
- Immed = Immed >> 12;
- } else
- return false;
-
- unsigned ShVal = ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftAmt);
- Val = CurDAG->getTargetConstant(Immed, MVT::i32);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
- return true;
-}
-
-/// SelectNegArithImmed - As above, but negates the value before trying to
-/// select it.
-bool ARM64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
- SDValue &Shift) {
- // This function is called from the addsub_shifted_imm ComplexPattern,
- // which lists [imm] as the list of opcode it's interested in, however
- // we still need to check whether the operand is actually an immediate
- // here because the ComplexPattern opcode list is only used in
- // root-level opcode matching.
- if (!isa<ConstantSDNode>(N.getNode()))
- return false;
-
- // The immediate operand must be a 24-bit zero-extended immediate.
- uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
-
- // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
- // have the opposite effect on the C flag, so this pattern mustn't match under
- // those circumstances.
- if (Immed == 0)
- return false;
-
- if (N.getValueType() == MVT::i32)
- Immed = ~((uint32_t)Immed) + 1;
- else
- Immed = ~Immed + 1ULL;
- if (Immed & 0xFFFFFFFFFF000000ULL)
- return false;
-
- Immed &= 0xFFFFFFULL;
- return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
-}
-
-/// getShiftTypeForNode - Translate a shift node to the corresponding
-/// ShiftType value.
-static ARM64_AM::ShiftType getShiftTypeForNode(SDValue N) {
- switch (N.getOpcode()) {
- default:
- return ARM64_AM::InvalidShift;
- case ISD::SHL:
- return ARM64_AM::LSL;
- case ISD::SRL:
- return ARM64_AM::LSR;
- case ISD::SRA:
- return ARM64_AM::ASR;
- case ISD::ROTR:
- return ARM64_AM::ROR;
- }
-}
-
-/// \brief Determine wether it is worth to fold V into an extended register.
-bool ARM64DAGToDAGISel::isWorthFolding(SDValue V) const {
- // it hurts if the a value is used at least twice, unless we are optimizing
- // for code size.
- if (ForCodeSize || V.hasOneUse())
- return true;
- return false;
-}
-
-/// SelectShiftedRegister - Select a "shifted register" operand. If the value
-/// is not shifted, set the Shift operand to default of "LSL 0". The logical
-/// instructions allow the shifted register to be rotated, but the arithmetic
-/// instructions do not. The AllowROR parameter specifies whether ROR is
-/// supported.
-bool ARM64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
- SDValue &Reg, SDValue &Shift) {
- ARM64_AM::ShiftType ShType = getShiftTypeForNode(N);
- if (ShType == ARM64_AM::InvalidShift)
- return false;
- if (!AllowROR && ShType == ARM64_AM::ROR)
- return false;
-
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- unsigned BitSize = N.getValueType().getSizeInBits();
- unsigned Val = RHS->getZExtValue() & (BitSize - 1);
- unsigned ShVal = ARM64_AM::getShifterImm(ShType, Val);
-
- Reg = N.getOperand(0);
- Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
- return isWorthFolding(N);
- }
-
- return false;
-}
-
-/// getExtendTypeForNode - Translate an extend node to the corresponding
-/// ExtendType value.
-static ARM64_AM::ExtendType getExtendTypeForNode(SDValue N,
- bool IsLoadStore = false) {
- if (N.getOpcode() == ISD::SIGN_EXTEND ||
- N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- EVT SrcVT;
- if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
- SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
- else
- SrcVT = N.getOperand(0).getValueType();
-
- if (!IsLoadStore && SrcVT == MVT::i8)
- return ARM64_AM::SXTB;
- else if (!IsLoadStore && SrcVT == MVT::i16)
- return ARM64_AM::SXTH;
- else if (SrcVT == MVT::i32)
- return ARM64_AM::SXTW;
- else if (SrcVT == MVT::i64)
- return ARM64_AM::SXTX;
-
- return ARM64_AM::InvalidExtend;
- } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
- N.getOpcode() == ISD::ANY_EXTEND) {
- EVT SrcVT = N.getOperand(0).getValueType();
- if (!IsLoadStore && SrcVT == MVT::i8)
- return ARM64_AM::UXTB;
- else if (!IsLoadStore && SrcVT == MVT::i16)
- return ARM64_AM::UXTH;
- else if (SrcVT == MVT::i32)
- return ARM64_AM::UXTW;
- else if (SrcVT == MVT::i64)
- return ARM64_AM::UXTX;
-
- return ARM64_AM::InvalidExtend;
- } else if (N.getOpcode() == ISD::AND) {
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD)
- return ARM64_AM::InvalidExtend;
- uint64_t AndMask = CSD->getZExtValue();
-
- switch (AndMask) {
- default:
- return ARM64_AM::InvalidExtend;
- case 0xFF:
- return !IsLoadStore ? ARM64_AM::UXTB : ARM64_AM::InvalidExtend;
- case 0xFFFF:
- return !IsLoadStore ? ARM64_AM::UXTH : ARM64_AM::InvalidExtend;
- case 0xFFFFFFFF:
- return ARM64_AM::UXTW;
- }
- }
-
- return ARM64_AM::InvalidExtend;
-}
-
-// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
-static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
- if (DL->getOpcode() != ARM64ISD::DUPLANE16 &&
- DL->getOpcode() != ARM64ISD::DUPLANE32)
- return false;
-
- SDValue SV = DL->getOperand(0);
- if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
- return false;
-
- SDValue EV = SV.getOperand(1);
- if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
-
- ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
- ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
- LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
- LaneOp = EV.getOperand(0);
-
- return true;
-}
-
-// Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
-// high lane extract.
-static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
- SDValue &LaneOp, int &LaneIdx) {
-
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
- std::swap(Op0, Op1);
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
- return false;
- }
- StdOp = Op1;
- return true;
-}
-
-/// SelectMLAV64LaneV128 - ARM64 supports 64-bit vector MLAs (v4i16 and v2i32)
-/// where one multiplicand is a lane in the upper half of a 128-bit vector.
-/// Recognize and select this so that we don't emit unnecessary lane extracts.
-SDNode *ARM64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
- SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
- int LaneIdx = -1; // Will hold the lane index.
-
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx)) {
- std::swap(Op0, Op1);
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx))
- return 0;
- }
-
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
-
- SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
-
- unsigned MLAOpc = ~0U;
-
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized MLA.");
- case MVT::v4i16:
- MLAOpc = ARM64::MLAv4i16_indexed;
- break;
- case MVT::v2i32:
- MLAOpc = ARM64::MLAv2i32_indexed;
- break;
- }
-
- return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
- SDValue SMULLOp0;
- SDValue SMULLOp1;
- int LaneIdx;
-
- if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
- LaneIdx))
- return 0;
-
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
-
- SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
-
- unsigned SMULLOpc = ~0U;
-
- if (IntNo == Intrinsic::arm64_neon_smull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = ARM64::SMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = ARM64::SMULLv2i32_indexed;
- break;
- }
- } else if (IntNo == Intrinsic::arm64_neon_umull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = ARM64::UMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = ARM64::UMULLv2i32_indexed;
- break;
- }
- } else
- llvm_unreachable("Unrecognized intrinsic.");
-
- return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
-}
-
-/// SelectArithExtendedRegister - Select a "extended register" operand. This
-/// operand folds in an extend followed by an optional left shift.
-bool ARM64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
- SDValue &Shift) {
- unsigned ShiftVal = 0;
- ARM64_AM::ExtendType Ext;
-
- if (N.getOpcode() == ISD::SHL) {
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (!CSD)
- return false;
- ShiftVal = CSD->getZExtValue();
- if ((ShiftVal & 0x3) != ShiftVal)
- return false;
-
- Ext = getExtendTypeForNode(N.getOperand(0));
- if (Ext == ARM64_AM::InvalidExtend)
- return false;
-
- Reg = N.getOperand(0).getOperand(0);
- } else {
- Ext = getExtendTypeForNode(N);
- if (Ext == ARM64_AM::InvalidExtend)
- return false;
-
- Reg = N.getOperand(0);
- }
-
- // ARM64 mandates that the RHS of the operation must use the smallest
- // register classs that could contain the size being extended from. Thus,
- // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
- // there might not be an actual 32-bit value in the program. We can
- // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
- if (Reg.getValueType() == MVT::i64 && Ext != ARM64_AM::UXTX &&
- Ext != ARM64_AM::SXTX) {
- SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
- MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32, Reg, SubReg);
- Reg = SDValue(Node, 0);
- }
-
- Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
- return isWorthFolding(N);
-}
-
-/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
-/// immediate" address. The "Size" argument is the size in bytes of the memory
-/// reference, which determines the scale.
-bool ARM64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
- SDValue &Base, SDValue &OffImm) {
- const TargetLowering *TLI = getTargetLowering();
- if (N.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(N)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
- return true;
- }
-
- if (N.getOpcode() == ARM64ISD::ADDlow) {
- GlobalAddressSDNode *GAN =
- dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
- Base = N.getOperand(0);
- OffImm = N.getOperand(1);
- if (!GAN)
- return true;
-
- const GlobalValue *GV = GAN->getGlobal();
- unsigned Alignment = GV->getAlignment();
- const DataLayout *DL = TLI->getDataLayout();
- if (Alignment == 0 && !Subtarget->isTargetDarwin())
- Alignment = DL->getABITypeAlignment(GV->getType()->getElementType());
-
- if (Alignment >= Size)
- return true;
- }
-
- if (CurDAG->isBaseWithConstantOffset(N)) {
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int64_t RHSC = (int64_t)RHS->getZExtValue();
- unsigned Scale = Log2_32(Size);
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- }
- OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
- return true;
- }
- }
- }
-
- // Before falling back to our general case, check if the unscaled
- // instructions can handle this. If so, that's preferable.
- if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
- return false;
-
- // Base only. The address will be materialized into a register before
- // the memory is accessed.
- // add x0, Xbase, #offset
- // ldr x0, [x0]
- Base = N;
- OffImm = CurDAG->getTargetConstant(0, MVT::i64);
- return true;
-}
-
-/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
-/// immediate" address. This should only match when there is an offset that
-/// is not valid for a scaled immediate addressing mode. The "Size" argument
-/// is the size in bytes of the memory reference, which is needed here to know
-/// what is valid for a scaled immediate.
-bool ARM64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
- SDValue &Base, SDValue &OffImm) {
- if (!CurDAG->isBaseWithConstantOffset(N))
- return false;
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
- int64_t RHSC = RHS->getSExtValue();
- // If the offset is valid as a scaled immediate, don't match here.
- if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
- RHSC < (0x1000 << Log2_32(Size)))
- return false;
- if (RHSC >= -256 && RHSC < 256) {
- Base = N.getOperand(0);
- if (Base.getOpcode() == ISD::FrameIndex) {
- int FI = cast<FrameIndexSDNode>(Base)->getIndex();
- const TargetLowering *TLI = getTargetLowering();
- Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- }
- OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
- return true;
- }
- }
- return false;
-}
-
-static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
- SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
- SDValue ImpDef = SDValue(
- CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
- 0);
- MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
- return SDValue(Node, 0);
-}
-
-static SDValue WidenIfNeeded(SelectionDAG *CurDAG, SDValue N) {
- if (N.getValueType() == MVT::i32) {
- return Widen(CurDAG, N);
- }
-
- return N;
-}
-
-/// \brief Check if the given SHL node (\p N), can be used to form an
-/// extended register for an addressing mode.
-bool ARM64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
- SDValue &Offset, SDValue &Imm) {
- assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
- ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
- if (CSD && (CSD->getZExtValue() & 0x7) == CSD->getZExtValue()) {
-
- ARM64_AM::ExtendType Ext = getExtendTypeForNode(N.getOperand(0), true);
- if (Ext == ARM64_AM::InvalidExtend) {
- Ext = ARM64_AM::UXTX;
- Offset = WidenIfNeeded(CurDAG, N.getOperand(0));
- } else {
- Offset = WidenIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
- }
-
- unsigned LegalShiftVal = Log2_32(Size);
- unsigned ShiftVal = CSD->getZExtValue();
-
- if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
- return false;
-
- Imm = CurDAG->getTargetConstant(
- ARM64_AM::getMemExtendImm(Ext, ShiftVal != 0), MVT::i32);
- if (isWorthFolding(N))
- return true;
- }
- return false;
-}
-
-bool ARM64DAGToDAGISel::SelectAddrModeRO(SDValue N, unsigned Size,
- SDValue &Base, SDValue &Offset,
- SDValue &Imm) {
- if (N.getOpcode() != ISD::ADD)
- return false;
- SDValue LHS = N.getOperand(0);
- SDValue RHS = N.getOperand(1);
-
- // We don't want to match immediate adds here, because they are better lowered
- // to the register-immediate addressing modes.
- if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
- return false;
-
- // Check if this particular node is reused in any non-memory related
- // operation. If yes, do not try to fold this node into the address
- // computation, since the computation will be kept.
- const SDNode *Node = N.getNode();
- for (SDNode::use_iterator UI = Node->use_begin(), UE = Node->use_end();
- UI != UE; ++UI) {
- if (!isa<MemSDNode>(*UI))
- return false;
- }
-
- // Remember if it is worth folding N when it produces extended register.
- bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
-
- // Try to match a shifted extend on the RHS.
- if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(RHS, Size, Offset, Imm)) {
- Base = LHS;
- return true;
- }
-
- // Try to match a shifted extend on the LHS.
- if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
- SelectExtendedSHL(LHS, Size, Offset, Imm)) {
- Base = RHS;
- return true;
- }
-
- ARM64_AM::ExtendType Ext = ARM64_AM::UXTX;
- // Try to match an unshifted extend on the LHS.
- if (IsExtendedRegisterWorthFolding &&
- (Ext = getExtendTypeForNode(LHS, true)) != ARM64_AM::InvalidExtend) {
- Base = RHS;
- Offset = WidenIfNeeded(CurDAG, LHS.getOperand(0));
- Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
- MVT::i32);
- if (isWorthFolding(LHS))
- return true;
- }
-
- // Try to match an unshifted extend on the RHS.
- if (IsExtendedRegisterWorthFolding &&
- (Ext = getExtendTypeForNode(RHS, true)) != ARM64_AM::InvalidExtend) {
- Base = LHS;
- Offset = WidenIfNeeded(CurDAG, RHS.getOperand(0));
- Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
- MVT::i32);
- if (isWorthFolding(RHS))
- return true;
- }
-
- // Match any non-shifted, non-extend, non-immediate add expression.
- Base = LHS;
- Offset = WidenIfNeeded(CurDAG, RHS);
- Ext = ARM64_AM::UXTX;
- Imm = CurDAG->getTargetConstant(ARM64_AM::getMemExtendImm(Ext, false),
- MVT::i32);
- // Reg1 + Reg2 is free: no check needed.
- return true;
-}
-
-SDValue ARM64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = { ARM64::DDRegClassID, ARM64::DDDRegClassID,
- ARM64::DDDDRegClassID };
- static unsigned SubRegs[] = { ARM64::dsub0, ARM64::dsub1,
- ARM64::dsub2, ARM64::dsub3 };
-
- return createTuple(Regs, RegClassIDs, SubRegs);
-}
-
-SDValue ARM64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
- static unsigned RegClassIDs[] = { ARM64::QQRegClassID, ARM64::QQQRegClassID,
- ARM64::QQQQRegClassID };
- static unsigned SubRegs[] = { ARM64::qsub0, ARM64::qsub1,
- ARM64::qsub2, ARM64::qsub3 };
-
- return createTuple(Regs, RegClassIDs, SubRegs);
-}
-
-SDValue ARM64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
- unsigned RegClassIDs[],
- unsigned SubRegs[]) {
- // There's no special register-class for a vector-list of 1 element: it's just
- // a vector.
- if (Regs.size() == 1)
- return Regs[0];
-
- assert(Regs.size() >= 2 && Regs.size() <= 4);
-
- SDLoc DL(Regs[0].getNode());
-
- SmallVector<SDValue, 4> Ops;
-
- // First operand of REG_SEQUENCE is the desired RegClass.
- Ops.push_back(
- CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
-
- // Then we get pairs of source & subregister-position for the components.
- for (unsigned i = 0; i < Regs.size(); ++i) {
- Ops.push_back(Regs[i]);
- Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
- }
-
- SDNode *N =
- CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
- return SDValue(N, 0);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
- unsigned Opc, bool isExt) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- unsigned ExtOff = isExt;
-
- // Form a REG_SEQUENCE to force register allocation.
- unsigned Vec0Off = ExtOff + 1;
- SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
- N->op_begin() + Vec0Off + NumVecs);
- SDValue RegSeq = createQTuple(Regs);
-
- SmallVector<SDValue, 6> Ops;
- if (isExt)
- Ops.push_back(N->getOperand(1));
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isUnindexed())
- return NULL;
- EVT VT = LD->getMemoryVT();
- EVT DstVT = N->getValueType(0);
- ISD::MemIndexedMode AM = LD->getAddressingMode();
- bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
-
- // We're not doing validity checking here. That was done when checking
- // if we should mark the load as indexed or not. We're just selecting
- // the right instruction.
- unsigned Opcode = 0;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- bool InsertTo64 = false;
- if (VT == MVT::i64)
- Opcode = IsPre ? ARM64::LDRXpre_isel : ARM64::LDRXpost_isel;
- else if (VT == MVT::i32) {
- if (ExtType == ISD::NON_EXTLOAD)
- Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
- else if (ExtType == ISD::SEXTLOAD)
- Opcode = IsPre ? ARM64::LDRSWpre_isel : ARM64::LDRSWpost_isel;
- else {
- Opcode = IsPre ? ARM64::LDRWpre_isel : ARM64::LDRWpost_isel;
- InsertTo64 = true;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::i16) {
- if (ExtType == ISD::SEXTLOAD) {
- if (DstVT == MVT::i64)
- Opcode = IsPre ? ARM64::LDRSHXpre_isel : ARM64::LDRSHXpost_isel;
- else
- Opcode = IsPre ? ARM64::LDRSHWpre_isel : ARM64::LDRSHWpost_isel;
- } else {
- Opcode = IsPre ? ARM64::LDRHHpre_isel : ARM64::LDRHHpost_isel;
- InsertTo64 = DstVT == MVT::i64;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::i8) {
- if (ExtType == ISD::SEXTLOAD) {
- if (DstVT == MVT::i64)
- Opcode = IsPre ? ARM64::LDRSBXpre_isel : ARM64::LDRSBXpost_isel;
- else
- Opcode = IsPre ? ARM64::LDRSBWpre_isel : ARM64::LDRSBWpost_isel;
- } else {
- Opcode = IsPre ? ARM64::LDRBBpre_isel : ARM64::LDRBBpost_isel;
- InsertTo64 = DstVT == MVT::i64;
- // The result of the load is only i32. It's the subreg_to_reg that makes
- // it into an i64.
- DstVT = MVT::i32;
- }
- } else if (VT == MVT::f32) {
- Opcode = IsPre ? ARM64::LDRSpre_isel : ARM64::LDRSpost_isel;
- } else if (VT == MVT::f64) {
- Opcode = IsPre ? ARM64::LDRDpre_isel : ARM64::LDRDpost_isel;
- } else
- return NULL;
- SDValue Chain = LD->getChain();
- SDValue Base = LD->getBasePtr();
- ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
- int OffsetVal = (int)OffsetOp->getZExtValue();
- SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
- SDValue Ops[] = { Base, Offset, Chain };
- SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), DstVT, MVT::i64,
- MVT::Other, Ops);
- // Either way, we're replacing the node, so tell the caller that.
- Done = true;
- if (InsertTo64) {
- SDValue SubReg = CurDAG->getTargetConstant(ARM64::sub_32, MVT::i32);
- SDNode *Sub = CurDAG->getMachineNode(
- ARM64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
- CurDAG->getTargetConstant(0, MVT::i64), SDValue(Res, 0), SubReg);
- ReplaceUses(SDValue(N, 0), SDValue(Sub, 0));
- ReplaceUses(SDValue(N, 1), SDValue(Res, 1));
- ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
- return 0;
- }
- return Res;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
- unsigned SubRegIdx) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- SDValue Chain = N->getOperand(0);
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(2)); // Mem operand;
- Ops.push_back(Chain);
-
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
-
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- SDValue SuperReg = SDValue(Ld, 0);
-
- // MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- // MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- // cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
-
- switch (NumVecs) {
- case 4:
- ReplaceUses(SDValue(N, 3), CurDAG->getTargetExtractSubreg(SubRegIdx + 3, dl,
- VT, SuperReg));
- // FALLTHROUGH
- case 3:
- ReplaceUses(SDValue(N, 2), CurDAG->getTargetExtractSubreg(SubRegIdx + 2, dl,
- VT, SuperReg));
- // FALLTHROUGH
- case 2:
- ReplaceUses(SDValue(N, 1), CurDAG->getTargetExtractSubreg(SubRegIdx + 1, dl,
- VT, SuperReg));
- ReplaceUses(SDValue(N, 0),
- CurDAG->getTargetExtractSubreg(SubRegIdx, dl, VT, SuperReg));
- break;
- case 1:
- ReplaceUses(SDValue(N, 0), SuperReg);
- break;
- }
-
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
-
- return 0;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
-
- // Form a REG_SEQUENCE to force register allocation.
- bool Is128Bit = VT.getSizeInBits() == 128;
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
- SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(N->getOperand(NumVecs + 2));
- Ops.push_back(N->getOperand(0));
- SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
-
- return St;
-}
-
-/// WidenVector - Given a value in the V64 register class, produce the
-/// equivalent value in the V128 register class.
-class WidenVector {
- SelectionDAG &DAG;
-
-public:
- WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
-
- SDValue operator()(SDValue V64Reg) {
- EVT VT = V64Reg.getValueType();
- unsigned NarrowSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
- SDLoc DL(V64Reg);
-
- SDValue Undef =
- SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
- return DAG.getTargetInsertSubreg(ARM64::dsub, DL, WideTy, Undef, V64Reg);
- }
-};
-
-/// NarrowVector - Given a value in the V128 register class, produce the
-/// equivalent value in the V64 register class.
-static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
- EVT VT = V128Reg.getValueType();
- unsigned WideSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
-
- return DAG.getTargetExtractSubreg(ARM64::dsub, SDLoc(V128Reg), NarrowTy,
- V128Reg);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
-
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
-
- if (Narrow)
- std::transform(Regs.begin(), Regs.end(), Regs.begin(),
- WidenVector(*CurDAG));
-
- SDValue RegSeq = createQTuple(Regs);
-
- std::vector<EVT> ResTys;
- ResTys.push_back(MVT::Untyped);
- ResTys.push_back(MVT::Other);
-
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
- SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
- SDValue SuperReg = SDValue(Ld, 0);
-
- EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
- switch (NumVecs) {
- case 4: {
- SDValue NV3 =
- CurDAG->getTargetExtractSubreg(ARM64::qsub3, dl, WideVT, SuperReg);
- if (Narrow)
- ReplaceUses(SDValue(N, 3), NarrowVector(NV3, *CurDAG));
- else
- ReplaceUses(SDValue(N, 3), NV3);
- }
- // FALLTHROUGH
- case 3: {
- SDValue NV2 =
- CurDAG->getTargetExtractSubreg(ARM64::qsub2, dl, WideVT, SuperReg);
- if (Narrow)
- ReplaceUses(SDValue(N, 2), NarrowVector(NV2, *CurDAG));
- else
- ReplaceUses(SDValue(N, 2), NV2);
- }
- // FALLTHROUGH
- case 2: {
- SDValue NV1 =
- CurDAG->getTargetExtractSubreg(ARM64::qsub1, dl, WideVT, SuperReg);
- SDValue NV0 =
- CurDAG->getTargetExtractSubreg(ARM64::qsub0, dl, WideVT, SuperReg);
- if (Narrow) {
- ReplaceUses(SDValue(N, 1), NarrowVector(NV1, *CurDAG));
- ReplaceUses(SDValue(N, 0), NarrowVector(NV0, *CurDAG));
- } else {
- ReplaceUses(SDValue(N, 1), NV1);
- ReplaceUses(SDValue(N, 0), NV0);
- }
- break;
- }
- }
-
- ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
-
- return Ld;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
- unsigned Opc) {
- SDLoc dl(N);
- EVT VT = N->getOperand(2)->getValueType(0);
- bool Narrow = VT.getSizeInBits() == 64;
-
- // Form a REG_SEQUENCE to force register allocation.
- SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
-
- if (Narrow)
- std::transform(Regs.begin(), Regs.end(), Regs.begin(),
- WidenVector(*CurDAG));
-
- SDValue RegSeq = createQTuple(Regs);
-
- unsigned LaneNo =
- cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(RegSeq);
- Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
- Ops.push_back(N->getOperand(NumVecs + 3));
- Ops.push_back(N->getOperand(0));
- SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
-
- // Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
-
- return St;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
- unsigned Op16, unsigned Op32,
- unsigned Op64) {
- // Mostly direct translation to the given operations, except that we preserve
- // the AtomicOrdering for use later on.
- AtomicSDNode *AN = cast<AtomicSDNode>(Node);
- EVT VT = AN->getMemoryVT();
-
- unsigned Op;
- if (VT == MVT::i8)
- Op = Op8;
- else if (VT == MVT::i16)
- Op = Op16;
- else if (VT == MVT::i32)
- Op = Op32;
- else if (VT == MVT::i64)
- Op = Op64;
- else
- llvm_unreachable("Unexpected atomic operation");
-
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 1; i < AN->getNumOperands(); ++i)
- Ops.push_back(AN->getOperand(i));
-
- Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
- Ops.push_back(AN->getOperand(0)); // Chain moves to the end
-
- return CurDAG->SelectNodeTo(Node, Op, AN->getValueType(0), MVT::Other,
- &Ops[0], Ops.size());
-}
-
-static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
- unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB,
- unsigned NumberOfIgnoredLowBits,
- bool BiggerPattern) {
- assert(N->getOpcode() == ISD::AND &&
- "N must be a AND operation to call this function");
-
- EVT VT = N->getValueType(0);
-
- // Here we can test the type of VT and return false when the type does not
- // match, but since it is done prior to that call in the current context
- // we turned that into an assert to avoid redundant code.
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Type checking must have been done before calling this function");
-
- // FIXME: simplify-demanded-bits in DAGCombine will probably have
- // changed the AND node to a 32-bit mask operation. We'll have to
- // undo that as part of the transform here if we want to catch all
- // the opportunities.
- // Currently the NumberOfIgnoredLowBits argument helps to recover
- // form these situations when matching bigger pattern (bitfield insert).
-
- // For unsigned extracts, check for a shift right and mask
- uint64_t And_imm = 0;
- if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
- return false;
-
- const SDNode *Op0 = N->getOperand(0).getNode();
-
- // Because of simplify-demanded-bits in DAGCombine, the mask may have been
- // simplified. Try to undo that
- And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
-
- // The immediate is a mask of the low bits iff imm & (imm+1) == 0
- if (And_imm & (And_imm + 1))
- return false;
-
- bool ClampMSB = false;
- uint64_t Srl_imm = 0;
- // Handle the SRL + ANY_EXTEND case.
- if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
- isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
- // Extend the incoming operand of the SRL to 64-bit.
- Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
- // Make sure to clamp the MSB so that we preserve the semantics of the
- // original operations.
- ClampMSB = true;
- } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
- Opd0 = Op0->getOperand(0);
- } else if (BiggerPattern) {
- // Let's pretend a 0 shift right has been performed.
- // The resulting code will be at least as good as the original one
- // plus it may expose more opportunities for bitfield insert pattern.
- // FIXME: Currently we limit this to the bigger pattern, because
- // some optimizations expect AND and not UBFM
- Opd0 = N->getOperand(0);
- } else
- return false;
-
- assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
- "bad amount in shift node!");
-
- LSB = Srl_imm;
- MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
- : CountTrailingOnes_64(And_imm)) -
- 1;
- if (ClampMSB)
- // Since we're moving the extend before the right shift operation, we need
- // to clamp the MSB to make sure we don't shift in undefined bits instead of
- // the zeros which would get shifted in with the original right shift
- // operation.
- MSB = MSB > 31 ? 31 : MSB;
-
- Opc = VT == MVT::i32 ? ARM64::UBFMWri : ARM64::UBFMXri;
- return true;
-}
-
-static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB) {
- // We are looking for the following pattern which basically extracts a single
- // bit from the source value and places it in the LSB of the destination
- // value, all other bits of the destination value or set to zero:
- //
- // Value2 = AND Value, MaskImm
- // SRL Value2, ShiftImm
- //
- // with MaskImm >> ShiftImm == 1.
- //
- // This gets selected into a single UBFM:
- //
- // UBFM Value, ShiftImm, ShiftImm
- //
-
- if (N->getOpcode() != ISD::SRL)
- return false;
-
- uint64_t And_mask = 0;
- if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
- return false;
-
- Opd0 = N->getOperand(0).getOperand(0);
-
- uint64_t Srl_imm = 0;
- if (!isIntImmediate(N->getOperand(1), Srl_imm))
- return false;
-
- // Check whether we really have a one bit extract here.
- if (And_mask >> Srl_imm == 0x1) {
- if (N->getValueType(0) == MVT::i32)
- Opc = ARM64::UBFMWri;
- else
- Opc = ARM64::UBFMXri;
-
- LSB = MSB = Srl_imm;
-
- return true;
- }
-
- return false;
-}
-
-static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- unsigned &LSB, unsigned &MSB,
- bool BiggerPattern) {
- assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
- "N must be a SHR/SRA operation to call this function");
-
- EVT VT = N->getValueType(0);
-
- // Here we can test the type of VT and return false when the type does not
- // match, but since it is done prior to that call in the current context
- // we turned that into an assert to avoid redundant code.
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "Type checking must have been done before calling this function");
-
- // Check for AND + SRL doing a one bit extract.
- if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
- return true;
-
- // we're looking for a shift of a shift
- uint64_t Shl_imm = 0;
- if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
- Opd0 = N->getOperand(0).getOperand(0);
- } else if (BiggerPattern) {
- // Let's pretend a 0 shift left has been performed.
- // FIXME: Currently we limit this to the bigger pattern case,
- // because some optimizations expect AND and not UBFM
- Opd0 = N->getOperand(0);
- } else
- return false;
-
- assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
- uint64_t Srl_imm = 0;
- if (!isIntImmediate(N->getOperand(1), Srl_imm))
- return false;
-
- assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
- "bad amount in shift node!");
- // Note: The width operand is encoded as width-1.
- unsigned Width = VT.getSizeInBits() - Srl_imm - 1;
- int sLSB = Srl_imm - Shl_imm;
- if (sLSB < 0)
- return false;
- LSB = sLSB;
- MSB = LSB + Width;
- // SRA requires a signed extraction
- if (VT == MVT::i32)
- Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMWri : ARM64::UBFMWri;
- else
- Opc = N->getOpcode() == ISD::SRA ? ARM64::SBFMXri : ARM64::UBFMXri;
- return true;
-}
-
-static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
- SDValue &Opd0, unsigned &LSB, unsigned &MSB,
- unsigned NumberOfIgnoredLowBits = 0,
- bool BiggerPattern = false) {
- if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
- return false;
-
- switch (N->getOpcode()) {
- default:
- if (!N->isMachineOpcode())
- return false;
- break;
- case ISD::AND:
- return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
- NumberOfIgnoredLowBits, BiggerPattern);
- case ISD::SRL:
- case ISD::SRA:
- return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
- }
-
- unsigned NOpc = N->getMachineOpcode();
- switch (NOpc) {
- default:
- return false;
- case ARM64::SBFMWri:
- case ARM64::UBFMWri:
- case ARM64::SBFMXri:
- case ARM64::UBFMXri:
- Opc = NOpc;
- Opd0 = N->getOperand(0);
- LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
- MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
- return true;
- }
- // Unreachable
- return false;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
- unsigned Opc, LSB, MSB;
- SDValue Opd0;
- if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
- return NULL;
-
- EVT VT = N->getValueType(0);
- SDValue Ops[] = { Opd0, CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT) };
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 3);
-}
-
-// Is mask a i32 or i64 binary sequence 1..10..0 and
-// CountTrailingZeros(mask) == ExpectedTrailingZeros
-static bool isHighMask(uint64_t Mask, unsigned ExpectedTrailingZeros,
- unsigned NumberOfIgnoredHighBits, EVT VT) {
- assert((VT == MVT::i32 || VT == MVT::i64) &&
- "i32 or i64 mask type expected!");
-
- uint64_t ExpectedMask;
- if (VT == MVT::i32) {
- uint32_t ExpectedMaski32 = ~0 << ExpectedTrailingZeros;
- ExpectedMask = ExpectedMaski32;
- if (NumberOfIgnoredHighBits) {
- uint32_t highMask = ~0 << (32 - NumberOfIgnoredHighBits);
- Mask |= highMask;
- }
- } else {
- ExpectedMask = ((uint64_t) ~0) << ExpectedTrailingZeros;
- if (NumberOfIgnoredHighBits)
- Mask |= ((uint64_t) ~0) << (64 - NumberOfIgnoredHighBits);
- }
-
- return Mask == ExpectedMask;
-}
-
-// Look for bits that will be useful for later uses.
-// A bit is consider useless as soon as it is dropped and never used
-// before it as been dropped.
-// E.g., looking for useful bit of x
-// 1. y = x & 0x7
-// 2. z = y >> 2
-// After #1, x useful bits are 0x7, then the useful bits of x, live through
-// y.
-// After #2, the useful bits of x are 0x4.
-// However, if x is used on an unpredicatable instruction, then all its bits
-// are useful.
-// E.g.
-// 1. y = x & 0x7
-// 2. z = y >> 2
-// 3. str x, [@x]
-static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
-
-static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
- Imm = ARM64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
- UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
- getUsefulBits(Op, UsefulBits, Depth + 1);
-}
-
-static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
- uint64_t Imm, uint64_t MSB,
- unsigned Depth) {
- // inherit the bitwidth value
- APInt OpUsefulBits(UsefulBits);
- OpUsefulBits = 1;
-
- if (MSB >= Imm) {
- OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
- --OpUsefulBits;
- // The interesting part will be in the lower part of the result
- getUsefulBits(Op, OpUsefulBits, Depth + 1);
- // The interesting part was starting at Imm in the argument
- OpUsefulBits = OpUsefulBits.shl(Imm);
- } else {
- OpUsefulBits = OpUsefulBits.shl(MSB + 1);
- --OpUsefulBits;
- // The interesting part will be shifted in the result
- OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
- getUsefulBits(Op, OpUsefulBits, Depth + 1);
- // The interesting part was at zero in the argument
- OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
- }
-
- UsefulBits &= OpUsefulBits;
-}
-
-static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
- uint64_t MSB =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
-
- getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
-}
-
-static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t ShiftTypeAndValue =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- APInt Mask(UsefulBits);
- Mask.clearAllBits();
- Mask.flipAllBits();
-
- if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSL) {
- // Shift Left
- uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
- Mask = Mask.shl(ShiftAmt);
- getUsefulBits(Op, Mask, Depth + 1);
- Mask = Mask.lshr(ShiftAmt);
- } else if (ARM64_AM::getShiftType(ShiftTypeAndValue) == ARM64_AM::LSR) {
- // Shift Right
- // We do not handle ARM64_AM::ASR, because the sign will change the
- // number of useful bits
- uint64_t ShiftAmt = ARM64_AM::getShiftValue(ShiftTypeAndValue);
- Mask = Mask.lshr(ShiftAmt);
- getUsefulBits(Op, Mask, Depth + 1);
- Mask = Mask.shl(ShiftAmt);
- } else
- return;
-
- UsefulBits &= Mask;
-}
-
-static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
- unsigned Depth) {
- uint64_t Imm =
- cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
- uint64_t MSB =
- cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
-
- if (Op.getOperand(1) == Orig)
- return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
-
- APInt OpUsefulBits(UsefulBits);
- OpUsefulBits = 1;
-
- if (MSB >= Imm) {
- OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
- --OpUsefulBits;
- UsefulBits &= ~OpUsefulBits;
- getUsefulBits(Op, UsefulBits, Depth + 1);
- } else {
- OpUsefulBits = OpUsefulBits.shl(MSB + 1);
- --OpUsefulBits;
- UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
- getUsefulBits(Op, UsefulBits, Depth + 1);
- }
-}
-
-static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
- SDValue Orig, unsigned Depth) {
-
- // Users of this node should have already been instruction selected
- // FIXME: Can we turn that into an assert?
- if (!UserNode->isMachineOpcode())
- return;
-
- switch (UserNode->getMachineOpcode()) {
- default:
- return;
- case ARM64::ANDSWri:
- case ARM64::ANDSXri:
- case ARM64::ANDWri:
- case ARM64::ANDXri:
- // We increment Depth only when we call the getUsefulBits
- return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
- Depth);
- case ARM64::UBFMWri:
- case ARM64::UBFMXri:
- return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
-
- case ARM64::ORRWrs:
- case ARM64::ORRXrs:
- if (UserNode->getOperand(1) != Orig)
- return;
- return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
- Depth);
- case ARM64::BFMWri:
- case ARM64::BFMXri:
- return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
- }
-}
-
-static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
- if (Depth >= 6)
- return;
- // Initialize UsefulBits
- if (!Depth) {
- unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
- // At the beginning, assume every produced bits is useful
- UsefulBits = APInt(Bitwidth, 0);
- UsefulBits.flipAllBits();
- }
- APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
-
- for (SDNode::use_iterator UseIt = Op.getNode()->use_begin(),
- UseEnd = Op.getNode()->use_end();
- UseIt != UseEnd; ++UseIt) {
- // A use cannot produce useful bits
- APInt UsefulBitsForUse = APInt(UsefulBits);
- getUsefulBitsForUse(*UseIt, UsefulBitsForUse, Op, Depth);
- UsersUsefulBits |= UsefulBitsForUse;
- }
- // UsefulBits contains the produced bits that are meaningful for the
- // current definition, thus a user cannot make a bit meaningful at
- // this point
- UsefulBits &= UsersUsefulBits;
-}
-
-// Given a OR operation, check if we have the following pattern
-// ubfm c, b, imm, imm2 (or something that does the same jobs, see
-// isBitfieldExtractOp)
-// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
-// countTrailingZeros(mask2) == imm2 - imm + 1
-// f = d | c
-// if yes, given reference arguments will be update so that one can replace
-// the OR instruction with:
-// f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
-static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Opd0,
- SDValue &Opd1, unsigned &LSB,
- unsigned &MSB, SelectionDAG *CurDAG) {
- assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
-
- // Set Opc
- EVT VT = N->getValueType(0);
- if (VT == MVT::i32)
- Opc = ARM64::BFMWri;
- else if (VT == MVT::i64)
- Opc = ARM64::BFMXri;
- else
- return false;
-
- // Because of simplify-demanded-bits in DAGCombine, involved masks may not
- // have the expected shape. Try to undo that.
- APInt UsefulBits;
- getUsefulBits(SDValue(N, 0), UsefulBits);
-
- unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
- unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
-
- // OR is commutative, check both possibilities (does llvm provide a
- // way to do that directely, e.g., via code matcher?)
- SDValue OrOpd1Val = N->getOperand(1);
- SDNode *OrOpd0 = N->getOperand(0).getNode();
- SDNode *OrOpd1 = N->getOperand(1).getNode();
- for (int i = 0; i < 2;
- ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
- unsigned BFXOpc;
- // Set Opd1, LSB and MSB arguments by looking for
- // c = ubfm b, imm, imm2
- if (!isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Opd1, LSB, MSB,
- NumberOfIgnoredLowBits, true))
- continue;
-
- // Check that the returned opcode is compatible with the pattern,
- // i.e., same type and zero extended (U and not S)
- if ((BFXOpc != ARM64::UBFMXri && VT == MVT::i64) ||
- (BFXOpc != ARM64::UBFMWri && VT == MVT::i32))
- continue;
-
- // Compute the width of the bitfield insertion
- int sMSB = MSB - LSB + 1;
- // FIXME: This constraints is to catch bitfield insertion we may
- // want to widen the pattern if we want to grab general bitfied
- // move case
- if (sMSB <= 0)
- continue;
-
- // Check the second part of the pattern
- EVT VT = OrOpd1->getValueType(0);
- if (VT != MVT::i32 && VT != MVT::i64)
- continue;
-
- // Compute the Known Zero for the candidate of the first operand.
- // This allows to catch more general case than just looking for
- // AND with imm. Indeed, simplify-demanded-bits may have removed
- // the AND instruction because it proves it was useless.
- APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(OrOpd1Val, KnownZero, KnownOne);
-
- // Check if there is enough room for the second operand to appear
- // in the first one
- if (KnownZero.countTrailingOnes() < (unsigned)sMSB)
- continue;
-
- // Set the first operand
- uint64_t Imm;
- if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
- isHighMask(Imm, sMSB, NumberOfIgnoredHighBits, VT))
- // In that case, we can eliminate the AND
- Opd0 = OrOpd1->getOperand(0);
- else
- // Maybe the AND has been removed by simplify-demanded-bits
- // or is useful because it discards more bits
- Opd0 = OrOpd1Val;
-
- // both parts match
- return true;
- }
-
- return false;
-}
-
-SDNode *ARM64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
- if (N->getOpcode() != ISD::OR)
- return NULL;
-
- unsigned Opc;
- unsigned LSB, MSB;
- SDValue Opd0, Opd1;
-
- if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
- return NULL;
-
- EVT VT = N->getValueType(0);
- SDValue Ops[] = { Opd0,
- Opd1,
- CurDAG->getTargetConstant(LSB, VT),
- CurDAG->getTargetConstant(MSB, VT) };
- return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 4);
-}
-
-SDNode *ARM64DAGToDAGISel::SelectLIBM(SDNode *N) {
- EVT VT = N->getValueType(0);
- unsigned Variant;
- unsigned Opc;
- unsigned FRINTXOpcs[] = { ARM64::FRINTXSr, ARM64::FRINTXDr };
-
- if (VT == MVT::f32) {
- Variant = 0;
- } else if (VT == MVT::f64) {
- Variant = 1;
- } else
- return 0; // Unrecognized argument type. Fall back on default codegen.
-
- // Pick the FRINTX variant needed to set the flags.
- unsigned FRINTXOpc = FRINTXOpcs[Variant];
-
- switch (N->getOpcode()) {
- default:
- return 0; // Unrecognized libm ISD node. Fall back on default codegen.
- case ISD::FCEIL: {
- unsigned FRINTPOpcs[] = { ARM64::FRINTPSr, ARM64::FRINTPDr };
- Opc = FRINTPOpcs[Variant];
- break;
- }
- case ISD::FFLOOR: {
- unsigned FRINTMOpcs[] = { ARM64::FRINTMSr, ARM64::FRINTMDr };
- Opc = FRINTMOpcs[Variant];
- break;
- }
- case ISD::FTRUNC: {
- unsigned FRINTZOpcs[] = { ARM64::FRINTZSr, ARM64::FRINTZDr };
- Opc = FRINTZOpcs[Variant];
- break;
- }
- case ISD::FROUND: {
- unsigned FRINTAOpcs[] = { ARM64::FRINTASr, ARM64::FRINTADr };
- Opc = FRINTAOpcs[Variant];
- break;
- }
- }
-
- SDLoc dl(N);
- SDValue In = N->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- Ops.push_back(In);
-
- if (!TM.Options.UnsafeFPMath) {
- SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
- Ops.push_back(SDValue(FRINTX, 1));
- }
-
- return CurDAG->getMachineNode(Opc, dl, VT, Ops);
-}
-
-SDNode *ARM64DAGToDAGISel::Select(SDNode *Node) {
- // Dump information about the Node being selected
- DEBUG(errs() << "Selecting: ");
- DEBUG(Node->dump(CurDAG));
- DEBUG(errs() << "\n");
-
- // If we have a custom node, we already have selected!
- if (Node->isMachineOpcode()) {
- DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
- Node->setNodeId(-1);
- return NULL;
- }
-
- // Few custom selection stuff.
- SDNode *ResNode = 0;
- EVT VT = Node->getValueType(0);
-
- switch (Node->getOpcode()) {
- default:
- break;
-
- case ISD::ADD:
- if (SDNode *I = SelectMLAV64LaneV128(Node))
- return I;
- break;
-
- case ISD::ATOMIC_LOAD_ADD:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_ADD_I8,
- ARM64::ATOMIC_LOAD_ADD_I16, ARM64::ATOMIC_LOAD_ADD_I32,
- ARM64::ATOMIC_LOAD_ADD_I64);
- case ISD::ATOMIC_LOAD_SUB:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_SUB_I8,
- ARM64::ATOMIC_LOAD_SUB_I16, ARM64::ATOMIC_LOAD_SUB_I32,
- ARM64::ATOMIC_LOAD_SUB_I64);
- case ISD::ATOMIC_LOAD_AND:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_AND_I8,
- ARM64::ATOMIC_LOAD_AND_I16, ARM64::ATOMIC_LOAD_AND_I32,
- ARM64::ATOMIC_LOAD_AND_I64);
- case ISD::ATOMIC_LOAD_OR:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_OR_I8,
- ARM64::ATOMIC_LOAD_OR_I16, ARM64::ATOMIC_LOAD_OR_I32,
- ARM64::ATOMIC_LOAD_OR_I64);
- case ISD::ATOMIC_LOAD_XOR:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_XOR_I8,
- ARM64::ATOMIC_LOAD_XOR_I16, ARM64::ATOMIC_LOAD_XOR_I32,
- ARM64::ATOMIC_LOAD_XOR_I64);
- case ISD::ATOMIC_LOAD_NAND:
- return SelectAtomic(
- Node, ARM64::ATOMIC_LOAD_NAND_I8, ARM64::ATOMIC_LOAD_NAND_I16,
- ARM64::ATOMIC_LOAD_NAND_I32, ARM64::ATOMIC_LOAD_NAND_I64);
- case ISD::ATOMIC_LOAD_MIN:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MIN_I8,
- ARM64::ATOMIC_LOAD_MIN_I16, ARM64::ATOMIC_LOAD_MIN_I32,
- ARM64::ATOMIC_LOAD_MIN_I64);
- case ISD::ATOMIC_LOAD_MAX:
- return SelectAtomic(Node, ARM64::ATOMIC_LOAD_MAX_I8,
- ARM64::ATOMIC_LOAD_MAX_I16, ARM64::ATOMIC_LOAD_MAX_I32,
- ARM64::ATOMIC_LOAD_MAX_I64);
- case ISD::ATOMIC_LOAD_UMIN:
- return SelectAtomic(
- Node, ARM64::ATOMIC_LOAD_UMIN_I8, ARM64::ATOMIC_LOAD_UMIN_I16,
- ARM64::ATOMIC_LOAD_UMIN_I32, ARM64::ATOMIC_LOAD_UMIN_I64);
- case ISD::ATOMIC_LOAD_UMAX:
- return SelectAtomic(
- Node, ARM64::ATOMIC_LOAD_UMAX_I8, ARM64::ATOMIC_LOAD_UMAX_I16,
- ARM64::ATOMIC_LOAD_UMAX_I32, ARM64::ATOMIC_LOAD_UMAX_I64);
- case ISD::ATOMIC_SWAP:
- return SelectAtomic(Node, ARM64::ATOMIC_SWAP_I8, ARM64::ATOMIC_SWAP_I16,
- ARM64::ATOMIC_SWAP_I32, ARM64::ATOMIC_SWAP_I64);
- case ISD::ATOMIC_CMP_SWAP:
- return SelectAtomic(Node, ARM64::ATOMIC_CMP_SWAP_I8,
- ARM64::ATOMIC_CMP_SWAP_I16, ARM64::ATOMIC_CMP_SWAP_I32,
- ARM64::ATOMIC_CMP_SWAP_I64);
-
- case ISD::LOAD: {
- // Try to select as an indexed load. Fall through to normal processing
- // if we can't.
- bool Done = false;
- SDNode *I = SelectIndexedLoad(Node, Done);
- if (Done)
- return I;
- break;
- }
-
- case ISD::SRL:
- case ISD::AND:
- case ISD::SRA:
- if (SDNode *I = SelectBitfieldExtractOp(Node))
- return I;
- break;
-
- case ISD::OR:
- if (SDNode *I = SelectBitfieldInsertOp(Node))
- return I;
- break;
-
- case ISD::EXTRACT_VECTOR_ELT: {
- // Extracting lane zero is a special case where we can just use a plain
- // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
- // the rest of the compiler, especially the register allocator and copyi
- // propagation, to reason about, so is preferred when it's possible to
- // use it.
- ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
- // Bail and use the default Select() for non-zero lanes.
- if (LaneNode->getZExtValue() != 0)
- break;
- // If the element type is not the same as the result type, likewise
- // bail and use the default Select(), as there's more to do than just
- // a cross-class COPY. This catches extracts of i8 and i16 elements
- // since they will need an explicit zext.
- if (VT != Node->getOperand(0).getValueType().getVectorElementType())
- break;
- unsigned SubReg;
- switch (Node->getOperand(0)
- .getValueType()
- .getVectorElementType()
- .getSizeInBits()) {
- default:
- assert(0 && "Unexpected vector element type!");
- case 64:
- SubReg = ARM64::dsub;
- break;
- case 32:
- SubReg = ARM64::ssub;
- break;
- case 16: // FALLTHROUGH
- case 8:
- llvm_unreachable("unexpected zext-requiring extract element!");
- }
- SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
- Node->getOperand(0));
- DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
- DEBUG(Extract->dumpr(CurDAG));
- DEBUG(dbgs() << "\n");
- return Extract.getNode();
- }
- case ISD::Constant: {
- // Materialize zero constants as copies from WZR/XZR. This allows
- // the coalescer to propagate these into other instructions.
- ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
- if (ConstNode->isNullValue()) {
- if (VT == MVT::i32)
- return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
- ARM64::WZR, MVT::i32).getNode();
- else if (VT == MVT::i64)
- return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
- ARM64::XZR, MVT::i64).getNode();
- }
- break;
- }
-
- case ISD::FrameIndex: {
- // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
- const TargetLowering *TLI = getTargetLowering();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
- SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
- CurDAG->getTargetConstant(Shifter, MVT::i32) };
- return CurDAG->SelectNodeTo(Node, ARM64::ADDXri, MVT::i64, Ops, 3);
- }
- case ISD::INTRINSIC_W_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- switch (IntNo) {
- default:
- break;
- case Intrinsic::arm64_ldxp: {
- SDValue MemAddr = Node->getOperand(2);
- SDLoc DL(Node);
- SDValue Chain = Node->getOperand(0);
-
- SDNode *Ld = CurDAG->getMachineNode(ARM64::LDXPX, DL, MVT::i64, MVT::i64,
- MVT::Other, MemAddr, Chain);
-
- // Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
- return Ld;
- }
- case Intrinsic::arm64_stxp: {
- SDLoc DL(Node);
- SDValue Chain = Node->getOperand(0);
- SDValue ValLo = Node->getOperand(2);
- SDValue ValHi = Node->getOperand(3);
- SDValue MemAddr = Node->getOperand(4);
-
- // Place arguments in the right order.
- SmallVector<SDValue, 7> Ops;
- Ops.push_back(ValLo);
- Ops.push_back(ValHi);
- Ops.push_back(MemAddr);
- Ops.push_back(Chain);
-
- SDNode *St =
- CurDAG->getMachineNode(ARM64::STXPX, DL, MVT::i32, MVT::Other, Ops);
- // Transfer memoperands.
- MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
- MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
- cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
-
- return St;
- }
- case Intrinsic::arm64_neon_ld1x2:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 2, ARM64::LD1Twov8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 2, ARM64::LD1Twov16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 2, ARM64::LD1Twov4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 2, ARM64::LD1Twov8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 2, ARM64::LD1Twov2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 2, ARM64::LD1Twov4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 2, ARM64::LD1Twov2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld1x3:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 3, ARM64::LD1Threev8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 3, ARM64::LD1Threev16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 3, ARM64::LD1Threev4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 3, ARM64::LD1Threev8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 3, ARM64::LD1Threev2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 3, ARM64::LD1Threev4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 3, ARM64::LD1Threev2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld1x4:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 4, ARM64::LD1Fourv8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 4, ARM64::LD1Fourv16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 4, ARM64::LD1Fourv4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 4, ARM64::LD1Fourv8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 4, ARM64::LD1Fourv2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 4, ARM64::LD1Fourv4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 4, ARM64::LD1Fourv2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld2:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 2, ARM64::LD2Twov8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 2, ARM64::LD2Twov16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 2, ARM64::LD2Twov4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 2, ARM64::LD2Twov8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 2, ARM64::LD2Twov2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 2, ARM64::LD2Twov4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 2, ARM64::LD1Twov1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 2, ARM64::LD2Twov2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld3:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 3, ARM64::LD3Threev8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 3, ARM64::LD3Threev16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 3, ARM64::LD3Threev4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 3, ARM64::LD3Threev8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 3, ARM64::LD3Threev2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 3, ARM64::LD3Threev4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 3, ARM64::LD1Threev1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 3, ARM64::LD3Threev2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld4:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 4, ARM64::LD4Fourv8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 4, ARM64::LD4Fourv16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 4, ARM64::LD4Fourv4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 4, ARM64::LD4Fourv8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 4, ARM64::LD4Fourv2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 4, ARM64::LD4Fourv4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 4, ARM64::LD1Fourv1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 4, ARM64::LD4Fourv2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld2r:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 2, ARM64::LD2Rv8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 2, ARM64::LD2Rv16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 2, ARM64::LD2Rv4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 2, ARM64::LD2Rv8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 2, ARM64::LD2Rv2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 2, ARM64::LD2Rv4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 2, ARM64::LD2Rv1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 2, ARM64::LD2Rv2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld3r:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 3, ARM64::LD3Rv8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 3, ARM64::LD3Rv16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 3, ARM64::LD3Rv4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 3, ARM64::LD3Rv8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 3, ARM64::LD3Rv2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 3, ARM64::LD3Rv4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 3, ARM64::LD3Rv1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 3, ARM64::LD3Rv2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld4r:
- if (VT == MVT::v8i8)
- return SelectLoad(Node, 4, ARM64::LD4Rv8b, ARM64::dsub0);
- else if (VT == MVT::v16i8)
- return SelectLoad(Node, 4, ARM64::LD4Rv16b, ARM64::qsub0);
- else if (VT == MVT::v4i16)
- return SelectLoad(Node, 4, ARM64::LD4Rv4h, ARM64::dsub0);
- else if (VT == MVT::v8i16)
- return SelectLoad(Node, 4, ARM64::LD4Rv8h, ARM64::qsub0);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectLoad(Node, 4, ARM64::LD4Rv2s, ARM64::dsub0);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectLoad(Node, 4, ARM64::LD4Rv4s, ARM64::qsub0);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectLoad(Node, 4, ARM64::LD4Rv1d, ARM64::dsub0);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectLoad(Node, 4, ARM64::LD4Rv2d, ARM64::qsub0);
- break;
- case Intrinsic::arm64_neon_ld2lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectLoadLane(Node, 2, ARM64::LD2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectLoadLane(Node, 2, ARM64::LD2i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectLoadLane(Node, 2, ARM64::LD2i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectLoadLane(Node, 2, ARM64::LD2i64);
- break;
- case Intrinsic::arm64_neon_ld3lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectLoadLane(Node, 3, ARM64::LD3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectLoadLane(Node, 3, ARM64::LD3i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectLoadLane(Node, 3, ARM64::LD3i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectLoadLane(Node, 3, ARM64::LD3i64);
- break;
- case Intrinsic::arm64_neon_ld4lane:
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectLoadLane(Node, 4, ARM64::LD4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectLoadLane(Node, 4, ARM64::LD4i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectLoadLane(Node, 4, ARM64::LD4i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectLoadLane(Node, 4, ARM64::LD4i64);
- break;
- }
- } break;
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
- case Intrinsic::arm64_neon_tbl2:
- return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBLv8i8Two
- : ARM64::TBLv16i8Two,
- false);
- case Intrinsic::arm64_neon_tbl3:
- return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBLv8i8Three
- : ARM64::TBLv16i8Three,
- false);
- case Intrinsic::arm64_neon_tbl4:
- return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBLv8i8Four
- : ARM64::TBLv16i8Four,
- false);
- case Intrinsic::arm64_neon_tbx2:
- return SelectTable(Node, 2, VT == MVT::v8i8 ? ARM64::TBXv8i8Two
- : ARM64::TBXv16i8Two,
- true);
- case Intrinsic::arm64_neon_tbx3:
- return SelectTable(Node, 3, VT == MVT::v8i8 ? ARM64::TBXv8i8Three
- : ARM64::TBXv16i8Three,
- true);
- case Intrinsic::arm64_neon_tbx4:
- return SelectTable(Node, 4, VT == MVT::v8i8 ? ARM64::TBXv8i8Four
- : ARM64::TBXv16i8Four,
- true);
- case Intrinsic::arm64_neon_smull:
- case Intrinsic::arm64_neon_umull:
- if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
- return N;
- break;
- }
- break;
- }
- case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- if (Node->getNumOperands() >= 3)
- VT = Node->getOperand(2)->getValueType(0);
- switch (IntNo) {
- default:
- break;
- case Intrinsic::arm64_neon_st1x2: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 2, ARM64::ST1Twov8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 2, ARM64::ST1Twov16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 2, ARM64::ST1Twov4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 2, ARM64::ST1Twov8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 2, ARM64::ST1Twov2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 2, ARM64::ST1Twov4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 2, ARM64::ST1Twov2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 2, ARM64::ST1Twov1d);
- break;
- }
- case Intrinsic::arm64_neon_st1x3: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 3, ARM64::ST1Threev8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 3, ARM64::ST1Threev16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 3, ARM64::ST1Threev4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 3, ARM64::ST1Threev8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 3, ARM64::ST1Threev2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 3, ARM64::ST1Threev4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 3, ARM64::ST1Threev2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 3, ARM64::ST1Threev1d);
- break;
- }
- case Intrinsic::arm64_neon_st1x4: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 4, ARM64::ST1Fourv8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 4, ARM64::ST1Fourv16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 4, ARM64::ST1Fourv4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 4, ARM64::ST1Fourv8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 4, ARM64::ST1Fourv2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 4, ARM64::ST1Fourv4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 4, ARM64::ST1Fourv2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 4, ARM64::ST1Fourv1d);
- break;
- }
- case Intrinsic::arm64_neon_st2: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 2, ARM64::ST2Twov8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 2, ARM64::ST2Twov16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 2, ARM64::ST2Twov4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 2, ARM64::ST2Twov8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 2, ARM64::ST2Twov2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 2, ARM64::ST2Twov4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 2, ARM64::ST2Twov2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 2, ARM64::ST1Twov1d);
- break;
- }
- case Intrinsic::arm64_neon_st3: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 3, ARM64::ST3Threev8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 3, ARM64::ST3Threev16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 3, ARM64::ST3Threev4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 3, ARM64::ST3Threev8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 3, ARM64::ST3Threev2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 3, ARM64::ST3Threev4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 3, ARM64::ST3Threev2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 3, ARM64::ST1Threev1d);
- break;
- }
- case Intrinsic::arm64_neon_st4: {
- if (VT == MVT::v8i8)
- return SelectStore(Node, 4, ARM64::ST4Fourv8b);
- else if (VT == MVT::v16i8)
- return SelectStore(Node, 4, ARM64::ST4Fourv16b);
- else if (VT == MVT::v4i16)
- return SelectStore(Node, 4, ARM64::ST4Fourv4h);
- else if (VT == MVT::v8i16)
- return SelectStore(Node, 4, ARM64::ST4Fourv8h);
- else if (VT == MVT::v2i32 || VT == MVT::v2f32)
- return SelectStore(Node, 4, ARM64::ST4Fourv2s);
- else if (VT == MVT::v4i32 || VT == MVT::v4f32)
- return SelectStore(Node, 4, ARM64::ST4Fourv4s);
- else if (VT == MVT::v2i64 || VT == MVT::v2f64)
- return SelectStore(Node, 4, ARM64::ST4Fourv2d);
- else if (VT == MVT::v1i64 || VT == MVT::v1f64)
- return SelectStore(Node, 4, ARM64::ST1Fourv1d);
- break;
- }
- case Intrinsic::arm64_neon_st2lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectStoreLane(Node, 2, ARM64::ST2i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectStoreLane(Node, 2, ARM64::ST2i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectStoreLane(Node, 2, ARM64::ST2i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectStoreLane(Node, 2, ARM64::ST2i64);
- break;
- }
- case Intrinsic::arm64_neon_st3lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectStoreLane(Node, 3, ARM64::ST3i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectStoreLane(Node, 3, ARM64::ST3i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectStoreLane(Node, 3, ARM64::ST3i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectStoreLane(Node, 3, ARM64::ST3i64);
- break;
- }
- case Intrinsic::arm64_neon_st4lane: {
- if (VT == MVT::v16i8 || VT == MVT::v8i8)
- return SelectStoreLane(Node, 4, ARM64::ST4i8);
- else if (VT == MVT::v8i16 || VT == MVT::v4i16)
- return SelectStoreLane(Node, 4, ARM64::ST4i16);
- else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
- VT == MVT::v2f32)
- return SelectStoreLane(Node, 4, ARM64::ST4i32);
- else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
- VT == MVT::v1f64)
- return SelectStoreLane(Node, 4, ARM64::ST4i64);
- break;
- }
- }
- }
-
- case ISD::FCEIL:
- case ISD::FFLOOR:
- case ISD::FTRUNC:
- case ISD::FROUND:
- if (SDNode *I = SelectLIBM(Node))
- return I;
- break;
- }
-
- // Select the default instruction
- ResNode = SelectCode(Node);
-
- DEBUG(errs() << "=> ");
- if (ResNode == NULL || ResNode == Node)
- DEBUG(Node->dump(CurDAG));
- else
- DEBUG(ResNode->dump(CurDAG));
- DEBUG(errs() << "\n");
-
- return ResNode;
-}
-
-/// createARM64ISelDag - This pass converts a legalized DAG into a
-/// ARM64-specific DAG, ready for instruction scheduling.
-FunctionPass *llvm::createARM64ISelDag(ARM64TargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
- return new ARM64DAGToDAGISel(TM, OptLevel);
-}
diff --git a/lib/Target/ARM64/ARM64ISelLowering.cpp b/lib/Target/ARM64/ARM64ISelLowering.cpp
deleted file mode 100644
index 641f591..0000000
--- a/lib/Target/ARM64/ARM64ISelLowering.cpp
+++ /dev/null
@@ -1,7551 +0,0 @@
-//===-- ARM64ISelLowering.cpp - ARM64 DAG Lowering Implementation --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64TargetLowering class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-lower"
-
-#include "ARM64ISelLowering.h"
-#include "ARM64PerfectShuffle.h"
-#include "ARM64Subtarget.h"
-#include "ARM64CallingConv.h"
-#include "ARM64MachineFunctionInfo.h"
-#include "ARM64TargetMachine.h"
-#include "ARM64TargetObjectFile.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetOptions.h"
-using namespace llvm;
-
-STATISTIC(NumTailCalls, "Number of tail calls");
-STATISTIC(NumShiftInserts, "Number of vector shift inserts");
-
-// This option should go away when tail calls fully work.
-static cl::opt<bool>
-EnableARM64TailCalls("arm64-tail-calls", cl::Hidden,
- cl::desc("Generate ARM64 tail calls (TEMPORARY OPTION)."),
- cl::init(true));
-
-static cl::opt<bool>
-StrictAlign("arm64-strict-align", cl::Hidden,
- cl::desc("Disallow all unaligned memory accesses"));
-
-// Place holder until extr generation is tested fully.
-static cl::opt<bool>
-EnableARM64ExtrGeneration("arm64-extr-generation", cl::Hidden,
- cl::desc("Allow ARM64 (or (shift)(shift))->extract"),
- cl::init(true));
-
-static cl::opt<bool>
-EnableARM64SlrGeneration("arm64-shift-insert-generation", cl::Hidden,
- cl::desc("Allow ARM64 SLI/SRI formation"),
- cl::init(false));
-
-//===----------------------------------------------------------------------===//
-// ARM64 Lowering public interface.
-//===----------------------------------------------------------------------===//
-static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
- if (TM.getSubtarget<ARM64Subtarget>().isTargetDarwin())
- return new ARM64_MachoTargetObjectFile();
-
- return new ARM64_ELFTargetObjectFile();
-}
-
-ARM64TargetLowering::ARM64TargetLowering(ARM64TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)) {
- Subtarget = &TM.getSubtarget<ARM64Subtarget>();
-
- // ARM64 doesn't have comparisons which set GPRs or setcc instructions, so
- // we have to make something up. Arbitrarily, choose ZeroOrOne.
- setBooleanContents(ZeroOrOneBooleanContent);
- // When comparing vectors the result sets the different elements in the
- // vector to all-one or all-zero.
- setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
-
- // Set up the register classes.
- addRegisterClass(MVT::i32, &ARM64::GPR32allRegClass);
- addRegisterClass(MVT::i64, &ARM64::GPR64allRegClass);
- addRegisterClass(MVT::f32, &ARM64::FPR32RegClass);
- addRegisterClass(MVT::f64, &ARM64::FPR64RegClass);
- addRegisterClass(MVT::f128, &ARM64::FPR128RegClass);
- addRegisterClass(MVT::v16i8, &ARM64::FPR8RegClass);
- addRegisterClass(MVT::v8i16, &ARM64::FPR16RegClass);
-
- // Someone set us up the NEON.
- addDRTypeForNEON(MVT::v2f32);
- addDRTypeForNEON(MVT::v8i8);
- addDRTypeForNEON(MVT::v4i16);
- addDRTypeForNEON(MVT::v2i32);
- addDRTypeForNEON(MVT::v1i64);
- addDRTypeForNEON(MVT::v1f64);
-
- addQRTypeForNEON(MVT::v4f32);
- addQRTypeForNEON(MVT::v2f64);
- addQRTypeForNEON(MVT::v16i8);
- addQRTypeForNEON(MVT::v8i16);
- addQRTypeForNEON(MVT::v4i32);
- addQRTypeForNEON(MVT::v2i64);
-
- // Compute derived properties from the register classes
- computeRegisterProperties();
-
- // Provide all sorts of operation actions
- setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
- setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
- setOperationAction(ISD::SETCC, MVT::i32, Custom);
- setOperationAction(ISD::SETCC, MVT::i64, Custom);
- setOperationAction(ISD::SETCC, MVT::f32, Custom);
- setOperationAction(ISD::SETCC, MVT::f64, Custom);
- setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::BR_CC, MVT::i32, Custom);
- setOperationAction(ISD::BR_CC, MVT::i64, Custom);
- setOperationAction(ISD::BR_CC, MVT::f32, Custom);
- setOperationAction(ISD::BR_CC, MVT::f64, Custom);
- setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SELECT, MVT::i64, Custom);
- setOperationAction(ISD::SELECT, MVT::f32, Custom);
- setOperationAction(ISD::SELECT, MVT::f64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::JumpTable, MVT::i64, Custom);
-
- setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
- setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
- setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
-
- setOperationAction(ISD::FREM, MVT::f32, Expand);
- setOperationAction(ISD::FREM, MVT::f64, Expand);
- setOperationAction(ISD::FREM, MVT::f80, Expand);
-
- // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
- // silliness like this:
- setOperationAction(ISD::FABS, MVT::v1f64, Expand);
- setOperationAction(ISD::FADD, MVT::v1f64, Expand);
- setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
- setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
- setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
- setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
- setOperationAction(ISD::FMA, MVT::v1f64, Expand);
- setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
- setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
- setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
- setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
- setOperationAction(ISD::FREM, MVT::v1f64, Expand);
- setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
- setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
- setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
- setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
- setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
- setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
- setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
- setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
- setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
- setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
- setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
-
- setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
- setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
- setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
- setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
- setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
-
- // Custom lowering hooks are needed for XOR
- // to fold it into CSINC/CSINV.
- setOperationAction(ISD::XOR, MVT::i32, Custom);
- setOperationAction(ISD::XOR, MVT::i64, Custom);
-
- // Virtually no operation on f128 is legal, but LLVM can't expand them when
- // there's a valid register class, so we need custom operations in most cases.
- setOperationAction(ISD::FABS, MVT::f128, Expand);
- setOperationAction(ISD::FADD, MVT::f128, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
- setOperationAction(ISD::FCOS, MVT::f128, Expand);
- setOperationAction(ISD::FDIV, MVT::f128, Custom);
- setOperationAction(ISD::FMA, MVT::f128, Expand);
- setOperationAction(ISD::FMUL, MVT::f128, Custom);
- setOperationAction(ISD::FNEG, MVT::f128, Expand);
- setOperationAction(ISD::FPOW, MVT::f128, Expand);
- setOperationAction(ISD::FREM, MVT::f128, Expand);
- setOperationAction(ISD::FRINT, MVT::f128, Expand);
- setOperationAction(ISD::FSIN, MVT::f128, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
- setOperationAction(ISD::FSQRT, MVT::f128, Expand);
- setOperationAction(ISD::FSUB, MVT::f128, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
- setOperationAction(ISD::SETCC, MVT::f128, Custom);
- setOperationAction(ISD::BR_CC, MVT::f128, Custom);
- setOperationAction(ISD::SELECT, MVT::f128, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
- setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
-
- // Lowering for many of the conversions is actually specified by the non-f128
- // type. The LowerXXX function will be trivial when f128 isn't involved.
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
- setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
-
- // 128-bit atomics
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i128, Custom);
- // These are surprisingly difficult. The only single-copy atomic 128-bit
- // instruction on AArch64 is stxp (when it succeeds). So a store can safely
- // become a simple swap, but a load can only be determined to have been atomic
- // if storing the same value back succeeds.
- setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Expand);
-
- // Variable arguments.
- setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Custom);
- setOperationAction(ISD::VACOPY, MVT::Other, Custom);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
-
- // Variable-sized objects.
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
-
- // Exception handling.
- // FIXME: These are guesses. Has this been defined yet?
- setExceptionPointerRegister(ARM64::X0);
- setExceptionSelectorRegister(ARM64::X1);
-
- // Constant pool entries
- setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
-
- // BlockAddress
- setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
-
- // Add/Sub overflow ops with MVT::Glues are lowered to CPSR dependences.
- setOperationAction(ISD::ADDC, MVT::i32, Custom);
- setOperationAction(ISD::ADDE, MVT::i32, Custom);
- setOperationAction(ISD::SUBC, MVT::i32, Custom);
- setOperationAction(ISD::SUBE, MVT::i32, Custom);
- setOperationAction(ISD::ADDC, MVT::i64, Custom);
- setOperationAction(ISD::ADDE, MVT::i64, Custom);
- setOperationAction(ISD::SUBC, MVT::i64, Custom);
- setOperationAction(ISD::SUBE, MVT::i64, Custom);
-
- // ARM64 lacks both left-rotate and popcount instructions.
- setOperationAction(ISD::ROTL, MVT::i32, Expand);
- setOperationAction(ISD::ROTL, MVT::i64, Expand);
-
- // ARM64 doesn't have a direct vector ->f32 conversion instructions for
- // elements smaller than i32, so promote the input to i32 first.
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
- setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
- // Similarly, there is no direct i32 -> f64 vector conversion instruction.
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
- setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
- setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
-
- // ARM64 doesn't have {U|S}MUL_LOHI.
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
-
- // ARM64 doesn't have MUL.2d:
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
-
- // Expand the undefined-at-zero variants to cttz/ctlz to their defined-at-zero
- // counterparts, which ARM64 supports directly.
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand);
-
- setOperationAction(ISD::CTPOP, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i64, Custom);
-
- setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::SREM, MVT::i32, Expand);
- setOperationAction(ISD::SREM, MVT::i64, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
- setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
- setOperationAction(ISD::UREM, MVT::i32, Expand);
- setOperationAction(ISD::UREM, MVT::i64, Expand);
-
- // Custom lower Add/Sub/Mul with overflow.
- setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
- setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
- setOperationAction(ISD::SMULO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
- setOperationAction(ISD::UMULO, MVT::i32, Custom);
- setOperationAction(ISD::UMULO, MVT::i64, Custom);
-
- setOperationAction(ISD::FSIN, MVT::f32, Expand);
- setOperationAction(ISD::FSIN, MVT::f64, Expand);
- setOperationAction(ISD::FCOS, MVT::f32, Expand);
- setOperationAction(ISD::FCOS, MVT::f64, Expand);
- setOperationAction(ISD::FPOW, MVT::f32, Expand);
- setOperationAction(ISD::FPOW, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
-
- // ARM64 has implementations of a lot of rounding-like FP operations.
- static MVT RoundingTypes[] = { MVT::f32, MVT::f64, MVT::v2f32,
- MVT::v4f32, MVT::v2f64 };
- for (unsigned I = 0; I < array_lengthof(RoundingTypes); ++I) {
- MVT Ty = RoundingTypes[I];
- setOperationAction(ISD::FFLOOR, Ty, Legal);
- setOperationAction(ISD::FNEARBYINT, Ty, Legal);
- setOperationAction(ISD::FCEIL, Ty, Legal);
- setOperationAction(ISD::FRINT, Ty, Legal);
- setOperationAction(ISD::FTRUNC, Ty, Legal);
- setOperationAction(ISD::FROUND, Ty, Legal);
- }
-
- setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
-
- if (Subtarget->isTargetMachO()) {
- // For iOS, we don't want to the normal expansion of a libcall to
- // sincos. We want to issue a libcall to __sincos_stret to avoid memory
- // traffic.
- setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
- setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
- } else {
- setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
- setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
- }
-
- // ARM64 does not have floating-point extending loads, i1 sign-extending load,
- // floating-point truncating stores, or v2i32->v2i16 truncating store.
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
- setLoadExtAction(ISD::EXTLOAD, MVT::f80, Expand);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand);
- setTruncStoreAction(MVT::f64, MVT::f32, Expand);
- setTruncStoreAction(MVT::f128, MVT::f80, Expand);
- setTruncStoreAction(MVT::f128, MVT::f64, Expand);
- setTruncStoreAction(MVT::f128, MVT::f32, Expand);
- setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
- // Indexed loads and stores are supported.
- for (unsigned im = (unsigned)ISD::PRE_INC;
- im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
- setIndexedLoadAction(im, MVT::i8, Legal);
- setIndexedLoadAction(im, MVT::i16, Legal);
- setIndexedLoadAction(im, MVT::i32, Legal);
- setIndexedLoadAction(im, MVT::i64, Legal);
- setIndexedLoadAction(im, MVT::f64, Legal);
- setIndexedLoadAction(im, MVT::f32, Legal);
- setIndexedStoreAction(im, MVT::i8, Legal);
- setIndexedStoreAction(im, MVT::i16, Legal);
- setIndexedStoreAction(im, MVT::i32, Legal);
- setIndexedStoreAction(im, MVT::i64, Legal);
- setIndexedStoreAction(im, MVT::f64, Legal);
- setIndexedStoreAction(im, MVT::f32, Legal);
- }
-
- // Likewise, narrowing and extending vector loads/stores aren't handled
- // directly.
- for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,
- Expand);
-
- for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
- InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
- setTruncStoreAction((MVT::SimpleValueType)VT,
- (MVT::SimpleValueType)InnerVT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
- }
-
- // Trap.
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
-
- // We combine OR nodes for bitfield operations.
- setTargetDAGCombine(ISD::OR);
-
- // Vector add and sub nodes may conceal a high-half opportunity.
- // Also, try to fold ADD into CSINC/CSINV..
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
-
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::SINT_TO_FP);
- setTargetDAGCombine(ISD::UINT_TO_FP);
-
- setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
-
- setTargetDAGCombine(ISD::ANY_EXTEND);
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::SIGN_EXTEND);
- setTargetDAGCombine(ISD::BITCAST);
- setTargetDAGCombine(ISD::CONCAT_VECTORS);
- setTargetDAGCombine(ISD::STORE);
-
- setTargetDAGCombine(ISD::MUL);
-
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
-
- setStackPointerRegisterToSaveRestore(ARM64::SP);
-
- setSchedulingPreference(Sched::Hybrid);
-
- // Enable TBZ/TBNZ
- MaskAndBranchFoldingIsLegal = true;
-
- setMinFunctionAlignment(2);
-
- RequireStrictAlign = StrictAlign;
-}
-
-void ARM64TargetLowering::addTypeForNEON(EVT VT, EVT PromotedBitwiseVT) {
- if (VT == MVT::v2f32) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i32);
-
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i32);
- } else if (VT == MVT::v2f64 || VT == MVT::v4f32) {
- setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::LOAD, VT.getSimpleVT(), MVT::v2i64);
-
- setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
- AddPromotedToType(ISD::STORE, VT.getSimpleVT(), MVT::v2i64);
- }
-
- // Mark vector float intrinsics as expand.
- if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
- setOperationAction(ISD::FSIN, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FCOS, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FPOWI, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FPOW, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FLOG, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FLOG2, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FLOG10, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FEXP, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FEXP2, VT.getSimpleVT(), Expand);
- }
-
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::AND, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::OR, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
-
- setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::VSELECT, VT.getSimpleVT(), Expand);
- setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
-
- setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
- setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
-
- setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom);
-}
-
-void ARM64TargetLowering::addDRTypeForNEON(MVT VT) {
- addRegisterClass(VT, &ARM64::FPR64RegClass);
- addTypeForNEON(VT, MVT::v2i32);
-}
-
-void ARM64TargetLowering::addQRTypeForNEON(MVT VT) {
- addRegisterClass(VT, &ARM64::FPR128RegClass);
- addTypeForNEON(VT, MVT::v4i32);
-}
-
-EVT ARM64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector())
- return MVT::i32;
- return VT.changeVectorElementTypeToInteger();
-}
-
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified in
-/// Mask are known to be either zero or one and return them in the
-/// KnownZero/KnownOne bitsets.
-void ARM64TargetLowering::computeMaskedBitsForTargetNode(
- const SDValue Op, APInt &KnownZero, APInt &KnownOne,
- const SelectionDAG &DAG, unsigned Depth) const {
- switch (Op.getOpcode()) {
- default:
- break;
- case ARM64ISD::CSEL: {
- APInt KnownZero2, KnownOne2;
- DAG.ComputeMaskedBits(Op->getOperand(0), KnownZero, KnownOne, Depth + 1);
- DAG.ComputeMaskedBits(Op->getOperand(1), KnownZero2, KnownOne2, Depth + 1);
- KnownZero &= KnownZero2;
- KnownOne &= KnownOne2;
- break;
- }
- case ISD::INTRINSIC_W_CHAIN:
- break;
- case ISD::INTRINSIC_WO_CHAIN:
- case ISD::INTRINSIC_VOID: {
- unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- switch (IntNo) {
- default:
- break;
- case Intrinsic::arm64_neon_umaxv:
- case Intrinsic::arm64_neon_uminv: {
- // Figure out the datatype of the vector operand. The UMINV instruction
- // will zero extend the result, so we can mark as known zero all the
- // bits larger than the element datatype. 32-bit or larget doesn't need
- // this as those are legal types and will be handled by isel directly.
- MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
- unsigned BitWidth = KnownZero.getBitWidth();
- if (VT == MVT::v8i8 || VT == MVT::v16i8) {
- assert(BitWidth >= 8 && "Unexpected width!");
- APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
- KnownZero |= Mask;
- } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
- assert(BitWidth >= 16 && "Unexpected width!");
- APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
- KnownZero |= Mask;
- }
- break;
- } break;
- }
- }
- }
-}
-
-MVT ARM64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const {
- return MVT::i64;
-}
-
-unsigned ARM64TargetLowering::getMaximalGlobalOffset() const {
- // FIXME: On ARM64, this depends on the type.
- // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes().
- // and the offset has to be a multiple of the related size in bytes.
- return 4095;
-}
-
-FastISel *
-ARM64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo) const {
- return ARM64::createFastISel(funcInfo, libInfo);
-}
-
-const char *ARM64TargetLowering::getTargetNodeName(unsigned Opcode) const {
- switch (Opcode) {
- default:
- return 0;
- case ARM64ISD::CALL: return "ARM64ISD::CALL";
- case ARM64ISD::ADRP: return "ARM64ISD::ADRP";
- case ARM64ISD::ADDlow: return "ARM64ISD::ADDlow";
- case ARM64ISD::LOADgot: return "ARM64ISD::LOADgot";
- case ARM64ISD::RET_FLAG: return "ARM64ISD::RET_FLAG";
- case ARM64ISD::BRCOND: return "ARM64ISD::BRCOND";
- case ARM64ISD::CSEL: return "ARM64ISD::CSEL";
- case ARM64ISD::FCSEL: return "ARM64ISD::FCSEL";
- case ARM64ISD::CSINV: return "ARM64ISD::CSINV";
- case ARM64ISD::CSNEG: return "ARM64ISD::CSNEG";
- case ARM64ISD::CSINC: return "ARM64ISD::CSINC";
- case ARM64ISD::THREAD_POINTER: return "ARM64ISD::THREAD_POINTER";
- case ARM64ISD::TLSDESC_CALL: return "ARM64ISD::TLSDESC_CALL";
- case ARM64ISD::ADC: return "ARM64ISD::ADC";
- case ARM64ISD::SBC: return "ARM64ISD::SBC";
- case ARM64ISD::ADDS: return "ARM64ISD::ADDS";
- case ARM64ISD::SUBS: return "ARM64ISD::SUBS";
- case ARM64ISD::ADCS: return "ARM64ISD::ADCS";
- case ARM64ISD::SBCS: return "ARM64ISD::SBCS";
- case ARM64ISD::ANDS: return "ARM64ISD::ANDS";
- case ARM64ISD::FCMP: return "ARM64ISD::FCMP";
- case ARM64ISD::FMIN: return "ARM64ISD::FMIN";
- case ARM64ISD::FMAX: return "ARM64ISD::FMAX";
- case ARM64ISD::DUP: return "ARM64ISD::DUP";
- case ARM64ISD::DUPLANE8: return "ARM64ISD::DUPLANE8";
- case ARM64ISD::DUPLANE16: return "ARM64ISD::DUPLANE16";
- case ARM64ISD::DUPLANE32: return "ARM64ISD::DUPLANE32";
- case ARM64ISD::DUPLANE64: return "ARM64ISD::DUPLANE64";
- case ARM64ISD::MOVI: return "ARM64ISD::MOVI";
- case ARM64ISD::MOVIshift: return "ARM64ISD::MOVIshift";
- case ARM64ISD::MOVIedit: return "ARM64ISD::MOVIedit";
- case ARM64ISD::MOVImsl: return "ARM64ISD::MOVImsl";
- case ARM64ISD::FMOV: return "ARM64ISD::FMOV";
- case ARM64ISD::MVNIshift: return "ARM64ISD::MVNIshift";
- case ARM64ISD::MVNImsl: return "ARM64ISD::MVNImsl";
- case ARM64ISD::BICi: return "ARM64ISD::BICi";
- case ARM64ISD::ORRi: return "ARM64ISD::ORRi";
- case ARM64ISD::NEG: return "ARM64ISD::NEG";
- case ARM64ISD::EXTR: return "ARM64ISD::EXTR";
- case ARM64ISD::ZIP1: return "ARM64ISD::ZIP1";
- case ARM64ISD::ZIP2: return "ARM64ISD::ZIP2";
- case ARM64ISD::UZP1: return "ARM64ISD::UZP1";
- case ARM64ISD::UZP2: return "ARM64ISD::UZP2";
- case ARM64ISD::TRN1: return "ARM64ISD::TRN1";
- case ARM64ISD::TRN2: return "ARM64ISD::TRN2";
- case ARM64ISD::REV16: return "ARM64ISD::REV16";
- case ARM64ISD::REV32: return "ARM64ISD::REV32";
- case ARM64ISD::REV64: return "ARM64ISD::REV64";
- case ARM64ISD::EXT: return "ARM64ISD::EXT";
- case ARM64ISD::VSHL: return "ARM64ISD::VSHL";
- case ARM64ISD::VLSHR: return "ARM64ISD::VLSHR";
- case ARM64ISD::VASHR: return "ARM64ISD::VASHR";
- case ARM64ISD::CMEQ: return "ARM64ISD::CMEQ";
- case ARM64ISD::CMGE: return "ARM64ISD::CMGE";
- case ARM64ISD::CMGT: return "ARM64ISD::CMGT";
- case ARM64ISD::CMHI: return "ARM64ISD::CMHI";
- case ARM64ISD::CMHS: return "ARM64ISD::CMHS";
- case ARM64ISD::FCMEQ: return "ARM64ISD::FCMEQ";
- case ARM64ISD::FCMGE: return "ARM64ISD::FCMGE";
- case ARM64ISD::FCMGT: return "ARM64ISD::FCMGT";
- case ARM64ISD::CMEQz: return "ARM64ISD::CMEQz";
- case ARM64ISD::CMGEz: return "ARM64ISD::CMGEz";
- case ARM64ISD::CMGTz: return "ARM64ISD::CMGTz";
- case ARM64ISD::CMLEz: return "ARM64ISD::CMLEz";
- case ARM64ISD::CMLTz: return "ARM64ISD::CMLTz";
- case ARM64ISD::FCMEQz: return "ARM64ISD::FCMEQz";
- case ARM64ISD::FCMGEz: return "ARM64ISD::FCMGEz";
- case ARM64ISD::FCMGTz: return "ARM64ISD::FCMGTz";
- case ARM64ISD::FCMLEz: return "ARM64ISD::FCMLEz";
- case ARM64ISD::FCMLTz: return "ARM64ISD::FCMLTz";
- case ARM64ISD::NOT: return "ARM64ISD::NOT";
- case ARM64ISD::BIT: return "ARM64ISD::BIT";
- case ARM64ISD::CBZ: return "ARM64ISD::CBZ";
- case ARM64ISD::CBNZ: return "ARM64ISD::CBNZ";
- case ARM64ISD::TBZ: return "ARM64ISD::TBZ";
- case ARM64ISD::TBNZ: return "ARM64ISD::TBNZ";
- case ARM64ISD::TC_RETURN: return "ARM64ISD::TC_RETURN";
- case ARM64ISD::SITOF: return "ARM64ISD::SITOF";
- case ARM64ISD::UITOF: return "ARM64ISD::UITOF";
- case ARM64ISD::SQSHL_I: return "ARM64ISD::SQSHL_I";
- case ARM64ISD::UQSHL_I: return "ARM64ISD::UQSHL_I";
- case ARM64ISD::SRSHR_I: return "ARM64ISD::SRSHR_I";
- case ARM64ISD::URSHR_I: return "ARM64ISD::URSHR_I";
- case ARM64ISD::SQSHLU_I: return "ARM64ISD::SQSHLU_I";
- case ARM64ISD::WrapperLarge: return "ARM64ISD::WrapperLarge";
- }
-}
-
-static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
- unsigned &LdrOpc, unsigned &StrOpc) {
- static unsigned LoadBares[] = { ARM64::LDXRB, ARM64::LDXRH, ARM64::LDXRW,
- ARM64::LDXRX, ARM64::LDXPX };
- static unsigned LoadAcqs[] = { ARM64::LDAXRB, ARM64::LDAXRH, ARM64::LDAXRW,
- ARM64::LDAXRX, ARM64::LDAXPX };
- static unsigned StoreBares[] = { ARM64::STXRB, ARM64::STXRH, ARM64::STXRW,
- ARM64::STXRX, ARM64::STXPX };
- static unsigned StoreRels[] = { ARM64::STLXRB, ARM64::STLXRH, ARM64::STLXRW,
- ARM64::STLXRX, ARM64::STLXPX };
-
- unsigned *LoadOps, *StoreOps;
- if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- LoadOps = LoadAcqs;
- else
- LoadOps = LoadBares;
-
- if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
- StoreOps = StoreRels;
- else
- StoreOps = StoreBares;
-
- assert(isPowerOf2_32(Size) && Size <= 16 &&
- "unsupported size for atomic binary op!");
-
- LdrOpc = LoadOps[Log2_32(Size)];
- StrOpc = StoreOps[Log2_32(Size)];
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned Size) const {
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned oldval = MI->getOperand(2).getReg();
- unsigned newval = MI->getOperand(3).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
- unsigned scratch = BB->getParent()->getRegInfo().createVirtualRegister(
- &ARM64::GPR32RegClass);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc dl = MI->getDebugLoc();
-
- // FIXME: We currently always generate a seq_cst operation; we should
- // be able to relax this in some cases.
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loop1MBB);
- MF->insert(It, loop2MBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // thisMBB:
- // ...
- // fallthrough --> loop1MBB
- BB->addSuccessor(loop1MBB);
-
- // loop1MBB:
- // ldrex dest, [ptr]
- // cmp dest, oldval
- // bne exitMBB
- BB = loop1MBB;
- BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- BuildMI(BB, dl, TII->get(Size == 8 ? ARM64::SUBSXrr : ARM64::SUBSWrr))
- .addReg(Size == 8 ? ARM64::XZR : ARM64::WZR, RegState::Define)
- .addReg(dest)
- .addReg(oldval);
- BuildMI(BB, dl, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(exitMBB);
- BB->addSuccessor(loop2MBB);
- BB->addSuccessor(exitMBB);
-
- // loop2MBB:
- // strex scratch, newval, [ptr]
- // cmp scratch, #0
- // bne loop1MBB
- BB = loop2MBB;
- BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
- BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loop1MBB);
- BB->addSuccessor(loop1MBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned dest = MI->getOperand(0).getReg();
- unsigned ptr = MI->getOperand(1).getReg();
- unsigned incr = MI->getOperand(2).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
- DebugLoc dl = MI->getDebugLoc();
-
- unsigned ldrOpc, strOpc;
- getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
-
- MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, loopMBB);
- MF->insert(It, exitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- exitMBB->splice(exitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned scratch = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
- unsigned scratch2 =
- (!BinOpcode)
- ? incr
- : RegInfo.createVirtualRegister(Size == 8 ? &ARM64::GPR64RegClass
- : &ARM64::GPR32RegClass);
-
- // thisMBB:
- // ...
- // fallthrough --> loopMBB
- BB->addSuccessor(loopMBB);
-
- // loopMBB:
- // ldxr dest, ptr
- // <binop> scratch2, dest, incr
- // stxr scratch, scratch2, ptr
- // cbnz scratch, loopMBB
- // fallthrough --> exitMBB
- BB = loopMBB;
- BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
- if (BinOpcode) {
- // operand order needs to go the other way for NAND
- if (BinOpcode == ARM64::BICWrr || BinOpcode == ARM64::BICXrr)
- BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(incr).addReg(dest);
- else
- BuildMI(BB, dl, TII->get(BinOpcode), scratch2).addReg(dest).addReg(incr);
- }
-
- BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
- BuildMI(BB, dl, TII->get(ARM64::CBNZW)).addReg(scratch).addMBB(loopMBB);
-
- BB->addSuccessor(loopMBB);
- BB->addSuccessor(exitMBB);
-
- // exitMBB:
- // ...
- BB = exitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicBinary128(
- MachineInstr *MI, MachineBasicBlock *BB, unsigned BinOpcodeLo,
- unsigned BinOpcodeHi) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned DestLo = MI->getOperand(0).getReg();
- unsigned DestHi = MI->getOperand(1).getReg();
- unsigned Ptr = MI->getOperand(2).getReg();
- unsigned IncrLo = MI->getOperand(3).getReg();
- unsigned IncrHi = MI->getOperand(4).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
- DebugLoc DL = MI->getDebugLoc();
-
- unsigned LdrOpc, StrOpc;
- getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
- MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, LoopMBB);
- MF->insert(It, ExitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- ExitMBB->splice(ExitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
- unsigned ScratchLo = IncrLo, ScratchHi = IncrHi;
- if (BinOpcodeLo) {
- assert(BinOpcodeHi && "Expect neither or both opcodes to be defined");
- ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
- ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
- }
-
- // ThisMBB:
- // ...
- // fallthrough --> LoopMBB
- BB->addSuccessor(LoopMBB);
-
- // LoopMBB:
- // ldxp DestLo, DestHi, Ptr
- // <binoplo> ScratchLo, DestLo, IncrLo
- // <binophi> ScratchHi, DestHi, IncrHi
- // stxp ScratchRes, ScratchLo, ScratchHi, ptr
- // cbnz ScratchRes, LoopMBB
- // fallthrough --> ExitMBB
- BB = LoopMBB;
- BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
- .addReg(DestHi, RegState::Define)
- .addReg(Ptr);
- if (BinOpcodeLo) {
- // operand order needs to go the other way for NAND
- if (BinOpcodeLo == ARM64::BICXrr) {
- std::swap(IncrLo, DestLo);
- std::swap(IncrHi, DestHi);
- }
-
- BuildMI(BB, DL, TII->get(BinOpcodeLo), ScratchLo).addReg(DestLo).addReg(
- IncrLo);
- BuildMI(BB, DL, TII->get(BinOpcodeHi), ScratchHi).addReg(DestHi).addReg(
- IncrHi);
- }
-
- BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
- .addReg(ScratchLo)
- .addReg(ScratchHi)
- .addReg(Ptr);
- BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
-
- BB->addSuccessor(LoopMBB);
- BB->addSuccessor(ExitMBB);
-
- // ExitMBB:
- // ...
- BB = ExitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitAtomicCmpSwap128(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- unsigned DestLo = MI->getOperand(0).getReg();
- unsigned DestHi = MI->getOperand(1).getReg();
- unsigned Ptr = MI->getOperand(2).getReg();
- unsigned OldValLo = MI->getOperand(3).getReg();
- unsigned OldValHi = MI->getOperand(4).getReg();
- unsigned NewValLo = MI->getOperand(5).getReg();
- unsigned NewValHi = MI->getOperand(6).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(7).getImm());
- unsigned ScratchRes = BB->getParent()->getRegInfo().createVirtualRegister(
- &ARM64::GPR32RegClass);
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- DebugLoc DL = MI->getDebugLoc();
-
- unsigned LdrOpc, StrOpc;
- getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
- MachineFunction *MF = BB->getParent();
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator It = BB;
- ++It; // insert the new blocks after the current block
-
- MachineBasicBlock *Loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *Loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, Loop1MBB);
- MF->insert(It, Loop2MBB);
- MF->insert(It, ExitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- ExitMBB->splice(ExitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- // ThisMBB:
- // ...
- // fallthrough --> Loop1MBB
- BB->addSuccessor(Loop1MBB);
-
- // Loop1MBB:
- // ldxp DestLo, DestHi, [Ptr]
- // cmp DestLo, OldValLo
- // sbc xzr, DestHi, OldValHi
- // bne ExitMBB
- BB = Loop1MBB;
- BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
- .addReg(DestHi, RegState::Define)
- .addReg(Ptr);
- BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
- OldValLo);
- BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
- OldValHi);
-
- BuildMI(BB, DL, TII->get(ARM64::Bcc)).addImm(ARM64CC::NE).addMBB(ExitMBB);
- BB->addSuccessor(Loop2MBB);
- BB->addSuccessor(ExitMBB);
-
- // Loop2MBB:
- // stxp ScratchRes, NewValLo, NewValHi, [Ptr]
- // cbnz ScratchRes, Loop1MBB
- BB = Loop2MBB;
- BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
- .addReg(NewValLo)
- .addReg(NewValHi)
- .addReg(Ptr);
- BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(Loop1MBB);
- BB->addSuccessor(Loop1MBB);
- BB->addSuccessor(ExitMBB);
-
- // ExitMBB:
- // ...
- BB = ExitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *ARM64TargetLowering::EmitAtomicMinMax128(
- MachineInstr *MI, MachineBasicBlock *BB, unsigned CondCode) const {
- // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction *MF = BB->getParent();
- MachineFunction::iterator It = BB;
- ++It;
-
- unsigned DestLo = MI->getOperand(0).getReg();
- unsigned DestHi = MI->getOperand(1).getReg();
- unsigned Ptr = MI->getOperand(2).getReg();
- unsigned IncrLo = MI->getOperand(3).getReg();
- unsigned IncrHi = MI->getOperand(4).getReg();
- AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(5).getImm());
- DebugLoc DL = MI->getDebugLoc();
-
- unsigned LdrOpc, StrOpc;
- getExclusiveOperation(16, Ord, LdrOpc, StrOpc);
-
- MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, LoopMBB);
- MF->insert(It, ExitMBB);
-
- // Transfer the remainder of BB and its successor edges to exitMBB.
- ExitMBB->splice(ExitMBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned ScratchRes = RegInfo.createVirtualRegister(&ARM64::GPR32RegClass);
- unsigned ScratchLo = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
- unsigned ScratchHi = RegInfo.createVirtualRegister(&ARM64::GPR64RegClass);
-
- // ThisMBB:
- // ...
- // fallthrough --> LoopMBB
- BB->addSuccessor(LoopMBB);
-
- // LoopMBB:
- // ldxp DestLo, DestHi, Ptr
- // cmp ScratchLo, DestLo, IncrLo
- // sbc xzr, ScratchHi, DestHi, IncrHi
- // csel ScratchLo, DestLo, IncrLo, <cmp-op>
- // csel ScratchHi, DestHi, IncrHi, <cmp-op>
- // stxp ScratchRes, ScratchLo, ScratchHi, ptr
- // cbnz ScratchRes, LoopMBB
- // fallthrough --> ExitMBB
- BB = LoopMBB;
- BuildMI(BB, DL, TII->get(LdrOpc), DestLo)
- .addReg(DestHi, RegState::Define)
- .addReg(Ptr);
-
- BuildMI(BB, DL, TII->get(ARM64::SUBSXrr), ARM64::XZR).addReg(DestLo).addReg(
- IncrLo);
- BuildMI(BB, DL, TII->get(ARM64::SBCXr), ARM64::XZR).addReg(DestHi).addReg(
- IncrHi);
-
- BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchLo)
- .addReg(DestLo)
- .addReg(IncrLo)
- .addImm(CondCode);
- BuildMI(BB, DL, TII->get(ARM64::CSELXr), ScratchHi)
- .addReg(DestHi)
- .addReg(IncrHi)
- .addImm(CondCode);
-
- BuildMI(BB, DL, TII->get(StrOpc), ScratchRes)
- .addReg(ScratchLo)
- .addReg(ScratchHi)
- .addReg(Ptr);
- BuildMI(BB, DL, TII->get(ARM64::CBNZW)).addReg(ScratchRes).addMBB(LoopMBB);
-
- BB->addSuccessor(LoopMBB);
- BB->addSuccessor(ExitMBB);
-
- // ExitMBB:
- // ...
- BB = ExitMBB;
-
- MI->eraseFromParent(); // The instruction is gone now.
-
- return BB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitF128CSEL(MachineInstr *MI,
- MachineBasicBlock *MBB) const {
- // We materialise the F128CSEL pseudo-instruction as some control flow and a
- // phi node:
-
- // OrigBB:
- // [... previous instrs leading to comparison ...]
- // b.ne TrueBB
- // b EndBB
- // TrueBB:
- // ; Fallthrough
- // EndBB:
- // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
-
- const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
- MachineFunction *MF = MBB->getParent();
- const BasicBlock *LLVM_BB = MBB->getBasicBlock();
- DebugLoc DL = MI->getDebugLoc();
- MachineFunction::iterator It = MBB;
- ++It;
-
- unsigned DestReg = MI->getOperand(0).getReg();
- unsigned IfTrueReg = MI->getOperand(1).getReg();
- unsigned IfFalseReg = MI->getOperand(2).getReg();
- unsigned CondCode = MI->getOperand(3).getImm();
- bool CPSRKilled = MI->getOperand(4).isKill();
-
- MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
- MF->insert(It, TrueBB);
- MF->insert(It, EndBB);
-
- // Transfer rest of current basic-block to EndBB
- EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
- MBB->end());
- EndBB->transferSuccessorsAndUpdatePHIs(MBB);
-
- BuildMI(MBB, DL, TII->get(ARM64::Bcc)).addImm(CondCode).addMBB(TrueBB);
- BuildMI(MBB, DL, TII->get(ARM64::B)).addMBB(EndBB);
- MBB->addSuccessor(TrueBB);
- MBB->addSuccessor(EndBB);
-
- // TrueBB falls through to the end.
- TrueBB->addSuccessor(EndBB);
-
- if (!CPSRKilled) {
- TrueBB->addLiveIn(ARM64::CPSR);
- EndBB->addLiveIn(ARM64::CPSR);
- }
-
- BuildMI(*EndBB, EndBB->begin(), DL, TII->get(ARM64::PHI), DestReg)
- .addReg(IfTrueReg)
- .addMBB(TrueBB)
- .addReg(IfFalseReg)
- .addMBB(MBB);
-
- MI->eraseFromParent();
- return EndBB;
-}
-
-MachineBasicBlock *
-ARM64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const {
- switch (MI->getOpcode()) {
- default:
-#ifndef NDEBUG
- MI->dump();
-#endif
- assert(0 && "Unexpected instruction for custom inserter!");
- break;
-
- case ARM64::ATOMIC_LOAD_ADD_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::ADDWrr);
- case ARM64::ATOMIC_LOAD_ADD_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::ADDWrr);
- case ARM64::ATOMIC_LOAD_ADD_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::ADDWrr);
- case ARM64::ATOMIC_LOAD_ADD_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::ADDXrr);
- case ARM64::ATOMIC_LOAD_ADD_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::ADDSXrr, ARM64::ADCXr);
-
- case ARM64::ATOMIC_LOAD_AND_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::ANDWrr);
- case ARM64::ATOMIC_LOAD_AND_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::ANDWrr);
- case ARM64::ATOMIC_LOAD_AND_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::ANDWrr);
- case ARM64::ATOMIC_LOAD_AND_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::ANDXrr);
- case ARM64::ATOMIC_LOAD_AND_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::ANDXrr, ARM64::ANDXrr);
-
- case ARM64::ATOMIC_LOAD_OR_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::ORRWrr);
- case ARM64::ATOMIC_LOAD_OR_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::ORRWrr);
- case ARM64::ATOMIC_LOAD_OR_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::ORRWrr);
- case ARM64::ATOMIC_LOAD_OR_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::ORRXrr);
- case ARM64::ATOMIC_LOAD_OR_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::ORRXrr, ARM64::ORRXrr);
-
- case ARM64::ATOMIC_LOAD_XOR_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::EORWrr);
- case ARM64::ATOMIC_LOAD_XOR_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::EORWrr);
- case ARM64::ATOMIC_LOAD_XOR_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::EORWrr);
- case ARM64::ATOMIC_LOAD_XOR_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::EORXrr);
- case ARM64::ATOMIC_LOAD_XOR_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::EORXrr, ARM64::EORXrr);
-
- case ARM64::ATOMIC_LOAD_NAND_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::BICWrr);
- case ARM64::ATOMIC_LOAD_NAND_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::BICWrr);
- case ARM64::ATOMIC_LOAD_NAND_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::BICWrr);
- case ARM64::ATOMIC_LOAD_NAND_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::BICXrr);
- case ARM64::ATOMIC_LOAD_NAND_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::BICXrr, ARM64::BICXrr);
-
- case ARM64::ATOMIC_LOAD_SUB_I8:
- return EmitAtomicBinary(MI, BB, 1, ARM64::SUBWrr);
- case ARM64::ATOMIC_LOAD_SUB_I16:
- return EmitAtomicBinary(MI, BB, 2, ARM64::SUBWrr);
- case ARM64::ATOMIC_LOAD_SUB_I32:
- return EmitAtomicBinary(MI, BB, 4, ARM64::SUBWrr);
- case ARM64::ATOMIC_LOAD_SUB_I64:
- return EmitAtomicBinary(MI, BB, 8, ARM64::SUBXrr);
- case ARM64::ATOMIC_LOAD_SUB_I128:
- return EmitAtomicBinary128(MI, BB, ARM64::SUBSXrr, ARM64::SBCXr);
-
- case ARM64::ATOMIC_LOAD_MIN_I128:
- return EmitAtomicMinMax128(MI, BB, ARM64CC::LT);
-
- case ARM64::ATOMIC_LOAD_MAX_I128:
- return EmitAtomicMinMax128(MI, BB, ARM64CC::GT);
-
- case ARM64::ATOMIC_LOAD_UMIN_I128:
- return EmitAtomicMinMax128(MI, BB, ARM64CC::CC);
-
- case ARM64::ATOMIC_LOAD_UMAX_I128:
- return EmitAtomicMinMax128(MI, BB, ARM64CC::HI);
-
- case ARM64::ATOMIC_SWAP_I8:
- return EmitAtomicBinary(MI, BB, 1, 0);
- case ARM64::ATOMIC_SWAP_I16:
- return EmitAtomicBinary(MI, BB, 2, 0);
- case ARM64::ATOMIC_SWAP_I32:
- return EmitAtomicBinary(MI, BB, 4, 0);
- case ARM64::ATOMIC_SWAP_I64:
- return EmitAtomicBinary(MI, BB, 8, 0);
- case ARM64::ATOMIC_SWAP_I128:
- return EmitAtomicBinary128(MI, BB, 0, 0);
-
- case ARM64::ATOMIC_CMP_SWAP_I8:
- return EmitAtomicCmpSwap(MI, BB, 1);
- case ARM64::ATOMIC_CMP_SWAP_I16:
- return EmitAtomicCmpSwap(MI, BB, 2);
- case ARM64::ATOMIC_CMP_SWAP_I32:
- return EmitAtomicCmpSwap(MI, BB, 4);
- case ARM64::ATOMIC_CMP_SWAP_I64:
- return EmitAtomicCmpSwap(MI, BB, 8);
- case ARM64::ATOMIC_CMP_SWAP_I128:
- return EmitAtomicCmpSwap128(MI, BB);
-
- case ARM64::F128CSEL:
- return EmitF128CSEL(MI, BB);
-
- case TargetOpcode::STACKMAP:
- case TargetOpcode::PATCHPOINT:
- return emitPatchPoint(MI, BB);
- }
- llvm_unreachable("Unexpected instruction for custom inserter!");
-}
-
-//===----------------------------------------------------------------------===//
-// ARM64 Lowering private implementation.
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Lowering Code
-//===----------------------------------------------------------------------===//
-
-/// changeIntCCToARM64CC - Convert a DAG integer condition code to an ARM64 CC
-static ARM64CC::CondCode changeIntCCToARM64CC(ISD::CondCode CC) {
- switch (CC) {
- default:
- llvm_unreachable("Unknown condition code!");
- case ISD::SETNE:
- return ARM64CC::NE;
- case ISD::SETEQ:
- return ARM64CC::EQ;
- case ISD::SETGT:
- return ARM64CC::GT;
- case ISD::SETGE:
- return ARM64CC::GE;
- case ISD::SETLT:
- return ARM64CC::LT;
- case ISD::SETLE:
- return ARM64CC::LE;
- case ISD::SETUGT:
- return ARM64CC::HI;
- case ISD::SETUGE:
- return ARM64CC::CS;
- case ISD::SETULT:
- return ARM64CC::CC;
- case ISD::SETULE:
- return ARM64CC::LS;
- }
-}
-
-/// changeFPCCToARM64CC - Convert a DAG fp condition code to an ARM64 CC.
-static void changeFPCCToARM64CC(ISD::CondCode CC, ARM64CC::CondCode &CondCode,
- ARM64CC::CondCode &CondCode2) {
- CondCode2 = ARM64CC::AL;
- switch (CC) {
- default:
- llvm_unreachable("Unknown FP condition!");
- case ISD::SETEQ:
- case ISD::SETOEQ:
- CondCode = ARM64CC::EQ;
- break;
- case ISD::SETGT:
- case ISD::SETOGT:
- CondCode = ARM64CC::GT;
- break;
- case ISD::SETGE:
- case ISD::SETOGE:
- CondCode = ARM64CC::GE;
- break;
- case ISD::SETOLT:
- CondCode = ARM64CC::MI;
- break;
- case ISD::SETOLE:
- CondCode = ARM64CC::LS;
- break;
- case ISD::SETONE:
- CondCode = ARM64CC::MI;
- CondCode2 = ARM64CC::GT;
- break;
- case ISD::SETO:
- CondCode = ARM64CC::VC;
- break;
- case ISD::SETUO:
- CondCode = ARM64CC::VS;
- break;
- case ISD::SETUEQ:
- CondCode = ARM64CC::EQ;
- CondCode2 = ARM64CC::VS;
- break;
- case ISD::SETUGT:
- CondCode = ARM64CC::HI;
- break;
- case ISD::SETUGE:
- CondCode = ARM64CC::PL;
- break;
- case ISD::SETLT:
- case ISD::SETULT:
- CondCode = ARM64CC::LT;
- break;
- case ISD::SETLE:
- case ISD::SETULE:
- CondCode = ARM64CC::LE;
- break;
- case ISD::SETNE:
- case ISD::SETUNE:
- CondCode = ARM64CC::NE;
- break;
- }
-}
-
-static bool isLegalArithImmed(uint64_t C) {
- // Matches ARM64DAGToDAGISel::SelectArithImmed().
- return (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
-}
-
-static SDValue emitComparison(SDValue LHS, SDValue RHS, SDLoc dl,
- SelectionDAG &DAG) {
- EVT VT = LHS.getValueType();
-
- if (VT.isFloatingPoint())
- return DAG.getNode(ARM64ISD::FCMP, dl, VT, LHS, RHS);
-
- // The CMP instruction is just an alias for SUBS, and representing it as
- // SUBS means that it's possible to get CSE with subtract operations.
- // A later phase can perform the optimization of setting the destination
- // register to WZR/XZR if it ends up being unused.
- return DAG.getNode(ARM64ISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS)
- .getValue(1);
-}
-
-static SDValue getARM64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARM64cc, SelectionDAG &DAG, SDLoc dl) {
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
- EVT VT = RHS.getValueType();
- uint64_t C = RHSC->getZExtValue();
- if (!isLegalArithImmed(C)) {
- // Constant does not fit, try adjusting it by one?
- switch (CC) {
- default:
- break;
- case ISD::SETLT:
- case ISD::SETGE:
- if ((VT == MVT::i32 && C != 0x80000000 &&
- isLegalArithImmed((uint32_t)(C - 1))) ||
- (VT == MVT::i64 && C != 0x80000000ULL &&
- isLegalArithImmed(C - 1ULL))) {
- CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
- C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
- }
- break;
- case ISD::SETULT:
- case ISD::SETUGE:
- if ((VT == MVT::i32 && C != 0 &&
- isLegalArithImmed((uint32_t)(C - 1))) ||
- (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
- CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
- C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
- RHS = DAG.getConstant(C, VT);
- }
- break;
- case ISD::SETLE:
- case ISD::SETGT:
- if ((VT == MVT::i32 && C != 0x7fffffff &&
- isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0x7ffffffffffffffULL &&
- isLegalArithImmed(C + 1ULL))) {
- CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
- C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
- }
- break;
- case ISD::SETULE:
- case ISD::SETUGT:
- if ((VT == MVT::i32 && C != 0xffffffff &&
- isLegalArithImmed((uint32_t)(C + 1))) ||
- (VT == MVT::i64 && C != 0xfffffffffffffffULL &&
- isLegalArithImmed(C + 1ULL))) {
- CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
- C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
- RHS = DAG.getConstant(C, VT);
- }
- break;
- }
- }
- }
-
- SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
- ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
- ARM64cc = DAG.getConstant(ARM64CC, MVT::i32);
- return Cmp;
-}
-
-static std::pair<SDValue, SDValue>
-getARM64XALUOOp(ARM64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
- assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&
- "Unsupported value type");
- SDValue Value, Overflow;
- SDLoc DL(Op);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- unsigned Opc = 0;
- switch (Op.getOpcode()) {
- default:
- llvm_unreachable("Unknown overflow instruction!");
- case ISD::SADDO:
- Opc = ARM64ISD::ADDS;
- CC = ARM64CC::VS;
- break;
- case ISD::UADDO:
- Opc = ARM64ISD::ADDS;
- CC = ARM64CC::CS;
- break;
- case ISD::SSUBO:
- Opc = ARM64ISD::SUBS;
- CC = ARM64CC::VS;
- break;
- case ISD::USUBO:
- Opc = ARM64ISD::SUBS;
- CC = ARM64CC::CC;
- break;
- // Multiply needs a little bit extra work.
- case ISD::SMULO:
- case ISD::UMULO: {
- CC = ARM64CC::NE;
- bool IsSigned = (Op.getOpcode() == ISD::SMULO) ? true : false;
- if (Op.getValueType() == MVT::i32) {
- unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
- // For a 32 bit multiply with overflow check we want the instruction
- // selector to generate a widening multiply (SMADDL/UMADDL). For that we
- // need to generate the following pattern:
- // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
- LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
- RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
- SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
- SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
- DAG.getConstant(0, MVT::i64));
- // On ARM64 the upper 32 bits are always zero extended for a 32 bit
- // operation. We need to clear out the upper 32 bits, because we used a
- // widening multiply that wrote all 64 bits. In the end this should be a
- // noop.
- Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
- if (IsSigned) {
- // The signed overflow check requires more than just a simple check for
- // any bit set in the upper 32 bits of the result. These bits could be
- // just the sign bits of a negative number. To perform the overflow
- // check we have to arithmetic shift right the 32nd bit of the result by
- // 31 bits. Then we compare the result to the upper 32 bits.
- SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
- DAG.getConstant(32, MVT::i64));
- UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
- SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
- DAG.getConstant(31, MVT::i64));
- // It is important that LowerBits is last, otherwise the arithmetic
- // shift will not be folded into the compare (SUBS).
- SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
- Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
- .getValue(1);
- } else {
- // The overflow check for unsigned multiply is easy. We only need to
- // check if any of the upper 32 bits are set. This can be done with a
- // CMP (shifted register). For that we need to generate the following
- // pattern:
- // (i64 ARM64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
- SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
- DAG.getConstant(32, MVT::i64));
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
- Overflow =
- DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
- UpperBits).getValue(1);
- }
- break;
- }
- assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type");
- // For the 64 bit multiply
- Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
- if (IsSigned) {
- SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
- SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
- DAG.getConstant(63, MVT::i64));
- // It is important that LowerBits is last, otherwise the arithmetic
- // shift will not be folded into the compare (SUBS).
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
- Overflow = DAG.getNode(ARM64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
- .getValue(1);
- } else {
- SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
- SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
- Overflow =
- DAG.getNode(ARM64ISD::SUBS, DL, VTs, DAG.getConstant(0, MVT::i64),
- UpperBits).getValue(1);
- }
- break;
- }
- } // switch (...)
-
- if (Opc) {
- SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
-
- // Emit the ARM64 operation with overflow check.
- Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
- Overflow = Value.getValue(1);
- }
- return std::make_pair(Value, Overflow);
-}
-
-SDValue ARM64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const {
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
- return makeLibCall(DAG, Call, MVT::f128, &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
-}
-
-static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
- SDValue Sel = Op.getOperand(0);
- SDValue Other = Op.getOperand(1);
-
- // If neither operand is a SELECT_CC, give up.
- if (Sel.getOpcode() != ISD::SELECT_CC)
- std::swap(Sel, Other);
- if (Sel.getOpcode() != ISD::SELECT_CC)
- return Op;
-
- // The folding we want to perform is:
- // (xor x, (select_cc a, b, cc, 0, -1) )
- // -->
- // (csel x, (xor x, -1), cc ...)
- //
- // The latter will get matched to a CSINV instruction.
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
- SDValue LHS = Sel.getOperand(0);
- SDValue RHS = Sel.getOperand(1);
- SDValue TVal = Sel.getOperand(2);
- SDValue FVal = Sel.getOperand(3);
- SDLoc dl(Sel);
-
- // FIXME: This could be generalized to non-integer comparisons.
- if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
- return Op;
-
- ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
- ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
-
- // The the values aren't constants, this isn't the pattern we're looking for.
- if (!CFVal || !CTVal)
- return Op;
-
- // We can commute the SELECT_CC by inverting the condition. This
- // might be needed to make this fit into a CSINV pattern.
- if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- }
-
- // If the constants line up, perform the transform!
- if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
- SDValue CCVal;
- SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-
- FVal = Other;
- TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
- DAG.getConstant(-1ULL, Other.getValueType()));
-
- return DAG.getNode(ARM64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
- CCVal, Cmp);
- }
-
- return Op;
-}
-
-static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
- EVT VT = Op.getValueType();
-
- // Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
- unsigned Opc;
- bool ExtraOp = false;
- switch (Op.getOpcode()) {
- default:
- assert(0 && "Invalid code");
- case ISD::ADDC:
- Opc = ARM64ISD::ADDS;
- break;
- case ISD::SUBC:
- Opc = ARM64ISD::SUBS;
- break;
- case ISD::ADDE:
- Opc = ARM64ISD::ADCS;
- ExtraOp = true;
- break;
- case ISD::SUBE:
- Opc = ARM64ISD::SBCS;
- ExtraOp = true;
- break;
- }
-
- if (!ExtraOp)
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
- return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
- Op.getOperand(2));
-}
-
-static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
- // Let legalize expand this if it isn't a legal type yet.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
- return SDValue();
-
- ARM64CC::CondCode CC;
- // The actual operation that sets the overflow or carry flag.
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getARM64XALUOOp(CC, Op, DAG);
-
- // We use 0 and 1 as false and true values.
- SDValue TVal = DAG.getConstant(1, MVT::i32);
- SDValue FVal = DAG.getConstant(0, MVT::i32);
-
- // We use an inverted condition, because the conditional select is inverted
- // too. This will allow it to be selected to a single instruction:
- // CSINC Wd, WZR, WZR, invert(cond).
- SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), MVT::i32);
- Overflow = DAG.getNode(ARM64ISD::CSEL, SDLoc(Op), MVT::i32, FVal, TVal, CCVal,
- Overflow);
-
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), VTs, Value, Overflow);
-}
-
-// Prefetch operands are:
-// 1: Address to prefetch
-// 2: bool isWrite
-// 3: int locality (0 = no locality ... 3 = extreme locality)
-// 4: bool isDataCache
-static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
- SDLoc DL(Op);
- unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- // The data thing is not used.
- // unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
-
- bool IsStream = !Locality;
- // When the locality number is set
- if (Locality) {
- // The front-end should have filtered out the out-of-range values
- assert(Locality <= 3 && "Prefetch locality out-of-range");
- // The locality degree is the opposite of the cache speed.
- // Put the number the other way around.
- // The encoding starts at 0 for level 1
- Locality = 3 - Locality;
- }
-
- // built the mask value encoding the expected behavior.
- unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
- (Locality << 1) | // Cache level bits
- (unsigned)IsStream; // Stream bit
- return DAG.getNode(ARM64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
- DAG.getConstant(PrfOp, MVT::i32), Op.getOperand(1));
-}
-
-SDValue ARM64TargetLowering::LowerFP_EXTEND(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
-
- RTLIB::Libcall LC;
- LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
-
- return LowerF128Call(Op, DAG, LC);
-}
-
-SDValue ARM64TargetLowering::LowerFP_ROUND(SDValue Op,
- SelectionDAG &DAG) const {
- if (Op.getOperand(0).getValueType() != MVT::f128) {
- // It's legal except when f128 is involved
- return Op;
- }
-
- RTLIB::Libcall LC;
- LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
-
- // FP_ROUND node has a second operand indicating whether it is known to be
- // precise. That doesn't take part in the LibCall so we can't directly use
- // LowerF128Call.
- SDValue SrcVal = Op.getOperand(0);
- return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
- /*isSigned*/ false, SDLoc(Op)).first;
-}
-
-static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
- // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
- // Any additional optimization in this function should be recorded
- // in the cost tables.
- EVT InVT = Op.getOperand(0).getValueType();
- EVT VT = Op.getValueType();
-
- // FP_TO_XINT conversion from the same type are legal.
- if (VT.getSizeInBits() == InVT.getSizeInBits())
- return Op;
-
- if (InVT == MVT::v2f64) {
- SDLoc dl(Op);
- SDValue Cv = DAG.getNode(Op.getOpcode(), dl, MVT::v2i64, Op.getOperand(0));
- return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
- }
-
- // Type changing conversions are illegal.
- return SDValue();
-}
-
-SDValue ARM64TargetLowering::LowerFP_TO_INT(SDValue Op,
- SelectionDAG &DAG) const {
- if (Op.getOperand(0).getValueType().isVector())
- return LowerVectorFP_TO_INT(Op, DAG);
-
- if (Op.getOperand(0).getValueType() != MVT::f128) {
- // It's legal except when f128 is involved
- return Op;
- }
-
- RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::FP_TO_SINT)
- LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
- else
- LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
-
- SmallVector<SDValue, 2> Ops;
- for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i)
- Ops.push_back(Op.getOperand(i));
-
- return makeLibCall(DAG, LC, Op.getValueType(), &Ops[0], Ops.size(), false,
- SDLoc(Op)).first;
-}
-
-static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
- // Warning: We maintain cost tables in ARM64TargetTransformInfo.cpp.
- // Any additional optimization in this function should be recorded
- // in the cost tables.
- EVT VT = Op.getValueType();
- SDLoc dl(Op);
- SDValue In = Op.getOperand(0);
- EVT InVT = In.getValueType();
-
- // v2i32 to v2f32 is legal.
- if (VT == MVT::v2f32 && InVT == MVT::v2i32)
- return Op;
-
- // This function only handles v2f64 outputs.
- if (VT == MVT::v2f64) {
- // Extend the input argument to a v2i64 that we can feed into the
- // floating point conversion. Zero or sign extend based on whether
- // we're doing a signed or unsigned float conversion.
- unsigned Opc =
- Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
- assert(Op.getNumOperands() == 1 && "FP conversions take one argument");
- SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0));
- return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted);
- }
-
- // Scalarize v2i64 to v2f32 conversions.
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
- SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In,
- DAG.getConstant(i, MVT::i64));
- Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr);
- BuildVectorOps.push_back(Sclr);
- }
-
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &BuildVectorOps[0],
- BuildVectorOps.size());
-}
-
-SDValue ARM64TargetLowering::LowerINT_TO_FP(SDValue Op,
- SelectionDAG &DAG) const {
- if (Op.getValueType().isVector())
- return LowerVectorINT_TO_FP(Op, DAG);
-
- // i128 conversions are libcalls.
- if (Op.getOperand(0).getValueType() == MVT::i128)
- return SDValue();
-
- // Other conversions are legal, unless it's to the completely software-based
- // fp128.
- if (Op.getValueType() != MVT::f128)
- return Op;
-
- RTLIB::Libcall LC;
- if (Op.getOpcode() == ISD::SINT_TO_FP)
- LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
- else
- LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
-
- return LowerF128Call(Op, DAG, LC);
-}
-
-SDValue ARM64TargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
- // For iOS, we want to call an alternative entry point: __sincos_stret,
- // which returns the values in two S / D registers.
- SDLoc dl(Op);
- SDValue Arg = Op.getOperand(0);
- EVT ArgVT = Arg.getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
-
- ArgListTy Args;
- ArgListEntry Entry;
-
- Entry.Node = Arg;
- Entry.Ty = ArgTy;
- Entry.isSExt = false;
- Entry.isZExt = false;
- Args.push_back(Entry);
-
- const char *LibcallName =
- (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
- SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy());
-
- StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL);
- TargetLowering::CallLoweringInfo CLI(
- DAG.getEntryNode(), RetTy, false, false, false, false, 0,
- CallingConv::Fast, /*isTaillCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed*/ true, Callee, Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
- return CallResult.first;
-}
-
-SDValue ARM64TargetLowering::LowerOperation(SDValue Op,
- SelectionDAG &DAG) const {
- switch (Op.getOpcode()) {
- default:
- llvm_unreachable("unimplemented operand");
- return SDValue();
- case ISD::GlobalAddress:
- return LowerGlobalAddress(Op, DAG);
- case ISD::GlobalTLSAddress:
- return LowerGlobalTLSAddress(Op, DAG);
- case ISD::SETCC:
- return LowerSETCC(Op, DAG);
- case ISD::BR_CC:
- return LowerBR_CC(Op, DAG);
- case ISD::SELECT:
- return LowerSELECT(Op, DAG);
- case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG);
- case ISD::JumpTable:
- return LowerJumpTable(Op, DAG);
- case ISD::ConstantPool:
- return LowerConstantPool(Op, DAG);
- case ISD::BlockAddress:
- return LowerBlockAddress(Op, DAG);
- case ISD::VASTART:
- return LowerVASTART(Op, DAG);
- case ISD::VACOPY:
- return LowerVACOPY(Op, DAG);
- case ISD::VAARG:
- return LowerVAARG(Op, DAG);
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::SUBC:
- case ISD::SUBE:
- return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
- case ISD::SADDO:
- case ISD::UADDO:
- case ISD::SSUBO:
- case ISD::USUBO:
- case ISD::SMULO:
- case ISD::UMULO:
- return LowerXALUO(Op, DAG);
- case ISD::FADD:
- return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
- case ISD::FSUB:
- return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
- case ISD::FMUL:
- return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
- case ISD::FDIV:
- return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
- case ISD::FP_ROUND:
- return LowerFP_ROUND(Op, DAG);
- case ISD::FP_EXTEND:
- return LowerFP_EXTEND(Op, DAG);
- case ISD::FRAMEADDR:
- return LowerFRAMEADDR(Op, DAG);
- case ISD::RETURNADDR:
- return LowerRETURNADDR(Op, DAG);
- case ISD::INSERT_VECTOR_ELT:
- return LowerINSERT_VECTOR_ELT(Op, DAG);
- case ISD::EXTRACT_VECTOR_ELT:
- return LowerEXTRACT_VECTOR_ELT(Op, DAG);
- case ISD::SCALAR_TO_VECTOR:
- return LowerSCALAR_TO_VECTOR(Op, DAG);
- case ISD::BUILD_VECTOR:
- return LowerBUILD_VECTOR(Op, DAG);
- case ISD::VECTOR_SHUFFLE:
- return LowerVECTOR_SHUFFLE(Op, DAG);
- case ISD::EXTRACT_SUBVECTOR:
- return LowerEXTRACT_SUBVECTOR(Op, DAG);
- case ISD::SRA:
- case ISD::SRL:
- case ISD::SHL:
- return LowerVectorSRA_SRL_SHL(Op, DAG);
- case ISD::SHL_PARTS:
- return LowerShiftLeftParts(Op, DAG);
- case ISD::SRL_PARTS:
- case ISD::SRA_PARTS:
- return LowerShiftRightParts(Op, DAG);
- case ISD::CTPOP:
- return LowerCTPOP(Op, DAG);
- case ISD::FCOPYSIGN:
- return LowerFCOPYSIGN(Op, DAG);
- case ISD::AND:
- return LowerVectorAND(Op, DAG);
- case ISD::OR:
- return LowerVectorOR(Op, DAG);
- case ISD::XOR:
- return LowerXOR(Op, DAG);
- case ISD::PREFETCH:
- return LowerPREFETCH(Op, DAG);
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- return LowerINT_TO_FP(Op, DAG);
- case ISD::FP_TO_SINT:
- case ISD::FP_TO_UINT:
- return LowerFP_TO_INT(Op, DAG);
- case ISD::FSINCOS:
- return LowerFSINCOS(Op, DAG);
- }
-}
-
-/// getFunctionAlignment - Return the Log2 alignment of this function.
-unsigned ARM64TargetLowering::getFunctionAlignment(const Function *F) const {
- return 2;
-}
-
-//===----------------------------------------------------------------------===//
-// Calling Convention Implementation
-//===----------------------------------------------------------------------===//
-
-#include "ARM64GenCallingConv.inc"
-
-/// Selects the correct CCAssignFn for a the given CallingConvention
-/// value.
-CCAssignFn *ARM64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
- bool IsVarArg) const {
- switch (CC) {
- default:
- llvm_unreachable("Unsupported calling convention.");
- case CallingConv::WebKit_JS:
- return CC_ARM64_WebKit_JS;
- case CallingConv::C:
- case CallingConv::Fast:
- if (!Subtarget->isTargetDarwin())
- return CC_ARM64_AAPCS;
- return IsVarArg ? CC_ARM64_DarwinPCS_VarArg : CC_ARM64_DarwinPCS;
- }
-}
-
-SDValue ARM64TargetLowering::LowerFormalArguments(
- SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Assign locations to all of the incoming arguments.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
- // At this point, Ins[].VT may already be promoted to i32. To correctly
- // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
- // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
- // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
- // we use a special version of AnalyzeFormalArguments to pass in ValVT and
- // LocVT.
- unsigned NumArgs = Ins.size();
- Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
- unsigned CurArgIdx = 0;
- for (unsigned i = 0; i != NumArgs; ++i) {
- MVT ValVT = Ins[i].VT;
- std::advance(CurOrigArg, Ins[i].OrigArgIndex - CurArgIdx);
- CurArgIdx = Ins[i].OrigArgIndex;
-
- // Get type of the original argument.
- EVT ActualVT = getValueType(CurOrigArg->getType(), /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- MVT LocVT = ValVT;
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- LocVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- LocVT = MVT::i16;
-
- CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
- bool Res =
- AssignFn(i, ValVT, LocVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
- assert(!Res && "Call operand has unhandled type");
- (void)Res;
- }
-
- SmallVector<SDValue, 16> ArgValues;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
-
- // Arguments stored in registers.
- if (VA.isRegLoc()) {
- EVT RegVT = VA.getLocVT();
-
- SDValue ArgValue;
- const TargetRegisterClass *RC;
-
- if (RegVT == MVT::i32)
- RC = &ARM64::GPR32RegClass;
- else if (RegVT == MVT::i64)
- RC = &ARM64::GPR64RegClass;
- else if (RegVT == MVT::f32)
- RC = &ARM64::FPR32RegClass;
- else if (RegVT == MVT::f64 || RegVT == MVT::v1i64 ||
- RegVT == MVT::v1f64 || RegVT == MVT::v2i32 ||
- RegVT == MVT::v4i16 || RegVT == MVT::v8i8)
- RC = &ARM64::FPR64RegClass;
- else if (RegVT == MVT::v2i64 || RegVT == MVT::v4i32 ||
- RegVT == MVT::v8i16 || RegVT == MVT::v16i8)
- RC = &ARM64::FPR128RegClass;
- else
- llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
-
- // Transform the arguments in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
- ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
-
- // If this is an 8, 16 or 32-bit value, it is really passed promoted
- // to 64 bits. Insert an assert[sz]ext to capture this, then
- // truncate to the right size.
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
- break;
- case CCValAssign::SExt:
- ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
- break;
- case CCValAssign::ZExt:
- ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
- DAG.getValueType(VA.getValVT()));
- ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
- break;
- }
-
- InVals.push_back(ArgValue);
-
- } else { // VA.isRegLoc()
- assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
- unsigned ArgOffset = VA.getLocMemOffset();
- unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
- int FI = MFI->CreateFixedObject(ArgSize, ArgOffset, true);
-
- // Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
- InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, FIN,
- MachinePointerInfo::getFixedStack(FI), false,
- false, false, 0));
- }
- }
-
- // varargs
- if (isVarArg) {
- if (!Subtarget->isTargetDarwin()) {
- // The AAPCS variadic function ABI is identical to the non-variadic
- // one. As a result there may be more arguments in registers and we should
- // save them for future reference.
- saveVarArgRegisters(CCInfo, DAG, DL, Chain);
- }
-
- ARM64FunctionInfo *AFI = MF.getInfo<ARM64FunctionInfo>();
- // This will point to the next argument passed via stack.
- unsigned StackOffset = CCInfo.getNextStackOffset();
- // We currently pass all varargs at 8-byte alignment.
- StackOffset = ((StackOffset + 7) & ~7);
- AFI->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
- }
-
- return Chain;
-}
-
-void ARM64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
- SelectionDAG &DAG, SDLoc DL,
- SDValue &Chain) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
-
- SmallVector<SDValue, 8> MemOps;
-
- static const uint16_t GPRArgRegs[] = { ARM64::X0, ARM64::X1, ARM64::X2,
- ARM64::X3, ARM64::X4, ARM64::X5,
- ARM64::X6, ARM64::X7 };
- static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
- unsigned FirstVariadicGPR =
- CCInfo.getFirstUnallocated(GPRArgRegs, NumGPRArgRegs);
-
- static const uint16_t FPRArgRegs[] = { ARM64::Q0, ARM64::Q1, ARM64::Q2,
- ARM64::Q3, ARM64::Q4, ARM64::Q5,
- ARM64::Q6, ARM64::Q7 };
- static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
- unsigned FirstVariadicFPR =
- CCInfo.getFirstUnallocated(FPRArgRegs, NumFPRArgRegs);
-
- unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
- int GPRIdx = 0;
- if (GPRSaveSize != 0) {
- GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
-
- SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
-
- for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &ARM64::GPR64RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 8), false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(8, getPointerTy()));
- }
- }
-
- unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
- int FPRIdx = 0;
- if (FPRSaveSize != 0) {
- FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
-
- SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
-
- for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
- unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &ARM64::FPR128RegClass);
- SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::v2i64);
- SDValue Store =
- DAG.getStore(Val.getValue(1), DL, Val, FIN,
- MachinePointerInfo::getStack(i * 16), false, false, 0);
- MemOps.push_back(Store);
- FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
- DAG.getConstant(16, getPointerTy()));
- }
- }
-
- FuncInfo->setVarArgsGPRIndex(GPRIdx);
- FuncInfo->setVarArgsGPRSize(GPRSaveSize);
- FuncInfo->setVarArgsFPRIndex(FPRIdx);
- FuncInfo->setVarArgsFPRSize(FPRSaveSize);
-
- if (!MemOps.empty()) {
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
- MemOps.size());
- }
-}
-
-/// LowerCallResult - Lower the result values of a call into the
-/// appropriate copies out of appropriate physical registers.
-SDValue ARM64TargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
- SDValue ThisVal) const {
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
- : RetCC_ARM64_AAPCS;
- // Assign locations to each value returned by this call.
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC);
-
- // Copy all of the result registers out of their specified physreg.
- for (unsigned i = 0; i != RVLocs.size(); ++i) {
- CCValAssign VA = RVLocs[i];
-
- // Pass 'this' value directly from the argument to return value, to avoid
- // reg unit interference
- if (i == 0 && isThisReturn) {
- assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
- "unexpected return calling convention register assignment");
- InVals.push_back(ThisVal);
- continue;
- }
-
- SDValue Val =
- DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
-
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
- break;
- }
-
- InVals.push_back(Val);
- }
-
- return Chain;
-}
-
-bool ARM64TargetLowering::isEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- bool isCalleeStructRet, bool isCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
- // Look for obvious safe cases to perform tail call optimization that do not
- // require ABI changes. This is what gcc calls sibcall.
-
- // Do not sibcall optimize vararg calls unless the call site is not passing
- // any arguments.
- if (isVarArg && !Outs.empty())
- return false;
-
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
- if (isCalleeStructRet || isCallerStructRet)
- return false;
-
- // Note that currently ARM64 "C" calling convention and "Fast" calling
- // convention are compatible. If/when that ever changes, we'll need to
- // add checks here to make sure any interactions are OK.
-
- // If the callee takes no arguments then go on to check the results of the
- // call.
- if (!Outs.empty()) {
- // Check if stack adjustment is needed. For now, do not do this if any
- // argument is passed on the stack.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
- CCAssignFn *AssignFn = CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
- CCInfo.AnalyzeCallOperands(Outs, AssignFn);
- if (CCInfo.getNextStackOffset()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (VA.needsCustom()) {
- // Just don't handle anything that needs custom adjustments for now.
- // If need be, we can revisit later, but we shouldn't ever end up
- // here.
- return false;
- } else if (!VA.isRegLoc()) {
- // Likewise, don't try to handle stack based arguments for the
- // time being.
- return false;
- }
- }
- }
- }
-
- return true;
-}
-/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
-/// and add input and output parameter nodes.
-SDValue ARM64TargetLowering::LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- SelectionDAG &DAG = CLI.DAG;
- SDLoc &DL = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
- SDValue Chain = CLI.Chain;
- SDValue Callee = CLI.Callee;
- bool &IsTailCall = CLI.IsTailCall;
- CallingConv::ID CallConv = CLI.CallConv;
- bool IsVarArg = CLI.IsVarArg;
-
- MachineFunction &MF = DAG.getMachineFunction();
- bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
- bool IsThisReturn = false;
-
- // If tail calls are explicitly disabled, make sure not to use them.
- if (!EnableARM64TailCalls)
- IsTailCall = false;
-
- if (IsTailCall) {
- // Check if it's really possible to do a tail call.
- IsTailCall = isEligibleForTailCallOptimization(
- Callee, CallConv, IsVarArg, IsStructRet,
- MF.getFunction()->hasStructRetAttr(), Outs, OutVals, Ins, DAG);
- // We don't support GuaranteedTailCallOpt, only automatically
- // detected sibcalls.
- // FIXME: Re-evaluate. Is this true? Should it be true?
- if (IsTailCall)
- ++NumTailCalls;
- }
-
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
- getTargetMachine(), ArgLocs, *DAG.getContext());
-
- if (IsVarArg) {
- // Handle fixed and variable vector arguments differently.
- // Variable vector arguments always go into memory.
- unsigned NumArgs = Outs.size();
-
- for (unsigned i = 0; i != NumArgs; ++i) {
- MVT ArgVT = Outs[i].VT;
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
- /*IsVarArg=*/ !Outs[i].IsFixed);
- bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
- assert(!Res && "Call operand has unhandled type");
- (void)Res;
- }
- } else {
- // At this point, Outs[].VT may already be promoted to i32. To correctly
- // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
- // i8 to CC_ARM64_AAPCS with i32 being ValVT and i8 being LocVT.
- // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
- // we use a special version of AnalyzeCallOperands to pass in ValVT and
- // LocVT.
- unsigned NumArgs = Outs.size();
- for (unsigned i = 0; i != NumArgs; ++i) {
- MVT ValVT = Outs[i].VT;
- // Get type of the original argument.
- EVT ActualVT = getValueType(CLI.Args[Outs[i].OrigArgIndex].Ty,
- /*AllowUnknown*/ true);
- MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
- ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
- MVT LocVT = ValVT;
- if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
- LocVT = MVT::i8;
- else if (ActualMVT == MVT::i16)
- LocVT = MVT::i16;
-
- CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
- bool Res = AssignFn(i, ValVT, LocVT, CCValAssign::Full, ArgFlags, CCInfo);
- assert(!Res && "Call operand has unhandled type");
- (void)Res;
- }
- }
-
- // Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
-
- // Adjust the stack pointer for the new arguments...
- // These operations are automatically eliminated by the prolog/epilog pass
- if (!IsTailCall)
- Chain =
- DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), DL);
-
- SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, ARM64::SP, getPointerTy());
-
- SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
- SmallVector<SDValue, 8> MemOpChains;
-
- // Walk the register/memloc assignments, inserting copies/loads.
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
-
- // Promote the value if needed.
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::SExt:
- Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
- break;
- case CCValAssign::ZExt:
- Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
- break;
- case CCValAssign::AExt:
- Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
- break;
- case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
- break;
- case CCValAssign::FPExt:
- Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
- break;
- }
-
- if (VA.isRegLoc()) {
- if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i64) {
- assert(VA.getLocVT() == MVT::i64 &&
- "unexpected calling convention register assignment");
- assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&
- "unexpected use of 'returned'");
- IsThisReturn = true;
- }
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
- assert(VA.isMemLoc());
- // There's no reason we can't support stack args w/ tailcall, but
- // we currently don't, so assert if we see one.
- assert(!IsTailCall && "stack argument with tail call!?");
- unsigned LocMemOffset = VA.getLocMemOffset();
- SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
- PtrOff = DAG.getNode(ISD::ADD, DL, getPointerTy(), StackPtr, PtrOff);
-
- // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
- // promoted to a legal register type i32, we should truncate Arg back to
- // i1/i8/i16.
- if (Arg.getValueType().isSimple() &&
- Arg.getValueType().getSimpleVT() == MVT::i32 &&
- (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 ||
- VA.getLocVT() == MVT::i16))
- Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg);
-
- SDValue Store = DAG.getStore(Chain, DL, Arg, PtrOff,
- MachinePointerInfo::getStack(LocMemOffset),
- false, false, 0);
- MemOpChains.push_back(Store);
- }
- }
-
- if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOpChains[0],
- MemOpChains.size());
-
- // Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
- // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
- // node so that legalize doesn't hack it.
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- Subtarget->isTargetMachO()) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- bool InternalLinkage = GV->hasInternalLinkage();
- if (InternalLinkage)
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
- else {
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0,
- ARM64II::MO_GOT);
- Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
- }
- } else if (ExternalSymbolSDNode *S =
- dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const char *Sym = S->getSymbol();
- Callee =
- DAG.getTargetExternalSymbol(Sym, getPointerTy(), ARM64II::MO_GOT);
- Callee = DAG.getNode(ARM64ISD::LOADgot, DL, getPointerTy(), Callee);
- }
- } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, DL, getPointerTy(), 0, 0);
- } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- const char *Sym = S->getSymbol();
- Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), 0);
- }
-
- std::vector<SDValue> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Callee);
-
- // Add argument registers to the end of the list so that they are known live
- // into the call.
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
- Ops.push_back(DAG.getRegister(RegsToPass[i].first,
- RegsToPass[i].second.getValueType()));
-
- // Add a register mask operand representing the call-preserved registers.
- const uint32_t *Mask;
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
- if (IsThisReturn) {
- // For 'this' returns, use the X0-preserving mask if applicable
- Mask = ARI->getThisReturnPreservedMask(CallConv);
- if (!Mask) {
- IsThisReturn = false;
- Mask = ARI->getCallPreservedMask(CallConv);
- }
- } else
- Mask = ARI->getCallPreservedMask(CallConv);
-
- assert(Mask && "Missing call preserved mask for calling convention");
- Ops.push_back(DAG.getRegisterMask(Mask));
-
- if (InFlag.getNode())
- Ops.push_back(InFlag);
-
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-
- // If we're doing a tall call, use a TC_RETURN here rather than an
- // actual call instruction.
- if (IsTailCall)
- return DAG.getNode(ARM64ISD::TC_RETURN, DL, NodeTys, &Ops[0], Ops.size());
-
- // Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(ARM64ISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
- InFlag = Chain.getValue(1);
-
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag, DL);
- if (!Ins.empty())
- InFlag = Chain.getValue(1);
-
- // Handle result values, copying them out of physregs into vregs that we
- // return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
- InVals, IsThisReturn,
- IsThisReturn ? OutVals[0] : SDValue());
-}
-
-bool ARM64TargetLowering::CanLowerReturn(
- CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
- : RetCC_ARM64_AAPCS;
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
- return CCInfo.CheckReturn(Outs, RetCC);
-}
-
-SDValue
-ARM64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const {
- CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS ? RetCC_ARM64_WebKit_JS
- : RetCC_ARM64_AAPCS;
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
- getTargetMachine(), RVLocs, *DAG.getContext());
- CCInfo.AnalyzeReturn(Outs, RetCC);
-
- // Copy the result values into the output registers.
- SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
- for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
- ++i, ++realRVLocIdx) {
- CCValAssign &VA = RVLocs[i];
- assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = OutVals[realRVLocIdx];
-
- switch (VA.getLocInfo()) {
- default:
- llvm_unreachable("Unknown loc info!");
- case CCValAssign::Full:
- break;
- case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
- break;
- }
-
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
- Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
- }
-
- RetOps[0] = Chain; // Update chain.
-
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
-
- return DAG.getNode(ARM64ISD::RET_FLAG, DL, MVT::Other, &RetOps[0],
- RetOps.size());
-}
-
-//===----------------------------------------------------------------------===//
-// Other Lowering Code
-//===----------------------------------------------------------------------===//
-
-SDValue ARM64TargetLowering::LowerGlobalAddress(SDValue Op,
- SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
- SDLoc DL(Op);
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- unsigned char OpFlags =
- Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
-
- assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
- "unexpected offset in global node");
-
- // This also catched the large code model case for Darwin.
- if ((OpFlags & ARM64II::MO_GOT) != 0) {
- SDValue GotAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
- // FIXME: Once remat is capable of dealing with instructions with register
- // operands, expand this into two nodes instead of using a wrapper node.
- return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
- }
-
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- const unsigned char MO_NC = ARM64II::MO_NC;
- return DAG.getNode(
- ARM64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G3),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
- } else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small model on ELF and
- // the only correct model on Darwin.
- SDValue Hi = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
- OpFlags | ARM64II::MO_PAGE);
- unsigned char LoFlags = OpFlags | ARM64II::MO_PAGEOFF | ARM64II::MO_NC;
- SDValue Lo = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, LoFlags);
-
- SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
- }
-}
-
-/// \brief Convert a TLS address reference into the correct sequence of loads
-/// and calls to compute the variable's address (for Darwin, currently) and
-/// return an SDValue containing the final node.
-
-/// Darwin only has one TLS scheme which must be capable of dealing with the
-/// fully general situation, in the worst case. This means:
-/// + "extern __thread" declaration.
-/// + Defined in a possibly unknown dynamic library.
-///
-/// The general system is that each __thread variable has a [3 x i64] descriptor
-/// which contains information used by the runtime to calculate the address. The
-/// only part of this the compiler needs to know about is the first xword, which
-/// contains a function pointer that must be called with the address of the
-/// entire descriptor in "x0".
-///
-/// Since this descriptor may be in a different unit, in general even the
-/// descriptor must be accessed via an indirect load. The "ideal" code sequence
-/// is:
-/// adrp x0, _var@TLVPPAGE
-/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
-/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
-/// ; the function pointer
-/// blr x1 ; Uses descriptor address in x0
-/// ; Address of _var is now in x0.
-///
-/// If the address of _var's descriptor *is* known to the linker, then it can
-/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
-/// a slight efficiency gain.
-SDValue
-ARM64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
-
- SDLoc DL(Op);
- MVT PtrVT = getPointerTy();
- const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
-
- SDValue TLVPAddr =
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
- SDValue DescAddr = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TLVPAddr);
-
- // The first entry in the descriptor is a function pointer that we must call
- // to obtain the address of the variable.
- SDValue Chain = DAG.getEntryNode();
- SDValue FuncTLVGet =
- DAG.getLoad(MVT::i64, DL, Chain, DescAddr, MachinePointerInfo::getGOT(),
- false, true, true, 8);
- Chain = FuncTLVGet.getValue(1);
-
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- MFI->setAdjustsStack(true);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be
- // silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // Finally, we can make the call. This is just a degenerate version of a
- // normal ARM64 call node: x0 takes the address of the descriptor, and returns
- // the address of the variable in this thread.
- Chain = DAG.getCopyToReg(Chain, DL, ARM64::X0, DescAddr, SDValue());
- Chain = DAG.getNode(ARM64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
- Chain, FuncTLVGet, DAG.getRegister(ARM64::X0, MVT::i64),
- DAG.getRegisterMask(Mask), Chain.getValue(1));
- return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Chain.getValue(1));
-}
-
-/// When accessing thread-local variables under either the general-dynamic or
-/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
-/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
-/// is a function pointer to carry out the resolution. This function takes the
-/// address of the descriptor in X0 and returns the TPIDR_EL0 offset in X0. All
-/// other registers (except LR, CPSR) are preserved.
-///
-/// Thus, the ideal call sequence on AArch64 is:
-///
-/// adrp x0, :tlsdesc:thread_var
-/// ldr x8, [x0, :tlsdesc_lo12:thread_var]
-/// add x0, x0, :tlsdesc_lo12:thread_var
-/// .tlsdesccall thread_var
-/// blr x8
-/// (TPIDR_EL0 offset now in x0).
-///
-/// The ".tlsdesccall" directive instructs the assembler to insert a particular
-/// relocation to help the linker relax this sequence if it turns out to be too
-/// conservative.
-///
-/// FIXME: we currently produce an extra, duplicated, ADRP instruction, but this
-/// is harmless.
-SDValue ARM64TargetLowering::LowerELFTLSDescCall(SDValue SymAddr,
- SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const {
- EVT PtrVT = getPointerTy();
-
- // The function we need to call is simply the first entry in the GOT for this
- // descriptor, load it in preparation.
- SDValue Func = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, SymAddr);
-
- // TLS calls preserve all registers except those that absolutely must be
- // trashed: X0 (it takes an argument), LR (it's a call) and CPSR (let's not be
- // silly).
- const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
- const ARM64RegisterInfo *ARI = static_cast<const ARM64RegisterInfo *>(TRI);
- const uint32_t *Mask = ARI->getTLSCallPreservedMask();
-
- // The function takes only one argument: the address of the descriptor itself
- // in X0.
- SDValue Glue, Chain;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM64::X0, DescAddr, Glue);
- Glue = Chain.getValue(1);
-
- // We're now ready to populate the argument list, as with a normal call:
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(Chain);
- Ops.push_back(Func);
- Ops.push_back(SymAddr);
- Ops.push_back(DAG.getRegister(ARM64::X0, PtrVT));
- Ops.push_back(DAG.getRegisterMask(Mask));
- Ops.push_back(Glue);
-
- SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(ARM64ISD::TLSDESC_CALL, DL, NodeTys, &Ops[0], Ops.size());
- Glue = Chain.getValue(1);
-
- return DAG.getCopyFromReg(Chain, DL, ARM64::X0, PtrVT, Glue);
-}
-
-SDValue ARM64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Subtarget->isTargetELF() && "This function expects an ELF target");
- assert(getTargetMachine().getCodeModel() == CodeModel::Small &&
- "ELF TLS only supported in small memory model");
- const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-
- TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
-
- SDValue TPOff;
- EVT PtrVT = getPointerTy();
- SDLoc DL(Op);
- const GlobalValue *GV = GA->getGlobal();
-
- SDValue ThreadBase = DAG.getNode(ARM64ISD::THREAD_POINTER, DL, PtrVT);
-
- if (Model == TLSModel::LocalExec) {
- SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
- SDValue LoVar = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
-
- TPOff = SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- TPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, TPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
- } else if (Model == TLSModel::InitialExec) {
- TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
- TPOff = DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, TPOff);
- } else if (Model == TLSModel::LocalDynamic) {
- // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
- // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
- // the beginning of the module's TLS region, followed by a DTPREL offset
- // calculation.
-
- // These accesses will need deduplicating if there's more than one.
- ARM64FunctionInfo *MFI =
- DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
- MFI->incNumLocalDynamicTLSAccesses();
-
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT, ARM64II::MO_TLS | ARM64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetExternalSymbol(
- "_TLS_MODULE_BASE_", PtrVT,
- ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
- // The call needs a relocation too for linker relaxation. It doesn't make
- // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
- // the address.
- SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
- ARM64II::MO_TLS);
-
- // Now we can calculate the offset from TPIDR_EL0 to this module's
- // thread-local area.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
-
- // Now use :dtprel_whatever: operations to calculate this variable's offset
- // in its thread-storage area.
- SDValue HiVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G1);
- SDValue LoVar = DAG.getTargetGlobalAddress(
- GV, DL, MVT::i64, 0, ARM64II::MO_TLS | ARM64II::MO_G0 | ARM64II::MO_NC);
-
- SDValue DTPOff =
- SDValue(DAG.getMachineNode(ARM64::MOVZXi, DL, PtrVT, HiVar,
- DAG.getTargetConstant(16, MVT::i32)),
- 0);
- DTPOff = SDValue(DAG.getMachineNode(ARM64::MOVKXi, DL, PtrVT, DTPOff, LoVar,
- DAG.getTargetConstant(0, MVT::i32)),
- 0);
-
- TPOff = DAG.getNode(ISD::ADD, DL, PtrVT, TPOff, DTPOff);
- } else if (Model == TLSModel::GeneralDynamic) {
- // Accesses used in this sequence go via the TLS descriptor which lives in
- // the GOT. Prepare an address we can use to handle this.
- SDValue HiDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0, ARM64II::MO_TLS | ARM64II::MO_PAGE);
- SDValue LoDesc = DAG.getTargetGlobalAddress(
- GV, DL, PtrVT, 0,
- ARM64II::MO_TLS | ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
- // First argument to the descriptor call is the address of the descriptor
- // itself.
- SDValue DescAddr = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, HiDesc);
- DescAddr = DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, DescAddr, LoDesc);
-
- // The call needs a relocation too for linker relaxation. It doesn't make
- // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
- // the address.
- SDValue SymAddr =
- DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ARM64II::MO_TLS);
-
- // Finally we can make a call to calculate the offset from tpidr_el0.
- TPOff = LowerELFTLSDescCall(SymAddr, DescAddr, DL, DAG);
- } else
- llvm_unreachable("Unsupported ELF TLS access model");
-
- return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
-}
-
-SDValue ARM64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
- SelectionDAG &DAG) const {
- if (Subtarget->isTargetDarwin())
- return LowerDarwinGlobalTLSAddress(Op, DAG);
- else if (Subtarget->isTargetELF())
- return LowerELFGlobalTLSAddress(Op, DAG);
-
- llvm_unreachable("Unexpected platform trying to use TLS");
-}
-SDValue ARM64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue Dest = Op.getOperand(4);
- SDLoc dl(Op);
-
- // Handle f128 first, since lowering it will result in comparing the return
- // value of a libcall against zero, which is just what the rest of LowerBR_CC
- // is expecting to deal with.
- if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
- // If softenSetCCOperands returned a scalar, we need to compare the result
- // against zero to select between true and false values.
- if (RHS.getNode() == 0) {
- RHS = DAG.getConstant(0, LHS.getValueType());
- CC = ISD::SETNE;
- }
- }
-
- // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
- // instruction.
- unsigned Opc = LHS.getOpcode();
- if (LHS.getResNo() == 1 && isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->isOne() &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
- "Unexpected condition code.");
- // Only lower legal XALUO ops.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
- return SDValue();
-
- // The actual operation with overflow check.
- ARM64CC::CondCode OFCC;
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, LHS.getValue(0), DAG);
-
- if (CC == ISD::SETNE)
- OFCC = getInvertedCondCode(OFCC);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
- return DAG.getNode(ARM64ISD::BRCOND, SDLoc(LHS), MVT::Other, Chain, Dest,
- CCVal, Overflow);
- }
-
- if (LHS.getValueType().isInteger()) {
- assert((LHS.getValueType() == RHS.getValueType()) &&
- (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
-
- // If the RHS of the comparison is zero, we can potentially fold this
- // to a specialized branch.
- const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if (RHSC && RHSC->getZExtValue() == 0) {
- if (CC == ISD::SETEQ) {
- // See if we can use a TBZ to fold in an AND as well.
- // TBZ has a smaller branch displacement than CBZ. If the offset is
- // out of bounds, a late MI-layer pass rewrites branches.
- // 403.gcc is an example that hits this case.
- if (LHS.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(LHS.getOperand(1)) &&
- isPowerOf2_64(LHS.getConstantOperandVal(1))) {
- SDValue Test = LHS.getOperand(0);
- uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
- return DAG.getNode(ARM64ISD::TBZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
- }
-
- return DAG.getNode(ARM64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
- } else if (CC == ISD::SETNE) {
- // See if we can use a TBZ to fold in an AND as well.
- // TBZ has a smaller branch displacement than CBZ. If the offset is
- // out of bounds, a late MI-layer pass rewrites branches.
- // 403.gcc is an example that hits this case.
- if (LHS.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(LHS.getOperand(1)) &&
- isPowerOf2_64(LHS.getConstantOperandVal(1))) {
- SDValue Test = LHS.getOperand(0);
- uint64_t Mask = LHS.getConstantOperandVal(1);
-
- // TBNZ only operates on i64's, but the ext should be free.
- if (Test.getValueType() == MVT::i32)
- Test = DAG.getAnyExtOrTrunc(Test, dl, MVT::i64);
-
- return DAG.getNode(ARM64ISD::TBNZ, dl, MVT::Other, Chain, Test,
- DAG.getConstant(Log2_64(Mask), MVT::i64), Dest);
- }
-
- return DAG.getNode(ARM64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
- }
- }
-
- SDValue CCVal;
- SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
- return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
- Cmp);
- }
-
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
-
- // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
- // clean. Some of them require two branches to implement.
- SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
- ARM64CC::CondCode CC1, CC2;
- changeFPCCToARM64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
- SDValue BR1 =
- DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
- if (CC2 != ARM64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
- return DAG.getNode(ARM64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
- Cmp);
- }
-
- return BR1;
-}
-
-SDValue ARM64TargetLowering::LowerFCOPYSIGN(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
-
- SDValue In1 = Op.getOperand(0);
- SDValue In2 = Op.getOperand(1);
- EVT SrcVT = In2.getValueType();
- if (SrcVT != VT) {
- if (SrcVT == MVT::f32 && VT == MVT::f64)
- In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
- else if (SrcVT == MVT::f64 && VT == MVT::f32)
- In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0));
- else
- // FIXME: Src type is different, bail out for now. Can VT really be a
- // vector type?
- return SDValue();
- }
-
- EVT VecVT;
- EVT EltVT;
- SDValue EltMask, VecVal1, VecVal2;
- if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
- EltVT = MVT::i32;
- VecVT = MVT::v4i32;
- EltMask = DAG.getConstant(0x80000000ULL, EltVT);
-
- if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
- DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(ARM64::ssub, DL, VecVT,
- DAG.getUNDEF(VecVT), In2);
- } else {
- VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
- VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
- }
- } else if (VT == MVT::f64 || VT == MVT::v2f64) {
- EltVT = MVT::i64;
- VecVT = MVT::v2i64;
-
- // We want to materialize a mask with the the high bit set, but the AdvSIMD
- // immediate moves cannot materialize that in a single instruction for
- // 64-bit elements. Instead, materialize zero and then negate it.
- EltMask = DAG.getConstant(0, EltVT);
-
- if (!VT.isVector()) {
- VecVal1 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In1);
- VecVal2 = DAG.getTargetInsertSubreg(ARM64::dsub, DL, VecVT,
- DAG.getUNDEF(VecVT), In2);
- } else {
- VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
- VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
- }
- } else {
- llvm_unreachable("Invalid type for copysign!");
- }
-
- std::vector<SDValue> BuildVectorOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i)
- BuildVectorOps.push_back(EltMask);
-
- SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT,
- &BuildVectorOps[0], BuildVectorOps.size());
-
- // If we couldn't materialize the mask above, then the mask vector will be
- // the zero vector, and we need to negate it here.
- if (VT == MVT::f64 || VT == MVT::v2f64) {
- BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
- BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
- BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
- }
-
- SDValue Sel =
- DAG.getNode(ARM64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
-
- if (VT == MVT::f32)
- return DAG.getTargetExtractSubreg(ARM64::ssub, DL, VT, Sel);
- else if (VT == MVT::f64)
- return DAG.getTargetExtractSubreg(ARM64::dsub, DL, VT, Sel);
- else
- return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
-}
-
-SDValue ARM64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
- if (DAG.getMachineFunction().getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::NoImplicitFloat))
- return SDValue();
-
- // While there is no integer popcount instruction, it can
- // be more efficiently lowered to the following sequence that uses
- // AdvSIMD registers/instructions as long as the copies to/from
- // the AdvSIMD registers are cheap.
- // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
- // CNT V0.8B, V0.8B // 8xbyte pop-counts
- // ADDV B0, V0.8B // sum 8xbyte pop-counts
- // UMOV X0, V0.B[0] // copy byte result back to integer reg
- SDValue Val = Op.getOperand(0);
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
- SDValue ZeroVec = DAG.getUNDEF(MVT::v8i8);
-
- SDValue VecVal;
- if (VT == MVT::i32) {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
- VecVal =
- DAG.getTargetInsertSubreg(ARM64::ssub, DL, MVT::v8i8, ZeroVec, VecVal);
- } else {
- VecVal = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
- }
-
- SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, VecVal);
- SDValue UaddLV = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
- DAG.getConstant(Intrinsic::arm64_neon_uaddlv, MVT::i32), CtPop);
-
- if (VT == MVT::i64)
- UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
- return UaddLV;
-}
-
-SDValue ARM64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
-
- if (Op.getValueType().isVector())
- return LowerVSETCC(Op, DAG);
-
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- SDLoc dl(Op);
-
- // We chose ZeroOrOneBooleanContents, so use zero and one.
- EVT VT = Op.getValueType();
- SDValue TVal = DAG.getConstant(1, VT);
- SDValue FVal = DAG.getConstant(0, VT);
-
- // Handle f128 first, since one possible outcome is a normal integer
- // comparison which gets picked up by the next if statement.
- if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
- // If softenSetCCOperands returned a scalar, use it.
- if (RHS.getNode() == 0) {
- assert(LHS.getValueType() == Op.getValueType() &&
- "Unexpected setcc expansion!");
- return LHS;
- }
- }
-
- if (LHS.getValueType().isInteger()) {
- SDValue CCVal;
- SDValue Cmp =
- getARM64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
-
- // Note that we inverted the condition above, so we reverse the order of
- // the true and false operands here. This will allow the setcc to be
- // matched to a single CSINC instruction.
- return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
- }
-
- // Now we know we're dealing with FP values.
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
-
- // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
- // and do the comparison.
- SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-
- ARM64CC::CondCode CC1, CC2;
- changeFPCCToARM64CC(CC, CC1, CC2);
- if (CC2 == ARM64CC::AL) {
- changeFPCCToARM64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
-
- // Note that we inverted the condition above, so we reverse the order of
- // the true and false operands here. This will allow the setcc to be
- // matched to a single CSINC instruction.
- return DAG.getNode(ARM64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
- } else {
- // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
- // clean. Some of them require two CSELs to implement. As is in this case,
- // we emit the first CSEL and then emit a second using the output of the
- // first as the RHS. We're effectively OR'ing the two CC's together.
-
- // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
- SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
-
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
- return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
- }
-}
-
-/// A SELECT_CC operation is really some kind of max or min if both values being
-/// compared are, in some sense, equal to the results in either case. However,
-/// it is permissible to compare f32 values and produce directly extended f64
-/// values.
-///
-/// Extending the comparison operands would also be allowed, but is less likely
-/// to happen in practice since their use is right here. Note that truncate
-/// operations would *not* be semantically equivalent.
-static bool selectCCOpsAreFMaxCompatible(SDValue Cmp, SDValue Result) {
- if (Cmp == Result)
- return true;
-
- ConstantFPSDNode *CCmp = dyn_cast<ConstantFPSDNode>(Cmp);
- ConstantFPSDNode *CResult = dyn_cast<ConstantFPSDNode>(Result);
- if (CCmp && CResult && Cmp.getValueType() == MVT::f32 &&
- Result.getValueType() == MVT::f64) {
- bool Lossy;
- APFloat CmpVal = CCmp->getValueAPF();
- CmpVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Lossy);
- return CResult->getValueAPF().bitwiseIsEqual(CmpVal);
- }
-
- return Result->getOpcode() == ISD::FP_EXTEND && Result->getOperand(0) == Cmp;
-}
-
-SDValue ARM64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDValue CC = Op->getOperand(0);
- SDValue TVal = Op->getOperand(1);
- SDValue FVal = Op->getOperand(2);
- SDLoc DL(Op);
-
- unsigned Opc = CC.getOpcode();
- // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
- // instruction.
- if (CC.getResNo() == 1 &&
- (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
- Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)) {
- // Only lower legal XALUO ops.
- if (!DAG.getTargetLoweringInfo().isTypeLegal(CC->getValueType(0)))
- return SDValue();
-
- ARM64CC::CondCode OFCC;
- SDValue Value, Overflow;
- std::tie(Value, Overflow) = getARM64XALUOOp(OFCC, CC.getValue(0), DAG);
- SDValue CCVal = DAG.getConstant(OFCC, MVT::i32);
-
- return DAG.getNode(ARM64ISD::CSEL, DL, Op.getValueType(), TVal, FVal, CCVal,
- Overflow);
- }
-
- if (CC.getOpcode() == ISD::SETCC)
- return DAG.getSelectCC(DL, CC.getOperand(0), CC.getOperand(1), TVal, FVal,
- cast<CondCodeSDNode>(CC.getOperand(2))->get());
- else
- return DAG.getSelectCC(DL, CC, DAG.getConstant(0, CC.getValueType()), TVal,
- FVal, ISD::SETNE);
-}
-
-SDValue ARM64TargetLowering::LowerSELECT_CC(SDValue Op,
- SelectionDAG &DAG) const {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue TVal = Op.getOperand(2);
- SDValue FVal = Op.getOperand(3);
- SDLoc dl(Op);
-
- // Handle f128 first, because it will result in a comparison of some RTLIB
- // call result against zero.
- if (LHS.getValueType() == MVT::f128) {
- softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
-
- // If softenSetCCOperands returned a scalar, we need to compare the result
- // against zero to select between true and false values.
- if (RHS.getNode() == 0) {
- RHS = DAG.getConstant(0, LHS.getValueType());
- CC = ISD::SETNE;
- }
- }
-
- // Handle integers first.
- if (LHS.getValueType().isInteger()) {
- assert((LHS.getValueType() == RHS.getValueType()) &&
- (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64));
-
- unsigned Opcode = ARM64ISD::CSEL;
-
- // If both the TVal and the FVal are constants, see if we can swap them in
- // order to for a CSINV or CSINC out of them.
- ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
- ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
-
- if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- } else if (TVal.getOpcode() == ISD::XOR) {
- // If TVal is a NOT we want to swap TVal and FVal so that we can match
- // with a CSINV rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(1));
-
- if (CVal && CVal->isAllOnesValue()) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- }
- } else if (TVal.getOpcode() == ISD::SUB) {
- // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
- // that we can match with a CSNEG rather than a CSEL.
- ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(TVal.getOperand(0));
-
- if (CVal && CVal->isNullValue()) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- }
- } else if (CTVal && CFVal) {
- const int64_t TrueVal = CTVal->getSExtValue();
- const int64_t FalseVal = CFVal->getSExtValue();
- bool Swap = false;
-
- // If both TVal and FVal are constants, see if FVal is the
- // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
- // instead of a CSEL in that case.
- if (TrueVal == ~FalseVal) {
- Opcode = ARM64ISD::CSINV;
- } else if (TrueVal == -FalseVal) {
- Opcode = ARM64ISD::CSNEG;
- } else if (TVal.getValueType() == MVT::i32) {
- // If our operands are only 32-bit wide, make sure we use 32-bit
- // arithmetic for the check whether we can use CSINC. This ensures that
- // the addition in the check will wrap around properly in case there is
- // an overflow (which would not be the case if we do the check with
- // 64-bit arithmetic).
- const uint32_t TrueVal32 = CTVal->getZExtValue();
- const uint32_t FalseVal32 = CFVal->getZExtValue();
-
- if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
- Opcode = ARM64ISD::CSINC;
-
- if (TrueVal32 > FalseVal32) {
- Swap = true;
- }
- }
- // 64-bit check whether we can use CSINC.
- } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
- Opcode = ARM64ISD::CSINC;
-
- if (TrueVal > FalseVal) {
- Swap = true;
- }
- }
-
- // Swap TVal and FVal if necessary.
- if (Swap) {
- std::swap(TVal, FVal);
- std::swap(CTVal, CFVal);
- CC = ISD::getSetCCInverse(CC, true);
- }
-
- if (Opcode != ARM64ISD::CSEL) {
- // Drop FVal since we can get its value by simply inverting/negating
- // TVal.
- FVal = TVal;
- }
- }
-
- SDValue CCVal;
- SDValue Cmp = getARM64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
-
- EVT VT = Op.getValueType();
- return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
- }
-
- // Now we know we're dealing with FP values.
- assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
- assert(LHS.getValueType() == RHS.getValueType());
- EVT VT = Op.getValueType();
-
- // Try to match this select into a max/min operation, which have dedicated
- // opcode in the instruction set.
- // NOTE: This is not correct in the presence of NaNs, so we only enable this
- // in no-NaNs mode.
- if (getTargetMachine().Options.NoNaNsFPMath) {
- if (selectCCOpsAreFMaxCompatible(LHS, FVal) &&
- selectCCOpsAreFMaxCompatible(RHS, TVal)) {
- CC = ISD::getSetCCSwappedOperands(CC);
- std::swap(TVal, FVal);
- }
-
- if (selectCCOpsAreFMaxCompatible(LHS, TVal) &&
- selectCCOpsAreFMaxCompatible(RHS, FVal)) {
- switch (CC) {
- default:
- break;
- case ISD::SETGT:
- case ISD::SETGE:
- case ISD::SETUGT:
- case ISD::SETUGE:
- case ISD::SETOGT:
- case ISD::SETOGE:
- return DAG.getNode(ARM64ISD::FMAX, dl, VT, TVal, FVal);
- break;
- case ISD::SETLT:
- case ISD::SETLE:
- case ISD::SETULT:
- case ISD::SETULE:
- case ISD::SETOLT:
- case ISD::SETOLE:
- return DAG.getNode(ARM64ISD::FMIN, dl, VT, TVal, FVal);
- break;
- }
- }
- }
-
- // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
- // and do the comparison.
- SDValue Cmp = emitComparison(LHS, RHS, dl, DAG);
-
- // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
- // clean. Some of them require two CSELs to implement.
- ARM64CC::CondCode CC1, CC2;
- changeFPCCToARM64CC(CC, CC1, CC2);
- SDValue CC1Val = DAG.getConstant(CC1, MVT::i32);
- SDValue CS1 = DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
-
- // If we need a second CSEL, emit it, using the output of the first as the
- // RHS. We're effectively OR'ing the two CC's together.
- if (CC2 != ARM64CC::AL) {
- SDValue CC2Val = DAG.getConstant(CC2, MVT::i32);
- return DAG.getNode(ARM64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
- }
-
- // Otherwise, return the output of the first CSEL.
- return CS1;
-}
-
-SDValue ARM64TargetLowering::LowerJumpTable(SDValue Op,
- SelectionDAG &DAG) const {
- // Jump table entries as PC relative offsets. No additional tweaking
- // is necessary here. Just get the address of the jump table.
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- EVT PtrVT = getPointerTy();
- SDLoc DL(Op);
-
- SDValue Hi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, ARM64II::MO_PAGE);
- SDValue Lo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
- ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
- SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
-}
-
-SDValue ARM64TargetLowering::LowerConstantPool(SDValue Op,
- SelectionDAG &DAG) const {
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
- EVT PtrVT = getPointerTy();
- SDLoc DL(Op);
-
- if (getTargetMachine().getCodeModel() == CodeModel::Large) {
- // Use the GOT for the large code model on iOS.
- if (Subtarget->isTargetMachO()) {
- SDValue GotAddr = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- ARM64II::MO_GOT);
- return DAG.getNode(ARM64ISD::LOADgot, DL, PtrVT, GotAddr);
- }
-
- const unsigned char MO_NC = ARM64II::MO_NC;
- return DAG.getNode(
- ARM64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), ARM64II::MO_G3),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), ARM64II::MO_G2 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), ARM64II::MO_G1 | MO_NC),
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), ARM64II::MO_G0 | MO_NC));
- } else {
- // Use ADRP/ADD or ADRP/LDR for everything else: the small memory model on
- // ELF, the only valid one on Darwin.
- SDValue Hi =
- DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlignment(),
- CP->getOffset(), ARM64II::MO_PAGE);
- SDValue Lo = DAG.getTargetConstantPool(
- CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(),
- ARM64II::MO_PAGEOFF | ARM64II::MO_NC);
-
- SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
- }
-}
-
-SDValue ARM64TargetLowering::LowerBlockAddress(SDValue Op,
- SelectionDAG &DAG) const {
- const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
- EVT PtrVT = getPointerTy();
- SDLoc DL(Op);
- if (getTargetMachine().getCodeModel() == CodeModel::Large &&
- !Subtarget->isTargetMachO()) {
- const unsigned char MO_NC = ARM64II::MO_NC;
- return DAG.getNode(
- ARM64ISD::WrapperLarge, DL, PtrVT,
- DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G3),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G2 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G1 | MO_NC),
- DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_G0 | MO_NC));
- } else {
- SDValue Hi = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGE);
- SDValue Lo = DAG.getTargetBlockAddress(BA, PtrVT, 0, ARM64II::MO_PAGEOFF |
- ARM64II::MO_NC);
- SDValue ADRP = DAG.getNode(ARM64ISD::ADRP, DL, PtrVT, Hi);
- return DAG.getNode(ARM64ISD::ADDlow, DL, PtrVT, ADRP, Lo);
- }
-}
-
-SDValue ARM64TargetLowering::LowerDarwin_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
- ARM64FunctionInfo *FuncInfo =
- DAG.getMachineFunction().getInfo<ARM64FunctionInfo>();
-
- SDLoc DL(Op);
- SDValue FR =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
- const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
- MachinePointerInfo(SV), false, false, 0);
-}
-
-SDValue ARM64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
- SelectionDAG &DAG) const {
- // The layout of the va_list struct is specified in the AArch64 Procedure Call
- // Standard, section B.3.
- MachineFunction &MF = DAG.getMachineFunction();
- ARM64FunctionInfo *FuncInfo = MF.getInfo<ARM64FunctionInfo>();
- SDLoc DL(Op);
-
- SDValue Chain = Op.getOperand(0);
- SDValue VAList = Op.getOperand(1);
- const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- SmallVector<SDValue, 4> MemOps;
-
- // void *__stack at offset 0
- SDValue Stack =
- DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy());
- MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
- MachinePointerInfo(SV), false, false, 8));
-
- // void *__gr_top at offset 8
- int GPRSize = FuncInfo->getVarArgsGPRSize();
- if (GPRSize > 0) {
- SDValue GRTop, GRTopAddr;
-
- GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(8, getPointerTy()));
-
- GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), getPointerTy());
- GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
- DAG.getConstant(GPRSize, getPointerTy()));
-
- MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
- MachinePointerInfo(SV, 8), false, false, 8));
- }
-
- // void *__vr_top at offset 16
- int FPRSize = FuncInfo->getVarArgsFPRSize();
- if (FPRSize > 0) {
- SDValue VRTop, VRTopAddr;
- VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(16, getPointerTy()));
-
- VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), getPointerTy());
- VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
- DAG.getConstant(FPRSize, getPointerTy()));
-
- MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
- MachinePointerInfo(SV, 16), false, false, 8));
- }
-
- // int __gr_offs at offset 24
- SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(24, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
- GROffsAddr, MachinePointerInfo(SV, 24), false,
- false, 4));
-
- // int __vr_offs at offset 28
- SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(28, getPointerTy()));
- MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
- VROffsAddr, MachinePointerInfo(SV, 28), false,
- false, 4));
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
- MemOps.size());
-}
-
-SDValue ARM64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
- return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG)
- : LowerAAPCS_VASTART(Op, DAG);
-}
-
-SDValue ARM64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
- // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
- // pointer.
- unsigned VaListSize = Subtarget->isTargetDarwin() ? 8 : 32;
- const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
- const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-
- return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1),
- Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32),
- 8, false, false, MachinePointerInfo(DestSV),
- MachinePointerInfo(SrcSV));
-}
-
-SDValue ARM64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
- assert(Subtarget->isTargetDarwin() &&
- "automatic va_arg instruction only works on Darwin");
-
- const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- SDValue Chain = Op.getOperand(0);
- SDValue Addr = Op.getOperand(1);
- unsigned Align = Op.getConstantOperandVal(3);
-
- SDValue VAList = DAG.getLoad(getPointerTy(), DL, Chain, Addr,
- MachinePointerInfo(V), false, false, false, 0);
- Chain = VAList.getValue(1);
-
- if (Align > 8) {
- assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
- VAList = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(Align - 1, getPointerTy()));
- VAList = DAG.getNode(ISD::AND, DL, getPointerTy(), VAList,
- DAG.getConstant(-(int64_t)Align, getPointerTy()));
- }
-
- Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = getDataLayout()->getTypeAllocSize(ArgTy);
-
- // Scalar integer and FP values smaller than 64 bits are implicitly extended
- // up to 64 bits. At the very least, we have to increase the striding of the
- // vaargs list to match this, and for FP values we need to introduce
- // FP_ROUND nodes as well.
- if (VT.isInteger() && !VT.isVector())
- ArgSize = 8;
- bool NeedFPTrunc = false;
- if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
- ArgSize = 8;
- NeedFPTrunc = true;
- }
-
- // Increment the pointer, VAList, to the next vaarg
- SDValue VANext = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
- DAG.getConstant(ArgSize, getPointerTy()));
- // Store the incremented VAList to the legalized pointer
- SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V),
- false, false, 0);
-
- // Load the actual argument out of the pointer VAList
- if (NeedFPTrunc) {
- // Load the value as an f64.
- SDValue WideFP = DAG.getLoad(MVT::f64, DL, APStore, VAList,
- MachinePointerInfo(), false, false, false, 0);
- // Round the value down to an f32.
- SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
- DAG.getIntPtrConstant(1));
- SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
- // Merge the rounded value with the chain output of the load.
- return DAG.getMergeValues(Ops, 2, DL);
- }
-
- return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo(), false,
- false, false, 0);
-}
-
-SDValue ARM64TargetLowering::LowerFRAMEADDR(SDValue Op,
- SelectionDAG &DAG) const {
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- MFI->setFrameAddressIsTaken(true);
-
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, ARM64::FP, VT);
- while (Depth--)
- FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
- MachinePointerInfo(), false, false, false, 0);
- return FrameAddr;
-}
-
-SDValue ARM64TargetLowering::LowerRETURNADDR(SDValue Op,
- SelectionDAG &DAG) const {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo *MFI = MF.getFrameInfo();
- MFI->setReturnAddressIsTaken(true);
-
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- if (Depth) {
- SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
- SDValue Offset = DAG.getConstant(8, getPointerTy());
- return DAG.getLoad(VT, DL, DAG.getEntryNode(),
- DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
- MachinePointerInfo(), false, false, false, 0);
- }
-
- // Return LR, which contains the return address. Mark it an implicit live-in.
- unsigned Reg = MF.addLiveIn(ARM64::LR, &ARM64::GPR64RegClass);
- return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
-}
-
-/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftRightParts(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getNumOperands() == 3 && "Not a double-shift!");
- EVT VT = Op.getValueType();
- unsigned VTBits = VT.getSizeInBits();
- SDLoc dl(Op);
- SDValue ShOpLo = Op.getOperand(0);
- SDValue ShOpHi = Op.getOperand(1);
- SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
- unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
-
- assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
-
- SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
- SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
-
- SDValue Cmp =
- emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG);
- SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
-
- SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
- SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
- SDValue Lo =
- DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
-
- // ARM64 shifts larger than the register width are wrapped rather than
- // clamped, so we can't just emit "hi >> x".
- SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue TrueValHi = Opc == ISD::SRA
- ? DAG.getNode(Opc, dl, VT, ShOpHi,
- DAG.getConstant(VTBits - 1, MVT::i64))
- : DAG.getConstant(0, VT);
- SDValue Hi =
- DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
-
- SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
-}
-
-/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
-/// i64 values and take a 2 x i64 value to shift plus a shift amount.
-SDValue ARM64TargetLowering::LowerShiftLeftParts(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getNumOperands() == 3 && "Not a double-shift!");
- EVT VT = Op.getValueType();
- unsigned VTBits = VT.getSizeInBits();
- SDLoc dl(Op);
- SDValue ShOpLo = Op.getOperand(0);
- SDValue ShOpHi = Op.getOperand(1);
- SDValue ShAmt = Op.getOperand(2);
- SDValue ARMcc;
-
- assert(Op.getOpcode() == ISD::SHL_PARTS);
- SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
- DAG.getConstant(VTBits, MVT::i64), ShAmt);
- SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
- SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
- DAG.getConstant(VTBits, MVT::i64));
- SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
- SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
-
- SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-
- SDValue Cmp =
- emitComparison(ExtraShAmt, DAG.getConstant(0, MVT::i64), dl, DAG);
- SDValue CCVal = DAG.getConstant(ARM64CC::GE, MVT::i32);
- SDValue Hi = DAG.getNode(ARM64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
-
- // ARM64 shifts of larger than register sizes are wrapped rather than clamped,
- // so we can't just emit "lo << a" if a is too big.
- SDValue TrueValLo = DAG.getConstant(0, VT);
- SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Lo =
- DAG.getNode(ARM64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
-
- SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
-}
-
-bool
-ARM64TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- // The ARM64 target doesn't support folding offsets into global addresses.
- return false;
-}
-
-bool ARM64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
- // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
- // FIXME: We should be able to handle f128 as well with a clever lowering.
- if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32))
- return true;
-
- if (VT == MVT::f64)
- return ARM64_AM::getFP64Imm(Imm) != -1;
- else if (VT == MVT::f32)
- return ARM64_AM::getFP32Imm(Imm) != -1;
- return false;
-}
-
-//===----------------------------------------------------------------------===//
-// ARM64 Optimization Hooks
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ARM64 Inline Assembly Support
-//===----------------------------------------------------------------------===//
-
-// Table of Constraints
-// TODO: This is the current set of constraints supported by ARM for the
-// compiler, not all of them may make sense, e.g. S may be difficult to support.
-//
-// r - A general register
-// w - An FP/SIMD register of some size in the range v0-v31
-// x - An FP/SIMD register of some size in the range v0-v15
-// I - Constant that can be used with an ADD instruction
-// J - Constant that can be used with a SUB instruction
-// K - Constant that can be used with a 32-bit logical instruction
-// L - Constant that can be used with a 64-bit logical instruction
-// M - Constant that can be used as a 32-bit MOV immediate
-// N - Constant that can be used as a 64-bit MOV immediate
-// Q - A memory reference with base register and no offset
-// S - A symbolic address
-// Y - Floating point constant zero
-// Z - Integer constant zero
-//
-// Note that general register operands will be output using their 64-bit x
-// register name, whatever the size of the variable, unless the asm operand
-// is prefixed by the %w modifier. Floating-point and SIMD register operands
-// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
-// %q modifier.
-
-/// getConstraintType - Given a constraint letter, return the type of
-/// constraint it is for this target.
-ARM64TargetLowering::ConstraintType
-ARM64TargetLowering::getConstraintType(const std::string &Constraint) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- default:
- break;
- case 'z':
- return C_Other;
- case 'x':
- case 'w':
- return C_RegisterClass;
- // An address with a single base register. Due to the way we
- // currently handle addresses it is the same as 'r'.
- case 'Q':
- return C_Memory;
- }
- }
- return TargetLowering::getConstraintType(Constraint);
-}
-
-/// Examine constraint type and operand type and determine a weight value.
-/// This object must already have been set up with the operand type
-/// and the current alternative constraint selected.
-TargetLowering::ConstraintWeight
-ARM64TargetLowering::getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const {
- ConstraintWeight weight = CW_Invalid;
- Value *CallOperandVal = info.CallOperandVal;
- // If we don't have a value, we can't do a match,
- // but allow it at the lowest weight.
- if (CallOperandVal == NULL)
- return CW_Default;
- Type *type = CallOperandVal->getType();
- // Look at the constraint type.
- switch (*constraint) {
- default:
- weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
- break;
- case 'x':
- case 'w':
- if (type->isFloatingPointTy() || type->isVectorTy())
- weight = CW_Register;
- break;
- case 'z':
- weight = CW_Constant;
- break;
- }
- return weight;
-}
-
-std::pair<unsigned, const TargetRegisterClass *>
-ARM64TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const {
- if (Constraint.size() == 1) {
- switch (Constraint[0]) {
- case 'r':
- if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, &ARM64::GPR64commonRegClass);
- return std::make_pair(0U, &ARM64::GPR32commonRegClass);
- case 'w':
- if (VT == MVT::f32)
- return std::make_pair(0U, &ARM64::FPR32RegClass);
- if (VT.getSizeInBits() == 64)
- return std::make_pair(0U, &ARM64::FPR64RegClass);
- if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &ARM64::FPR128RegClass);
- break;
- // The instructions that this constraint is designed for can
- // only take 128-bit registers so just use that regclass.
- case 'x':
- if (VT.getSizeInBits() == 128)
- return std::make_pair(0U, &ARM64::FPR128_loRegClass);
- break;
- }
- }
- if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(unsigned(ARM64::CPSR), &ARM64::CCRRegClass);
-
- // Use the default implementation in TargetLowering to convert the register
- // constraint into a member of a register class.
- std::pair<unsigned, const TargetRegisterClass *> Res;
- Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
-
- // Not found as a standard register?
- if (Res.second == 0) {
- unsigned Size = Constraint.size();
- if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
- tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
- const std::string Reg =
- std::string(&Constraint[2], &Constraint[Size - 1]);
- int RegNo = atoi(Reg.c_str());
- if (RegNo >= 0 && RegNo <= 31) {
- // v0 - v31 are aliases of q0 - q31.
- // By default we'll emit v0-v31 for this unless there's a modifier where
- // we'll emit the correct register as well.
- Res.first = ARM64::FPR128RegClass.getRegister(RegNo);
- Res.second = &ARM64::FPR128RegClass;
- }
- }
- }
-
- return Res;
-}
-
-/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops.
-void ARM64TargetLowering::LowerAsmOperandForConstraint(
- SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const {
- SDValue Result(0, 0);
-
- // Currently only support length 1 constraints.
- if (Constraint.length() != 1)
- return;
-
- char ConstraintLetter = Constraint[0];
- switch (ConstraintLetter) {
- default:
- break;
-
- // This set of constraints deal with valid constants for various instructions.
- // Validate and return a target constant for them if we can.
- case 'z': {
- // 'z' maps to xzr or wzr so it needs an input of 0.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C || C->getZExtValue() != 0)
- return;
-
- if (Op.getValueType() == MVT::i64)
- Result = DAG.getRegister(ARM64::XZR, MVT::i64);
- else
- Result = DAG.getRegister(ARM64::WZR, MVT::i32);
- break;
- }
-
- case 'I':
- case 'J':
- case 'K':
- case 'L':
- case 'M':
- case 'N':
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C)
- return;
-
- // Grab the value and do some validation.
- uint64_t CVal = C->getZExtValue();
- switch (ConstraintLetter) {
- // The I constraint applies only to simple ADD or SUB immediate operands:
- // i.e. 0 to 4095 with optional shift by 12
- // The J constraint applies only to ADD or SUB immediates that would be
- // valid when negated, i.e. if [an add pattern] were to be output as a SUB
- // instruction [or vice versa], in other words -1 to -4095 with optional
- // left shift by 12.
- case 'I':
- if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
- break;
- return;
- case 'J': {
- uint64_t NVal = -C->getSExtValue();
- if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal))
- break;
- return;
- }
- // The K and L constraints apply *only* to logical immediates, including
- // what used to be the MOVI alias for ORR (though the MOVI alias has now
- // been removed and MOV should be used). So these constraints have to
- // distinguish between bit patterns that are valid 32-bit or 64-bit
- // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
- // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
- // versa.
- case 'K':
- if (ARM64_AM::isLogicalImmediate(CVal, 32))
- break;
- return;
- case 'L':
- if (ARM64_AM::isLogicalImmediate(CVal, 64))
- break;
- return;
- // The M and N constraints are a superset of K and L respectively, for use
- // with the MOV (immediate) alias. As well as the logical immediates they
- // also match 32 or 64-bit immediates that can be loaded either using a
- // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
- // (M) or 64-bit 0x1234000000000000 (N) etc.
- // As a note some of this code is liberally stolen from the asm parser.
- case 'M': {
- if (!isUInt<32>(CVal))
- return;
- if (ARM64_AM::isLogicalImmediate(CVal, 32))
- break;
- if ((CVal & 0xFFFF) == CVal)
- break;
- if ((CVal & 0xFFFF0000ULL) == CVal)
- break;
- uint64_t NCVal = ~(uint32_t)CVal;
- if ((NCVal & 0xFFFFULL) == NCVal)
- break;
- if ((NCVal & 0xFFFF0000ULL) == NCVal)
- break;
- return;
- }
- case 'N': {
- if (ARM64_AM::isLogicalImmediate(CVal, 64))
- break;
- if ((CVal & 0xFFFFULL) == CVal)
- break;
- if ((CVal & 0xFFFF0000ULL) == CVal)
- break;
- if ((CVal & 0xFFFF00000000ULL) == CVal)
- break;
- if ((CVal & 0xFFFF000000000000ULL) == CVal)
- break;
- uint64_t NCVal = ~CVal;
- if ((NCVal & 0xFFFFULL) == NCVal)
- break;
- if ((NCVal & 0xFFFF0000ULL) == NCVal)
- break;
- if ((NCVal & 0xFFFF00000000ULL) == NCVal)
- break;
- if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
- break;
- return;
- }
- default:
- return;
- }
-
- // All assembler immediates are 64-bit integers.
- Result = DAG.getTargetConstant(CVal, MVT::i64);
- break;
- }
-
- if (Result.getNode()) {
- Ops.push_back(Result);
- return;
- }
-
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
-}
-
-//===----------------------------------------------------------------------===//
-// ARM64 Advanced SIMD Support
-//===----------------------------------------------------------------------===//
-
-/// WidenVector - Given a value in the V64 register class, produce the
-/// equivalent value in the V128 register class.
-static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
- EVT VT = V64Reg.getValueType();
- unsigned NarrowSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
- SDLoc DL(V64Reg);
-
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
- V64Reg, DAG.getConstant(0, MVT::i32));
-}
-
-/// getExtFactor - Determine the adjustment factor for the position when
-/// generating an "extract from vector registers" instruction.
-static unsigned getExtFactor(SDValue &V) {
- EVT EltType = V.getValueType().getVectorElementType();
- return EltType.getSizeInBits() / 8;
-}
-
-/// NarrowVector - Given a value in the V128 register class, produce the
-/// equivalent value in the V64 register class.
-static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
- EVT VT = V128Reg.getValueType();
- unsigned WideSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
- SDLoc DL(V128Reg);
-
- return DAG.getTargetExtractSubreg(ARM64::dsub, DL, NarrowTy, V128Reg);
-}
-
-// Gather data to see if the operation can be modelled as a
-// shuffle in combination with VEXTs.
-SDValue ARM64TargetLowering::ReconstructShuffle(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
- unsigned NumElts = VT.getVectorNumElements();
-
- SmallVector<SDValue, 2> SourceVecs;
- SmallVector<unsigned, 2> MinElts;
- SmallVector<unsigned, 2> MaxElts;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
- // A shuffle can only come from building a vector from various
- // elements of other vectors.
- return SDValue();
- }
-
- // Record this extraction against the appropriate vector if possible...
- SDValue SourceVec = V.getOperand(0);
- unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
- bool FoundSource = false;
- for (unsigned j = 0; j < SourceVecs.size(); ++j) {
- if (SourceVecs[j] == SourceVec) {
- if (MinElts[j] > EltNo)
- MinElts[j] = EltNo;
- if (MaxElts[j] < EltNo)
- MaxElts[j] = EltNo;
- FoundSource = true;
- break;
- }
- }
-
- // Or record a new source if not...
- if (!FoundSource) {
- SourceVecs.push_back(SourceVec);
- MinElts.push_back(EltNo);
- MaxElts.push_back(EltNo);
- }
- }
-
- // Currently only do something sane when at most two source vectors
- // involved.
- if (SourceVecs.size() > 2)
- return SDValue();
-
- SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
- int VEXTOffsets[2] = { 0, 0 };
-
- // This loop extracts the usage patterns of the source vectors
- // and prepares appropriate SDValues for a shuffle if possible.
- for (unsigned i = 0; i < SourceVecs.size(); ++i) {
- if (SourceVecs[i].getValueType() == VT) {
- // No VEXT necessary
- ShuffleSrcs[i] = SourceVecs[i];
- VEXTOffsets[i] = 0;
- continue;
- } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
- // It probably isn't worth padding out a smaller vector just to
- // break it down again in a shuffle.
- return SDValue();
- }
-
- // Don't attempt to extract subvectors from BUILD_VECTOR sources
- // that expand or trunc the original value.
- // TODO: We can try to bitcast and ANY_EXTEND the result but
- // we need to consider the cost of vector ANY_EXTEND, and the
- // legality of all the types.
- if (SourceVecs[i].getValueType().getVectorElementType() !=
- VT.getVectorElementType())
- return SDValue();
-
- // Since only 64-bit and 128-bit vectors are legal on ARM and
- // we've eliminated the other cases...
- assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts &&
- "unexpected vector sizes in ReconstructShuffle");
-
- if (MaxElts[i] - MinElts[i] >= NumElts) {
- // Span too large for a VEXT to cope
- return SDValue();
- }
-
- if (MinElts[i] >= NumElts) {
- // The extraction can just take the second half
- VEXTOffsets[i] = NumElts;
- ShuffleSrcs[i] =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
- } else if (MaxElts[i] < NumElts) {
- // The extraction can just take the first half
- VEXTOffsets[i] = 0;
- ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- } else {
- // An actual VEXT is needed
- VEXTOffsets[i] = MinElts[i];
- SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
- SourceVecs[i], DAG.getIntPtrConstant(0));
- SDValue VEXTSrc2 =
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i],
- DAG.getIntPtrConstant(NumElts));
- unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1);
- ShuffleSrcs[i] = DAG.getNode(ARM64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2,
- DAG.getConstant(Imm, MVT::i32));
- }
- }
-
- SmallVector<int, 8> Mask;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Entry = Op.getOperand(i);
- if (Entry.getOpcode() == ISD::UNDEF) {
- Mask.push_back(-1);
- continue;
- }
-
- SDValue ExtractVec = Entry.getOperand(0);
- int ExtractElt =
- cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue();
- if (ExtractVec == SourceVecs[0]) {
- Mask.push_back(ExtractElt - VEXTOffsets[0]);
- } else {
- Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
- }
- }
-
- // Final check before we try to produce nonsense...
- if (isShuffleMaskLegal(Mask, VT))
- return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
- &Mask[0]);
-
- return SDValue();
-}
-
-// check if an EXT instruction can handle the shuffle mask when the
-// vector sources of the shuffle are the same.
-static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
- unsigned NumElts = VT.getVectorNumElements();
-
- // Assume that the first shuffle index is not UNDEF. Fail if it is.
- if (M[0] < 0)
- return false;
-
- Imm = M[0];
-
- // If this is a VEXT shuffle, the immediate value is the index of the first
- // element. The other shuffle indices must be the successive elements after
- // the first one.
- unsigned ExpectedElt = Imm;
- for (unsigned i = 1; i < NumElts; ++i) {
- // Increment the expected index. If it wraps around, just follow it
- // back to index zero and keep going.
- ++ExpectedElt;
- if (ExpectedElt == NumElts)
- ExpectedElt = 0;
-
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if (ExpectedElt != static_cast<unsigned>(M[i]))
- return false;
- }
-
- return true;
-}
-
-// check if an EXT instruction can handle the shuffle mask when the
-// vector sources of the shuffle are different.
-static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
- unsigned &Imm) {
- unsigned NumElts = VT.getVectorNumElements();
- ReverseEXT = false;
-
- // Assume that the first shuffle index is not UNDEF. Fail if it is.
- if (M[0] < 0)
- return false;
-
- Imm = M[0];
-
- // If this is a VEXT shuffle, the immediate value is the index of the first
- // element. The other shuffle indices must be the successive elements after
- // the first one.
- unsigned ExpectedElt = Imm;
- for (unsigned i = 1; i < NumElts; ++i) {
- // Increment the expected index. If it wraps around, it may still be
- // a VEXT but the source vectors must be swapped.
- ExpectedElt += 1;
- if (ExpectedElt == NumElts * 2) {
- ExpectedElt = 0;
- ReverseEXT = true;
- }
-
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if (ExpectedElt != static_cast<unsigned>(M[i]))
- return false;
- }
-
- // Adjust the index value if the source operands will be swapped.
- if (ReverseEXT)
- Imm -= NumElts;
-
- return true;
-}
-
-/// isREVMask - Check if a vector shuffle corresponds to a REV
-/// instruction with the specified blocksize. (The order of the elements
-/// within each block of the vector is reversed.)
-static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
- assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
- "Only possible block sizes for REV are: 16, 32, 64");
-
- unsigned EltSz = VT.getVectorElementType().getSizeInBits();
- if (EltSz == 64)
- return false;
-
- unsigned NumElts = VT.getVectorNumElements();
- unsigned BlockElts = M[0] + 1;
- // If the first shuffle index is UNDEF, be optimistic.
- if (M[0] < 0)
- BlockElts = BlockSize / EltSz;
-
- if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
- return false;
-
- for (unsigned i = 0; i < NumElts; ++i) {
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
- return false;
- }
-
- return true;
-}
-
-static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
- (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
- return false;
- Idx += 1;
- }
-
- return true;
-}
-
-static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i != NumElts; ++i) {
- if (M[i] < 0)
- continue; // ignore UNDEF indices
- if ((unsigned)M[i] != 2 * i + WhichResult)
- return false;
- }
-
- return true;
-}
-
-static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
- (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
- return false;
- }
- return true;
-}
-
-/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
-static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- unsigned Idx = WhichResult * NumElts / 2;
- for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
- (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
- return false;
- Idx += 1;
- }
-
- return true;
-}
-
-/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
-static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned Half = VT.getVectorNumElements() / 2;
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned j = 0; j != 2; ++j) {
- unsigned Idx = WhichResult;
- for (unsigned i = 0; i != Half; ++i) {
- int MIdx = M[i + j * Half];
- if (MIdx >= 0 && (unsigned)MIdx != Idx)
- return false;
- Idx += 2;
- }
- }
-
- return true;
-}
-
-/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
-/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
-/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
-static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
- unsigned NumElts = VT.getVectorNumElements();
- WhichResult = (M[0] == 0 ? 0 : 1);
- for (unsigned i = 0; i < NumElts; i += 2) {
- if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
- (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
- return false;
- }
- return true;
-}
-
-/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
-/// the specified operations to build the shuffle.
-static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
- SDValue RHS, SelectionDAG &DAG,
- SDLoc dl) {
- unsigned OpNum = (PFEntry >> 26) & 0x0F;
- unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
- unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
-
- enum {
- OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
- OP_VREV,
- OP_VDUP0,
- OP_VDUP1,
- OP_VDUP2,
- OP_VDUP3,
- OP_VEXT1,
- OP_VEXT2,
- OP_VEXT3,
- OP_VUZPL, // VUZP, left result
- OP_VUZPR, // VUZP, right result
- OP_VZIPL, // VZIP, left result
- OP_VZIPR, // VZIP, right result
- OP_VTRNL, // VTRN, left result
- OP_VTRNR // VTRN, right result
- };
-
- if (OpNum == OP_COPY) {
- if (LHSID == (1 * 9 + 2) * 9 + 3)
- return LHS;
- assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!");
- return RHS;
- }
-
- SDValue OpLHS, OpRHS;
- OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
- OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
- EVT VT = OpLHS.getValueType();
-
- switch (OpNum) {
- default:
- llvm_unreachable("Unknown shuffle opcode!");
- case OP_VREV:
- // VREV divides the vector in half and swaps within the half.
- if (VT.getVectorElementType() == MVT::i32 ||
- VT.getVectorElementType() == MVT::f32)
- return DAG.getNode(ARM64ISD::REV64, dl, VT, OpLHS);
- // vrev <4 x i16> -> REV32
- if (VT.getVectorElementType() == MVT::i16)
- return DAG.getNode(ARM64ISD::REV32, dl, VT, OpLHS);
- // vrev <4 x i8> -> REV16
- assert(VT.getVectorElementType() == MVT::i8);
- return DAG.getNode(ARM64ISD::REV16, dl, VT, OpLHS);
- case OP_VDUP0:
- case OP_VDUP1:
- case OP_VDUP2:
- case OP_VDUP3: {
- EVT EltTy = VT.getVectorElementType();
- unsigned Opcode;
- if (EltTy == MVT::i8)
- Opcode = ARM64ISD::DUPLANE8;
- else if (EltTy == MVT::i16)
- Opcode = ARM64ISD::DUPLANE16;
- else if (EltTy == MVT::i32 || EltTy == MVT::f32)
- Opcode = ARM64ISD::DUPLANE32;
- else if (EltTy == MVT::i64 || EltTy == MVT::f64)
- Opcode = ARM64ISD::DUPLANE64;
- else
- llvm_unreachable("Invalid vector element type?");
-
- if (VT.getSizeInBits() == 64)
- OpLHS = WidenVector(OpLHS, DAG);
- SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, MVT::i64);
- return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
- }
- case OP_VEXT1:
- case OP_VEXT2:
- case OP_VEXT3: {
- unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
- return DAG.getNode(ARM64ISD::EXT, dl, VT, OpLHS, OpRHS,
- DAG.getConstant(Imm, MVT::i32));
- }
- case OP_VUZPL:
- return DAG.getNode(ARM64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- case OP_VUZPR:
- return DAG.getNode(ARM64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- case OP_VZIPL:
- return DAG.getNode(ARM64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- case OP_VZIPR:
- return DAG.getNode(ARM64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- case OP_VTRNL:
- return DAG.getNode(ARM64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- case OP_VTRNR:
- return DAG.getNode(ARM64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS, OpRHS);
- }
-}
-
-static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
- SelectionDAG &DAG) {
- // Check to see if we can use the TBL instruction.
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
- SDLoc DL(Op);
-
- EVT EltVT = Op.getValueType().getVectorElementType();
- unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
-
- SmallVector<SDValue, 8> TBLMask;
- for (int Val : ShuffleMask) {
- for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
- unsigned Offset = Byte + Val * BytesPerElt;
- TBLMask.push_back(DAG.getConstant(Offset, MVT::i32));
- }
- }
-
- MVT IndexVT = MVT::v8i8;
- unsigned IndexLen = 8;
- if (Op.getValueType().getSizeInBits() == 128) {
- IndexVT = MVT::v16i8;
- IndexLen = 16;
- }
-
- SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
- SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
-
- SDValue Shuffle;
- if (V2.getNode()->getOpcode() == ISD::UNDEF) {
- if (IndexLen == 8)
- V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
- Shuffle = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
- DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
- } else {
- if (IndexLen == 8) {
- V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
- Shuffle = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::arm64_neon_tbl1, MVT::i32), V1Cst,
- DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
- } else {
- // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
- // cannot currently represent the register constraints on the input
- // table registers.
- // Shuffle = DAG.getNode(ARM64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
- // DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT,
- // &TBLMask[0], IndexLen));
- Shuffle = DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
- DAG.getConstant(Intrinsic::arm64_neon_tbl2, MVT::i32), V1Cst, V2Cst,
- DAG.getNode(ISD::BUILD_VECTOR, DL, IndexVT, &TBLMask[0], IndexLen));
- }
- }
- return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
-}
-
-static unsigned getDUPLANEOp(EVT EltType) {
- if (EltType == MVT::i8)
- return ARM64ISD::DUPLANE8;
- if (EltType == MVT::i16)
- return ARM64ISD::DUPLANE16;
- if (EltType == MVT::i32 || EltType == MVT::f32)
- return ARM64ISD::DUPLANE32;
- if (EltType == MVT::i64 || EltType == MVT::f64)
- return ARM64ISD::DUPLANE64;
-
- llvm_unreachable("Invalid vector element type?");
-}
-
-SDValue ARM64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
-
- ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
-
- // Convert shuffles that are directly supported on NEON to target-specific
- // DAG nodes, instead of keeping them as shuffles and matching them again
- // during code selection. This is more efficient and avoids the possibility
- // of inconsistencies between legalization and selection.
- ArrayRef<int> ShuffleMask = SVN->getMask();
-
- SDValue V1 = Op.getOperand(0);
- SDValue V2 = Op.getOperand(1);
-
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0],
- V1.getValueType().getSimpleVT())) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1)
- Lane = 0;
-
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ARM64ISD::DUP, dl, V1.getValueType(),
- V1.getOperand(0));
- // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
- // constant. If so, we can just reference the lane's definition directly.
- if (V1.getOpcode() == ISD::BUILD_VECTOR &&
- !isa<ConstantSDNode>(V1.getOperand(Lane)))
- return DAG.getNode(ARM64ISD::DUP, dl, VT, V1.getOperand(Lane));
-
- // Otherwise, duplicate from the lane of the input vector.
- unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-
- // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
- // to make a vector of the same size as this SHUFFLE. We can ignore the
- // extract entirely, and canonicalise the concat using WidenVector.
- if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
- V1 = V1.getOperand(0);
- } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
- unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
- Lane -= Idx * VT.getVectorNumElements() / 2;
- V1 = WidenVector(V1.getOperand(Idx), DAG);
- } else if (VT.getSizeInBits() == 64)
- V1 = WidenVector(V1, DAG);
-
- return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, MVT::i64));
- }
-
- if (isREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(ARM64ISD::REV64, dl, V1.getValueType(), V1, V2);
- if (isREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(ARM64ISD::REV32, dl, V1.getValueType(), V1, V2);
- if (isREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(ARM64ISD::REV16, dl, V1.getValueType(), V1, V2);
-
- bool ReverseEXT = false;
- unsigned Imm;
- if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
- if (ReverseEXT)
- std::swap(V1, V2);
- Imm *= getExtFactor(V1);
- return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V2,
- DAG.getConstant(Imm, MVT::i32));
- } else if (V2->getOpcode() == ISD::UNDEF &&
- isSingletonEXTMask(ShuffleMask, VT, Imm)) {
- Imm *= getExtFactor(V1);
- return DAG.getNode(ARM64ISD::EXT, dl, V1.getValueType(), V1, V1,
- DAG.getConstant(Imm, MVT::i32));
- }
-
- unsigned WhichResult;
- if (isZIPMask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
- }
- if (isUZPMask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
- }
- if (isTRNMask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
- }
-
- if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::ZIP1 : ARM64ISD::ZIP2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
- }
- if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::UZP1 : ARM64ISD::UZP2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
- }
- if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
- unsigned Opc = (WhichResult == 0) ? ARM64ISD::TRN1 : ARM64ISD::TRN2;
- return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
- }
-
- // If the shuffle is not directly supported and it has 4 elements, use
- // the PerfectShuffle-generated table to synthesize it from other shuffles.
- unsigned NumElts = VT.getVectorNumElements();
- if (NumElts == 4) {
- unsigned PFIndexes[4];
- for (unsigned i = 0; i != 4; ++i) {
- if (ShuffleMask[i] < 0)
- PFIndexes[i] = 8;
- else
- PFIndexes[i] = ShuffleMask[i];
- }
-
- // Compute the index in the perfect shuffle table.
- unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
- PFIndexes[2] * 9 + PFIndexes[3];
- unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
- unsigned Cost = (PFEntry >> 30);
-
- if (Cost <= 4)
- return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
- }
-
- return GenerateTBL(Op, ShuffleMask, DAG);
-}
-
-static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
- APInt &UndefBits) {
- EVT VT = BVN->getValueType(0);
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
- unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
-
- for (unsigned i = 0; i < NumSplats; ++i) {
- CnstBits <<= SplatBitSize;
- UndefBits <<= SplatBitSize;
- CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
- UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
- }
-
- return true;
- }
-
- return false;
-}
-
-SDValue ARM64TargetLowering::LowerVectorAND(SDValue Op,
- SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN =
- dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
- SDValue LHS = Op.getOperand(0);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
-
- if (!BVN)
- return Op;
-
- APInt CnstBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
- // We only have BIC vector immediate instruction, which is and-not.
- CnstBits = ~CnstBits;
-
- // We make use of a little bit of goto ickiness in order to avoid having to
- // duplicate the immediate matching logic for the undef toggled case.
- bool SecondTry = false;
- AttemptModImm:
-
- if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
- CnstBits = CnstBits.zextOrTrunc(64);
- uint64_t CnstVal = CnstBits.getZExtValue();
-
- if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::BICi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
- }
-
- if (SecondTry)
- goto FailedModImm;
- SecondTry = true;
- CnstBits = ~UndefBits;
- goto AttemptModImm;
- }
-
-// We can always fall back to a non-immediate AND.
-FailedModImm:
- return Op;
-}
-
-// Specialized code to quickly find if PotentialBVec is a BuildVector that
-// consists of only the same constant int value, returned in reference arg
-// ConstVal
-static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
- uint64_t &ConstVal) {
- BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
- if (!Bvec)
- return false;
- ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
- if (!FirstElt)
- return false;
- EVT VT = Bvec->getValueType(0);
- unsigned NumElts = VT.getVectorNumElements();
- for (unsigned i = 1; i < NumElts; ++i)
- if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
- return false;
- ConstVal = FirstElt->getZExtValue();
- return true;
-}
-
-static unsigned getIntrinsicID(const SDNode *N) {
- unsigned Opcode = N->getOpcode();
- switch (Opcode) {
- default:
- return Intrinsic::not_intrinsic;
- case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
- if (IID < Intrinsic::num_intrinsics)
- return IID;
- return Intrinsic::not_intrinsic;
- }
- }
-}
-
-// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
-// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
-static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
-
- if (!VT.isVector())
- return SDValue();
-
- SDLoc DL(N);
-
- // Is the first op an AND?
- const SDValue And = N->getOperand(0);
- if (And.getOpcode() != ISD::AND)
- return SDValue();
-
- // Is the second op an shl or lshr?
- SDValue Shift = N->getOperand(1);
- // This will have been turned into: ARM64ISD::VSHL vector, #shift
- // or ARM64ISD::VLSHR vector, #shift
- unsigned ShiftOpc = Shift.getOpcode();
- if ((ShiftOpc != ARM64ISD::VSHL && ShiftOpc != ARM64ISD::VLSHR))
- return SDValue();
- bool IsShiftRight = ShiftOpc == ARM64ISD::VLSHR;
-
- // Is the shift amount constant?
- ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
- if (!C2node)
- return SDValue();
-
- // Is the and mask vector all constant?
- uint64_t C1;
- if (!isAllConstantBuildVector(And.getOperand(1), C1))
- return SDValue();
-
- // Is C1 == ~C2, taking into account how much one can shift elements of a
- // particular size?
- uint64_t C2 = C2node->getZExtValue();
- unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits();
- if (C2 > ElemSizeInBits)
- return SDValue();
- unsigned ElemMask = (1 << ElemSizeInBits) - 1;
- if ((C1 & ElemMask) != (~C2 & ElemMask))
- return SDValue();
-
- SDValue X = And.getOperand(0);
- SDValue Y = Shift.getOperand(0);
-
- unsigned Intrin =
- IsShiftRight ? Intrinsic::arm64_neon_vsri : Intrinsic::arm64_neon_vsli;
- SDValue ResultSLI =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrin, MVT::i32), X, Y, Shift.getOperand(1));
-
- DEBUG(dbgs() << "arm64-lower: transformed: \n");
- DEBUG(N->dump(&DAG));
- DEBUG(dbgs() << "into: \n");
- DEBUG(ResultSLI->dump(&DAG));
-
- ++NumShiftInserts;
- return ResultSLI;
-}
-
-SDValue ARM64TargetLowering::LowerVectorOR(SDValue Op,
- SelectionDAG &DAG) const {
- // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
- if (EnableARM64SlrGeneration) {
- SDValue Res = tryLowerToSLI(Op.getNode(), DAG);
- if (Res.getNode())
- return Res;
- }
-
- BuildVectorSDNode *BVN =
- dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
- SDValue LHS = Op.getOperand(1);
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
-
- // OR commutes, so try swapping the operands.
- if (!BVN) {
- LHS = Op.getOperand(0);
- BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
- }
- if (!BVN)
- return Op;
-
- APInt CnstBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
- // We make use of a little bit of goto ickiness in order to avoid having to
- // duplicate the immediate matching logic for the undef toggled case.
- bool SecondTry = false;
- AttemptModImm:
-
- if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
- CnstBits = CnstBits.zextOrTrunc(64);
- uint64_t CnstVal = CnstBits.getZExtValue();
-
- if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::ORRi, dl, MovTy, LHS,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
- }
-
- if (SecondTry)
- goto FailedModImm;
- SecondTry = true;
- CnstBits = UndefBits;
- goto AttemptModImm;
- }
-
-// We can always fall back to a non-immediate OR.
-FailedModImm:
- return Op;
-}
-
-SDValue ARM64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- SDLoc dl(Op);
- EVT VT = Op.getValueType();
-
- APInt CnstBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
- // We make use of a little bit of goto ickiness in order to avoid having to
- // duplicate the immediate matching logic for the undef toggled case.
- bool SecondTry = false;
- AttemptModImm:
-
- if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
- CnstBits = CnstBits.zextOrTrunc(64);
- uint64_t CnstVal = CnstBits.getZExtValue();
-
- // Certain magic vector constants (used to express things like NOT
- // and NEG) are passed through unmodified. This allows codegen patterns
- // for these operations to match. Special-purpose patterns will lower
- // these immediates to MOVIs if it proves necessary.
- if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
- return Op;
-
- // The many faces of MOVI...
- if (ARM64_AM::isAdvSIMDModImmType10(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType10(CnstVal);
- if (VT.getSizeInBits() == 128) {
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::v2i64,
- DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- // Support the V64 version via subregister insertion.
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIedit, dl, MVT::f64,
- DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType9(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType9(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
- SDValue Mov = DAG.getNode(ARM64ISD::MOVI, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- // The few faces of FMOV...
- if (ARM64_AM::isAdvSIMDModImmType11(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType11(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
- SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType12(CnstVal) &&
- VT.getSizeInBits() == 128) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType12(CnstVal);
- SDValue Mov = DAG.getNode(ARM64ISD::FMOV, dl, MVT::v2f64,
- DAG.getConstant(CnstVal, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- // The many faces of MVNI...
- CnstVal = ~CnstVal;
- if (ARM64_AM::isAdvSIMDModImmType1(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType1(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType2(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType2(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType3(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType3(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(16, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType4(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType4(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(24, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType5(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType5(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(0, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType6(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType6(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNIshift, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(8, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType7(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType7(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(264, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
-
- if (ARM64_AM::isAdvSIMDModImmType8(CnstVal)) {
- CnstVal = ARM64_AM::encodeAdvSIMDModImmType8(CnstVal);
- MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
- SDValue Mov = DAG.getNode(ARM64ISD::MVNImsl, dl, MovTy,
- DAG.getConstant(CnstVal, MVT::i32),
- DAG.getConstant(272, MVT::i32));
- return DAG.getNode(ISD::BITCAST, dl, VT, Mov);
- }
- }
-
- if (SecondTry)
- goto FailedModImm;
- SecondTry = true;
- CnstBits = UndefBits;
- goto AttemptModImm;
- }
-FailedModImm:
-
- // Scan through the operands to find some interesting properties we can
- // exploit:
- // 1) If only one value is used, we can use a DUP, or
- // 2) if only the low element is not undef, we can just insert that, or
- // 3) if only one constant value is used (w/ some non-constant lanes),
- // we can splat the constant value into the whole vector then fill
- // in the non-constant lanes.
- // 4) FIXME: If different constant values are used, but we can intelligently
- // select the values we'll be overwriting for the non-constant
- // lanes such that we can directly materialize the vector
- // some other way (MOVI, e.g.), we can be sneaky.
- unsigned NumElts = VT.getVectorNumElements();
- bool isOnlyLowElement = true;
- bool usesOnlyOneValue = true;
- bool usesOnlyOneConstantValue = true;
- bool isConstant = true;
- unsigned NumConstantLanes = 0;
- SDValue Value;
- SDValue ConstantValue;
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- if (i > 0)
- isOnlyLowElement = false;
- if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
- isConstant = false;
-
- if (isa<ConstantSDNode>(V)) {
- ++NumConstantLanes;
- if (!ConstantValue.getNode())
- ConstantValue = V;
- else if (ConstantValue != V)
- usesOnlyOneConstantValue = false;
- }
-
- if (!Value.getNode())
- Value = V;
- else if (V != Value)
- usesOnlyOneValue = false;
- }
-
- if (!Value.getNode())
- return DAG.getUNDEF(VT);
-
- if (isOnlyLowElement)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
-
- // Use DUP for non-constant splats. For f32 constant splats, reduce to
- // i32 and try again.
- if (usesOnlyOneValue) {
- if (!isConstant) {
- if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- Value.getValueType() != VT)
- return DAG.getNode(ARM64ISD::DUP, dl, VT, Value);
-
- // This is actually a DUPLANExx operation, which keeps everything vectory.
-
- // DUPLANE works on 128-bit vectors, widen it if necessary.
- SDValue Lane = Value.getOperand(1);
- Value = Value.getOperand(0);
- if (Value.getValueType().getSizeInBits() == 64)
- Value = WidenVector(Value, DAG);
-
- unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
- return DAG.getNode(Opcode, dl, VT, Value, Lane);
- }
-
- if (VT.getVectorElementType().isFloatingPoint()) {
- SmallVector<SDValue, 8> Ops;
- MVT NewType =
- (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- for (unsigned i = 0; i < NumElts; ++i)
- Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
- EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
- SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
- Val = LowerBUILD_VECTOR(Val, DAG);
- if (Val.getNode())
- return DAG.getNode(ISD::BITCAST, dl, VT, Val);
- }
- }
-
- // If there was only one constant value used and for more than one lane,
- // start by splatting that value, then replace the non-constant lanes. This
- // is better than the default, which will perform a separate initialization
- // for each lane.
- if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
- SDValue Val = DAG.getNode(ARM64ISD::DUP, dl, VT, ConstantValue);
- // Now insert the non-constant lanes.
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
- if (!isa<ConstantSDNode>(V)) {
- // Note that type legalization likely mucked about with the VT of the
- // source operand, so we may have to convert it here before inserting.
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
- }
- }
- return Val;
- }
-
- // If all elements are constants and the case above didn't get hit, fall back
- // to the default expansion, which will generate a load from the constant
- // pool.
- if (isConstant)
- return SDValue();
-
- // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
- if (NumElts >= 4) {
- SDValue shuffle = ReconstructShuffle(Op, DAG);
- if (shuffle != SDValue())
- return shuffle;
- }
-
- // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
- // know the default expansion would otherwise fall back on something even
- // worse. For a vector with one or two non-undef values, that's
- // scalar_to_vector for the elements followed by a shuffle (provided the
- // shuffle is valid for the target) and materialization element by element
- // on the stack followed by a load for everything else.
- if (!isConstant && !usesOnlyOneValue) {
- SDValue Vec = DAG.getUNDEF(VT);
- SDValue Op0 = Op.getOperand(0);
- unsigned ElemSize = VT.getVectorElementType().getSizeInBits();
- unsigned i = 0;
- // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
- // a) Avoid a RMW dependency on the full vector register, and
- // b) Allow the register coalescer to fold away the copy if the
- // value is already in an S or D register.
- if (Op0.getOpcode() != ISD::UNDEF && (ElemSize == 32 || ElemSize == 64)) {
- unsigned SubIdx = ElemSize == 32 ? ARM64::ssub : ARM64::dsub;
- MachineSDNode *N =
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl, VT, Vec, Op0,
- DAG.getTargetConstant(SubIdx, MVT::i32));
- Vec = SDValue(N, 0);
- ++i;
- }
- for (; i < NumElts; ++i) {
- SDValue V = Op.getOperand(i);
- if (V.getOpcode() == ISD::UNDEF)
- continue;
- SDValue LaneIdx = DAG.getConstant(i, MVT::i64);
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
- }
- return Vec;
- }
-
- // Just use the default expansion. We failed to find a better alternative.
- return SDValue();
-}
-
-SDValue ARM64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
-
- // Check for non-constant lane.
- if (!isa<ConstantSDNode>(Op.getOperand(2)))
- return SDValue();
-
- EVT VT = Op.getOperand(0).getValueType();
-
- // Insertion/extraction are legal for V128 types.
- if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
- return Op;
-
- if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
- return SDValue();
-
- // For V64 types, we perform insertion by expanding the value
- // to a V128 type and perform the insertion on that.
- SDLoc DL(Op);
- SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
- EVT WideTy = WideVec.getValueType();
-
- SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
- Op.getOperand(1), Op.getOperand(2));
- // Re-narrow the resultant vector.
- return NarrowVector(Node, DAG);
-}
-
-SDValue ARM64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
-
- // Check for non-constant lane.
- if (!isa<ConstantSDNode>(Op.getOperand(1)))
- return SDValue();
-
- EVT VT = Op.getOperand(0).getValueType();
-
- // Insertion/extraction are legal for V128 types.
- if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
- return Op;
-
- if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32)
- return SDValue();
-
- // For V64 types, we perform extraction by expanding the value
- // to a V128 type and perform the extraction on that.
- SDLoc DL(Op);
- SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
- EVT WideTy = WideVec.getValueType();
-
- EVT ExtrTy = WideTy.getVectorElementType();
- if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
- ExtrTy = MVT::i32;
-
- // For extractions, we just return the result directly.
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
- Op.getOperand(1));
-}
-
-SDValue ARM64TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
- SelectionDAG &DAG) const {
- assert(Op.getOpcode() == ISD::SCALAR_TO_VECTOR && "Unknown opcode!");
- // Some AdvSIMD intrinsics leave their results in the scalar B/H/S/D
- // registers. The default lowering will copy those to a GPR then back
- // to a vector register. Instead, just recognize those cases and reference
- // the vector register they're already a subreg of.
- SDValue Op0 = Op->getOperand(0);
- if (Op0->getOpcode() != ISD::INTRINSIC_WO_CHAIN)
- return Op;
- unsigned IID = getIntrinsicID(Op0.getNode());
- // The below list of intrinsics isn't exhaustive. Add cases as-needed.
- // FIXME: Even better would be if there were an attribute on the node
- // that we could query and set in the intrinsics definition or something.
- unsigned SubIdx;
- switch (IID) {
- default:
- // Early exit if this isn't one of the intrinsics we handle.
- return Op;
- case Intrinsic::arm64_neon_uaddv:
- case Intrinsic::arm64_neon_saddv:
- case Intrinsic::arm64_neon_uaddlv:
- case Intrinsic::arm64_neon_saddlv:
- switch (Op0.getValueType().getSizeInBits()) {
- default:
- llvm_unreachable("Illegal result size from ARM64 vector intrinsic!");
- case 8:
- SubIdx = ARM64::bsub;
- break;
- case 16:
- SubIdx = ARM64::hsub;
- break;
- case 32:
- SubIdx = ARM64::ssub;
- break;
- case 64:
- SubIdx = ARM64::dsub;
- break;
- }
- }
- MachineSDNode *N =
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(Op),
- Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
- Op0, DAG.getTargetConstant(SubIdx, MVT::i32));
- return SDValue(N, 0);
-}
-
-SDValue ARM64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getOperand(0).getValueType();
- SDLoc dl(Op);
- // Just in case...
- if (!VT.isVector())
- return SDValue();
-
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!Cst)
- return SDValue();
- unsigned Val = Cst->getZExtValue();
-
- unsigned Size = Op.getValueType().getSizeInBits();
- if (Val == 0) {
- switch (Size) {
- case 8:
- return DAG.getTargetExtractSubreg(ARM64::bsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 16:
- return DAG.getTargetExtractSubreg(ARM64::hsub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 32:
- return DAG.getTargetExtractSubreg(ARM64::ssub, dl, Op.getValueType(),
- Op.getOperand(0));
- case 64:
- return DAG.getTargetExtractSubreg(ARM64::dsub, dl, Op.getValueType(),
- Op.getOperand(0));
- default:
- llvm_unreachable("Unexpected vector type in extract_subvector!");
- }
- }
- // If this is extracting the upper 64-bits of a 128-bit vector, we match
- // that directly.
- if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
- return Op;
-
- return SDValue();
-}
-
-bool ARM64TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
- EVT VT) const {
- if (VT.getVectorNumElements() == 4 &&
- (VT.is128BitVector() || VT.is64BitVector())) {
- unsigned PFIndexes[4];
- for (unsigned i = 0; i != 4; ++i) {
- if (M[i] < 0)
- PFIndexes[i] = 8;
- else
- PFIndexes[i] = M[i];
- }
-
- // Compute the index in the perfect shuffle table.
- unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
- PFIndexes[2] * 9 + PFIndexes[3];
- unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
- unsigned Cost = (PFEntry >> 30);
-
- if (Cost <= 4)
- return true;
- }
-
- bool ReverseVEXT;
- unsigned Imm, WhichResult;
-
- return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
- isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
- isEXTMask(M, VT, ReverseVEXT, Imm) ||
- // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
- isTRNMask(M, VT, WhichResult) || isUZPMask(M, VT, WhichResult) ||
- isZIPMask(M, VT, WhichResult) ||
- isTRN_v_undef_Mask(M, VT, WhichResult) ||
- isUZP_v_undef_Mask(M, VT, WhichResult) ||
- isZIP_v_undef_Mask(M, VT, WhichResult));
-}
-
-/// getVShiftImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift operation, where all the elements of the
-/// build_vector must have the same constant integer value.
-static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
- // Ignore bit_converts.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElementBits) ||
- SplatBitSize > ElementBits)
- return false;
- Cnt = SplatBits.getSExtValue();
- return true;
-}
-
-/// isVShiftLImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift left operation. That value must be in the range:
-/// 0 <= Value < ElementBits for a left shift; or
-/// 0 <= Value <= ElementBits for a long left shift.
-static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
- if (!getVShiftImm(Op, ElementBits, Cnt))
- return false;
- return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
-}
-
-/// isVShiftRImm - Check if this is a valid build_vector for the immediate
-/// operand of a vector shift right operation. For a shift opcode, the value
-/// is positive, but for an intrinsic the value count must be negative. The
-/// absolute value must be in the range:
-/// 1 <= |Value| <= ElementBits for a right shift; or
-/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
-static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
- int64_t &Cnt) {
- assert(VT.isVector() && "vector shift count is not a vector type");
- unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
- if (!getVShiftImm(Op, ElementBits, Cnt))
- return false;
- if (isIntrinsic)
- Cnt = -Cnt;
- return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
-}
-
-SDValue ARM64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
- SelectionDAG &DAG) const {
- EVT VT = Op.getValueType();
- SDLoc DL(Op);
- int64_t Cnt;
-
- if (!Op.getOperand(1).getValueType().isVector())
- return Op;
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
-
- switch (Op.getOpcode()) {
- default:
- llvm_unreachable("unexpected shift opcode");
-
- case ISD::SHL:
- if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
- return DAG.getNode(ARM64ISD::VSHL, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Intrinsic::arm64_neon_ushl, MVT::i32),
- Op.getOperand(0), Op.getOperand(1));
- case ISD::SRA:
- case ISD::SRL:
- // Right shift immediate
- if (isVShiftRImm(Op.getOperand(1), VT, false, false, Cnt) &&
- Cnt < EltSize) {
- unsigned Opc =
- (Op.getOpcode() == ISD::SRA) ? ARM64ISD::VASHR : ARM64ISD::VLSHR;
- return DAG.getNode(Opc, SDLoc(Op), VT, Op.getOperand(0),
- DAG.getConstant(Cnt, MVT::i32));
- }
-
- // Right shift register. Note, there is not a shift right register
- // instruction, but the shift left register instruction takes a signed
- // value, where negative numbers specify a right shift.
- unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::arm64_neon_sshl
- : Intrinsic::arm64_neon_ushl;
- // negate the shift amount
- SDValue NegShift = DAG.getNode(ARM64ISD::NEG, DL, VT, Op.getOperand(1));
- SDValue NegShiftLeft =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
- DAG.getConstant(Opc, MVT::i32), Op.getOperand(0), NegShift);
- return NegShiftLeft;
- }
-
- return SDValue();
-}
-
-static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
- ARM64CC::CondCode CC, bool NoNans, EVT VT,
- SDLoc dl, SelectionDAG &DAG) {
- EVT SrcVT = LHS.getValueType();
-
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
- APInt CnstBits(VT.getSizeInBits(), 0);
- APInt UndefBits(VT.getSizeInBits(), 0);
- bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
- bool IsZero = IsCnst && (CnstBits == 0);
-
- if (SrcVT.getVectorElementType().isFloatingPoint()) {
- switch (CC) {
- default:
- return SDValue();
- case ARM64CC::NE: {
- SDValue Fcmeq;
- if (IsZero)
- Fcmeq = DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
- else
- Fcmeq = DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
- return DAG.getNode(ARM64ISD::NOT, dl, VT, Fcmeq);
- }
- case ARM64CC::EQ:
- if (IsZero)
- return DAG.getNode(ARM64ISD::FCMEQz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::FCMEQ, dl, VT, LHS, RHS);
- case ARM64CC::GE:
- if (IsZero)
- return DAG.getNode(ARM64ISD::FCMGEz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::FCMGE, dl, VT, LHS, RHS);
- case ARM64CC::GT:
- if (IsZero)
- return DAG.getNode(ARM64ISD::FCMGTz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::FCMGT, dl, VT, LHS, RHS);
- case ARM64CC::LS:
- if (IsZero)
- return DAG.getNode(ARM64ISD::FCMLEz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::FCMGE, dl, VT, RHS, LHS);
- case ARM64CC::LT:
- if (!NoNans)
- return SDValue();
- // If we ignore NaNs then we can use to the MI implementation.
- // Fallthrough.
- case ARM64CC::MI:
- if (IsZero)
- return DAG.getNode(ARM64ISD::FCMLTz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::FCMGT, dl, VT, RHS, LHS);
- }
- }
-
- switch (CC) {
- default:
- return SDValue();
- case ARM64CC::NE: {
- SDValue Cmeq;
- if (IsZero)
- Cmeq = DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
- else
- Cmeq = DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
- return DAG.getNode(ARM64ISD::NOT, dl, VT, Cmeq);
- }
- case ARM64CC::EQ:
- if (IsZero)
- return DAG.getNode(ARM64ISD::CMEQz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::CMEQ, dl, VT, LHS, RHS);
- case ARM64CC::GE:
- if (IsZero)
- return DAG.getNode(ARM64ISD::CMGEz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::CMGE, dl, VT, LHS, RHS);
- case ARM64CC::GT:
- if (IsZero)
- return DAG.getNode(ARM64ISD::CMGTz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::CMGT, dl, VT, LHS, RHS);
- case ARM64CC::LE:
- if (IsZero)
- return DAG.getNode(ARM64ISD::CMLEz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::CMGE, dl, VT, RHS, LHS);
- case ARM64CC::LS:
- return DAG.getNode(ARM64ISD::CMHS, dl, VT, RHS, LHS);
- case ARM64CC::CC:
- return DAG.getNode(ARM64ISD::CMHI, dl, VT, RHS, LHS);
- case ARM64CC::LT:
- if (IsZero)
- return DAG.getNode(ARM64ISD::CMLTz, dl, VT, LHS);
- return DAG.getNode(ARM64ISD::CMGT, dl, VT, RHS, LHS);
- case ARM64CC::HI:
- return DAG.getNode(ARM64ISD::CMHI, dl, VT, LHS, RHS);
- case ARM64CC::CS:
- return DAG.getNode(ARM64ISD::CMHS, dl, VT, LHS, RHS);
- }
-}
-
-SDValue ARM64TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
- ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDLoc dl(Op);
-
- if (LHS.getValueType().getVectorElementType().isInteger()) {
- assert(LHS.getValueType() == RHS.getValueType());
- ARM64CC::CondCode ARM64CC = changeIntCCToARM64CC(CC);
- return EmitVectorComparison(LHS, RHS, ARM64CC, false, Op.getValueType(), dl,
- DAG);
- }
-
- assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||
- LHS.getValueType().getVectorElementType() == MVT::f64);
-
- // Unfortunately, the mapping of LLVM FP CC's onto ARM64 CC's isn't totally
- // clean. Some of them require two branches to implement.
- ARM64CC::CondCode CC1, CC2;
- changeFPCCToARM64CC(CC, CC1, CC2);
-
- bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
- SDValue Cmp1 =
- EmitVectorComparison(LHS, RHS, CC1, NoNaNs, Op.getValueType(), dl, DAG);
- if (!Cmp1.getNode())
- return SDValue();
-
- if (CC2 != ARM64CC::AL) {
- SDValue Cmp2 =
- EmitVectorComparison(LHS, RHS, CC2, NoNaNs, Op.getValueType(), dl, DAG);
- if (!Cmp2.getNode())
- return SDValue();
-
- return DAG.getNode(ISD::OR, dl, Cmp1.getValueType(), Cmp1, Cmp2);
- }
-
- return Cmp1;
-}
-
-/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
-/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
-/// specified in the intrinsic calls.
-bool ARM64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I,
- unsigned Intrinsic) const {
- switch (Intrinsic) {
- case Intrinsic::arm64_neon_ld2:
- case Intrinsic::arm64_neon_ld3:
- case Intrinsic::arm64_neon_ld4:
- case Intrinsic::arm64_neon_ld2lane:
- case Intrinsic::arm64_neon_ld3lane:
- case Intrinsic::arm64_neon_ld4lane:
- case Intrinsic::arm64_neon_ld2r:
- case Intrinsic::arm64_neon_ld3r:
- case Intrinsic::arm64_neon_ld4r: {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- // Conservatively set memVT to the entire set of vectors loaded.
- uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
- Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.offset = 0;
- Info.align = 0;
- Info.vol = false; // volatile loads with NEON intrinsics not supported
- Info.readMem = true;
- Info.writeMem = false;
- return true;
- }
- case Intrinsic::arm64_neon_st2:
- case Intrinsic::arm64_neon_st3:
- case Intrinsic::arm64_neon_st4:
- case Intrinsic::arm64_neon_st2lane:
- case Intrinsic::arm64_neon_st3lane:
- case Intrinsic::arm64_neon_st4lane: {
- Info.opc = ISD::INTRINSIC_VOID;
- // Conservatively set memVT to the entire set of vectors stored.
- unsigned NumElts = 0;
- for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
- Type *ArgTy = I.getArgOperand(ArgI)->getType();
- if (!ArgTy->isVectorTy())
- break;
- NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
- }
- Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
- Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
- Info.offset = 0;
- Info.align = 0;
- Info.vol = false; // volatile stores with NEON intrinsics not supported
- Info.readMem = false;
- Info.writeMem = true;
- return true;
- }
- case Intrinsic::arm64_ldxr: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
- return true;
- }
- case Intrinsic::arm64_stxr: {
- PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::getVT(PtrTy->getElementType());
- Info.ptrVal = I.getArgOperand(1);
- Info.offset = 0;
- Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType());
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
- return true;
- }
- case Intrinsic::arm64_ldxp: {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::i128;
- Info.ptrVal = I.getArgOperand(0);
- Info.offset = 0;
- Info.align = 16;
- Info.vol = true;
- Info.readMem = true;
- Info.writeMem = false;
- return true;
- }
- case Intrinsic::arm64_stxp: {
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.memVT = MVT::i128;
- Info.ptrVal = I.getArgOperand(2);
- Info.offset = 0;
- Info.align = 16;
- Info.vol = true;
- Info.readMem = false;
- Info.writeMem = true;
- return true;
- }
- default:
- break;
- }
-
- return false;
-}
-
-// Truncations from 64-bit GPR to 32-bit GPR is free.
-bool ARM64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
- if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
- return false;
- unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
- unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
-}
-bool ARM64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
- return false;
- unsigned NumBits1 = VT1.getSizeInBits();
- unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 <= NumBits2)
- return false;
- return true;
-}
-
-// All 32-bit GPR operations implicitly zero the high-half of the corresponding
-// 64-bit GPR.
-bool ARM64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
- if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
- return false;
- unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
- unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
-}
-bool ARM64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
- if (!VT1.isInteger() || !VT2.isInteger())
- return false;
- unsigned NumBits1 = VT1.getSizeInBits();
- unsigned NumBits2 = VT2.getSizeInBits();
- if (NumBits1 == 32 && NumBits2 == 64)
- return true;
- return false;
-}
-
-bool ARM64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
- EVT VT1 = Val.getValueType();
- if (isZExtFree(VT1, VT2)) {
- return true;
- }
-
- if (Val.getOpcode() != ISD::LOAD)
- return false;
-
- // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
- return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() &&
- VT2.isInteger() && VT1.getSizeInBits() <= 32);
-}
-
-bool ARM64TargetLowering::hasPairedLoad(Type *LoadedType,
- unsigned &RequiredAligment) const {
- if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
- return false;
- // Cyclone supports unaligned accesses.
- RequiredAligment = 0;
- unsigned NumBits = LoadedType->getPrimitiveSizeInBits();
- return NumBits == 32 || NumBits == 64;
-}
-
-bool ARM64TargetLowering::hasPairedLoad(EVT LoadedType,
- unsigned &RequiredAligment) const {
- if (!LoadedType.isSimple() ||
- (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
- return false;
- // Cyclone supports unaligned accesses.
- RequiredAligment = 0;
- unsigned NumBits = LoadedType.getSizeInBits();
- return NumBits == 32 || NumBits == 64;
-}
-
-static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
- unsigned AlignCheck) {
- return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
- (DstAlign == 0 || DstAlign % AlignCheck == 0));
-}
-
-EVT ARM64TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign, bool IsMemset,
- bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const {
- // Don't use AdvSIMD to implement 16-byte memset. It would have taken one
- // instruction to materialize the v2i64 zero and one store (with restrictive
- // addressing mode). Just do two i64 store of zero-registers.
- bool Fast;
- const Function *F = MF.getFunction();
- if (!IsMemset && Size >= 16 &&
- !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::NoImplicitFloat) &&
- (memOpAlign(SrcAlign, DstAlign, 16) ||
- (allowsUnalignedMemoryAccesses(MVT::v2i64, 0, &Fast) && Fast)))
- return MVT::v2i64;
-
- return Size >= 8 ? MVT::i64 : MVT::i32;
-}
-
-// 12-bit optionally shifted immediates are legal for adds.
-bool ARM64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
- if ((Immed >> 12) == 0 || ((Immed & 0xfff) == 0 && Immed >> 24 == 0))
- return true;
- return false;
-}
-
-// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
-// immediates is the same as for an add or a sub.
-bool ARM64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
- if (Immed < 0)
- Immed *= -1;
- return isLegalAddImmediate(Immed);
-}
-
-/// isLegalAddressingMode - Return true if the addressing mode represented
-/// by AM is legal for this target, for a load/store of the specified type.
-bool ARM64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
- // ARM64 has five basic addressing modes:
- // reg
- // reg + 9-bit signed offset
- // reg + SIZE_IN_BYTES * 12-bit unsigned offset
- // reg1 + reg2
- // reg + SIZE_IN_BYTES * reg
-
- // No global is ever allowed as a base.
- if (AM.BaseGV)
- return false;
-
- // No reg+reg+imm addressing.
- if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
- return false;
-
- // check reg + imm case:
- // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
- uint64_t NumBytes = 0;
- if (Ty->isSized()) {
- uint64_t NumBits = getDataLayout()->getTypeSizeInBits(Ty);
- NumBytes = NumBits / 8;
- if (!isPowerOf2_64(NumBits))
- NumBytes = 0;
- }
-
- if (!AM.Scale) {
- int64_t Offset = AM.BaseOffs;
-
- // 9-bit signed offset
- if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
- return true;
-
- // 12-bit unsigned offset
- unsigned shift = Log2_64(NumBytes);
- if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
- // Must be a multiple of NumBytes (NumBytes is a power of 2)
- (Offset >> shift) << shift == Offset)
- return true;
- return false;
- }
-
- // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
-
- if (!AM.Scale || AM.Scale == 1 ||
- (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes))
- return true;
- return false;
-}
-
-int ARM64TargetLowering::getScalingFactorCost(const AddrMode &AM,
- Type *Ty) const {
- // Scaling factors are not free at all.
- // Operands | Rt Latency
- // -------------------------------------------
- // Rt, [Xn, Xm] | 4
- // -------------------------------------------
- // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
- // Rt, [Xn, Wm, <extend> #imm] |
- if (isLegalAddressingMode(AM, Ty))
- // Scale represents reg2 * scale, thus account for 1 if
- // it is not equal to 0 or 1.
- return AM.Scale != 0 && AM.Scale != 1;
- return -1;
-}
-
-bool ARM64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
- VT = VT.getScalarType();
-
- if (!VT.isSimple())
- return false;
-
- switch (VT.getSimpleVT().SimpleTy) {
- case MVT::f32:
- case MVT::f64:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-const uint16_t *
-ARM64TargetLowering::getScratchRegisters(CallingConv::ID) const {
- // LR is a callee-save register, but we must treat it as clobbered by any call
- // site. Hence we include LR in the scratch registers, which are in turn added
- // as implicit-defs for stackmaps and patchpoints.
- static const uint16_t ScratchRegs[] = {
- ARM64::X16, ARM64::X17, ARM64::LR, 0
- };
- return ScratchRegs;
-}
-
-bool ARM64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const {
- assert(Ty->isIntegerTy());
-
- unsigned BitSize = Ty->getPrimitiveSizeInBits();
- if (BitSize == 0)
- return false;
-
- int64_t Val = Imm.getSExtValue();
- if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
- return true;
-
- if ((int64_t)Val < 0)
- Val = ~Val;
- if (BitSize == 32)
- Val &= (1LL << 32) - 1;
-
- unsigned LZ = countLeadingZeros((uint64_t)Val);
- unsigned Shift = (63 - LZ) / 16;
- // MOVZ is free so return true for one or fewer MOVK.
- return (Shift < 3) ? true : false;
-}
-
-// Generate SUBS and CSEL for integer abs.
-static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
-
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDLoc DL(N);
-
- // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
- // and change it to SUB and CSEL.
- if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
- N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
- N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0))
- if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
- if (Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
- SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
- N0.getOperand(0));
- // Generate SUBS & CSEL.
- SDValue Cmp =
- DAG.getNode(ARM64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
- N0.getOperand(0), DAG.getConstant(0, VT));
- return DAG.getNode(ARM64ISD::CSEL, DL, VT, N0.getOperand(0), Neg,
- DAG.getConstant(ARM64CC::PL, MVT::i32),
- SDValue(Cmp.getNode(), 1));
- }
- return SDValue();
-}
-
-// performXorCombine - Attempts to handle integer ABS.
-static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARM64Subtarget *Subtarget) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- return performIntegerAbsCombine(N, DAG);
-}
-
-static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARM64Subtarget *Subtarget) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- // Multiplication of a power of two plus/minus one can be done more
- // cheaply as as shift+add/sub. For now, this is true unilaterally. If
- // future CPUs have a cheaper MADD instruction, this may need to be
- // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
- // 64-bit is 5 cycles, so this is always a win.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
- APInt Value = C->getAPIntValue();
- EVT VT = N->getValueType(0);
- APInt VP1 = Value + 1;
- if (VP1.isPowerOf2()) {
- // Multiplying by one less than a power of two, replace with a shift
- // and a subtract.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VP1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- }
- APInt VM1 = Value - 1;
- if (VM1.isPowerOf2()) {
- // Multiplying by one more than a power of two, replace with a shift
- // and an add.
- SDValue ShiftedVal =
- DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0),
- DAG.getConstant(VM1.logBase2(), MVT::i64));
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0));
- }
- }
- return SDValue();
-}
-
-static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- if (VT != MVT::f32 && VT != MVT::f64)
- return SDValue();
- // Only optimize when the source and destination types have the same width.
- if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
- return SDValue();
-
- // If the result of an integer load is only used by an integer-to-float
- // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
- // This eliminates an "integer-to-vector-move UOP and improve throughput.
- SDValue N0 = N->getOperand(0);
- if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), LN0->isVolatile(),
- LN0->isNonTemporal(), LN0->isInvariant(),
- LN0->getAlignment());
-
- // Make sure successors of the original load stay after it by updating them
- // to use the new Chain.
- DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
-
- unsigned Opcode =
- (N->getOpcode() == ISD::SINT_TO_FP) ? ARM64ISD::SITOF : ARM64ISD::UITOF;
- return DAG.getNode(Opcode, SDLoc(N), VT, Load);
- }
-
- return SDValue();
-}
-
-/// An EXTR instruction is made up of two shifts, ORed together. This helper
-/// searches for and classifies those shifts.
-static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
- bool &FromHi) {
- if (N.getOpcode() == ISD::SHL)
- FromHi = false;
- else if (N.getOpcode() == ISD::SRL)
- FromHi = true;
- else
- return false;
-
- if (!isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- ShiftAmount = N->getConstantOperandVal(1);
- Src = N->getOperand(0);
- return true;
-}
-
-/// EXTR instruction extracts a contiguous chunk of bits from two existing
-/// registers viewed as a high/low pair. This function looks for the pattern:
-/// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
-/// EXTR. Can't quite be done in TableGen because the two immediates aren't
-/// independent.
-static SDValue tryCombineToEXTR(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI) {
- SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
-
- assert(N->getOpcode() == ISD::OR && "Unexpected root");
-
- if (VT != MVT::i32 && VT != MVT::i64)
- return SDValue();
-
- SDValue LHS;
- uint32_t ShiftLHS = 0;
- bool LHSFromHi = 0;
- if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
- return SDValue();
-
- SDValue RHS;
- uint32_t ShiftRHS = 0;
- bool RHSFromHi = 0;
- if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
- return SDValue();
-
- // If they're both trying to come from the high part of the register, they're
- // not really an EXTR.
- if (LHSFromHi == RHSFromHi)
- return SDValue();
-
- if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
- return SDValue();
-
- if (LHSFromHi) {
- std::swap(LHS, RHS);
- std::swap(ShiftLHS, ShiftRHS);
- }
-
- return DAG.getNode(ARM64ISD::EXTR, DL, VT, LHS, RHS,
- DAG.getConstant(ShiftRHS, MVT::i64));
-}
-
-static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
- const ARM64Subtarget *Subtarget) {
- // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
- if (!EnableARM64ExtrGeneration)
- return SDValue();
- SelectionDAG &DAG = DCI.DAG;
- EVT VT = N->getValueType(0);
-
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- SDValue Res = tryCombineToEXTR(N, DCI);
- if (Res.getNode())
- return Res;
-
- return SDValue();
-}
-
-static SDValue performBitcastCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // Wait 'til after everything is legalized to try this. That way we have
- // legal vector types and such.
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- // Remove extraneous bitcasts around an extract_subvector.
- // For example,
- // (v4i16 (bitconvert
- // (extract_subvector (v2i64 (bitconvert (v8i16 ...)), (i64 1)))))
- // becomes
- // (extract_subvector ((v8i16 ...), (i64 4)))
-
- // Only interested in 64-bit vectors as the ultimate result.
- EVT VT = N->getValueType(0);
- if (!VT.isVector())
- return SDValue();
- if (VT.getSimpleVT().getSizeInBits() != 64)
- return SDValue();
- // Is the operand an extract_subvector starting at the beginning or halfway
- // point of the vector? A low half may also come through as an
- // EXTRACT_SUBREG, so look for that, too.
- SDValue Op0 = N->getOperand(0);
- if (Op0->getOpcode() != ISD::EXTRACT_SUBVECTOR &&
- !(Op0->isMachineOpcode() &&
- Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG))
- return SDValue();
- uint64_t idx = cast<ConstantSDNode>(Op0->getOperand(1))->getZExtValue();
- if (Op0->getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- if (Op0->getValueType(0).getVectorNumElements() != idx && idx != 0)
- return SDValue();
- } else if (Op0->getMachineOpcode() == ARM64::EXTRACT_SUBREG) {
- if (idx != ARM64::dsub)
- return SDValue();
- // The dsub reference is equivalent to a lane zero subvector reference.
- idx = 0;
- }
- // Look through the bitcast of the input to the extract.
- if (Op0->getOperand(0)->getOpcode() != ISD::BITCAST)
- return SDValue();
- SDValue Source = Op0->getOperand(0)->getOperand(0);
- // If the source type has twice the number of elements as our destination
- // type, we know this is an extract of the high or low half of the vector.
- EVT SVT = Source->getValueType(0);
- if (SVT.getVectorNumElements() != VT.getVectorNumElements() * 2)
- return SDValue();
-
- DEBUG(dbgs() << "arm64-lower: bitcast extract_subvector simplification\n");
-
- // Create the simplified form to just extract the low or high half of the
- // vector directly rather than bothering with the bitcasts.
- SDLoc dl(N);
- unsigned NumElements = VT.getVectorNumElements();
- if (idx) {
- SDValue HalfIdx = DAG.getConstant(NumElements, MVT::i64);
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Source, HalfIdx);
- } else {
- SDValue SubReg = DAG.getTargetConstant(ARM64::dsub, MVT::i32);
- return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, VT,
- Source, SubReg),
- 0);
- }
-}
-
-static SDValue performConcatVectorsCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // Wait 'til after everything is legalized to try this. That way we have
- // legal vector types and such.
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- SDLoc dl(N);
- EVT VT = N->getValueType(0);
-
- // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
- // splat. The indexed instructions are going to be expecting a DUPLANE64, so
- // canonicalise to that.
- if (N->getOperand(0) == N->getOperand(1) && VT.getVectorNumElements() == 2) {
- assert(VT.getVectorElementType().getSizeInBits() == 64);
- return DAG.getNode(ARM64ISD::DUPLANE64, dl, VT,
- WidenVector(N->getOperand(0), DAG),
- DAG.getConstant(0, MVT::i64));
- }
-
- // Canonicalise concat_vectors so that the right-hand vector has as few
- // bit-casts as possible before its real operation. The primary matching
- // destination for these operations will be the narrowing "2" instructions,
- // which depend on the operation being performed on this right-hand vector.
- // For example,
- // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
- // becomes
- // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
-
- SDValue Op1 = N->getOperand(1);
- if (Op1->getOpcode() != ISD::BITCAST)
- return SDValue();
- SDValue RHS = Op1->getOperand(0);
- MVT RHSTy = RHS.getValueType().getSimpleVT();
- // If the RHS is not a vector, this is not the pattern we're looking for.
- if (!RHSTy.isVector())
- return SDValue();
-
- DEBUG(dbgs() << "arm64-lower: concat_vectors bitcast simplification\n");
-
- MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
- RHSTy.getVectorNumElements() * 2);
- return DAG.getNode(
- ISD::BITCAST, dl, VT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
- DAG.getNode(ISD::BITCAST, dl, RHSTy, N->getOperand(0)), RHS));
-}
-
-static SDValue tryCombineFixedPointConvert(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // Wait 'til after everything is legalized to try this. That way we have
- // legal vector types and such.
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
- // Transform a scalar conversion of a value from a lane extract into a
- // lane extract of a vector conversion. E.g., from foo1 to foo2:
- // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
- // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
- //
- // The second form interacts better with instruction selection and the
- // register allocator to avoid cross-class register copies that aren't
- // coalescable due to a lane reference.
-
- // Check the operand and see if it originates from a lane extract.
- SDValue Op1 = N->getOperand(1);
- if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
- // Yep, no additional predication needed. Perform the transform.
- SDValue IID = N->getOperand(0);
- SDValue Shift = N->getOperand(2);
- SDValue Vec = Op1.getOperand(0);
- SDValue Lane = Op1.getOperand(1);
- EVT ResTy = N->getValueType(0);
- EVT VecResTy;
- SDLoc DL(N);
-
- // The vector width should be 128 bits by the time we get here, even
- // if it started as 64 bits (the extract_vector handling will have
- // done so).
- assert(Vec.getValueType().getSizeInBits() == 128 &&
- "unexpected vector size on extract_vector_elt!");
- if (Vec.getValueType() == MVT::v4i32)
- VecResTy = MVT::v4f32;
- else if (Vec.getValueType() == MVT::v2i64)
- VecResTy = MVT::v2f64;
- else
- assert(0 && "unexpected vector type!");
-
- SDValue Convert =
- DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
- }
- return SDValue();
-}
-
-// AArch64 high-vector "long" operations are formed by performing the non-high
-// version on an extract_subvector of each operand which gets the high half:
-//
-// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
-//
-// However, there are cases which don't have an extract_high explicitly, but
-// have another operation that can be made compatible with one for free. For
-// example:
-//
-// (dupv64 scalar) --> (extract_high (dup128 scalar))
-//
-// This routine does the actual conversion of such DUPs, once outer routines
-// have determined that everything else is in order.
-static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
- // We can handle most types of duplicate, but the lane ones have an extra
- // operand saying *which* lane, so we need to know.
- bool IsDUPLANE;
- switch (N.getOpcode()) {
- case ARM64ISD::DUP:
- IsDUPLANE = false;
- break;
- case ARM64ISD::DUPLANE8:
- case ARM64ISD::DUPLANE16:
- case ARM64ISD::DUPLANE32:
- case ARM64ISD::DUPLANE64:
- IsDUPLANE = true;
- break;
- default:
- return SDValue();
- }
-
- MVT NarrowTy = N.getSimpleValueType();
- if (!NarrowTy.is64BitVector())
- return SDValue();
-
- MVT ElementTy = NarrowTy.getVectorElementType();
- unsigned NumElems = NarrowTy.getVectorNumElements();
- MVT NewDUPVT = MVT::getVectorVT(ElementTy, NumElems * 2);
-
- SDValue NewDUP;
- if (IsDUPLANE)
- NewDUP = DAG.getNode(N.getOpcode(), SDLoc(N), NewDUPVT, N.getOperand(0),
- N.getOperand(1));
- else
- NewDUP = DAG.getNode(ARM64ISD::DUP, SDLoc(N), NewDUPVT, N.getOperand(0));
-
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N.getNode()), NarrowTy,
- NewDUP, DAG.getConstant(NumElems, MVT::i64));
-}
-
-static bool isEssentiallyExtractSubvector(SDValue N) {
- if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
- return true;
-
- return N.getOpcode() == ISD::BITCAST &&
- N.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR;
-}
-
-/// \brief Helper structure to keep track of ISD::SET_CC operands.
-struct GenericSetCCInfo {
- const SDValue *Opnd0;
- const SDValue *Opnd1;
- ISD::CondCode CC;
-};
-
-/// \brief Helper structure to keep track of a SET_CC lowered into ARM64 code.
-struct ARM64SetCCInfo {
- const SDValue *Cmp;
- ARM64CC::CondCode CC;
-};
-
-/// \brief Helper structure to keep track of SetCC information.
-union SetCCInfo {
- GenericSetCCInfo Generic;
- ARM64SetCCInfo ARM64;
-};
-
-/// \brief Helper structure to be able to read SetCC information.
-/// If set to true, IsARM64 field, Info is a ARM64SetCCInfo, otherwise Info is
-/// a GenericSetCCInfo.
-struct SetCCInfoAndKind {
- SetCCInfo Info;
- bool IsARM64;
-};
-
-/// \brief Check whether or not \p Op is a SET_CC operation, either a generic or
-/// an
-/// ARM64 lowered one.
-/// \p SetCCInfo is filled accordingly.
-/// \post SetCCInfo is meanginfull only when this function returns true.
-/// \return True when Op is a kind of SET_CC operation.
-static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
- // If this is a setcc, this is straight forward.
- if (Op.getOpcode() == ISD::SETCC) {
- SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
- SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
- SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- SetCCInfo.IsARM64 = false;
- return true;
- }
- // Otherwise, check if this is a matching csel instruction.
- // In other words:
- // - csel 1, 0, cc
- // - csel 0, 1, !cc
- if (Op.getOpcode() != ARM64ISD::CSEL)
- return false;
- // Set the information about the operands.
- // TODO: we want the operands of the Cmp not the csel
- SetCCInfo.Info.ARM64.Cmp = &Op.getOperand(3);
- SetCCInfo.IsARM64 = true;
- SetCCInfo.Info.ARM64.CC = static_cast<ARM64CC::CondCode>(
- cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
-
- // Check that the operands matches the constraints:
- // (1) Both operands must be constants.
- // (2) One must be 1 and the other must be 0.
- ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
- ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
-
- // Check (1).
- if (!TValue || !FValue)
- return false;
-
- // Check (2).
- if (!TValue->isOne()) {
- // Update the comparison when we are interested in !cc.
- std::swap(TValue, FValue);
- SetCCInfo.Info.ARM64.CC =
- ARM64CC::getInvertedCondCode(SetCCInfo.Info.ARM64.CC);
- }
- return TValue->isOne() && FValue->isNullValue();
-}
-
-// The folding we want to perform is:
-// (add x, (setcc cc ...) )
-// -->
-// (csel x, (add x, 1), !cc ...)
-//
-// The latter will get matched to a CSINC instruction.
-static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
- assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!");
- SDValue LHS = Op->getOperand(0);
- SDValue RHS = Op->getOperand(1);
- SetCCInfoAndKind InfoAndKind;
-
- // If neither operand is a SET_CC, give up.
- if (!isSetCC(LHS, InfoAndKind)) {
- std::swap(LHS, RHS);
- if (!isSetCC(LHS, InfoAndKind))
- return SDValue();
- }
-
- // FIXME: This could be generatized to work for FP comparisons.
- EVT CmpVT = InfoAndKind.IsARM64
- ? InfoAndKind.Info.ARM64.Cmp->getOperand(0).getValueType()
- : InfoAndKind.Info.Generic.Opnd0->getValueType();
- if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
- return SDValue();
-
- SDValue CCVal;
- SDValue Cmp;
- SDLoc dl(Op);
- if (InfoAndKind.IsARM64) {
- CCVal = DAG.getConstant(
- ARM64CC::getInvertedCondCode(InfoAndKind.Info.ARM64.CC), MVT::i32);
- Cmp = *InfoAndKind.Info.ARM64.Cmp;
- } else
- Cmp = getARM64Cmp(*InfoAndKind.Info.Generic.Opnd0,
- *InfoAndKind.Info.Generic.Opnd1,
- ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, true),
- CCVal, DAG, dl);
-
- EVT VT = Op->getValueType(0);
- LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, VT));
- return DAG.getNode(ARM64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
-}
-
-// The basic add/sub long vector instructions have variants with "2" on the end
-// which act on the high-half of their inputs. They are normally matched by
-// patterns like:
-//
-// (add (zeroext (extract_high LHS)),
-// (zeroext (extract_high RHS)))
-// -> uaddl2 vD, vN, vM
-//
-// However, if one of the extracts is something like a duplicate, this
-// instruction can still be used profitably. This function puts the DAG into a
-// more appropriate form for those patterns to trigger.
-static SDValue performAddSubLongCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- MVT VT = N->getSimpleValueType(0);
- if (!VT.is128BitVector()) {
- if (N->getOpcode() == ISD::ADD)
- return performSetccAddFolding(N, DAG);
- return SDValue();
- }
-
- // Make sure both branches are extended in the same way.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
- LHS.getOpcode() != ISD::SIGN_EXTEND) ||
- LHS.getOpcode() != RHS.getOpcode())
- return SDValue();
-
- unsigned ExtType = LHS.getOpcode();
-
- // It's not worth doing if at least one of the inputs isn't already an
- // extract, but we don't know which it'll be so we have to try both.
- if (isEssentiallyExtractSubvector(LHS.getOperand(0))) {
- RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
- if (!RHS.getNode())
- return SDValue();
-
- RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
- } else if (isEssentiallyExtractSubvector(RHS.getOperand(0))) {
- LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
- if (!LHS.getNode())
- return SDValue();
-
- LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
- }
-
- return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
-}
-
-// Massage DAGs which we can use the high-half "long" operations on into
-// something isel will recognize better. E.g.
-//
-// (arm64_neon_umull (extract_high vec) (dupv64 scalar)) -->
-// (arm64_neon_umull (extract_high (v2i64 vec)))
-// (extract_high (v2i64 (dup128 scalar)))))
-//
-static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- if (DCI.isBeforeLegalizeOps())
- return SDValue();
-
- SDValue LHS = N->getOperand(1);
- SDValue RHS = N->getOperand(2);
- assert(LHS.getValueType().is64BitVector() &&
- RHS.getValueType().is64BitVector() &&
- "unexpected shape for long operation");
-
- // Either node could be a DUP, but it's not worth doing both of them (you'd
- // just as well use the non-high version) so look for a corresponding extract
- // operation on the other "wing".
- if (isEssentiallyExtractSubvector(LHS)) {
- RHS = tryExtendDUPToExtractHigh(RHS, DAG);
- if (!RHS.getNode())
- return SDValue();
- } else if (isEssentiallyExtractSubvector(RHS)) {
- LHS = tryExtendDUPToExtractHigh(LHS, DAG);
- if (!LHS.getNode())
- return SDValue();
- }
-
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
- N->getOperand(0), LHS, RHS);
-}
-
-static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
- MVT ElemTy = N->getSimpleValueType(0).getScalarType();
- unsigned ElemBits = ElemTy.getSizeInBits();
-
- int64_t ShiftAmount;
- if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
- APInt SplatValue, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs, ElemBits) ||
- SplatBitSize != ElemBits)
- return SDValue();
-
- ShiftAmount = SplatValue.getSExtValue();
- } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
- ShiftAmount = CVN->getSExtValue();
- } else
- return SDValue();
-
- unsigned Opcode;
- bool IsRightShift;
- switch (IID) {
- default:
- llvm_unreachable("Unknown shift intrinsic");
- case Intrinsic::arm64_neon_sqshl:
- Opcode = ARM64ISD::SQSHL_I;
- IsRightShift = false;
- break;
- case Intrinsic::arm64_neon_uqshl:
- Opcode = ARM64ISD::UQSHL_I;
- IsRightShift = false;
- break;
- case Intrinsic::arm64_neon_srshl:
- Opcode = ARM64ISD::SRSHR_I;
- IsRightShift = true;
- break;
- case Intrinsic::arm64_neon_urshl:
- Opcode = ARM64ISD::URSHR_I;
- IsRightShift = true;
- break;
- case Intrinsic::arm64_neon_sqshlu:
- Opcode = ARM64ISD::SQSHLU_I;
- IsRightShift = false;
- break;
- }
-
- if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(-ShiftAmount, MVT::i32));
- else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits)
- return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1),
- DAG.getConstant(ShiftAmount, MVT::i32));
-
- return SDValue();
-}
-
-// The CRC32[BH] instructions ignore the high bits of their data operand. Since
-// the intrinsics must be legal and take an i32, this means there's almost
-// certainly going to be a zext in the DAG which we can eliminate.
-static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
- SDValue AndN = N->getOperand(2);
- if (AndN.getOpcode() != ISD::AND)
- return SDValue();
-
- ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
- if (!CMask || CMask->getZExtValue() != Mask)
- return SDValue();
-
- return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
- N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
-}
-
-static SDValue performIntrinsicCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const ARM64Subtarget *Subtarget) {
- SelectionDAG &DAG = DCI.DAG;
- unsigned IID = getIntrinsicID(N);
- switch (IID) {
- default:
- break;
- case Intrinsic::arm64_neon_vcvtfxs2fp:
- case Intrinsic::arm64_neon_vcvtfxu2fp:
- return tryCombineFixedPointConvert(N, DCI, DAG);
- break;
- case Intrinsic::arm64_neon_fmax:
- return DAG.getNode(ARM64ISD::FMAX, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::arm64_neon_fmin:
- return DAG.getNode(ARM64ISD::FMIN, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2));
- case Intrinsic::arm64_neon_smull:
- case Intrinsic::arm64_neon_umull:
- case Intrinsic::arm64_neon_pmull:
- case Intrinsic::arm64_neon_sqdmull:
- return tryCombineLongOpWithDup(IID, N, DCI, DAG);
- case Intrinsic::arm64_neon_sqshl:
- case Intrinsic::arm64_neon_uqshl:
- case Intrinsic::arm64_neon_sqshlu:
- case Intrinsic::arm64_neon_srshl:
- case Intrinsic::arm64_neon_urshl:
- return tryCombineShiftImm(IID, N, DAG);
- case Intrinsic::arm64_crc32b:
- case Intrinsic::arm64_crc32cb:
- return tryCombineCRC32(0xff, N, DAG);
- case Intrinsic::arm64_crc32h:
- case Intrinsic::arm64_crc32ch:
- return tryCombineCRC32(0xffff, N, DAG);
- }
- return SDValue();
-}
-
-static SDValue performExtendCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
- // we can convert that DUP into another extract_high (of a bigger DUP), which
- // helps the backend to decide that an sabdl2 would be useful, saving a real
- // extract_high operation.
- if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
- N->getOperand(0).getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
- SDNode *ABDNode = N->getOperand(0).getNode();
- unsigned IID = getIntrinsicID(ABDNode);
- if (IID == Intrinsic::arm64_neon_sabd ||
- IID == Intrinsic::arm64_neon_uabd) {
- SDValue NewABD = tryCombineLongOpWithDup(IID, ABDNode, DCI, DAG);
- if (!NewABD.getNode())
- return SDValue();
-
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0),
- NewABD);
- }
- }
-
- // This is effectively a custom type legalization for ARM64.
- //
- // Type legalization will split an extend of a small, legal, type to a larger
- // illegal type by first splitting the destination type, often creating
- // illegal source types, which then get legalized in isel-confusing ways,
- // leading to really terrible codegen. E.g.,
- // %result = v8i32 sext v8i8 %value
- // becomes
- // %losrc = extract_subreg %value, ...
- // %hisrc = extract_subreg %value, ...
- // %lo = v4i32 sext v4i8 %losrc
- // %hi = v4i32 sext v4i8 %hisrc
- // Things go rapidly downhill from there.
- //
- // For ARM64, the [sz]ext vector instructions can only go up one element
- // size, so we can, e.g., extend from i8 to i16, but to go from i8 to i32
- // take two instructions.
- //
- // This implies that the most efficient way to do the extend from v8i8
- // to two v4i32 values is to first extend the v8i8 to v8i16, then do
- // the normal splitting to happen for the v8i16->v8i32.
-
- // This is pre-legalization to catch some cases where the default
- // type legalization will create ill-tempered code.
- if (!DCI.isBeforeLegalizeOps())
- return SDValue();
-
- // We're only interested in cleaning things up for non-legal vector types
- // here. If both the source and destination are legal, things will just
- // work naturally without any fiddling.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT ResVT = N->getValueType(0);
- if (!ResVT.isVector() || TLI.isTypeLegal(ResVT))
- return SDValue();
- // If the vector type isn't a simple VT, it's beyond the scope of what
- // we're worried about here. Let legalization do its thing and hope for
- // the best.
- if (!ResVT.isSimple())
- return SDValue();
-
- SDValue Src = N->getOperand(0);
- MVT SrcVT = Src->getValueType(0).getSimpleVT();
- // If the source VT is a 64-bit vector, we can play games and get the
- // better results we want.
- if (SrcVT.getSizeInBits() != 64)
- return SDValue();
-
- unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
- unsigned ElementCount = SrcVT.getVectorNumElements();
- SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
- SDLoc DL(N);
- Src = DAG.getNode(N->getOpcode(), DL, SrcVT, Src);
-
- // Now split the rest of the operation into two halves, each with a 64
- // bit source.
- EVT LoVT, HiVT;
- SDValue Lo, Hi;
- unsigned NumElements = ResVT.getVectorNumElements();
- assert(!(NumElements & 1) && "Splitting vector, but not in half!");
- LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
- ResVT.getVectorElementType(), NumElements / 2);
-
- EVT InNVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getVectorElementType(),
- LoVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, Src,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, Hi);
-
- // Now combine the parts back together so we still have a single result
- // like the combiner expects.
- return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
-}
-
-/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
-/// value. The load store optimizer pass will merge them to store pair stores.
-/// This has better performance than a splat of the scalar followed by a split
-/// vector store. Even if the stores are not merged it is four stores vs a dup,
-/// followed by an ext.b and two stores.
-static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
- SDValue StVal = St->getValue();
- EVT VT = StVal.getValueType();
-
- // Don't replace floating point stores, they possibly won't be transformed to
- // stp because of the store pair suppress pass.
- if (VT.isFloatingPoint())
- return SDValue();
-
- // Check for insert vector elements.
- if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
-
- // We can express a splat as store pair(s) for 2 or 4 elements.
- unsigned NumVecElts = VT.getVectorNumElements();
- if (NumVecElts != 4 && NumVecElts != 2)
- return SDValue();
- SDValue SplatVal = StVal.getOperand(1);
- unsigned RemainInsertElts = NumVecElts - 1;
-
- // Check that this is a splat.
- while (--RemainInsertElts) {
- SDValue NextInsertElt = StVal.getOperand(0);
- if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
- return SDValue();
- if (NextInsertElt.getOperand(1) != SplatVal)
- return SDValue();
- StVal = NextInsertElt;
- }
- unsigned OrigAlignment = St->getAlignment();
- unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
- unsigned Alignment = std::min(OrigAlignment, EltOffset);
-
- // Create scalar stores. This is at least as good as the code sequence for a
- // split unaligned store wich is a dup.s, ext.b, and two stores.
- // Most of the time the three stores should be replaced by store pair
- // instructions (stp).
- SDLoc DL(St);
- SDValue BasePtr = St->getBasePtr();
- SDValue NewST1 =
- DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(),
- St->isVolatile(), St->isNonTemporal(), St->getAlignment());
-
- unsigned Offset = EltOffset;
- while (--NumVecElts) {
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(Offset, MVT::i64));
- NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
- St->getPointerInfo(), St->isVolatile(),
- St->isNonTemporal(), Alignment);
- Offset += EltOffset;
- }
- return NewST1;
-}
-
-static SDValue performSTORECombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG,
- const ARM64Subtarget *Subtarget) {
- if (!DCI.isBeforeLegalize())
- return SDValue();
-
- StoreSDNode *S = cast<StoreSDNode>(N);
- if (S->isVolatile())
- return SDValue();
-
- // Cyclone has bad performance on unaligned 16B stores when crossing line and
- // page boundries. We want to split such stores.
- if (!Subtarget->isCyclone())
- return SDValue();
-
- // Don't split at Oz.
- MachineFunction &MF = DAG.getMachineFunction();
- bool IsMinSize = MF.getFunction()->getAttributes().hasAttribute(
- AttributeSet::FunctionIndex, Attribute::MinSize);
- if (IsMinSize)
- return SDValue();
-
- SDValue StVal = S->getValue();
- EVT VT = StVal.getValueType();
-
- // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
- // those up regresses performance on micro-benchmarks and olden/bh.
- if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
- return SDValue();
-
- // Split unaligned 16B stores. They are terrible for performance.
- // Don't split stores with alignment of 1 or 2. Code that uses clang vector
- // extensions can use this to mark that it does not want splitting to happen
- // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
- // eliminating alignment hazards is only 1 in 8 for alignment of 2.
- if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
- S->getAlignment() <= 2)
- return SDValue();
-
- // If we get a splat of a scalar convert this vector store to a store of
- // scalars. They will be merged into store pairs thereby removing two
- // instructions.
- SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S);
- if (ReplacedSplat != SDValue())
- return ReplacedSplat;
-
- SDLoc DL(S);
- unsigned NumElts = VT.getVectorNumElements() / 2;
- // Split VT into two.
- EVT HalfVT =
- EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), NumElts);
- SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(0));
- SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
- DAG.getIntPtrConstant(NumElts));
- SDValue BasePtr = S->getBasePtr();
- SDValue NewST1 =
- DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
- S->isVolatile(), S->isNonTemporal(), S->getAlignment());
- SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
- DAG.getConstant(8, MVT::i64));
- return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
- S->getPointerInfo(), S->isVolatile(), S->isNonTemporal(),
- S->getAlignment());
-}
-
-// Optimize compare with zero and branch.
-static SDValue performBRCONDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
- SDValue Chain = N->getOperand(0);
- SDValue Dest = N->getOperand(1);
- SDValue CCVal = N->getOperand(2);
- SDValue Cmp = N->getOperand(3);
-
- assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!");
- unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
- if (CC != ARM64CC::EQ && CC != ARM64CC::NE)
- return SDValue();
-
- unsigned CmpOpc = Cmp.getOpcode();
- if (CmpOpc != ARM64ISD::ADDS && CmpOpc != ARM64ISD::SUBS)
- return SDValue();
-
- // Only attempt folding if there is only one use of the flag and no use of the
- // value.
- if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
- return SDValue();
-
- SDValue LHS = Cmp.getOperand(0);
- SDValue RHS = Cmp.getOperand(1);
-
- assert(LHS.getValueType() == RHS.getValueType() &&
- "Expected the value type to be the same for both operands!");
- if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
- return SDValue();
-
- if (isa<ConstantSDNode>(LHS) && cast<ConstantSDNode>(LHS)->isNullValue())
- std::swap(LHS, RHS);
-
- if (!isa<ConstantSDNode>(RHS) || !cast<ConstantSDNode>(RHS)->isNullValue())
- return SDValue();
-
- if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
- LHS.getOpcode() == ISD::SRL)
- return SDValue();
-
- // Fold the compare into the branch instruction.
- SDValue BR;
- if (CC == ARM64CC::EQ)
- BR = DAG.getNode(ARM64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
- else
- BR = DAG.getNode(ARM64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
-
- // Do not add new nodes to DAG combiner worklist.
- DCI.CombineTo(N, BR, false);
-
- return SDValue();
-}
-
-SDValue ARM64TargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- switch (N->getOpcode()) {
- default:
- break;
- case ISD::ADD:
- case ISD::SUB:
- return performAddSubLongCombine(N, DCI, DAG);
- case ISD::XOR:
- return performXorCombine(N, DAG, DCI, Subtarget);
- case ISD::MUL:
- return performMulCombine(N, DAG, DCI, Subtarget);
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP:
- return performIntToFpCombine(N, DAG);
- case ISD::OR:
- return performORCombine(N, DCI, Subtarget);
- case ISD::INTRINSIC_WO_CHAIN:
- return performIntrinsicCombine(N, DCI, Subtarget);
- case ISD::ANY_EXTEND:
- case ISD::ZERO_EXTEND:
- case ISD::SIGN_EXTEND:
- return performExtendCombine(N, DCI, DAG);
- case ISD::BITCAST:
- return performBitcastCombine(N, DCI, DAG);
- case ISD::CONCAT_VECTORS:
- return performConcatVectorsCombine(N, DCI, DAG);
- case ISD::STORE:
- return performSTORECombine(N, DCI, DAG, Subtarget);
- case ARM64ISD::BRCOND:
- return performBRCONDCombine(N, DCI, DAG);
- }
- return SDValue();
-}
-
-// Check if the return value is used as only a return value, as otherwise
-// we can't perform a tail-call. In particular, we need to check for
-// target ISD nodes that are returns and any other "odd" constructs
-// that the generic analysis code won't necessarily catch.
-bool ARM64TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
- if (N->getNumValues() != 1)
- return false;
- if (!N->hasNUsesOfValue(1, 0))
- return false;
-
- SDValue TCChain = Chain;
- SDNode *Copy = *N->use_begin();
- if (Copy->getOpcode() == ISD::CopyToReg) {
- // If the copy has a glue operand, we conservatively assume it isn't safe to
- // perform a tail call.
- if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
- MVT::Glue)
- return false;
- TCChain = Copy->getOperand(0);
- } else if (Copy->getOpcode() != ISD::FP_EXTEND)
- return false;
-
- bool HasRet = false;
- for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
- UI != UE; ++UI) {
- if (UI->getOpcode() != ARM64ISD::RET_FLAG)
- return false;
- HasRet = true;
- }
-
- if (!HasRet)
- return false;
-
- Chain = TCChain;
- return true;
-}
-
-// Return whether the an instruction can potentially be optimized to a tail
-// call. This will cause the optimizers to attempt to move, or duplicate,
-// return instructions to help enable tail call optimizations for this
-// instruction.
-bool ARM64TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
- if (!EnableARM64TailCalls)
- return false;
-
- if (!CI->isTailCall())
- return false;
-
- return true;
-}
-
-bool ARM64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- bool &IsInc,
- SelectionDAG &DAG) const {
- if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
- return false;
-
- Base = Op->getOperand(0);
- // All of the indexed addressing mode instructions take a signed
- // 9 bit immediate offset.
- if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
- int64_t RHSC = (int64_t)RHS->getZExtValue();
- if (RHSC >= 256 || RHSC <= -256)
- return false;
- IsInc = (Op->getOpcode() == ISD::ADD);
- Offset = Op->getOperand(1);
- return true;
- }
- return false;
-}
-
-bool ARM64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const {
- EVT VT;
- SDValue Ptr;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- VT = LD->getMemoryVT();
- Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- VT = ST->getMemoryVT();
- Ptr = ST->getBasePtr();
- } else
- return false;
-
- bool IsInc;
- if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
- return false;
- AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
- return true;
-}
-
-bool ARM64TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const {
- EVT VT;
- SDValue Ptr;
- if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
- VT = LD->getMemoryVT();
- Ptr = LD->getBasePtr();
- } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
- VT = ST->getMemoryVT();
- Ptr = ST->getBasePtr();
- } else
- return false;
-
- bool IsInc;
- if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
- return false;
- // Post-indexing updates the base, so it's not a valid transform
- // if that's not the same as the load's pointer.
- if (Ptr != Base)
- return false;
- AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
- return true;
-}
-
-/// The only 128-bit atomic operation is an stxp that succeeds. In particular
-/// neither ldp nor ldxp are atomic. So the canonical sequence for an atomic
-/// load is:
-/// loop:
-/// ldxp x0, x1, [x8]
-/// stxp w2, x0, x1, [x8]
-/// cbnz w2, loop
-/// If the stxp succeeds then the ldxp managed to get both halves without an
-/// intervening stxp from a different thread and the read was atomic.
-static void ReplaceATOMIC_LOAD_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) {
- SDLoc DL(N);
- AtomicSDNode *AN = cast<AtomicSDNode>(N);
- EVT VT = AN->getMemoryVT();
- SDValue Zero = DAG.getConstant(0, VT);
-
- // FIXME: Really want ATOMIC_LOAD_NOP but that doesn't fit into the existing
- // scheme very well. Given the complexity of what we're already generating, an
- // extra couple of ORRs probably won't make much difference.
- SDValue Result = DAG.getAtomic(ISD::ATOMIC_LOAD_OR, DL, AN->getMemoryVT(),
- N->getOperand(0), N->getOperand(1), Zero,
- AN->getMemOperand(), AN->getOrdering(),
- AN->getSynchScope());
-
- Results.push_back(Result.getValue(0)); // Value
- Results.push_back(Result.getValue(1)); // Chain
-}
-
-static void ReplaceATOMIC_OP_128(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG, unsigned NewOp) {
- SDLoc DL(N);
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(N->getValueType(0) == MVT::i128 &&
- "Only know how to expand i128 atomics");
-
- SmallVector<SDValue, 6> Ops;
- Ops.push_back(N->getOperand(1)); // Ptr
- // Low part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
- N->getOperand(2), DAG.getIntPtrConstant(0)));
- // High part of Val1
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
- N->getOperand(2), DAG.getIntPtrConstant(1)));
- if (NewOp == ARM64::ATOMIC_CMP_SWAP_I128) {
- // Low part of Val2
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
- N->getOperand(3), DAG.getIntPtrConstant(0)));
- // High part of Val2
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
- N->getOperand(3), DAG.getIntPtrConstant(1)));
- }
-
- Ops.push_back(DAG.getTargetConstant(Ordering, MVT::i32));
- Ops.push_back(N->getOperand(0)); // Chain
-
- SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
- SDNode *Result = DAG.getMachineNode(NewOp, DL, Tys, Ops);
- SDValue OpsF[] = { SDValue(Result, 0), SDValue(Result, 1) };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, OpsF, 2));
- Results.push_back(SDValue(Result, 2));
-}
-
-void ARM64TargetLowering::ReplaceNodeResults(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const {
- switch (N->getOpcode()) {
- default:
- llvm_unreachable("Don't know how to custom expand this");
- case ISD::ATOMIC_LOAD:
- ReplaceATOMIC_LOAD_128(N, Results, DAG);
- return;
- case ISD::ATOMIC_LOAD_ADD:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_ADD_I128);
- return;
- case ISD::ATOMIC_LOAD_SUB:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_SUB_I128);
- return;
- case ISD::ATOMIC_LOAD_AND:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_AND_I128);
- return;
- case ISD::ATOMIC_LOAD_OR:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_OR_I128);
- return;
- case ISD::ATOMIC_LOAD_XOR:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_XOR_I128);
- return;
- case ISD::ATOMIC_LOAD_NAND:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_NAND_I128);
- return;
- case ISD::ATOMIC_SWAP:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_SWAP_I128);
- return;
- case ISD::ATOMIC_LOAD_MIN:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MIN_I128);
- return;
- case ISD::ATOMIC_LOAD_MAX:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_MAX_I128);
- return;
- case ISD::ATOMIC_LOAD_UMIN:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMIN_I128);
- return;
- case ISD::ATOMIC_LOAD_UMAX:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_LOAD_UMAX_I128);
- return;
- case ISD::ATOMIC_CMP_SWAP:
- ReplaceATOMIC_OP_128(N, Results, DAG, ARM64::ATOMIC_CMP_SWAP_I128);
- return;
- case ISD::FP_TO_UINT:
- case ISD::FP_TO_SINT:
- assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
- // Let normal code take care of it by not adding anything to Results.
- return;
- }
-}
diff --git a/lib/Target/ARM64/ARM64ISelLowering.h b/lib/Target/ARM64/ARM64ISelLowering.h
deleted file mode 100644
index a4664ac..0000000
--- a/lib/Target/ARM64/ARM64ISelLowering.h
+++ /dev/null
@@ -1,422 +0,0 @@
-//==-- ARM64ISelLowering.h - ARM64 DAG Lowering Interface --------*- C++ -*-==//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the interfaces that ARM64 uses to lower LLVM code into a
-// selection DAG.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64_ISELLOWERING_H
-#define LLVM_TARGET_ARM64_ISELLOWERING_H
-
-#include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/Target/TargetLowering.h"
-
-namespace llvm {
-
-namespace ARM64ISD {
-
-enum {
- FIRST_NUMBER = ISD::BUILTIN_OP_END,
- WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
- CALL, // Function call.
-
- // Almost the same as a normal call node, except that a TLSDesc relocation is
- // needed so the linker can relax it correctly if possible.
- TLSDESC_CALL,
- ADRP, // Page address of a TargetGlobalAddress operand.
- ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
- LOADgot, // Load from automatically generated descriptor (e.g. Global
- // Offset Table, TLS record).
- RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
- BRCOND, // Conditional branch instruction; "b.cond".
- CSEL,
- FCSEL, // Conditional move instruction.
- CSINV, // Conditional select invert.
- CSNEG, // Conditional select negate.
- CSINC, // Conditional select increment.
-
- // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
- // ELF.
- THREAD_POINTER,
- ADC,
- SBC, // adc, sbc instructions
-
- // Arithmetic instructions which write flags.
- ADDS,
- SUBS,
- ADCS,
- SBCS,
- ANDS,
-
- // Floating point comparison
- FCMP,
-
- // Floating point max and min instructions.
- FMAX,
- FMIN,
-
- // Scalar extract
- EXTR,
-
- // Scalar-to-vector duplication
- DUP,
- DUPLANE8,
- DUPLANE16,
- DUPLANE32,
- DUPLANE64,
-
- // Vector immedate moves
- MOVI,
- MOVIshift,
- MOVIedit,
- MOVImsl,
- FMOV,
- MVNIshift,
- MVNImsl,
-
- // Vector immediate ops
- BICi,
- ORRi,
-
- // Vector arithmetic negation
- NEG,
-
- // Vector shuffles
- ZIP1,
- ZIP2,
- UZP1,
- UZP2,
- TRN1,
- TRN2,
- REV16,
- REV32,
- REV64,
- EXT,
-
- // Vector shift by scalar
- VSHL,
- VLSHR,
- VASHR,
-
- // Vector shift by scalar (again)
- SQSHL_I,
- UQSHL_I,
- SQSHLU_I,
- SRSHR_I,
- URSHR_I,
-
- // Vector comparisons
- CMEQ,
- CMGE,
- CMGT,
- CMHI,
- CMHS,
- FCMEQ,
- FCMGE,
- FCMGT,
-
- // Vector zero comparisons
- CMEQz,
- CMGEz,
- CMGTz,
- CMLEz,
- CMLTz,
- FCMEQz,
- FCMGEz,
- FCMGTz,
- FCMLEz,
- FCMLTz,
-
- // Vector bitwise negation
- NOT,
-
- // Vector bitwise selection
- BIT,
-
- // Compare-and-branch
- CBZ,
- CBNZ,
- TBZ,
- TBNZ,
-
- // Tail calls
- TC_RETURN,
-
- // Custom prefetch handling
- PREFETCH,
-
- // {s|u}int to FP within a FP register.
- SITOF,
- UITOF
-};
-
-} // end namespace ARM64ISD
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64TargetLowering : public TargetLowering {
- bool RequireStrictAlign;
-
-public:
- explicit ARM64TargetLowering(ARM64TargetMachine &TM);
-
- /// Selects the correct CCAssignFn for a the given CallingConvention
- /// value.
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
-
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified in
- /// Mask are known to be either zero or one and return them in the
- /// KnownZero/KnownOne bitsets.
- void computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownZero,
- APInt &KnownOne, const SelectionDAG &DAG,
- unsigned Depth = 0) const;
-
- MVT getScalarShiftAmountTy(EVT LHSTy) const override;
-
- /// allowsUnalignedMemoryAccesses - Returns true if the target allows
- /// unaligned memory accesses. of the specified type.
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace = 0,
- bool *Fast = 0) const override {
- if (RequireStrictAlign)
- return false;
- // FIXME: True for Cyclone, but not necessary others.
- if (Fast)
- *Fast = true;
- return true;
- }
-
- /// LowerOperation - Provide custom lowering hooks for some operations.
- SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
-
- const char *getTargetNodeName(unsigned Opcode) const override;
-
- SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
-
- /// getFunctionAlignment - Return the Log2 alignment of this function.
- unsigned getFunctionAlignment(const Function *F) const;
-
- /// getMaximalGlobalOffset - Returns the maximal possible offset which can
- /// be used for loads / stores from the global.
- unsigned getMaximalGlobalOffset() const override;
-
- /// Returns true if a cast between SrcAS and DestAS is a noop.
- bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override {
- // Addrspacecasts are always noops.
- return true;
- }
-
- /// createFastISel - This method returns a target specific FastISel object,
- /// or null if the target does not support "fast" ISel.
- FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo) const override;
-
- bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
-
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
-
- /// isShuffleMaskLegal - Return true if the given shuffle mask can be
- /// codegen'd directly, or if it should be stack expanded.
- bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const override;
-
- /// getSetCCResultType - Return the ISD::SETCC ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
-
- SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
-
- MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size, unsigned BinOpcode) const;
- MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB,
- unsigned Size) const;
- MachineBasicBlock *EmitAtomicBinary128(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned BinOpcodeLo,
- unsigned BinOpcodeHi) const;
- MachineBasicBlock *EmitAtomicCmpSwap128(MachineInstr *MI,
- MachineBasicBlock *BB) const;
- MachineBasicBlock *EmitAtomicMinMax128(MachineInstr *MI,
- MachineBasicBlock *BB,
- unsigned CondCode) const;
- MachineBasicBlock *EmitF128CSEL(MachineInstr *MI,
- MachineBasicBlock *BB) const;
-
- MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const override;
-
- bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
- unsigned Intrinsic) const override;
-
- bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
- bool isTruncateFree(EVT VT1, EVT VT2) const override;
-
- bool isZExtFree(Type *Ty1, Type *Ty2) const override;
- bool isZExtFree(EVT VT1, EVT VT2) const override;
- bool isZExtFree(SDValue Val, EVT VT2) const override;
-
- bool hasPairedLoad(Type *LoadedType,
- unsigned &RequiredAligment) const override;
- bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
-
- bool isLegalAddImmediate(int64_t) const override;
- bool isLegalICmpImmediate(int64_t) const override;
-
- EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const override;
-
- /// isLegalAddressingMode - Return true if the addressing mode represented
- /// by AM is legal for this target, for a load/store of the specified type.
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
-
- /// \brief Return the cost of the scaling factor used in the addressing
- /// mode represented by AM for this target, for a load/store
- /// of the specified type.
- /// If the AM is supported, the return value must be >= 0.
- /// If the AM is not supported, it returns a negative value.
- int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
-
- /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
- /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
- /// expanded to FMAs when this method returns true, otherwise fmuladd is
- /// expanded to fmul + fadd.
- bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
-
- const uint16_t *getScratchRegisters(CallingConv::ID CC) const override;
-
- bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const override;
-
-private:
- /// Subtarget - Keep a pointer to the ARM64Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const ARM64Subtarget *Subtarget;
-
- void addTypeForNEON(EVT VT, EVT PromotedBitwiseVT);
- void addDRTypeForNEON(MVT VT);
- void addQRTypeForNEON(MVT VT);
-
- SDValue
- LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
- SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- SDValue LowerCall(CallLoweringInfo & /*CLI*/,
- SmallVectorImpl<SDValue> &InVals) const override;
-
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL,
- SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
- bool isThisReturn, SDValue ThisVal) const;
-
- bool isEligibleForTailCallOptimization(
- SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
- bool isCalleeStructRet, bool isCallerStructRet,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
-
- void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL,
- SDValue &Chain) const;
-
- bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const override;
-
- SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals, SDLoc DL,
- SelectionDAG &DAG) const override;
-
- SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerELFTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL,
- SelectionDAG &DAG) const;
- SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
- RTLIB::Libcall Call) const;
- SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorAND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
-
- ConstraintType getConstraintType(const std::string &Constraint) const;
-
- /// Examine constraint string and operand type and determine a weight value.
- /// The operand object must already have been set up with the operand type.
- ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info,
- const char *constraint) const;
-
- std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
- void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
-
- bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
- bool mayBeEmittedAsTailCall(CallInst *CI) const;
- bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
- ISD::MemIndexedMode &AM, bool &IsInc,
- SelectionDAG &DAG) const;
- bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
- bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
- SDValue &Offset, ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
-
- void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
-};
-
-namespace ARM64 {
-FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
- const TargetLibraryInfo *libInfo);
-} // end namespace ARM64
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_ARM64_ISELLOWERING_H
diff --git a/lib/Target/ARM64/ARM64InstrAtomics.td b/lib/Target/ARM64/ARM64InstrAtomics.td
deleted file mode 100644
index 0d36e06..0000000
--- a/lib/Target/ARM64/ARM64InstrAtomics.td
+++ /dev/null
@@ -1,293 +0,0 @@
-//===- ARM64InstrAtomics.td - ARM64 Atomic codegen support -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ARM64 Atomic operand code-gen constructs.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------
-// Atomic fences
-//===----------------------------------
-def : Pat<(atomic_fence (i64 4), (imm)), (DMB (i32 0x9))>;
-def : Pat<(atomic_fence (imm), (imm)), (DMB (i32 0xb))>;
-
-//===----------------------------------
-// Atomic loads
-//===----------------------------------
-
-// When they're actually atomic, only one addressing mode (GPR64sp) is
-// supported, but when they're relaxed and anything can be used, all the
-// standard modes would be valid and may give efficiency gains.
-
-// A atomic load operation that actually needs acquire semantics.
-class acquiring_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(Ordering != AcquireRelease && "unexpected load ordering");
- return Ordering == Acquire || Ordering == SequentiallyConsistent;
-}]>;
-
-// An atomic load operation that does not need either acquire or release
-// semantics.
-class relaxed_load<PatFrag base>
- : PatFrag<(ops node:$ptr), (base node:$ptr), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
-}]>;
-
-// 8-bit loads
-def : Pat<(acquiring_load<atomic_load_8> GPR64sp:$ptr), (LDARB GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_8> ro_indexed8:$addr),
- (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(relaxed_load<atomic_load_8> am_indexed8:$addr),
- (LDRBBui am_indexed8:$addr)>;
-def : Pat<(relaxed_load<atomic_load_8> am_unscaled8:$addr),
- (LDURBBi am_unscaled8:$addr)>;
-
-// 16-bit loads
-def : Pat<(acquiring_load<atomic_load_16> GPR64sp:$ptr), (LDARH GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_16> ro_indexed16:$addr),
- (LDRHHro ro_indexed16:$addr)>;
-def : Pat<(relaxed_load<atomic_load_16> am_indexed16:$addr),
- (LDRHHui am_indexed16:$addr)>;
-def : Pat<(relaxed_load<atomic_load_16> am_unscaled16:$addr),
- (LDURHHi am_unscaled16:$addr)>;
-
-// 32-bit loads
-def : Pat<(acquiring_load<atomic_load_32> GPR64sp:$ptr), (LDARW GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_32> ro_indexed32:$addr),
- (LDRWro ro_indexed32:$addr)>;
-def : Pat<(relaxed_load<atomic_load_32> am_indexed32:$addr),
- (LDRWui am_indexed32:$addr)>;
-def : Pat<(relaxed_load<atomic_load_32> am_unscaled32:$addr),
- (LDURWi am_unscaled32:$addr)>;
-
-// 64-bit loads
-def : Pat<(acquiring_load<atomic_load_64> GPR64sp:$ptr), (LDARX GPR64sp:$ptr)>;
-def : Pat<(relaxed_load<atomic_load_64> ro_indexed64:$addr),
- (LDRXro ro_indexed64:$addr)>;
-def : Pat<(relaxed_load<atomic_load_64> am_indexed64:$addr),
- (LDRXui am_indexed64:$addr)>;
-def : Pat<(relaxed_load<atomic_load_64> am_unscaled64:$addr),
- (LDURXi am_unscaled64:$addr)>;
-
-//===----------------------------------
-// Atomic stores
-//===----------------------------------
-
-// When they're actually atomic, only one addressing mode (GPR64sp) is
-// supported, but when they're relaxed and anything can be used, all the
-// standard modes would be valid and may give efficiency gains.
-
-// A store operation that actually needs release semantics.
-class releasing_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- assert(Ordering != AcquireRelease && "unexpected store ordering");
- return Ordering == Release || Ordering == SequentiallyConsistent;
-}]>;
-
-// An atomic store operation that doesn't actually need to be atomic on ARM64.
-class relaxed_store<PatFrag base>
- : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
- AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
- return Ordering == Monotonic || Ordering == Unordered;
-}]>;
-
-// 8-bit stores
-def : Pat<(releasing_store<atomic_store_8> GPR64sp:$ptr, GPR32:$val),
- (STLRB GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> ro_indexed8:$ptr, GPR32:$val),
- (STRBBro GPR32:$val, ro_indexed8:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> am_indexed8:$ptr, GPR32:$val),
- (STRBBui GPR32:$val, am_indexed8:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_8> am_unscaled8:$ptr, GPR32:$val),
- (STURBBi GPR32:$val, am_unscaled8:$ptr)>;
-
-// 16-bit stores
-def : Pat<(releasing_store<atomic_store_16> GPR64sp:$ptr, GPR32:$val),
- (STLRH GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> ro_indexed16:$ptr, GPR32:$val),
- (STRHHro GPR32:$val, ro_indexed16:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> am_indexed16:$ptr, GPR32:$val),
- (STRHHui GPR32:$val, am_indexed16:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_16> am_unscaled16:$ptr, GPR32:$val),
- (STURHHi GPR32:$val, am_unscaled16:$ptr)>;
-
-// 32-bit stores
-def : Pat<(releasing_store<atomic_store_32> GPR64sp:$ptr, GPR32:$val),
- (STLRW GPR32:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> ro_indexed32:$ptr, GPR32:$val),
- (STRWro GPR32:$val, ro_indexed32:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> am_indexed32:$ptr, GPR32:$val),
- (STRWui GPR32:$val, am_indexed32:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_32> am_unscaled32:$ptr, GPR32:$val),
- (STURWi GPR32:$val, am_unscaled32:$ptr)>;
-
-// 64-bit stores
-def : Pat<(releasing_store<atomic_store_64> GPR64sp:$ptr, GPR64:$val),
- (STLRX GPR64:$val, GPR64sp:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> ro_indexed64:$ptr, GPR64:$val),
- (STRXro GPR64:$val, ro_indexed64:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> am_indexed64:$ptr, GPR64:$val),
- (STRXui GPR64:$val, am_indexed64:$ptr)>;
-def : Pat<(relaxed_store<atomic_store_64> am_unscaled64:$ptr, GPR64:$val),
- (STURXi GPR64:$val, am_unscaled64:$ptr)>;
-
-//===----------------------------------
-// Atomic read-modify-write operations
-//===----------------------------------
-
-// More complicated operations need lots of C++ support, so we just create
-// skeletons here for the C++ code to refer to.
-
-let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
-multiclass AtomicSizes {
- def _I8 : Pseudo<(outs GPR32:$dst),
- (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I16 : Pseudo<(outs GPR32:$dst),
- (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I32 : Pseudo<(outs GPR32:$dst),
- (ins GPR64sp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
- def _I64 : Pseudo<(outs GPR64:$dst),
- (ins GPR64sp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
- def _I128 : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
- (ins GPR64sp:$ptr, GPR64:$incrlo, GPR64:$incrhi,
- i32imm:$ordering), []>;
-}
-}
-
-defm ATOMIC_LOAD_ADD : AtomicSizes;
-defm ATOMIC_LOAD_SUB : AtomicSizes;
-defm ATOMIC_LOAD_AND : AtomicSizes;
-defm ATOMIC_LOAD_OR : AtomicSizes;
-defm ATOMIC_LOAD_XOR : AtomicSizes;
-defm ATOMIC_LOAD_NAND : AtomicSizes;
-defm ATOMIC_SWAP : AtomicSizes;
-let Defs = [CPSR] in {
- // These operations need a CMP to calculate the correct value
- defm ATOMIC_LOAD_MIN : AtomicSizes;
- defm ATOMIC_LOAD_MAX : AtomicSizes;
- defm ATOMIC_LOAD_UMIN : AtomicSizes;
- defm ATOMIC_LOAD_UMAX : AtomicSizes;
-}
-
-class AtomicCmpSwap<RegisterClass GPRData>
- : Pseudo<(outs GPRData:$dst),
- (ins GPR64sp:$ptr, GPRData:$old, GPRData:$new,
- i32imm:$ordering), []> {
- let usesCustomInserter = 1;
- let hasCtrlDep = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let Defs = [CPSR];
-}
-
-def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
-def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
-
-def ATOMIC_CMP_SWAP_I128
- : Pseudo<(outs GPR64:$dstlo, GPR64:$dsthi),
- (ins GPR64sp:$ptr, GPR64:$oldlo, GPR64:$oldhi,
- GPR64:$newlo, GPR64:$newhi, i32imm:$ordering), []> {
- let usesCustomInserter = 1;
- let hasCtrlDep = 1;
- let mayLoad = 1;
- let mayStore = 1;
- let Defs = [CPSR];
-}
-
-//===----------------------------------
-// Low-level exclusive operations
-//===----------------------------------
-
-// Load-exclusives.
-
-def ldxr_1 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def ldxr_2 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def ldxr_4 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def ldxr_8 : PatFrag<(ops node:$ptr), (int_arm64_ldxr node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def : Pat<(ldxr_1 am_noindex:$addr),
- (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_2 am_noindex:$addr),
- (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_4 am_noindex:$addr),
- (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
-def : Pat<(ldxr_8 am_noindex:$addr), (LDXRX am_noindex:$addr)>;
-
-def : Pat<(and (ldxr_1 am_noindex:$addr), 0xff),
- (SUBREG_TO_REG (i64 0), (LDXRB am_noindex:$addr), sub_32)>;
-def : Pat<(and (ldxr_2 am_noindex:$addr), 0xffff),
- (SUBREG_TO_REG (i64 0), (LDXRH am_noindex:$addr), sub_32)>;
-def : Pat<(and (ldxr_4 am_noindex:$addr), 0xffffffff),
- (SUBREG_TO_REG (i64 0), (LDXRW am_noindex:$addr), sub_32)>;
-
-// Store-exclusives.
-
-def stxr_1 : PatFrag<(ops node:$val, node:$ptr),
- (int_arm64_stxr node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i8;
-}]>;
-
-def stxr_2 : PatFrag<(ops node:$val, node:$ptr),
- (int_arm64_stxr node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i16;
-}]>;
-
-def stxr_4 : PatFrag<(ops node:$val, node:$ptr),
- (int_arm64_stxr node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i32;
-}]>;
-
-def stxr_8 : PatFrag<(ops node:$val, node:$ptr),
- (int_arm64_stxr node:$val, node:$ptr), [{
- return cast<MemIntrinsicSDNode>(N)->getMemoryVT() == MVT::i64;
-}]>;
-
-def : Pat<(stxr_1 GPR64:$val, am_noindex:$addr),
- (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_2 GPR64:$val, am_noindex:$addr),
- (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_4 GPR64:$val, am_noindex:$addr),
- (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_8 GPR64:$val, am_noindex:$addr),
- (STXRX GPR64:$val, am_noindex:$addr)>;
-
-def : Pat<(stxr_1 (zext (and GPR32:$val, 0xff)), am_noindex:$addr),
- (STXRB GPR32:$val, am_noindex:$addr)>;
-def : Pat<(stxr_2 (zext (and GPR32:$val, 0xffff)), am_noindex:$addr),
- (STXRH GPR32:$val, am_noindex:$addr)>;
-def : Pat<(stxr_4 (zext GPR32:$val), am_noindex:$addr),
- (STXRW GPR32:$val, am_noindex:$addr)>;
-
-def : Pat<(stxr_1 (and GPR64:$val, 0xff), am_noindex:$addr),
- (STXRB (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_2 (and GPR64:$val, 0xffff), am_noindex:$addr),
- (STXRH (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-def : Pat<(stxr_4 (and GPR64:$val, 0xffffffff), am_noindex:$addr),
- (STXRW (EXTRACT_SUBREG GPR64:$val, sub_32), am_noindex:$addr)>;
-
-
-// And clear exclusive.
-
-def : Pat<(int_arm64_clrex), (CLREX 0xf)>;
diff --git a/lib/Target/ARM64/ARM64InstrFormats.td b/lib/Target/ARM64/ARM64InstrFormats.td
deleted file mode 100644
index 38406f8..0000000
--- a/lib/Target/ARM64/ARM64InstrFormats.td
+++ /dev/null
@@ -1,8193 +0,0 @@
-//===- ARM64InstrFormats.td - ARM64 Instruction Formats ------*- tblgen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Describe ARM64 instructions format here
-//
-
-// Format specifies the encoding used by the instruction. This is part of the
-// ad-hoc solution used to emit machine instruction encodings by our machine
-// code emitter.
-class Format<bits<2> val> {
- bits<2> Value = val;
-}
-
-def PseudoFrm : Format<0>;
-def NormalFrm : Format<1>; // Do we need any others?
-
-// ARM64 Instruction Format
-class ARM64Inst<Format f, string cstr> : Instruction {
- field bits<32> Inst; // Instruction encoding.
- // Mask of bits that cause an encoding to be UNPREDICTABLE.
- // If a bit is set, then if the corresponding bit in the
- // target encoding differs from its value in the "Inst" field,
- // the instruction is UNPREDICTABLE (SoftFail in abstract parlance).
- field bits<32> Unpredictable = 0;
- // SoftFail is the generic name for this field, but we alias it so
- // as to make it more obvious what it means in ARM-land.
- field bits<32> SoftFail = Unpredictable;
- let Namespace = "ARM64";
- Format F = f;
- bits<2> Form = F.Value;
- let Pattern = [];
- let Constraints = cstr;
-}
-
-// Pseudo instructions (don't have encoding information)
-class Pseudo<dag oops, dag iops, list<dag> pattern, string cstr = "">
- : ARM64Inst<PseudoFrm, cstr> {
- dag OutOperandList = oops;
- dag InOperandList = iops;
- let Pattern = pattern;
- let isCodeGenOnly = 1;
-}
-
-// Real instructions (have encoding information)
-class EncodedI<string cstr, list<dag> pattern> : ARM64Inst<NormalFrm, cstr> {
- let Pattern = pattern;
- let Size = 4;
-}
-
-// Normal instructions
-class I<dag oops, dag iops, string asm, string operands, string cstr,
- list<dag> pattern>
- : EncodedI<cstr, pattern> {
- dag OutOperandList = oops;
- dag InOperandList = iops;
- let AsmString = !strconcat(asm, operands);
-}
-
-class TriOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$MHS, node:$RHS), res>;
-class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
-class UnOpFrag<dag res> : PatFrag<(ops node:$LHS), res>;
-
-// Helper fragment for an extract of the high portion of a 128-bit vector.
-def extract_high_v16i8 :
- UnOpFrag<(extract_subvector (v16i8 node:$LHS), (i64 8))>;
-def extract_high_v8i16 :
- UnOpFrag<(extract_subvector (v8i16 node:$LHS), (i64 4))>;
-def extract_high_v4i32 :
- UnOpFrag<(extract_subvector (v4i32 node:$LHS), (i64 2))>;
-def extract_high_v2i64 :
- UnOpFrag<(extract_subvector (v2i64 node:$LHS), (i64 1))>;
-
-//===----------------------------------------------------------------------===//
-// Asm Operand Classes.
-//
-
-// Shifter operand for arithmetic shifted encodings.
-def ShifterOperand : AsmOperandClass {
- let Name = "Shifter";
-}
-
-// Shifter operand for mov immediate encodings.
-def MovImm32ShifterOperand : AsmOperandClass {
- let SuperClasses = [ShifterOperand];
- let Name = "MovImm32Shifter";
-}
-def MovImm64ShifterOperand : AsmOperandClass {
- let SuperClasses = [ShifterOperand];
- let Name = "MovImm64Shifter";
-}
-
-// Shifter operand for arithmetic register shifted encodings.
-def ArithmeticShifterOperand : AsmOperandClass {
- let SuperClasses = [ShifterOperand];
- let Name = "ArithmeticShifter";
-}
-
-// Shifter operand for arithmetic shifted encodings for ADD/SUB instructions.
-def AddSubShifterOperand : AsmOperandClass {
- let SuperClasses = [ArithmeticShifterOperand];
- let Name = "AddSubShifter";
-}
-
-// Shifter operand for logical vector 128/64-bit shifted encodings.
-def LogicalVecShifterOperand : AsmOperandClass {
- let SuperClasses = [ShifterOperand];
- let Name = "LogicalVecShifter";
-}
-def LogicalVecHalfWordShifterOperand : AsmOperandClass {
- let SuperClasses = [LogicalVecShifterOperand];
- let Name = "LogicalVecHalfWordShifter";
-}
-
-// The "MSL" shifter on the vector MOVI instruction.
-def MoveVecShifterOperand : AsmOperandClass {
- let SuperClasses = [ShifterOperand];
- let Name = "MoveVecShifter";
-}
-
-// Extend operand for arithmetic encodings.
-def ExtendOperand : AsmOperandClass { let Name = "Extend"; }
-def ExtendOperand64 : AsmOperandClass {
- let SuperClasses = [ExtendOperand];
- let Name = "Extend64";
-}
-// 'extend' that's a lsl of a 64-bit register.
-def ExtendOperandLSL64 : AsmOperandClass {
- let SuperClasses = [ExtendOperand];
- let Name = "ExtendLSL64";
-}
-
-// 8-bit floating-point immediate encodings.
-def FPImmOperand : AsmOperandClass {
- let Name = "FPImm";
- let ParserMethod = "tryParseFPImm";
-}
-
-// 8-bit immediate for AdvSIMD where 64-bit values of the form:
-// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
-// are encoded as the eight bit value 'abcdefgh'.
-def SIMDImmType10Operand : AsmOperandClass { let Name = "SIMDImmType10"; }
-
-
-//===----------------------------------------------------------------------===//
-// Operand Definitions.
-//
-
-// ADR[P] instruction labels.
-def AdrpOperand : AsmOperandClass {
- let Name = "AdrpLabel";
- let ParserMethod = "tryParseAdrpLabel";
-}
-def adrplabel : Operand<i64> {
- let EncoderMethod = "getAdrLabelOpValue";
- let PrintMethod = "printAdrpLabel";
- let ParserMatchClass = AdrpOperand;
-}
-
-def AdrOperand : AsmOperandClass {
- let Name = "AdrLabel";
- let ParserMethod = "tryParseAdrLabel";
-}
-def adrlabel : Operand<i64> {
- let EncoderMethod = "getAdrLabelOpValue";
- let ParserMatchClass = AdrOperand;
-}
-
-// simm9 predicate - True if the immediate is in the range [-256, 255].
-def SImm9Operand : AsmOperandClass {
- let Name = "SImm9";
- let DiagnosticType = "InvalidMemoryIndexedSImm9";
-}
-def simm9 : Operand<i64>, ImmLeaf<i64, [{ return Imm >= -256 && Imm < 256; }]> {
- let ParserMatchClass = SImm9Operand;
-}
-
-// simm7s4 predicate - True if the immediate is a multiple of 4 in the range
-// [-256, 252].
-def SImm7s4Operand : AsmOperandClass {
- let Name = "SImm7s4";
- let DiagnosticType = "InvalidMemoryIndexed32SImm7";
-}
-def simm7s4 : Operand<i32> {
- let ParserMatchClass = SImm7s4Operand;
- let PrintMethod = "printImmScale4";
-}
-
-// simm7s8 predicate - True if the immediate is a multiple of 8 in the range
-// [-512, 504].
-def SImm7s8Operand : AsmOperandClass {
- let Name = "SImm7s8";
- let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def simm7s8 : Operand<i32> {
- let ParserMatchClass = SImm7s8Operand;
- let PrintMethod = "printImmScale8";
-}
-
-// simm7s16 predicate - True if the immediate is a multiple of 16 in the range
-// [-1024, 1008].
-def SImm7s16Operand : AsmOperandClass {
- let Name = "SImm7s16";
- let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def simm7s16 : Operand<i32> {
- let ParserMatchClass = SImm7s16Operand;
- let PrintMethod = "printImmScale16";
-}
-
-// imm0_65535 predicate - True if the immediate is in the range [0,65535].
-def Imm0_65535Operand : AsmOperandClass { let Name = "Imm0_65535"; }
-def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
- return ((uint32_t)Imm) < 65536;
-}]> {
- let ParserMatchClass = Imm0_65535Operand;
-}
-
-def Imm1_8Operand : AsmOperandClass {
- let Name = "Imm1_8";
- let DiagnosticType = "InvalidImm1_8";
-}
-def Imm1_16Operand : AsmOperandClass {
- let Name = "Imm1_16";
- let DiagnosticType = "InvalidImm1_16";
-}
-def Imm1_32Operand : AsmOperandClass {
- let Name = "Imm1_32";
- let DiagnosticType = "InvalidImm1_32";
-}
-def Imm1_64Operand : AsmOperandClass {
- let Name = "Imm1_64";
- let DiagnosticType = "InvalidImm1_64";
-}
-
-def MovZSymbolG3AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG3";
- let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g3 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG3AsmOperand;
-}
-
-def MovZSymbolG2AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG2";
- let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g2 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG2AsmOperand;
-}
-
-def MovZSymbolG1AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG1";
- let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g1 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG1AsmOperand;
-}
-
-def MovZSymbolG0AsmOperand : AsmOperandClass {
- let Name = "MovZSymbolG0";
- let RenderMethod = "addImmOperands";
-}
-
-def movz_symbol_g0 : Operand<i32> {
- let ParserMatchClass = MovZSymbolG0AsmOperand;
-}
-
-def MovKSymbolG2AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG2";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g2 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG2AsmOperand;
-}
-
-def MovKSymbolG1AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG1";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g1 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG1AsmOperand;
-}
-
-def MovKSymbolG0AsmOperand : AsmOperandClass {
- let Name = "MovKSymbolG0";
- let RenderMethod = "addImmOperands";
-}
-
-def movk_symbol_g0 : Operand<i32> {
- let ParserMatchClass = MovKSymbolG0AsmOperand;
-}
-
-def fixedpoint32 : Operand<i32> {
- let EncoderMethod = "getFixedPointScaleOpValue";
- let DecoderMethod = "DecodeFixedPointScaleImm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def fixedpoint64 : Operand<i64> {
- let EncoderMethod = "getFixedPointScaleOpValue";
- let DecoderMethod = "DecodeFixedPointScaleImm";
- let ParserMatchClass = Imm1_64Operand;
-}
-
-def vecshiftR8 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR8OpValue";
- let DecoderMethod = "DecodeVecShiftR8Imm";
- let ParserMatchClass = Imm1_8Operand;
-}
-def vecshiftR16 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16Imm";
- let ParserMatchClass = Imm1_16Operand;
-}
-def vecshiftR16Narrow : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
-}]> {
- let EncoderMethod = "getVecShiftR16OpValue";
- let DecoderMethod = "DecodeVecShiftR16ImmNarrow";
- let ParserMatchClass = Imm1_8Operand;
-}
-def vecshiftR32 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32Imm";
- let ParserMatchClass = Imm1_32Operand;
-}
-def vecshiftR32Narrow : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
-}]> {
- let EncoderMethod = "getVecShiftR32OpValue";
- let DecoderMethod = "DecodeVecShiftR32ImmNarrow";
- let ParserMatchClass = Imm1_16Operand;
-}
-def vecshiftR64 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 65);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64Imm";
- let ParserMatchClass = Imm1_64Operand;
-}
-def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
-}]> {
- let EncoderMethod = "getVecShiftR64OpValue";
- let DecoderMethod = "DecodeVecShiftR64ImmNarrow";
- let ParserMatchClass = Imm1_32Operand;
-}
-
-def Imm0_7Operand : AsmOperandClass { let Name = "Imm0_7"; }
-def Imm0_15Operand : AsmOperandClass { let Name = "Imm0_15"; }
-def Imm0_31Operand : AsmOperandClass { let Name = "Imm0_31"; }
-def Imm0_63Operand : AsmOperandClass { let Name = "Imm0_63"; }
-
-def vecshiftL8 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) < 8);
-}]> {
- let EncoderMethod = "getVecShiftL8OpValue";
- let DecoderMethod = "DecodeVecShiftL8Imm";
- let ParserMatchClass = Imm0_7Operand;
-}
-def vecshiftL16 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) < 16);
-}]> {
- let EncoderMethod = "getVecShiftL16OpValue";
- let DecoderMethod = "DecodeVecShiftL16Imm";
- let ParserMatchClass = Imm0_15Operand;
-}
-def vecshiftL32 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) < 32);
-}]> {
- let EncoderMethod = "getVecShiftL32OpValue";
- let DecoderMethod = "DecodeVecShiftL32Imm";
- let ParserMatchClass = Imm0_31Operand;
-}
-def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
- return (((uint32_t)Imm) < 64);
-}]> {
- let EncoderMethod = "getVecShiftL64OpValue";
- let DecoderMethod = "DecodeVecShiftL64Imm";
- let ParserMatchClass = Imm0_63Operand;
-}
-
-
-// Crazy immediate formats used by 32-bit and 64-bit logical immediate
-// instructions for splatting repeating bit patterns across the immediate.
-def logical_imm32_XFORM : SDNodeXForm<imm, [{
- uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 32);
- return CurDAG->getTargetConstant(enc, MVT::i32);
-}]>;
-def logical_imm64_XFORM : SDNodeXForm<imm, [{
- uint64_t enc = ARM64_AM::encodeLogicalImmediate(N->getZExtValue(), 64);
- return CurDAG->getTargetConstant(enc, MVT::i32);
-}]>;
-
-def LogicalImm32Operand : AsmOperandClass { let Name = "LogicalImm32"; }
-def LogicalImm64Operand : AsmOperandClass { let Name = "LogicalImm64"; }
-def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{
- return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 32);
-}], logical_imm32_XFORM> {
- let PrintMethod = "printLogicalImm32";
- let ParserMatchClass = LogicalImm32Operand;
-}
-def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{
- return ARM64_AM::isLogicalImmediate(N->getZExtValue(), 64);
-}], logical_imm64_XFORM> {
- let PrintMethod = "printLogicalImm64";
- let ParserMatchClass = LogicalImm64Operand;
-}
-
-// imm0_255 predicate - True if the immediate is in the range [0,255].
-def Imm0_255Operand : AsmOperandClass { let Name = "Imm0_255"; }
-def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
- return ((uint32_t)Imm) < 256;
-}]> {
- let ParserMatchClass = Imm0_255Operand;
-}
-
-// imm0_127 predicate - True if the immediate is in the range [0,127]
-def Imm0_127Operand : AsmOperandClass { let Name = "Imm0_127"; }
-def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
- return ((uint32_t)Imm) < 128;
-}]> {
- let ParserMatchClass = Imm0_127Operand;
-}
-
-// NOTE: These imm0_N operands have to be of type i64 because i64 is the size
-// for all shift-amounts.
-
-// imm0_63 predicate - True if the immediate is in the range [0,63]
-def imm0_63 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 64;
-}]> {
- let ParserMatchClass = Imm0_63Operand;
-}
-
-// imm0_31 predicate - True if the immediate is in the range [0,31]
-def imm0_31 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 32;
-}]> {
- let ParserMatchClass = Imm0_31Operand;
-}
-
-// imm0_15 predicate - True if the immediate is in the range [0,15]
-def imm0_15 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 16;
-}]> {
- let ParserMatchClass = Imm0_15Operand;
-}
-
-// imm0_7 predicate - True if the immediate is in the range [0,7]
-def imm0_7 : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 8;
-}]> {
- let ParserMatchClass = Imm0_7Operand;
-}
-
-// An arithmetic shifter operand:
-// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
-// {5-0} - imm6
-def arith_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let ParserMatchClass = ArithmeticShifterOperand;
-}
-
-class arith_shifted_reg<ValueType Ty, RegisterClass regclass>
- : Operand<Ty>,
- ComplexPattern<Ty, 2, "SelectArithShiftedRegister", []> {
- let PrintMethod = "printShiftedRegister";
- let MIOperandInfo = (ops regclass, arith_shift);
-}
-
-def arith_shifted_reg32 : arith_shifted_reg<i32, GPR32>;
-def arith_shifted_reg64 : arith_shifted_reg<i64, GPR64>;
-
-// An arithmetic shifter operand:
-// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr, 11 = ror
-// {5-0} - imm6
-def logical_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let ParserMatchClass = ShifterOperand;
-}
-
-class logical_shifted_reg<ValueType Ty, RegisterClass regclass>
- : Operand<Ty>,
- ComplexPattern<Ty, 2, "SelectLogicalShiftedRegister", []> {
- let PrintMethod = "printShiftedRegister";
- let MIOperandInfo = (ops regclass, logical_shift);
-}
-
-def logical_shifted_reg32 : logical_shifted_reg<i32, GPR32>;
-def logical_shifted_reg64 : logical_shifted_reg<i64, GPR64>;
-
-// A logical vector shifter operand:
-// {7-6} - shift type: 00 = lsl
-// {5-0} - imm6: #0, #8, #16, or #24
-def logical_vec_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let EncoderMethod = "getVecShifterOpValue";
- let ParserMatchClass = LogicalVecShifterOperand;
-}
-
-// A logical vector half-word shifter operand:
-// {7-6} - shift type: 00 = lsl
-// {5-0} - imm6: #0 or #8
-def logical_vec_hw_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let EncoderMethod = "getVecShifterOpValue";
- let ParserMatchClass = LogicalVecHalfWordShifterOperand;
-}
-
-// A vector move shifter operand:
-// {0} - imm1: #8 or #16
-def move_vec_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let EncoderMethod = "getMoveVecShifterOpValue";
- let ParserMatchClass = MoveVecShifterOperand;
-}
-
-// An ADD/SUB immediate shifter operand:
-// {7-6} - shift type: 00 = lsl
-// {5-0} - imm6: #0 or #12
-def addsub_shift : Operand<i32> {
- let ParserMatchClass = AddSubShifterOperand;
-}
-
-class addsub_shifted_imm<ValueType Ty>
- : Operand<Ty>, ComplexPattern<Ty, 2, "SelectArithImmed", [imm]> {
- let PrintMethod = "printAddSubImm";
- let EncoderMethod = "getAddSubImmOpValue";
- let MIOperandInfo = (ops i32imm, addsub_shift);
-}
-
-def addsub_shifted_imm32 : addsub_shifted_imm<i32>;
-def addsub_shifted_imm64 : addsub_shifted_imm<i64>;
-
-class neg_addsub_shifted_imm<ValueType Ty>
- : Operand<Ty>, ComplexPattern<Ty, 2, "SelectNegArithImmed", [imm]> {
- let PrintMethod = "printAddSubImm";
- let EncoderMethod = "getAddSubImmOpValue";
- let MIOperandInfo = (ops i32imm, addsub_shift);
-}
-
-def neg_addsub_shifted_imm32 : neg_addsub_shifted_imm<i32>;
-def neg_addsub_shifted_imm64 : neg_addsub_shifted_imm<i64>;
-
-// An extend operand:
-// {5-3} - extend type
-// {2-0} - imm3
-def arith_extend : Operand<i32> {
- let PrintMethod = "printExtend";
- let ParserMatchClass = ExtendOperand;
-}
-def arith_extend64 : Operand<i32> {
- let PrintMethod = "printExtend";
- let ParserMatchClass = ExtendOperand64;
-}
-
-// 'extend' that's a lsl of a 64-bit register.
-def arith_extendlsl64 : Operand<i32> {
- let PrintMethod = "printExtend";
- let ParserMatchClass = ExtendOperandLSL64;
-}
-
-class arith_extended_reg32<ValueType Ty> : Operand<Ty>,
- ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
- let PrintMethod = "printExtendedRegister";
- let MIOperandInfo = (ops GPR32, arith_extend);
-}
-
-class arith_extended_reg32to64<ValueType Ty> : Operand<Ty>,
- ComplexPattern<Ty, 2, "SelectArithExtendedRegister", []> {
- let PrintMethod = "printExtendedRegister";
- let MIOperandInfo = (ops GPR32, arith_extend64);
-}
-
-// Floating-point immediate.
-def fpimm32 : Operand<f32>,
- PatLeaf<(f32 fpimm), [{
- return ARM64_AM::getFP32Imm(N->getValueAPF()) != -1;
- }], SDNodeXForm<fpimm, [{
- APFloat InVal = N->getValueAPF();
- uint32_t enc = ARM64_AM::getFP32Imm(InVal);
- return CurDAG->getTargetConstant(enc, MVT::i32);
- }]>> {
- let ParserMatchClass = FPImmOperand;
- let PrintMethod = "printFPImmOperand";
-}
-def fpimm64 : Operand<f64>,
- PatLeaf<(f64 fpimm), [{
- return ARM64_AM::getFP64Imm(N->getValueAPF()) != -1;
- }], SDNodeXForm<fpimm, [{
- APFloat InVal = N->getValueAPF();
- uint32_t enc = ARM64_AM::getFP64Imm(InVal);
- return CurDAG->getTargetConstant(enc, MVT::i32);
- }]>> {
- let ParserMatchClass = FPImmOperand;
- let PrintMethod = "printFPImmOperand";
-}
-
-def fpimm8 : Operand<i32> {
- let ParserMatchClass = FPImmOperand;
- let PrintMethod = "printFPImmOperand";
-}
-
-def fpimm0 : PatLeaf<(fpimm), [{
- return N->isExactlyValue(+0.0);
-}]>;
-
-// 8-bit immediate for AdvSIMD where 64-bit values of the form:
-// aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh
-// are encoded as the eight bit value 'abcdefgh'.
-def simdimmtype10 : Operand<i32>,
- PatLeaf<(f64 fpimm), [{
- return ARM64_AM::isAdvSIMDModImmType10(N->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue());
- }], SDNodeXForm<fpimm, [{
- APFloat InVal = N->getValueAPF();
- uint32_t enc = ARM64_AM::encodeAdvSIMDModImmType10(N->getValueAPF()
- .bitcastToAPInt()
- .getZExtValue());
- return CurDAG->getTargetConstant(enc, MVT::i32);
- }]>> {
- let ParserMatchClass = SIMDImmType10Operand;
- let PrintMethod = "printSIMDType10Operand";
-}
-
-
-//---
-// Sytem management
-//---
-
-// Base encoding for system instruction operands.
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class BaseSystemI<bit L, dag oops, dag iops, string asm, string operands>
- : I<oops, iops, asm, operands, "", []> {
- let Inst{31-22} = 0b1101010100;
- let Inst{21} = L;
-}
-
-// System instructions which do not have an Rt register.
-class SimpleSystemI<bit L, dag iops, string asm, string operands>
- : BaseSystemI<L, (outs), iops, asm, operands> {
- let Inst{4-0} = 0b11111;
-}
-
-// System instructions which have an Rt register.
-class RtSystemI<bit L, dag oops, dag iops, string asm, string operands>
- : BaseSystemI<L, oops, iops, asm, operands>,
- Sched<[WriteSys]> {
- bits<5> Rt;
- let Inst{4-0} = Rt;
-}
-
-// Hint instructions that take both a CRm and a 3-bit immediate.
-class HintI<string mnemonic>
- : SimpleSystemI<0, (ins imm0_127:$imm), mnemonic#" $imm", "">,
- Sched<[WriteHint]> {
- bits <7> imm;
- let Inst{20-12} = 0b000110010;
- let Inst{11-5} = imm;
-}
-
-// System instructions taking a single literal operand which encodes into
-// CRm. op2 differentiates the opcodes.
-def BarrierAsmOperand : AsmOperandClass {
- let Name = "Barrier";
- let ParserMethod = "tryParseBarrierOperand";
-}
-def barrier_op : Operand<i32> {
- let PrintMethod = "printBarrierOption";
- let ParserMatchClass = BarrierAsmOperand;
-}
-class CRmSystemI<Operand crmtype, bits<3> opc, string asm>
- : SimpleSystemI<0, (ins crmtype:$CRm), asm, "\t$CRm">,
- Sched<[WriteBarrier]> {
- bits<4> CRm;
- let Inst{20-12} = 0b000110011;
- let Inst{11-8} = CRm;
- let Inst{7-5} = opc;
-}
-
-// MRS/MSR system instructions.
-def SystemRegisterOperand : AsmOperandClass {
- let Name = "SystemRegister";
- let ParserMethod = "tryParseSystemRegister";
-}
-// concatenation of 1, op0, op1, CRn, CRm, op2. 16-bit immediate.
-def sysreg_op : Operand<i32> {
- let ParserMatchClass = SystemRegisterOperand;
- let DecoderMethod = "DecodeSystemRegister";
- let PrintMethod = "printSystemRegister";
-}
-
-class MRSI : RtSystemI<1, (outs GPR64:$Rt), (ins sysreg_op:$systemreg),
- "mrs", "\t$Rt, $systemreg"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
-}
-
-// FIXME: Some of these def CPSR, others don't. Best way to model that?
-// Explicitly modeling each of the system register as a register class
-// would do it, but feels like overkill at this point.
-class MSRI : RtSystemI<0, (outs), (ins sysreg_op:$systemreg, GPR64:$Rt),
- "msr", "\t$systemreg, $Rt"> {
- bits<15> systemreg;
- let Inst{20} = 1;
- let Inst{19-5} = systemreg;
-}
-
-def SystemCPSRFieldOperand : AsmOperandClass {
- let Name = "SystemCPSRField";
- let ParserMethod = "tryParseCPSRField";
-}
-def cpsrfield_op : Operand<i32> {
- let ParserMatchClass = SystemCPSRFieldOperand;
- let PrintMethod = "printSystemCPSRField";
-}
-
-let Defs = [CPSR] in
-class MSRcpsrI : SimpleSystemI<0, (ins cpsrfield_op:$cpsr_field, imm0_15:$imm),
- "msr", "\t$cpsr_field, $imm">,
- Sched<[WriteSys]> {
- bits<6> cpsrfield;
- bits<4> imm;
- let Inst{20-19} = 0b00;
- let Inst{18-16} = cpsrfield{5-3};
- let Inst{15-12} = 0b0100;
- let Inst{11-8} = imm;
- let Inst{7-5} = cpsrfield{2-0};
-
- let DecoderMethod = "DecodeSystemCPSRInstruction";
-}
-
-// SYS and SYSL generic system instructions.
-def SysCRAsmOperand : AsmOperandClass {
- let Name = "SysCR";
- let ParserMethod = "tryParseSysCROperand";
-}
-
-def sys_cr_op : Operand<i32> {
- let PrintMethod = "printSysCROperand";
- let ParserMatchClass = SysCRAsmOperand;
-}
-
-class SystemI<bit L, string asm>
- : SimpleSystemI<L,
- (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
- asm, "\t$op1, $Cn, $Cm, $op2">,
- Sched<[WriteSys]> {
- bits<3> op1;
- bits<4> Cn;
- bits<4> Cm;
- bits<3> op2;
- let Inst{20-19} = 0b01;
- let Inst{18-16} = op1;
- let Inst{15-12} = Cn;
- let Inst{11-8} = Cm;
- let Inst{7-5} = op2;
-}
-
-class SystemXtI<bit L, string asm>
- : RtSystemI<L, (outs),
- (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, GPR64:$Rt),
- asm, "\t$op1, $Cn, $Cm, $op2, $Rt"> {
- bits<3> op1;
- bits<4> Cn;
- bits<4> Cm;
- bits<3> op2;
- let Inst{20-19} = 0b01;
- let Inst{18-16} = op1;
- let Inst{15-12} = Cn;
- let Inst{11-8} = Cm;
- let Inst{7-5} = op2;
-}
-
-class SystemLXtI<bit L, string asm>
- : RtSystemI<L, (outs),
- (ins GPR64:$Rt, imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2),
- asm, "\t$Rt, $op1, $Cn, $Cm, $op2"> {
- bits<3> op1;
- bits<4> Cn;
- bits<4> Cm;
- bits<3> op2;
- let Inst{20-19} = 0b01;
- let Inst{18-16} = op1;
- let Inst{15-12} = Cn;
- let Inst{11-8} = Cm;
- let Inst{7-5} = op2;
-}
-
-
-// Branch (register) instructions:
-//
-// case opc of
-// 0001 blr
-// 0000 br
-// 0101 dret
-// 0100 eret
-// 0010 ret
-// otherwise UNDEFINED
-class BaseBranchReg<bits<4> opc, dag oops, dag iops, string asm,
- string operands, list<dag> pattern>
- : I<oops, iops, asm, operands, "", pattern>, Sched<[WriteBrReg]> {
- let Inst{31-25} = 0b1101011;
- let Inst{24-21} = opc;
- let Inst{20-16} = 0b11111;
- let Inst{15-10} = 0b000000;
- let Inst{4-0} = 0b00000;
-}
-
-class BranchReg<bits<4> opc, string asm, list<dag> pattern>
- : BaseBranchReg<opc, (outs), (ins GPR64:$Rn), asm, "\t$Rn", pattern> {
- bits<5> Rn;
- let Inst{9-5} = Rn;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1, isReturn = 1 in
-class SpecialReturn<bits<4> opc, string asm>
- : BaseBranchReg<opc, (outs), (ins), asm, "", []> {
- let Inst{9-5} = 0b11111;
-}
-
-//---
-// Conditional branch instruction.
-//---
-// Branch condition code.
-// 4-bit immediate. Pretty-printed as .<cc>
-def dotCcode : Operand<i32> {
- let PrintMethod = "printDotCondCode";
-}
-
-// Conditional branch target. 19-bit immediate. The low two bits of the target
-// offset are implied zero and so are not part of the immediate.
-def BranchTarget19Operand : AsmOperandClass {
- let Name = "BranchTarget19";
-}
-def am_brcond : Operand<OtherVT> {
- let EncoderMethod = "getCondBranchTargetOpValue";
- let DecoderMethod = "DecodeCondBranchTarget";
- let PrintMethod = "printAlignedBranchTarget";
- let ParserMatchClass = BranchTarget19Operand;
-}
-
-class BranchCond : I<(outs), (ins dotCcode:$cond, am_brcond:$target),
- "b", "$cond\t$target", "",
- [(ARM64brcond bb:$target, imm:$cond, CPSR)]>,
- Sched<[WriteBr]> {
- let isBranch = 1;
- let isTerminator = 1;
- let Uses = [CPSR];
-
- bits<4> cond;
- bits<19> target;
- let Inst{31-24} = 0b01010100;
- let Inst{23-5} = target;
- let Inst{4} = 0;
- let Inst{3-0} = cond;
-}
-
-//---
-// Compare-and-branch instructions.
-//---
-class BaseCmpBranch<RegisterClass regtype, bit op, string asm, SDNode node>
- : I<(outs), (ins regtype:$Rt, am_brcond:$target),
- asm, "\t$Rt, $target", "",
- [(node regtype:$Rt, bb:$target)]>,
- Sched<[WriteBr]> {
- let isBranch = 1;
- let isTerminator = 1;
-
- bits<5> Rt;
- bits<19> target;
- let Inst{30-25} = 0b011010;
- let Inst{24} = op;
- let Inst{23-5} = target;
- let Inst{4-0} = Rt;
-}
-
-multiclass CmpBranch<bit op, string asm, SDNode node> {
- def W : BaseCmpBranch<GPR32, op, asm, node> {
- let Inst{31} = 0;
- }
- def X : BaseCmpBranch<GPR64, op, asm, node> {
- let Inst{31} = 1;
- }
-}
-
-//---
-// Test-bit-and-branch instructions.
-//---
-// Test-and-branch target. 14-bit sign-extended immediate. The low two bits of
-// the target offset are implied zero and so are not part of the immediate.
-def BranchTarget14Operand : AsmOperandClass {
- let Name = "BranchTarget14";
-}
-def am_tbrcond : Operand<OtherVT> {
- let EncoderMethod = "getTestBranchTargetOpValue";
- let PrintMethod = "printAlignedBranchTarget";
- let ParserMatchClass = BranchTarget14Operand;
-}
-
-class TestBranch<bit op, string asm, SDNode node>
- : I<(outs), (ins GPR64:$Rt, imm0_63:$bit_off, am_tbrcond:$target),
- asm, "\t$Rt, $bit_off, $target", "",
- [(node GPR64:$Rt, imm0_63:$bit_off, bb:$target)]>,
- Sched<[WriteBr]> {
- let isBranch = 1;
- let isTerminator = 1;
-
- bits<5> Rt;
- bits<6> bit_off;
- bits<14> target;
-
- let Inst{31} = bit_off{5};
- let Inst{30-25} = 0b011011;
- let Inst{24} = op;
- let Inst{23-19} = bit_off{4-0};
- let Inst{18-5} = target;
- let Inst{4-0} = Rt;
-
- let DecoderMethod = "DecodeTestAndBranch";
-}
-
-//---
-// Unconditional branch (immediate) instructions.
-//---
-def BranchTarget26Operand : AsmOperandClass {
- let Name = "BranchTarget26";
-}
-def am_b_target : Operand<OtherVT> {
- let EncoderMethod = "getBranchTargetOpValue";
- let PrintMethod = "printAlignedBranchTarget";
- let ParserMatchClass = BranchTarget26Operand;
-}
-def am_bl_target : Operand<i64> {
- let EncoderMethod = "getBranchTargetOpValue";
- let PrintMethod = "printAlignedBranchTarget";
- let ParserMatchClass = BranchTarget26Operand;
-}
-
-class BImm<bit op, dag iops, string asm, list<dag> pattern>
- : I<(outs), iops, asm, "\t$addr", "", pattern>, Sched<[WriteBr]> {
- bits<26> addr;
- let Inst{31} = op;
- let Inst{30-26} = 0b00101;
- let Inst{25-0} = addr;
-
- let DecoderMethod = "DecodeUnconditionalBranch";
-}
-
-class BranchImm<bit op, string asm, list<dag> pattern>
- : BImm<op, (ins am_b_target:$addr), asm, pattern>;
-class CallImm<bit op, string asm, list<dag> pattern>
- : BImm<op, (ins am_bl_target:$addr), asm, pattern>;
-
-//---
-// Basic one-operand data processing instructions.
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseOneOperandData<bits<3> opc, RegisterClass regtype, string asm,
- SDPatternOperator node>
- : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
- [(set regtype:$Rd, (node regtype:$Rn))]>,
- Sched<[WriteI]> {
- bits<5> Rd;
- bits<5> Rn;
-
- let Inst{30-13} = 0b101101011000000000;
- let Inst{12-10} = opc;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass OneOperandData<bits<3> opc, string asm,
- SDPatternOperator node = null_frag> {
- def Wr : BaseOneOperandData<opc, GPR32, asm, node> {
- let Inst{31} = 0;
- }
-
- def Xr : BaseOneOperandData<opc, GPR64, asm, node> {
- let Inst{31} = 1;
- }
-}
-
-class OneWRegData<bits<3> opc, string asm, SDPatternOperator node>
- : BaseOneOperandData<opc, GPR32, asm, node> {
- let Inst{31} = 0;
-}
-
-class OneXRegData<bits<3> opc, string asm, SDPatternOperator node>
- : BaseOneOperandData<opc, GPR64, asm, node> {
- let Inst{31} = 1;
-}
-
-//---
-// Basic two-operand data processing instructions.
-//---
-class BaseBaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
- list<dag> pattern>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "", pattern>,
- Sched<[WriteI]> {
- let Uses = [CPSR];
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{30} = isSub;
- let Inst{28-21} = 0b11010000;
- let Inst{20-16} = Rm;
- let Inst{15-10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class BaseAddSubCarry<bit isSub, RegisterClass regtype, string asm,
- SDNode OpNode>
- : BaseBaseAddSubCarry<isSub, regtype, asm,
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR))]>;
-
-class BaseAddSubCarrySetFlags<bit isSub, RegisterClass regtype, string asm,
- SDNode OpNode>
- : BaseBaseAddSubCarry<isSub, regtype, asm,
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm, CPSR)),
- (implicit CPSR)]> {
- let Defs = [CPSR];
-}
-
-multiclass AddSubCarry<bit isSub, string asm, string asm_setflags,
- SDNode OpNode, SDNode OpNode_setflags> {
- def Wr : BaseAddSubCarry<isSub, GPR32, asm, OpNode> {
- let Inst{31} = 0;
- let Inst{29} = 0;
- }
- def Xr : BaseAddSubCarry<isSub, GPR64, asm, OpNode> {
- let Inst{31} = 1;
- let Inst{29} = 0;
- }
-
- // Sets flags.
- def SWr : BaseAddSubCarrySetFlags<isSub, GPR32, asm_setflags,
- OpNode_setflags> {
- let Inst{31} = 0;
- let Inst{29} = 1;
- }
- def SXr : BaseAddSubCarrySetFlags<isSub, GPR64, asm_setflags,
- OpNode_setflags> {
- let Inst{31} = 1;
- let Inst{29} = 1;
- }
-}
-
-class BaseTwoOperand<bits<4> opc, RegisterClass regtype, string asm,
- SDPatternOperator OpNode>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "",
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{30-21} = 0b0011010110;
- let Inst{20-16} = Rm;
- let Inst{15-14} = 0b00;
- let Inst{13-10} = opc;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class BaseDiv<bit isSigned, RegisterClass regtype, string asm,
- SDPatternOperator OpNode>
- : BaseTwoOperand<{0,0,1,?}, regtype, asm, OpNode> {
- let Inst{10} = isSigned;
-}
-
-multiclass Div<bit isSigned, string asm, SDPatternOperator OpNode> {
- def Wr : BaseDiv<isSigned, GPR32, asm, OpNode>,
- Sched<[WriteID32]> {
- let Inst{31} = 0;
- }
- def Xr : BaseDiv<isSigned, GPR64, asm, OpNode>,
- Sched<[WriteID64]> {
- let Inst{31} = 1;
- }
-}
-
-class BaseShift<bits<2> shift_type, RegisterClass regtype, string asm,
- SDPatternOperator OpNode = null_frag>
- : BaseTwoOperand<{1,0,?,?}, regtype, asm, OpNode>,
- Sched<[WriteIS]> {
- let Inst{11-10} = shift_type;
-}
-
-multiclass Shift<bits<2> shift_type, string asm, SDNode OpNode> {
- def Wr : BaseShift<shift_type, GPR32, asm> {
- let Inst{31} = 0;
- }
-
- def Xr : BaseShift<shift_type, GPR64, asm, OpNode> {
- let Inst{31} = 1;
- }
-
- def : Pat<(i32 (OpNode GPR32:$Rn, i64:$Rm)),
- (!cast<Instruction>(NAME # "Wr") GPR32:$Rn,
- (EXTRACT_SUBREG i64:$Rm, sub_32))>;
-
- def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (zext GPR32:$Rm)))),
- (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-
- def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (anyext GPR32:$Rm)))),
- (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-
- def : Pat<(i32 (OpNode GPR32:$Rn, (i64 (sext GPR32:$Rm)))),
- (!cast<Instruction>(NAME # "Wr") GPR32:$Rn, GPR32:$Rm)>;
-}
-
-class ShiftAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst regtype:$dst, regtype:$src1, regtype:$src2)>;
-
-class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype,
- RegisterClass addtype, string asm,
- list<dag> pattern>
- : I<(outs addtype:$Rd), (ins multype:$Rn, multype:$Rm, addtype:$Ra),
- asm, "\t$Rd, $Rn, $Rm, $Ra", "", pattern> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<5> Ra;
- let Inst{30-24} = 0b0011011;
- let Inst{23-21} = opc;
- let Inst{20-16} = Rm;
- let Inst{15} = isSub;
- let Inst{14-10} = Ra;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass MulAccum<bit isSub, string asm, SDNode AccNode> {
- def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm,
- [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>,
- Sched<[WriteIM32]> {
- let Inst{31} = 0;
- }
-
- def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm,
- [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>,
- Sched<[WriteIM64]> {
- let Inst{31} = 1;
- }
-}
-
-class WideMulAccum<bit isSub, bits<3> opc, string asm,
- SDNode AccNode, SDNode ExtNode>
- : BaseMulAccum<isSub, opc, GPR32, GPR64, asm,
- [(set GPR64:$Rd, (AccNode GPR64:$Ra,
- (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>,
- Sched<[WriteIM32]> {
- let Inst{31} = 1;
-}
-
-class MulHi<bits<3> opc, string asm, SDNode OpNode>
- : I<(outs GPR64:$Rd), (ins GPR64:$Rn, GPR64:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "",
- [(set GPR64:$Rd, (OpNode GPR64:$Rn, GPR64:$Rm))]>,
- Sched<[WriteIM64]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31-24} = 0b10011011;
- let Inst{23-21} = opc;
- let Inst{20-16} = Rm;
- let Inst{15-10} = 0b011111;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class MulAccumWAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst GPR32:$dst, GPR32:$src1, GPR32:$src2, WZR)>;
-class MulAccumXAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst GPR64:$dst, GPR64:$src1, GPR64:$src2, XZR)>;
-class WideMulAccumAlias<string asm, Instruction inst>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst GPR64:$dst, GPR32:$src1, GPR32:$src2, XZR)>;
-
-class BaseCRC32<bit sf, bits<2> sz, bit C, RegisterClass StreamReg,
- SDPatternOperator OpNode, string asm>
- : I<(outs GPR32:$Rd), (ins GPR32:$Rn, StreamReg:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "",
- [(set GPR32:$Rd, (OpNode GPR32:$Rn, StreamReg:$Rm))]>,
- Sched<[WriteISReg]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
-
- let Inst{31} = sf;
- let Inst{30-21} = 0b0011010110;
- let Inst{20-16} = Rm;
- let Inst{15-13} = 0b010;
- let Inst{12} = C;
- let Inst{11-10} = sz;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-//---
-// Address generation.
-//---
-
-class ADRI<bit page, string asm, Operand adr, list<dag> pattern>
- : I<(outs GPR64:$Xd), (ins adr:$label), asm, "\t$Xd, $label", "",
- pattern>,
- Sched<[WriteI]> {
- bits<5> Xd;
- bits<21> label;
- let Inst{31} = page;
- let Inst{30-29} = label{1-0};
- let Inst{28-24} = 0b10000;
- let Inst{23-5} = label{20-2};
- let Inst{4-0} = Xd;
-
- let DecoderMethod = "DecodeAdrInstruction";
-}
-
-//---
-// Move immediate.
-//---
-
-def movimm32_imm : Operand<i32> {
- let ParserMatchClass = Imm0_65535Operand;
- let EncoderMethod = "getMoveWideImmOpValue";
-}
-def movimm32_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let ParserMatchClass = MovImm32ShifterOperand;
-}
-def movimm64_shift : Operand<i32> {
- let PrintMethod = "printShifter";
- let ParserMatchClass = MovImm64ShifterOperand;
-}
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseMoveImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
- string asm>
- : I<(outs regtype:$Rd), (ins movimm32_imm:$imm, shifter:$shift),
- asm, "\t$Rd, $imm$shift", "", []>,
- Sched<[WriteImm]> {
- bits<5> Rd;
- bits<16> imm;
- bits<6> shift;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100101;
- let Inst{22-21} = shift{5-4};
- let Inst{20-5} = imm;
- let Inst{4-0} = Rd;
-
- let DecoderMethod = "DecodeMoveImmInstruction";
-}
-
-multiclass MoveImmediate<bits<2> opc, string asm> {
- def Wi : BaseMoveImmediate<opc, GPR32, movimm32_shift, asm> {
- let Inst{31} = 0;
- }
-
- def Xi : BaseMoveImmediate<opc, GPR64, movimm64_shift, asm> {
- let Inst{31} = 1;
- }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseInsertImmediate<bits<2> opc, RegisterClass regtype, Operand shifter,
- string asm>
- : I<(outs regtype:$Rd),
- (ins regtype:$src, movimm32_imm:$imm, shifter:$shift),
- asm, "\t$Rd, $imm$shift", "$src = $Rd", []>,
- Sched<[WriteI]> {
- bits<5> Rd;
- bits<16> imm;
- bits<6> shift;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100101;
- let Inst{22-21} = shift{5-4};
- let Inst{20-5} = imm;
- let Inst{4-0} = Rd;
-
- let DecoderMethod = "DecodeMoveImmInstruction";
-}
-
-multiclass InsertImmediate<bits<2> opc, string asm> {
- def Wi : BaseInsertImmediate<opc, GPR32, movimm32_shift, asm> {
- let Inst{31} = 0;
- }
-
- def Xi : BaseInsertImmediate<opc, GPR64, movimm64_shift, asm> {
- let Inst{31} = 1;
- }
-}
-
-//---
-// Add/Subtract
-//---
-
-class BaseAddSubImm<bit isSub, bit setFlags, RegisterClass dstRegtype,
- RegisterClass srcRegtype, addsub_shifted_imm immtype,
- string asm, SDPatternOperator OpNode>
- : I<(outs dstRegtype:$Rd), (ins srcRegtype:$Rn, immtype:$imm),
- asm, "\t$Rd, $Rn, $imm", "",
- [(set dstRegtype:$Rd, (OpNode srcRegtype:$Rn, immtype:$imm))]>,
- Sched<[WriteI]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<14> imm;
- let Inst{30} = isSub;
- let Inst{29} = setFlags;
- let Inst{28-24} = 0b10001;
- let Inst{23-22} = imm{13-12}; // '00' => lsl #0, '01' => lsl #12
- let Inst{21-10} = imm{11-0};
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
- let DecoderMethod = "DecodeBaseAddSubImm";
-}
-
-class BaseAddSubRegPseudo<RegisterClass regtype,
- SDPatternOperator OpNode>
- : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
- Sched<[WriteI]>;
-
-class BaseAddSubSReg<bit isSub, bit setFlags, RegisterClass regtype,
- arith_shifted_reg shifted_regtype, string asm,
- SDPatternOperator OpNode>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "",
- [(set regtype:$Rd, (OpNode regtype:$Rn, shifted_regtype:$Rm))]>,
- Sched<[WriteISReg]> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<8> shift;
- let Inst{30} = isSub;
- let Inst{29} = setFlags;
- let Inst{28-24} = 0b01011;
- let Inst{23-22} = shift{7-6};
- let Inst{21} = 0;
- let Inst{20-16} = src2;
- let Inst{15-10} = shift{5-0};
- let Inst{9-5} = src1;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeThreeAddrSRegInstruction";
-}
-
-class BaseAddSubEReg<bit isSub, bit setFlags, RegisterClass dstRegtype,
- RegisterClass src1Regtype, Operand src2Regtype,
- string asm, SDPatternOperator OpNode>
- : I<(outs dstRegtype:$R1),
- (ins src1Regtype:$R2, src2Regtype:$R3),
- asm, "\t$R1, $R2, $R3", "",
- [(set dstRegtype:$R1, (OpNode src1Regtype:$R2, src2Regtype:$R3))]>,
- Sched<[WriteIEReg]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<6> ext;
- let Inst{30} = isSub;
- let Inst{29} = setFlags;
- let Inst{28-24} = 0b01011;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = Rm;
- let Inst{15-13} = ext{5-3};
- let Inst{12-10} = ext{2-0};
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-
- let DecoderMethod = "DecodeAddSubERegInstruction";
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseAddSubEReg64<bit isSub, bit setFlags, RegisterClass dstRegtype,
- RegisterClass src1Regtype, RegisterClass src2Regtype,
- Operand ext_op, string asm>
- : I<(outs dstRegtype:$Rd),
- (ins src1Regtype:$Rn, src2Regtype:$Rm, ext_op:$ext),
- asm, "\t$Rd, $Rn, $Rm$ext", "", []>,
- Sched<[WriteIEReg]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<6> ext;
- let Inst{30} = isSub;
- let Inst{29} = setFlags;
- let Inst{28-24} = 0b01011;
- let Inst{23-21} = 0b001;
- let Inst{20-16} = Rm;
- let Inst{15} = ext{5};
- let Inst{12-10} = ext{2-0};
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-
- let DecoderMethod = "DecodeAddSubERegInstruction";
-}
-
-// Aliases for register+register add/subtract.
-class AddSubRegAlias<string asm, Instruction inst, RegisterClass dstRegtype,
- RegisterClass src1Regtype, RegisterClass src2Regtype,
- int shiftExt>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst dstRegtype:$dst, src1Regtype:$src1, src2Regtype:$src2,
- shiftExt)>;
-
-multiclass AddSub<bit isSub, string mnemonic,
- SDPatternOperator OpNode = null_frag> {
- let hasSideEffects = 0 in {
- // Add/Subtract immediate
- def Wri : BaseAddSubImm<isSub, 0, GPR32sp, GPR32sp, addsub_shifted_imm32,
- mnemonic, OpNode> {
- let Inst{31} = 0;
- }
- def Xri : BaseAddSubImm<isSub, 0, GPR64sp, GPR64sp, addsub_shifted_imm64,
- mnemonic, OpNode> {
- let Inst{31} = 1;
- }
-
- // Add/Subtract register - Only used for CodeGen
- def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
- def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
-
- // Add/Subtract shifted register
- def Wrs : BaseAddSubSReg<isSub, 0, GPR32, arith_shifted_reg32, mnemonic,
- OpNode> {
- let Inst{31} = 0;
- }
- def Xrs : BaseAddSubSReg<isSub, 0, GPR64, arith_shifted_reg64, mnemonic,
- OpNode> {
- let Inst{31} = 1;
- }
- }
-
- // Add/Subtract extended register
- let AddedComplexity = 1, hasSideEffects = 0 in {
- def Wrx : BaseAddSubEReg<isSub, 0, GPR32sp, GPR32sp,
- arith_extended_reg32<i32>, mnemonic, OpNode> {
- let Inst{31} = 0;
- }
- def Xrx : BaseAddSubEReg<isSub, 0, GPR64sp, GPR64sp,
- arith_extended_reg32to64<i64>, mnemonic, OpNode> {
- let Inst{31} = 1;
- }
- }
-
- def Xrx64 : BaseAddSubEReg64<isSub, 0, GPR64sp, GPR64sp, GPR64,
- arith_extendlsl64, mnemonic> {
- // UXTX and SXTX only.
- let Inst{14-13} = 0b11;
- let Inst{31} = 1;
- }
-
- // Register/register aliases with no shift when SP is not used.
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
- GPR32, GPR32, GPR32, 0>;
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
- GPR64, GPR64, GPR64, 0>;
-
- // Register/register aliases with no shift when either the destination or
- // first source register is SP. This relies on the shifted register aliases
- // above matching first in the case when SP is not used.
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
- GPR32sp, GPR32sp, GPR32, 16>; // UXTW #0
- def : AddSubRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Xrx64"),
- GPR64sp, GPR64sp, GPR64, 24>; // UXTX #0
-}
-
-multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode> {
- let isCompare = 1, Defs = [CPSR] in {
- // Add/Subtract immediate
- def Wri : BaseAddSubImm<isSub, 1, GPR32, GPR32sp, addsub_shifted_imm32,
- mnemonic, OpNode> {
- let Inst{31} = 0;
- }
- def Xri : BaseAddSubImm<isSub, 1, GPR64, GPR64sp, addsub_shifted_imm64,
- mnemonic, OpNode> {
- let Inst{31} = 1;
- }
-
- // Add/Subtract register
- def Wrr : BaseAddSubRegPseudo<GPR32, OpNode>;
- def Xrr : BaseAddSubRegPseudo<GPR64, OpNode>;
-
- // Add/Subtract shifted register
- def Wrs : BaseAddSubSReg<isSub, 1, GPR32, arith_shifted_reg32, mnemonic,
- OpNode> {
- let Inst{31} = 0;
- }
- def Xrs : BaseAddSubSReg<isSub, 1, GPR64, arith_shifted_reg64, mnemonic,
- OpNode> {
- let Inst{31} = 1;
- }
-
- // Add/Subtract extended register
- let AddedComplexity = 1 in {
- def Wrx : BaseAddSubEReg<isSub, 1, GPR32, GPR32sp,
- arith_extended_reg32<i32>, mnemonic, OpNode> {
- let Inst{31} = 0;
- }
- def Xrx : BaseAddSubEReg<isSub, 1, GPR64, GPR64sp,
- arith_extended_reg32<i64>, mnemonic, OpNode> {
- let Inst{31} = 1;
- }
- }
-
- def Xrx64 : BaseAddSubEReg64<isSub, 1, GPR64, GPR64sp, GPR64,
- arith_extendlsl64, mnemonic> {
- // UXTX and SXTX only.
- let Inst{14-13} = 0b11;
- let Inst{31} = 1;
- }
- } // Defs = [CPSR]
-
- // Register/register aliases with no shift when SP is not used.
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"),
- GPR32, GPR32, GPR32, 0>;
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Xrs"),
- GPR64, GPR64, GPR64, 0>;
-
- // Register/register aliases with no shift when the first source register
- // is SP. This relies on the shifted register aliases above matching first
- // in the case when SP is not used.
- def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrx"),
- GPR32, GPR32sp, GPR32, 16>; // UXTW #0
- def : AddSubRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Xrx64"),
- GPR64, GPR64sp, GPR64, 24>; // UXTX #0
-}
-
-//---
-// Extract
-//---
-def SDTA64EXTR : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
- SDTCisPtrTy<3>]>;
-def ARM64Extr : SDNode<"ARM64ISD::EXTR", SDTA64EXTR>;
-
-class BaseExtractImm<RegisterClass regtype, Operand imm_type, string asm,
- list<dag> patterns>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, imm_type:$imm),
- asm, "\t$Rd, $Rn, $Rm, $imm", "", patterns>,
- Sched<[WriteExtr, ReadExtrHi]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<6> imm;
-
- let Inst{30-23} = 0b00100111;
- let Inst{21} = 0;
- let Inst{20-16} = Rm;
- let Inst{15-10} = imm;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass ExtractImm<string asm> {
- def Wrri : BaseExtractImm<GPR32, imm0_31, asm,
- [(set GPR32:$Rd,
- (ARM64Extr GPR32:$Rn, GPR32:$Rm, imm0_31:$imm))]> {
- let Inst{31} = 0;
- let Inst{22} = 0;
- }
- def Xrri : BaseExtractImm<GPR64, imm0_63, asm,
- [(set GPR64:$Rd,
- (ARM64Extr GPR64:$Rn, GPR64:$Rm, imm0_63:$imm))]> {
-
- let Inst{31} = 1;
- let Inst{22} = 1;
- }
-}
-
-//---
-// Bitfield
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseBitfieldImm<bits<2> opc,
- RegisterClass regtype, Operand imm_type, string asm>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, imm_type:$immr, imm_type:$imms),
- asm, "\t$Rd, $Rn, $immr, $imms", "", []>,
- Sched<[WriteIS]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<6> immr;
- bits<6> imms;
-
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100110;
- let Inst{21-16} = immr;
- let Inst{15-10} = imms;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass BitfieldImm<bits<2> opc, string asm> {
- def Wri : BaseBitfieldImm<opc, GPR32, imm0_31, asm> {
- let Inst{31} = 0;
- let Inst{22} = 0;
- }
- def Xri : BaseBitfieldImm<opc, GPR64, imm0_63, asm> {
- let Inst{31} = 1;
- let Inst{22} = 1;
- }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseBitfieldImmWith2RegArgs<bits<2> opc,
- RegisterClass regtype, Operand imm_type, string asm>
- : I<(outs regtype:$Rd), (ins regtype:$src, regtype:$Rn, imm_type:$immr,
- imm_type:$imms),
- asm, "\t$Rd, $Rn, $immr, $imms", "$src = $Rd", []>,
- Sched<[WriteIS]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<6> immr;
- bits<6> imms;
-
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100110;
- let Inst{21-16} = immr;
- let Inst{15-10} = imms;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass BitfieldImmWith2RegArgs<bits<2> opc, string asm> {
- def Wri : BaseBitfieldImmWith2RegArgs<opc, GPR32, imm0_31, asm> {
- let Inst{31} = 0;
- let Inst{22} = 0;
- }
- def Xri : BaseBitfieldImmWith2RegArgs<opc, GPR64, imm0_63, asm> {
- let Inst{31} = 1;
- let Inst{22} = 1;
- }
-}
-
-//---
-// Logical
-//---
-
-// Logical (immediate)
-class BaseLogicalImm<bits<2> opc, RegisterClass dregtype,
- RegisterClass sregtype, Operand imm_type, string asm,
- list<dag> pattern>
- : I<(outs dregtype:$Rd), (ins sregtype:$Rn, imm_type:$imm),
- asm, "\t$Rd, $Rn, $imm", "", pattern>,
- Sched<[WriteI]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<13> imm;
- let Inst{30-29} = opc;
- let Inst{28-23} = 0b100100;
- let Inst{22} = imm{12};
- let Inst{21-16} = imm{11-6};
- let Inst{15-10} = imm{5-0};
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-
- let DecoderMethod = "DecodeLogicalImmInstruction";
-}
-
-// Logical (shifted register)
-class BaseLogicalSReg<bits<2> opc, bit N, RegisterClass regtype,
- logical_shifted_reg shifted_regtype, string asm,
- list<dag> pattern>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, shifted_regtype:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "", pattern>,
- Sched<[WriteISReg]> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> src1;
- bits<5> src2;
- bits<8> shift;
- let Inst{30-29} = opc;
- let Inst{28-24} = 0b01010;
- let Inst{23-22} = shift{7-6};
- let Inst{21} = N;
- let Inst{20-16} = src2;
- let Inst{15-10} = shift{5-0};
- let Inst{9-5} = src1;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeThreeAddrSRegInstruction";
-}
-
-// Aliases for register+register logical instructions.
-class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype>
- : InstAlias<asm#" $dst, $src1, $src2",
- (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>;
-
-let AddedComplexity = 6 in
-multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> {
- def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic,
- [(set GPR32sp:$Rd, (OpNode GPR32:$Rn,
- logical_imm32:$imm))]> {
- let Inst{31} = 0;
- let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
- }
- def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic,
- [(set GPR64sp:$Rd, (OpNode GPR64:$Rn,
- logical_imm64:$imm))]> {
- let Inst{31} = 1;
- }
-}
-
-multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> {
- let isCompare = 1, Defs = [CPSR] in {
- def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic,
- [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> {
- let Inst{31} = 0;
- let Inst{22} = 0; // 64-bit version has an additional bit of immediate.
- }
- def Xri : BaseLogicalImm<opc, GPR64, GPR64, logical_imm64, mnemonic,
- [(set GPR64:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> {
- let Inst{31} = 1;
- }
- } // end Defs = [CPSR]
-}
-
-class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode>
- : Pseudo<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
- [(set regtype:$Rd, (OpNode regtype:$Rn, regtype:$Rm))]>,
- Sched<[WriteI]>;
-
-// Split from LogicalImm as not all instructions have both.
-multiclass LogicalReg<bits<2> opc, bit N, string mnemonic,
- SDPatternOperator OpNode> {
- def Wrr : BaseLogicalRegPseudo<GPR32, OpNode>;
- def Xrr : BaseLogicalRegPseudo<GPR64, OpNode>;
-
- def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic,
- [(set GPR32:$Rd, (OpNode GPR32:$Rn,
- logical_shifted_reg32:$Rm))]> {
- let Inst{31} = 0;
- }
- def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic,
- [(set GPR64:$Rd, (OpNode GPR64:$Rn,
- logical_shifted_reg64:$Rm))]> {
- let Inst{31} = 1;
- }
-
- def : LogicalRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Wrs"), GPR32>;
- def : LogicalRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Xrs"), GPR64>;
-}
-
-// Split from LogicalReg to allow setting CPSR Defs
-multiclass LogicalRegS<bits<2> opc, bit N, string mnemonic> {
- let Defs = [CPSR], mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def Wrs : BaseLogicalSReg<opc, N, GPR32, logical_shifted_reg32, mnemonic, []>{
- let Inst{31} = 0;
- }
- def Xrs : BaseLogicalSReg<opc, N, GPR64, logical_shifted_reg64, mnemonic, []>{
- let Inst{31} = 1;
- }
- } // Defs = [CPSR]
-
- def : LogicalRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Wrs"), GPR32>;
- def : LogicalRegAlias<mnemonic,
- !cast<Instruction>(NAME#"Xrs"), GPR64>;
-}
-
-//---
-// Conditionally set flags
-//---
-
-// Condition code.
-// 4-bit immediate. Pretty-printed as <cc>
-def ccode : Operand<i32> {
- let PrintMethod = "printCondCode";
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsImm<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, imm0_31:$imm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $imm, $nzcv, $cond", "", []>,
- Sched<[WriteI]> {
- let Uses = [CPSR];
- let Defs = [CPSR];
-
- bits<5> Rn;
- bits<5> imm;
- bits<4> nzcv;
- bits<4> cond;
-
- let Inst{30} = op;
- let Inst{29-21} = 0b111010010;
- let Inst{20-16} = imm;
- let Inst{15-12} = cond;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4} = 0b0;
- let Inst{3-0} = nzcv;
-}
-
-multiclass CondSetFlagsImm<bit op, string asm> {
- def Wi : BaseCondSetFlagsImm<op, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xi : BaseCondSetFlagsImm<op, GPR64, asm> {
- let Inst{31} = 1;
- }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseCondSetFlagsReg<bit op, RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
- Sched<[WriteI]> {
- let Uses = [CPSR];
- let Defs = [CPSR];
-
- bits<5> Rn;
- bits<5> Rm;
- bits<4> nzcv;
- bits<4> cond;
-
- let Inst{30} = op;
- let Inst{29-21} = 0b111010010;
- let Inst{20-16} = Rm;
- let Inst{15-12} = cond;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Rn;
- let Inst{4} = 0b0;
- let Inst{3-0} = nzcv;
-}
-
-multiclass CondSetFlagsReg<bit op, string asm> {
- def Wr : BaseCondSetFlagsReg<op, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xr : BaseCondSetFlagsReg<op, GPR64, asm> {
- let Inst{31} = 1;
- }
-}
-
-//---
-// Conditional select
-//---
-
-class BaseCondSelect<bit op, bits<2> op2, RegisterClass regtype, string asm>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
- asm, "\t$Rd, $Rn, $Rm, $cond", "",
- [(set regtype:$Rd,
- (ARM64csel regtype:$Rn, regtype:$Rm, (i32 imm:$cond), CPSR))]>,
- Sched<[WriteI]> {
- let Uses = [CPSR];
-
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<4> cond;
-
- let Inst{30} = op;
- let Inst{29-21} = 0b011010100;
- let Inst{20-16} = Rm;
- let Inst{15-12} = cond;
- let Inst{11-10} = op2;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass CondSelect<bit op, bits<2> op2, string asm> {
- def Wr : BaseCondSelect<op, op2, GPR32, asm> {
- let Inst{31} = 0;
- }
- def Xr : BaseCondSelect<op, op2, GPR64, asm> {
- let Inst{31} = 1;
- }
-}
-
-class BaseCondSelectOp<bit op, bits<2> op2, RegisterClass regtype, string asm,
- PatFrag frag>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
- asm, "\t$Rd, $Rn, $Rm, $cond", "",
- [(set regtype:$Rd,
- (ARM64csel regtype:$Rn, (frag regtype:$Rm),
- (i32 imm:$cond), CPSR))]>,
- Sched<[WriteI]> {
- let Uses = [CPSR];
-
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<4> cond;
-
- let Inst{30} = op;
- let Inst{29-21} = 0b011010100;
- let Inst{20-16} = Rm;
- let Inst{15-12} = cond;
- let Inst{11-10} = op2;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass CondSelectOp<bit op, bits<2> op2, string asm, PatFrag frag> {
- def Wr : BaseCondSelectOp<op, op2, GPR32, asm, frag> {
- let Inst{31} = 0;
- }
- def Xr : BaseCondSelectOp<op, op2, GPR64, asm, frag> {
- let Inst{31} = 1;
- }
-}
-
-//---
-// Special Mask Value
-//---
-def maski8_or_more : Operand<i32>,
- ImmLeaf<i32, [{ return (Imm & 0xff) == 0xff; }]> {
-}
-def maski16_or_more : Operand<i32>,
- ImmLeaf<i32, [{ return (Imm & 0xffff) == 0xffff; }]> {
-}
-
-
-//---
-// Load/store
-//---
-
-// (unsigned immediate)
-// Indexed for 8-bit registers. offset is in range [0,4095].
-def MemoryIndexed8Operand : AsmOperandClass {
- let Name = "MemoryIndexed8";
- let DiagnosticType = "InvalidMemoryIndexed8";
-}
-def am_indexed8 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrModeIndexed8", []> {
- let PrintMethod = "printAMIndexed8";
- let EncoderMethod
- = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale1>";
- let ParserMatchClass = MemoryIndexed8Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 16-bit registers. offset is multiple of 2 in range [0,8190],
-// stored as immval/2 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed16Operand : AsmOperandClass {
- let Name = "MemoryIndexed16";
- let DiagnosticType = "InvalidMemoryIndexed16";
-}
-def am_indexed16 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrModeIndexed16", []> {
- let PrintMethod = "printAMIndexed16";
- let EncoderMethod
- = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale2>";
- let ParserMatchClass = MemoryIndexed16Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 32-bit registers. offset is multiple of 4 in range [0,16380],
-// stored as immval/4 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed32Operand : AsmOperandClass {
- let Name = "MemoryIndexed32";
- let DiagnosticType = "InvalidMemoryIndexed32";
-}
-def am_indexed32 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrModeIndexed32", []> {
- let PrintMethod = "printAMIndexed32";
- let EncoderMethod
- = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale4>";
- let ParserMatchClass = MemoryIndexed32Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 64-bit registers. offset is multiple of 8 in range [0,32760],
-// stored as immval/8 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed64Operand : AsmOperandClass {
- let Name = "MemoryIndexed64";
- let DiagnosticType = "InvalidMemoryIndexed64";
-}
-def am_indexed64 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrModeIndexed64", []> {
- let PrintMethod = "printAMIndexed64";
- let EncoderMethod
- = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale8>";
- let ParserMatchClass = MemoryIndexed64Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// Indexed for 128-bit registers. offset is multiple of 16 in range [0,65520],
-// stored as immval/16 (the 12-bit literal that encodes directly into the insn).
-def MemoryIndexed128Operand : AsmOperandClass {
- let Name = "MemoryIndexed128";
- let DiagnosticType = "InvalidMemoryIndexed128";
-}
-def am_indexed128 : Operand<i64>,
- ComplexPattern<i64, 2, "SelectAddrModeIndexed128", []> {
- let PrintMethod = "printAMIndexed128";
- let EncoderMethod
- = "getAMIndexed8OpValue<ARM64::fixup_arm64_ldst_imm12_scale16>";
- let ParserMatchClass = MemoryIndexed128Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-
-// No offset.
-def MemoryNoIndexOperand : AsmOperandClass { let Name = "MemoryNoIndex"; }
-def am_noindex : Operand<i64>,
- ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> {
- let PrintMethod = "printAMNoIndex";
- let ParserMatchClass = MemoryNoIndexOperand;
- let MIOperandInfo = (ops GPR64sp:$base);
-}
-
-class BaseLoadStoreUI<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
- string asm, list<dag> pattern>
- : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> {
- bits<5> dst;
-
- bits<17> addr;
- bits<5> base = addr{4-0};
- bits<12> offset = addr{16-5};
-
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b01;
- let Inst{23-22} = opc;
- let Inst{21-10} = offset;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeUnsignedLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- Operand indextype, string asm, list<dag> pattern>
- : BaseLoadStoreUI<sz, V, opc,
- (outs regtype:$Rt), (ins indextype:$addr), asm, pattern>,
- Sched<[WriteLD]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class StoreUI<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- Operand indextype, string asm, list<dag> pattern>
- : BaseLoadStoreUI<sz, V, opc,
- (outs), (ins regtype:$Rt, indextype:$addr), asm, pattern>,
- Sched<[WriteST]>;
-
-def PrefetchOperand : AsmOperandClass {
- let Name = "Prefetch";
- let ParserMethod = "tryParsePrefetch";
-}
-def prfop : Operand<i32> {
- let PrintMethod = "printPrefetchOp";
- let ParserMatchClass = PrefetchOperand;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchUI<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
- : BaseLoadStoreUI<sz, V, opc,
- (outs), (ins prfop:$Rt, am_indexed64:$addr), asm, pat>,
- Sched<[WriteLD]>;
-
-//---
-// Load literal
-//---
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class LoadLiteral<bits<2> opc, bit V, RegisterClass regtype, string asm>
- : I<(outs regtype:$Rt), (ins am_brcond:$label),
- asm, "\t$Rt, $label", "", []>,
- Sched<[WriteLD]> {
- bits<5> Rt;
- bits<19> label;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b011;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-5} = label;
- let Inst{4-0} = Rt;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchLiteral<bits<2> opc, bit V, string asm, list<dag> pat>
- : I<(outs), (ins prfop:$Rt, am_brcond:$label),
- asm, "\t$Rt, $label", "", pat>,
- Sched<[WriteLD]> {
- bits<5> Rt;
- bits<19> label;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b011;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-5} = label;
- let Inst{4-0} = Rt;
-}
-
-//---
-// Load/store register offset
-//---
-
-class MemROAsmOperand<int sz> : AsmOperandClass {
- let Name = "MemoryRegisterOffset"#sz;
-}
-
-def MemROAsmOperand8 : MemROAsmOperand<8>;
-def MemROAsmOperand16 : MemROAsmOperand<16>;
-def MemROAsmOperand32 : MemROAsmOperand<32>;
-def MemROAsmOperand64 : MemROAsmOperand<64>;
-def MemROAsmOperand128 : MemROAsmOperand<128>;
-
-class ro_indexed<int sz> : Operand<i64> { // ComplexPattern<...>
- let PrintMethod = "printMemoryRegOffset"#sz;
- let MIOperandInfo = (ops GPR64sp:$base, GPR64:$offset, i32imm:$extend);
-}
-
-def ro_indexed8 : ro_indexed<8>, ComplexPattern<i64, 3, "SelectAddrModeRO8", []> {
- let ParserMatchClass = MemROAsmOperand8;
-}
-
-def ro_indexed16 : ro_indexed<16>, ComplexPattern<i64, 3, "SelectAddrModeRO16", []> {
- let ParserMatchClass = MemROAsmOperand16;
-}
-
-def ro_indexed32 : ro_indexed<32>, ComplexPattern<i64, 3, "SelectAddrModeRO32", []> {
- let ParserMatchClass = MemROAsmOperand32;
-}
-
-def ro_indexed64 : ro_indexed<64>, ComplexPattern<i64, 3, "SelectAddrModeRO64", []> {
- let ParserMatchClass = MemROAsmOperand64;
-}
-
-def ro_indexed128 : ro_indexed<128>, ComplexPattern<i64, 3, "SelectAddrModeRO128", []> {
- let ParserMatchClass = MemROAsmOperand128;
-}
-
-class LoadStore8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, dag ins, dag outs, list<dag> pat>
- : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore8RO<sz, V, opc, regtype, asm,
- (outs regtype:$Rt), (ins ro_indexed8:$addr), pat>,
- Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store8RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore8RO<sz, V, opc, regtype, asm,
- (outs), (ins regtype:$Rt, ro_indexed8:$addr), pat>,
- Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, dag ins, dag outs, list<dag> pat>
- : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore16RO<sz, V, opc, regtype, asm,
- (outs regtype:$Rt), (ins ro_indexed16:$addr), pat>,
- Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store16RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore16RO<sz, V, opc, regtype, asm,
- (outs), (ins regtype:$Rt, ro_indexed16:$addr), pat>,
- Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, dag ins, dag outs, list<dag> pat>
- : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-class Load32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore32RO<sz, V, opc, regtype, asm,
- (outs regtype:$Rt), (ins ro_indexed32:$addr), pat>,
- Sched<[WriteLDIdx, ReadAdrBase]>;
-
-class Store32RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore32RO<sz, V, opc, regtype, asm,
- (outs), (ins regtype:$Rt, ro_indexed32:$addr), pat>,
- Sched<[WriteSTIdx, ReadAdrBase]>;
-
-class LoadStore64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, dag ins, dag outs, list<dag> pat>
- : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class Load64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore64RO<sz, V, opc, regtype, asm,
- (outs regtype:$Rt), (ins ro_indexed64:$addr), pat>,
- Sched<[WriteLDIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class Store64RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore64RO<sz, V, opc, regtype, asm,
- (outs), (ins regtype:$Rt, ro_indexed64:$addr), pat>,
- Sched<[WriteSTIdx, ReadAdrBase]>;
-
-
-class LoadStore128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, dag ins, dag outs, list<dag> pat>
- : I<ins, outs, asm, "\t$Rt, $addr", "", pat> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class Load128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore128RO<sz, V, opc, regtype, asm,
- (outs regtype:$Rt), (ins ro_indexed128:$addr), pat>,
- Sched<[WriteLDIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-class Store128RO<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm, list<dag> pat>
- : LoadStore128RO<sz, V, opc, regtype, asm,
- (outs), (ins regtype:$Rt, ro_indexed128:$addr), pat>,
- Sched<[WriteSTIdx, ReadAdrBase]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchRO<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
- : I<(outs), (ins prfop:$Rt, ro_indexed64:$addr), asm,
- "\t$Rt, $addr", "", pat>,
- Sched<[WriteLD]> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<5> offset;
- bits<4> extend;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 1;
- let Inst{20-16} = offset;
- let Inst{15-13} = extend{3-1};
-
- let Inst{12} = extend{0};
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeRegOffsetLdStInstruction";
-}
-
-//---
-// Load/store unscaled immediate
-//---
-
-def MemoryUnscaledOperand : AsmOperandClass {
- let Name = "MemoryUnscaled";
- let DiagnosticType = "InvalidMemoryIndexedSImm9";
-}
-class am_unscaled_operand : Operand<i64> {
- let PrintMethod = "printAMUnscaled";
- let ParserMatchClass = MemoryUnscaledOperand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled : am_unscaled_operand;
-def am_unscaled8 : am_unscaled_operand,
- ComplexPattern<i64, 2, "SelectAddrModeUnscaled8", []>;
-def am_unscaled16 : am_unscaled_operand,
- ComplexPattern<i64, 2, "SelectAddrModeUnscaled16", []>;
-def am_unscaled32 : am_unscaled_operand,
- ComplexPattern<i64, 2, "SelectAddrModeUnscaled32", []>;
-def am_unscaled64 : am_unscaled_operand,
- ComplexPattern<i64, 2, "SelectAddrModeUnscaled64", []>;
-def am_unscaled128 : am_unscaled_operand,
- ComplexPattern<i64, 2, "SelectAddrModeUnscaled128", []>;
-
-class BaseLoadStoreUnscale<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
- string asm, list<dag> pattern>
- : I<oops, iops, asm, "\t$Rt, $addr", "", pattern> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<9> offset;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 0;
- let Inst{20-12} = offset;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let AddedComplexity = 1 in // try this before LoadUI
-class LoadUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- Operand amtype, string asm, list<dag> pattern>
- : BaseLoadStoreUnscale<sz, V, opc, (outs regtype:$Rt),
- (ins amtype:$addr), asm, pattern>,
- Sched<[WriteLD]>;
-
-let AddedComplexity = 1 in // try this before StoreUI
-class StoreUnscaled<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- Operand amtype, string asm, list<dag> pattern>
- : BaseLoadStoreUnscale<sz, V, opc, (outs),
- (ins regtype:$Rt, amtype:$addr), asm, pattern>,
- Sched<[WriteST]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class PrefetchUnscaled<bits<2> sz, bit V, bits<2> opc, string asm, list<dag> pat>
- : BaseLoadStoreUnscale<sz, V, opc, (outs),
- (ins prfop:$Rt, am_unscaled:$addr), asm, pat>,
- Sched<[WriteLD]>;
-
-//---
-// Load/store unscaled immediate, unprivileged
-//---
-
-class BaseLoadStoreUnprivileged<bits<2> sz, bit V, bits<2> opc,
- dag oops, dag iops, string asm>
- : I<oops, iops, asm, "\t$Rt, $addr", "", []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> base;
- bits<9> offset;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 0;
- let Inst{20-12} = offset;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in {
-class LoadUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStoreUnprivileged<sz, V, opc,
- (outs regtype:$Rt), (ins am_unscaled:$addr), asm>,
- Sched<[WriteLD]>;
-}
-
-let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
-class StoreUnprivileged<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStoreUnprivileged<sz, V, opc,
- (outs), (ins regtype:$Rt, am_unscaled:$addr), asm>,
- Sched<[WriteST]>;
-}
-
-//---
-// Load/store pre-indexed
-//---
-
-class BaseLoadStorePreIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
- string asm, string cstr>
- : I<oops, iops, asm, "\t$Rt, $addr!", cstr, []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling.
- bits<5> dst;
- bits<5> base;
- bits<9> offset;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0;
- let Inst{23-22} = opc;
- let Inst{21} = 0;
- let Inst{20-12} = offset;
- let Inst{11-10} = 0b11;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-// FIXME: Modeling the write-back of these instructions for isel is tricky.
-// we need the complex addressing mode for the memory reference, but
-// we also need the write-back specified as a tied operand to the
-// base register. That combination does not play nicely with
-// the asm matcher and friends.
-class LoadPreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStorePreIdx<sz, V, opc,
- (outs regtype:$Rt/*, GPR64sp:$wback*/),
- (ins am_unscaled:$addr), asm, ""/*"$addr.base = $wback"*/>,
- Sched<[WriteLD, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePreIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStorePreIdx<sz, V, opc,
- (outs/* GPR64sp:$wback*/),
- (ins regtype:$Rt, am_unscaled:$addr),
- asm, ""/*"$addr.base = $wback"*/>,
- Sched<[WriteAdr, WriteST]>;
-} // hasSideEffects = 0
-
-// ISel pseudo-instructions which have the tied operands. When the MC lowering
-// logic finally gets smart enough to strip off tied operands that are just
-// for isel convenience, we can get rid of these pseudos and just reference
-// the real instructions directly.
-//
-// Ironically, also because of the writeback operands, we can't put the
-// matcher pattern directly on the instruction, but need to define it
-// separately.
-//
-// Loads aren't matched with patterns here at all, but rather in C++
-// custom lowering.
-let mayStore = 0, mayLoad = 1, hasSideEffects = 0 in {
-class LoadPreIdxPseudo<RegisterClass regtype>
- : Pseudo<(outs regtype:$Rt, GPR64sp:$wback),
- (ins am_noindex:$addr, simm9:$offset), [],
- "$addr.base = $wback,@earlyclobber $wback">,
- Sched<[WriteLD, WriteAdr]>;
-class LoadPostIdxPseudo<RegisterClass regtype>
- : Pseudo<(outs regtype:$Rt, GPR64sp:$wback),
- (ins am_noindex:$addr, simm9:$offset), [],
- "$addr.base = $wback,@earlyclobber $wback">,
- Sched<[WriteLD, WriteI]>;
-}
-multiclass StorePreIdxPseudo<RegisterClass regtype, ValueType Ty,
- SDPatternOperator OpNode> {
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
- def _isel: Pseudo<(outs GPR64sp:$wback),
- (ins regtype:$Rt, am_noindex:$addr, simm9:$offset), [],
- "$addr.base = $wback,@earlyclobber $wback">,
- Sched<[WriteAdr, WriteST]>;
-
- def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$offset),
- (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr,
- simm9:$offset)>;
-}
-
-//---
-// Load/store post-indexed
-//---
-
-// (pre-index) load/stores.
-class BaseLoadStorePostIdx<bits<2> sz, bit V, bits<2> opc, dag oops, dag iops,
- string asm, string cstr>
- : I<oops, iops, asm, "\t$Rt, $addr, $idx", cstr, []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling.
- bits<5> dst;
- bits<5> base;
- bits<9> offset;
- let Inst{31-30} = sz;
- let Inst{29-27} = 0b111;
- let Inst{26} = V;
- let Inst{25-24} = 0b00;
- let Inst{23-22} = opc;
- let Inst{21} = 0b0;
- let Inst{20-12} = offset;
- let Inst{11-10} = 0b01;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodeSignedLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-// FIXME: Modeling the write-back of these instructions for isel is tricky.
-// we need the complex addressing mode for the memory reference, but
-// we also need the write-back specified as a tied operand to the
-// base register. That combination does not play nicely with
-// the asm matcher and friends.
-class LoadPostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStorePostIdx<sz, V, opc,
- (outs regtype:$Rt/*, GPR64sp:$wback*/),
- (ins am_noindex:$addr, simm9:$idx),
- asm, ""/*"$addr.base = $wback"*/>,
- Sched<[WriteLD, WriteI]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePostIdx<bits<2> sz, bit V, bits<2> opc, RegisterClass regtype,
- string asm>
- : BaseLoadStorePostIdx<sz, V, opc,
- (outs/* GPR64sp:$wback*/),
- (ins regtype:$Rt, am_noindex:$addr, simm9:$idx),
- asm, ""/*"$addr.base = $wback"*/>,
- Sched<[WriteAdr, WriteST, ReadAdrBase]>;
-} // hasSideEffects = 0
-
-// ISel pseudo-instructions which have the tied operands. When the MC lowering
-// logic finally gets smart enough to strip off tied operands that are just
-// for isel convenience, we can get rid of these pseudos and just reference
-// the real instructions directly.
-//
-// Ironically, also because of the writeback operands, we can't put the
-// matcher pattern directly on the instruction, but need to define it
-// separately.
-multiclass StorePostIdxPseudo<RegisterClass regtype, ValueType Ty,
- SDPatternOperator OpNode, Instruction Insn> {
- let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
- def _isel: Pseudo<(outs GPR64sp:$wback),
- (ins regtype:$Rt, am_noindex:$addr, simm9:$idx), [],
- "$addr.base = $wback,@earlyclobber $wback">,
- PseudoInstExpansion<(Insn regtype:$Rt, am_noindex:$addr, simm9:$idx)>,
- Sched<[WriteAdr, WriteST, ReadAdrBase]>;
-
- def : Pat<(OpNode (Ty regtype:$Rt), am_noindex:$addr, simm9:$idx),
- (!cast<Instruction>(NAME#_isel) regtype:$Rt, am_noindex:$addr,
- simm9:$idx)>;
-}
-
-//---
-// Load/store pair
-//---
-
-// (indexed, offset)
-
-class BaseLoadStorePairOffset<bits<2> opc, bit V, bit L, dag oops, dag iops,
- string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> dst2;
- bits<5> base;
- bits<7> offset;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b101;
- let Inst{26} = V;
- let Inst{25-23} = 0b010;
- let Inst{22} = L;
- let Inst{21-15} = offset;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairOffset<bits<2> opc, bit V, RegisterClass regtype,
- Operand indextype, string asm>
- : BaseLoadStorePairOffset<opc, V, 1,
- (outs regtype:$Rt, regtype:$Rt2),
- (ins indextype:$addr), asm>,
- Sched<[WriteLD, WriteLDHi]>;
-
-let mayLoad = 0, mayStore = 1 in
-class StorePairOffset<bits<2> opc, bit V, RegisterClass regtype,
- Operand indextype, string asm>
- : BaseLoadStorePairOffset<opc, V, 0, (outs),
- (ins regtype:$Rt, regtype:$Rt2, indextype:$addr),
- asm>,
- Sched<[WriteSTP]>;
-} // hasSideEffects = 0
-
-// (pre-indexed)
-
-def MemoryIndexed32SImm7 : AsmOperandClass {
- let Name = "MemoryIndexed32SImm7";
- let DiagnosticType = "InvalidMemoryIndexed32SImm7";
-}
-def am_indexed32simm7 : Operand<i32> { // ComplexPattern<...>
- let PrintMethod = "printAMIndexed32";
- let ParserMatchClass = MemoryIndexed32SImm7;
- let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-def MemoryIndexed64SImm7 : AsmOperandClass {
- let Name = "MemoryIndexed64SImm7";
- let DiagnosticType = "InvalidMemoryIndexed64SImm7";
-}
-def am_indexed64simm7 : Operand<i32> { // ComplexPattern<...>
- let PrintMethod = "printAMIndexed64";
- let ParserMatchClass = MemoryIndexed64SImm7;
- let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-def MemoryIndexed128SImm7 : AsmOperandClass {
- let Name = "MemoryIndexed128SImm7";
- let DiagnosticType = "InvalidMemoryIndexed128SImm7";
-}
-def am_indexed128simm7 : Operand<i32> { // ComplexPattern<...>
- let PrintMethod = "printAMIndexed128";
- let ParserMatchClass = MemoryIndexed128SImm7;
- let MIOperandInfo = (ops GPR64sp:$base, i32imm:$offset);
-}
-
-class BaseLoadStorePairPreIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
- string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, $addr!", "", []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> dst2;
- bits<5> base;
- bits<7> offset;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b101;
- let Inst{26} = V;
- let Inst{25-23} = 0b011;
- let Inst{22} = L;
- let Inst{21-15} = offset;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
- Operand addrmode, string asm>
- : BaseLoadStorePairPreIdx<opc, V, 1,
- (outs regtype:$Rt, regtype:$Rt2),
- (ins addrmode:$addr), asm>,
- Sched<[WriteLD, WriteLDHi, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairPreIdx<bits<2> opc, bit V, RegisterClass regtype,
- Operand addrmode, string asm>
- : BaseLoadStorePairPreIdx<opc, V, 0, (outs),
- (ins regtype:$Rt, regtype:$Rt2, addrmode:$addr),
- asm>,
- Sched<[WriteAdr, WriteSTP]>;
-} // hasSideEffects = 0
-
-// (post-indexed)
-
-class BaseLoadStorePairPostIdx<bits<2> opc, bit V, bit L, dag oops, dag iops,
- string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, $addr, $idx", "", []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> dst2;
- bits<5> base;
- bits<7> offset;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b101;
- let Inst{26} = V;
- let Inst{25-23} = 0b001;
- let Inst{22} = L;
- let Inst{21-15} = offset;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
- Operand idxtype, string asm>
- : BaseLoadStorePairPostIdx<opc, V, 1,
- (outs regtype:$Rt, regtype:$Rt2),
- (ins am_noindex:$addr, idxtype:$idx), asm>,
- Sched<[WriteLD, WriteLDHi, WriteAdr]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairPostIdx<bits<2> opc, bit V, RegisterClass regtype,
- Operand idxtype, string asm>
- : BaseLoadStorePairPostIdx<opc, V, 0, (outs),
- (ins regtype:$Rt, regtype:$Rt2,
- am_noindex:$addr, idxtype:$idx),
- asm>,
- Sched<[WriteAdr, WriteSTP]>;
-} // hasSideEffects = 0
-
-// (no-allocate)
-
-class BaseLoadStorePairNoAlloc<bits<2> opc, bit V, bit L, dag oops, dag iops,
- string asm>
- : I<oops, iops, asm, "\t$Rt, $Rt2, $addr", "", []> {
- // The operands are in order to match the 'addr' MI operands, so we
- // don't need an encoder method and by-name matching. Just use the default
- // in-order handling. Since we're using by-order, make sure the names
- // do not match.
- bits<5> dst;
- bits<5> dst2;
- bits<5> base;
- bits<7> offset;
- let Inst{31-30} = opc;
- let Inst{29-27} = 0b101;
- let Inst{26} = V;
- let Inst{25-23} = 0b000;
- let Inst{22} = L;
- let Inst{21-15} = offset;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst;
-
- let DecoderMethod = "DecodePairLdStInstruction";
-}
-
-let hasSideEffects = 0 in {
-let mayStore = 0, mayLoad = 1 in
-class LoadPairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
- Operand indextype, string asm>
- : BaseLoadStorePairNoAlloc<opc, V, 1,
- (outs regtype:$Rt, regtype:$Rt2),
- (ins indextype:$addr), asm>,
- Sched<[WriteLD, WriteLDHi]>;
-
-let mayStore = 1, mayLoad = 0 in
-class StorePairNoAlloc<bits<2> opc, bit V, RegisterClass regtype,
- Operand indextype, string asm>
- : BaseLoadStorePairNoAlloc<opc, V, 0, (outs),
- (ins regtype:$Rt, regtype:$Rt2, indextype:$addr),
- asm>,
- Sched<[WriteSTP]>;
-} // hasSideEffects = 0
-
-//---
-// Load/store exclusive
-//---
-
-// True exclusive operations write to and/or read from the system's exclusive
-// monitors, which as far as a compiler is concerned can be modelled as a
-// random shared memory address. Hence LoadExclusive mayStore.
-let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
-class BaseLoadStoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- dag oops, dag iops, string asm, string operands>
- : I<oops, iops, asm, operands, "", []> {
- let Inst{31-30} = sz;
- let Inst{29-24} = 0b001000;
- let Inst{23} = o2;
- let Inst{22} = L;
- let Inst{21} = o1;
- let Inst{15} = o0;
-
- let DecoderMethod = "DecodeExclusiveLdStInstruction";
-}
-
-// Neither Rs nor Rt2 operands.
-class LoadStoreExclusiveSimple<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- dag oops, dag iops, string asm, string operands>
- : BaseLoadStoreExclusive<sz, o2, L, o1, o0, oops, iops, asm, operands> {
- bits<5> reg;
- bits<5> base;
- let Inst{20-16} = 0b11111;
- let Inst{14-10} = 0b11111;
- let Inst{9-5} = base;
- let Inst{4-0} = reg;
-}
-
-// Simple load acquires don't set the exclusive monitor
-let mayLoad = 1, mayStore = 0 in
-class LoadAcquire<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
- (ins am_noindex:$addr), asm, "\t$Rt, $addr">,
- Sched<[WriteLD]>;
-
-class LoadExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs regtype:$Rt),
- (ins am_noindex:$addr), asm, "\t$Rt, $addr">,
- Sched<[WriteLD]>;
-
-class LoadExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
- (outs regtype:$Rt, regtype:$Rt2),
- (ins am_noindex:$addr), asm,
- "\t$Rt, $Rt2, $addr">,
- Sched<[WriteLD, WriteLDHi]> {
- bits<5> dst1;
- bits<5> dst2;
- bits<5> base;
- let Inst{20-16} = 0b11111;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst1;
-}
-
-// Simple store release operations do not check the exclusive monitor.
-let mayLoad = 0, mayStore = 1 in
-class StoreRelease<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : LoadStoreExclusiveSimple<sz, o2, L, o1, o0, (outs),
- (ins regtype:$Rt, am_noindex:$addr),
- asm, "\t$Rt, $addr">,
- Sched<[WriteST]>;
-
-let mayLoad = 1, mayStore = 1 in
-class StoreExclusive<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : BaseLoadStoreExclusive<sz, o2, L, o1, o0, (outs GPR32:$Ws),
- (ins regtype:$Rt, am_noindex:$addr),
- asm, "\t$Ws, $Rt, $addr">,
- Sched<[WriteSTX]> {
- bits<5> status;
- bits<5> reg;
- bits<5> base;
- let Inst{20-16} = status;
- let Inst{14-10} = 0b11111;
- let Inst{9-5} = base;
- let Inst{4-0} = reg;
-
- let Constraints = "@earlyclobber $Ws";
-}
-
-class StoreExclusivePair<bits<2> sz, bit o2, bit L, bit o1, bit o0,
- RegisterClass regtype, string asm>
- : BaseLoadStoreExclusive<sz, o2, L, o1, o0,
- (outs GPR32:$Ws),
- (ins regtype:$Rt, regtype:$Rt2, am_noindex:$addr),
- asm, "\t$Ws, $Rt, $Rt2, $addr">,
- Sched<[WriteSTX]> {
- bits<5> status;
- bits<5> dst1;
- bits<5> dst2;
- bits<5> base;
- let Inst{20-16} = status;
- let Inst{14-10} = dst2;
- let Inst{9-5} = base;
- let Inst{4-0} = dst1;
-
- let Constraints = "@earlyclobber $Ws";
-}
-
-//---
-// Exception generation
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in
-class ExceptionGeneration<bits<3> op1, bits<2> ll, string asm>
- : I<(outs), (ins imm0_65535:$imm), asm, "\t$imm", "", []>,
- Sched<[WriteSys]> {
- bits<16> imm;
- let Inst{31-24} = 0b11010100;
- let Inst{23-21} = op1;
- let Inst{20-5} = imm;
- let Inst{4-2} = 0b000;
- let Inst{1-0} = ll;
-}
-
-//---
-// Floating point to integer conversion
-//---
-
-class BaseFPToIntegerUnscaled<bits<2> type, bits<2> rmode, bits<3> opcode,
- RegisterClass srcType, RegisterClass dstType,
- string asm, list<dag> pattern>
- : I<(outs dstType:$Rd), (ins srcType:$Rn),
- asm, "\t$Rd, $Rn", "", pattern>,
- Sched<[WriteFCvt]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{30} = 0;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 1;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPToInteger<bits<2> type, bits<2> rmode, bits<3> opcode,
- RegisterClass srcType, RegisterClass dstType,
- Operand immType, string asm>
- : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
- asm, "\t$Rd, $Rn, $scale", "", []>,
- Sched<[WriteFCvt]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<6> scale;
- let Inst{30} = 0;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = type;
- let Inst{21} = 0;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = scale;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass FPToInteger<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator OpN> {
- // Unscaled single-precision to 32-bit
- def UWSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR32, asm,
- [(set GPR32:$Rd, (OpN FPR32:$Rn))]> {
- let Inst{31} = 0; // 32-bit GPR flag
- }
-
- // Unscaled single-precision to 64-bit
- def UXSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, GPR64, asm,
- [(set GPR64:$Rd, (OpN FPR32:$Rn))]> {
- let Inst{31} = 1; // 64-bit GPR flag
- }
-
- // Unscaled double-precision to 32-bit
- def UWDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR32, asm,
- [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn)))]> {
- let Inst{31} = 0; // 32-bit GPR flag
- }
-
- // Unscaled double-precision to 64-bit
- def UXDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, GPR64, asm,
- [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn)))]> {
- let Inst{31} = 1; // 64-bit GPR flag
- }
-
- // Scaled single-precision to 32-bit
- def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32,
- fixedpoint32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- }
-
- // Scaled single-precision to 64-bit
- def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64,
- fixedpoint64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- }
-
- // Scaled double-precision to 32-bit
- def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32,
- fixedpoint32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- }
-
- // Scaled double-precision to 64-bit
- def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64,
- fixedpoint64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- }
-}
-
-//---
-// Integer to floating point conversion
-//---
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseIntegerToFP<bit isUnsigned,
- RegisterClass srcType, RegisterClass dstType,
- Operand immType, string asm>
- : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
- asm, "\t$Rd, $Rn, $scale", "", []>,
- Sched<[WriteFCvt]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<6> scale;
- let Inst{30-23} = 0b00111100;
- let Inst{21-17} = 0b00001;
- let Inst{16} = isUnsigned;
- let Inst{15-10} = scale;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class BaseIntegerToFPUnscaled<bit isUnsigned,
- RegisterClass srcType, RegisterClass dstType,
- ValueType dvt, string asm, SDNode node>
- : I<(outs dstType:$Rd), (ins srcType:$Rn),
- asm, "\t$Rd, $Rn", "", [(set (dvt dstType:$Rd), (node srcType:$Rn))]>,
- Sched<[WriteFCvt]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<6> scale;
- let Inst{30-23} = 0b00111100;
- let Inst{21-17} = 0b10001;
- let Inst{16} = isUnsigned;
- let Inst{15-10} = 0b000000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass IntegerToFP<bit isUnsigned, string asm, SDNode node> {
- // Unscaled
- def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-
- def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-
- // Scaled
- def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-
- def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-}
-
-//---
-// Unscaled integer <-> floating point conversion (i.e. FMOV)
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversion<bits<2> rmode, bits<3> opcode,
- RegisterClass srcType, RegisterClass dstType,
- string asm>
- : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "",
- // We use COPY_TO_REGCLASS for these bitconvert operations.
- // copyPhysReg() expands the resultant COPY instructions after
- // regalloc is done. This gives greater freedom for the allocator
- // and related passes (coalescing, copy propagation, et. al.) to
- // be more effective.
- [/*(set (dvt dstType:$Rd), (bitconvert (svt srcType:$Rn)))*/]>,
- Sched<[WriteFCopy]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{30-23} = 0b00111100;
- let Inst{21} = 1;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = 0b000000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversionToHigh<bits<2> rmode, bits<3> opcode,
- RegisterClass srcType, RegisterOperand dstType, string asm>
- : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd[1], $Rn", "", []>,
- Sched<[WriteFCopy]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{30-23} = 0b00111101;
- let Inst{21} = 1;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = 0b000000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseUnscaledConversionFromHigh<bits<2> rmode, bits<3> opcode,
- RegisterOperand srcType, RegisterClass dstType, string asm>
- : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn[1]", "", []>,
- Sched<[WriteFCopy]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{30-23} = 0b00111101;
- let Inst{21} = 1;
- let Inst{20-19} = rmode;
- let Inst{18-16} = opcode;
- let Inst{15-10} = 0b000000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-
-
-multiclass UnscaledConversion<string asm> {
- def WSr : BaseUnscaledConversion<0b00, 0b111, GPR32, FPR32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def XDr : BaseUnscaledConversion<0b00, 0b111, GPR64, FPR64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-
- def SWr : BaseUnscaledConversion<0b00, 0b110, FPR32, GPR32, asm> {
- let Inst{31} = 0; // 32-bit GPR flag
- let Inst{22} = 0; // 32-bit FPR flag
- }
-
- def DXr : BaseUnscaledConversion<0b00, 0b110, FPR64, GPR64, asm> {
- let Inst{31} = 1; // 64-bit GPR flag
- let Inst{22} = 1; // 64-bit FPR flag
- }
-
- def XDHighr : BaseUnscaledConversionToHigh<0b01, 0b111, GPR64, V128,
- asm#".d"> {
- let Inst{31} = 1;
- let Inst{22} = 0;
- }
-
- def DXHighr : BaseUnscaledConversionFromHigh<0b01, 0b110, V128, GPR64,
- asm#".d"> {
- let Inst{31} = 1;
- let Inst{22} = 0;
- }
-
- def : InstAlias<asm#"$Vd.d[1], $Rn",
- (!cast<Instruction>(NAME#XDHighr) V128:$Vd, GPR64:$Rn), 0>;
- def : InstAlias<asm#"$Rd, $Vn.d[1]",
- (!cast<Instruction>(NAME#DXHighr) GPR64:$Rd, V128:$Vn), 0>;
-}
-
-//---
-// Floating point conversion
-//---
-
-class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
- RegisterClass srcType, string asm, list<dag> pattern>
- : I<(outs dstType:$Rd), (ins srcType:$Rn), asm, "\t$Rd, $Rn", "", pattern>,
- Sched<[WriteFCvt]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-24} = 0b00011110;
- let Inst{23-22} = type;
- let Inst{21-17} = 0b10001;
- let Inst{16-15} = opcode;
- let Inst{14-10} = 0b10000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass FPConversion<string asm> {
- // Double-precision to Half-precision
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
- def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm, []>;
-
- // Double-precision to Single-precision
- def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
- [(set FPR32:$Rd, (fround FPR64:$Rn))]>;
-
- // Half-precision to Double-precision
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
- def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm, []>;
-
- // Half-precision to Single-precision
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
- def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm, []>;
-
- // Single-precision to Double-precision
- def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
- [(set FPR64:$Rd, (fextend FPR32:$Rn))]>;
-
- // Single-precision to Half-precision
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
- def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm, []>;
-}
-
-//---
-// Single operand floating point data processing
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSingleOperandFPData<bits<4> opcode, RegisterClass regtype,
- ValueType vt, string asm, SDPatternOperator node>
- : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn", "",
- [(set (vt regtype:$Rd), (node (vt regtype:$Rn)))]>,
- Sched<[WriteF]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-23} = 0b000111100;
- let Inst{21-19} = 0b100;
- let Inst{18-15} = opcode;
- let Inst{14-10} = 0b10000;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SingleOperandFPData<bits<4> opcode, string asm,
- SDPatternOperator node = null_frag> {
- def Sr : BaseSingleOperandFPData<opcode, FPR32, f32, asm, node> {
- let Inst{22} = 0; // 32-bit size flag
- }
-
- def Dr : BaseSingleOperandFPData<opcode, FPR64, f64, asm, node> {
- let Inst{22} = 1; // 64-bit size flag
- }
-}
-
-//---
-// Two operand floating point data processing
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseTwoOperandFPData<bits<4> opcode, RegisterClass regtype,
- string asm, list<dag> pat>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm),
- asm, "\t$Rd, $Rn, $Rm", "", pat>,
- Sched<[WriteF]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass TwoOperandFPData<bits<4> opcode, string asm,
- SDPatternOperator node = null_frag> {
- def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
- [(set (f32 FPR32:$Rd),
- (node (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]> {
- let Inst{22} = 0; // 32-bit size flag
- }
-
- def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
- [(set (f64 FPR64:$Rd),
- (node (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]> {
- let Inst{22} = 1; // 64-bit size flag
- }
-}
-
-multiclass TwoOperandFPDataNeg<bits<4> opcode, string asm, SDNode node> {
- def Srr : BaseTwoOperandFPData<opcode, FPR32, asm,
- [(set FPR32:$Rd, (fneg (node FPR32:$Rn, (f32 FPR32:$Rm))))]> {
- let Inst{22} = 0; // 32-bit size flag
- }
-
- def Drr : BaseTwoOperandFPData<opcode, FPR64, asm,
- [(set FPR64:$Rd, (fneg (node FPR64:$Rn, (f64 FPR64:$Rm))))]> {
- let Inst{22} = 1; // 64-bit size flag
- }
-}
-
-
-//---
-// Three operand floating point data processing
-//---
-
-class BaseThreeOperandFPData<bit isNegated, bit isSub,
- RegisterClass regtype, string asm, list<dag> pat>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, regtype: $Ra),
- asm, "\t$Rd, $Rn, $Rm, $Ra", "", pat>,
- Sched<[WriteFMul]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<5> Ra;
- let Inst{31-23} = 0b000111110;
- let Inst{21} = isNegated;
- let Inst{20-16} = Rm;
- let Inst{15} = isSub;
- let Inst{14-10} = Ra;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass ThreeOperandFPData<bit isNegated, bit isSub,string asm,
- SDPatternOperator node> {
- def Srrr : BaseThreeOperandFPData<isNegated, isSub, FPR32, asm,
- [(set FPR32:$Rd,
- (node (f32 FPR32:$Rn), (f32 FPR32:$Rm), (f32 FPR32:$Ra)))]> {
- let Inst{22} = 0; // 32-bit size flag
- }
-
- def Drrr : BaseThreeOperandFPData<isNegated, isSub, FPR64, asm,
- [(set FPR64:$Rd,
- (node (f64 FPR64:$Rn), (f64 FPR64:$Rm), (f64 FPR64:$Ra)))]> {
- let Inst{22} = 1; // 64-bit size flag
- }
-}
-
-//---
-// Floating point data comparisons
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseOneOperandFPComparison<bit signalAllNans,
- RegisterClass regtype, string asm,
- list<dag> pat>
- : I<(outs), (ins regtype:$Rn), asm, "\t$Rn, #0.0", "", pat>,
- Sched<[WriteFCmp]> {
- bits<5> Rn;
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
-
- let Inst{20-16} = 0b00000;
- let Inst{15-10} = 0b001000;
- let Inst{9-5} = Rn;
- let Inst{4} = signalAllNans;
- let Inst{3-0} = 0b1000;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseTwoOperandFPComparison<bit signalAllNans, RegisterClass regtype,
- string asm, list<dag> pat>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm), asm, "\t$Rn, $Rm", "", pat>,
- Sched<[WriteFCmp]> {
- bits<5> Rm;
- bits<5> Rn;
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-10} = 0b001000;
- let Inst{9-5} = Rn;
- let Inst{4} = signalAllNans;
- let Inst{3-0} = 0b0000;
-}
-
-multiclass FPComparison<bit signalAllNans, string asm,
- SDPatternOperator OpNode = null_frag> {
- let Defs = [CPSR] in {
- def Srr : BaseTwoOperandFPComparison<signalAllNans, FPR32, asm,
- [(OpNode FPR32:$Rn, (f32 FPR32:$Rm)), (implicit CPSR)]> {
- let Inst{22} = 0;
- }
-
- def Sri : BaseOneOperandFPComparison<signalAllNans, FPR32, asm,
- [(OpNode (f32 FPR32:$Rn), fpimm0), (implicit CPSR)]> {
- let Inst{22} = 0;
- }
-
- def Drr : BaseTwoOperandFPComparison<signalAllNans, FPR64, asm,
- [(OpNode FPR64:$Rn, (f64 FPR64:$Rm)), (implicit CPSR)]> {
- let Inst{22} = 1;
- }
-
- def Dri : BaseOneOperandFPComparison<signalAllNans, FPR64, asm,
- [(OpNode (f64 FPR64:$Rn), fpimm0), (implicit CPSR)]> {
- let Inst{22} = 1;
- }
- } // Defs = [CPSR]
-}
-
-//---
-// Floating point conditional comparisons
-//---
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseFPCondComparison<bit signalAllNans,
- RegisterClass regtype, string asm>
- : I<(outs), (ins regtype:$Rn, regtype:$Rm, imm0_15:$nzcv, ccode:$cond),
- asm, "\t$Rn, $Rm, $nzcv, $cond", "", []>,
- Sched<[WriteFCmp]> {
- bits<5> Rn;
- bits<5> Rm;
- bits<4> nzcv;
- bits<4> cond;
-
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-12} = cond;
- let Inst{11-10} = 0b01;
- let Inst{9-5} = Rn;
- let Inst{4} = signalAllNans;
- let Inst{3-0} = nzcv;
-}
-
-multiclass FPCondComparison<bit signalAllNans, string asm> {
- let Defs = [CPSR], Uses = [CPSR] in {
- def Srr : BaseFPCondComparison<signalAllNans, FPR32, asm> {
- let Inst{22} = 0;
- }
-
- def Drr : BaseFPCondComparison<signalAllNans, FPR64, asm> {
- let Inst{22} = 1;
- }
- } // Defs = [CPSR], Uses = [CPSR]
-}
-
-//---
-// Floating point conditional select
-//---
-
-class BaseFPCondSelect<RegisterClass regtype, ValueType vt, string asm>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, ccode:$cond),
- asm, "\t$Rd, $Rn, $Rm, $cond", "",
- [(set regtype:$Rd,
- (ARM64csel (vt regtype:$Rn), regtype:$Rm,
- (i32 imm:$cond), CPSR))]>,
- Sched<[WriteF]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<4> cond;
-
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-12} = cond;
- let Inst{11-10} = 0b11;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass FPCondSelect<string asm> {
- let Uses = [CPSR] in {
- def Srrr : BaseFPCondSelect<FPR32, f32, asm> {
- let Inst{22} = 0;
- }
-
- def Drrr : BaseFPCondSelect<FPR64, f64, asm> {
- let Inst{22} = 1;
- }
- } // Uses = [CPSR]
-}
-
-//---
-// Floating move immediate
-//---
-
-class BaseFPMoveImmediate<RegisterClass regtype, Operand fpimmtype, string asm>
- : I<(outs regtype:$Rd), (ins fpimmtype:$imm), asm, "\t$Rd, $imm", "",
- [(set regtype:$Rd, fpimmtype:$imm)]>,
- Sched<[WriteFImm]> {
- bits<5> Rd;
- bits<8> imm;
- let Inst{31-23} = 0b000111100;
- let Inst{21} = 1;
- let Inst{20-13} = imm;
- let Inst{12-5} = 0b10000000;
- let Inst{4-0} = Rd;
-}
-
-multiclass FPMoveImmediate<string asm> {
- def Si : BaseFPMoveImmediate<FPR32, fpimm32, asm> {
- let Inst{22} = 0;
- }
-
- def Di : BaseFPMoveImmediate<FPR64, fpimm64, asm> {
- let Inst{22} = 1;
- }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD
-//----------------------------------------------------------------------------
-
-def VectorIndexBOperand : AsmOperandClass { let Name = "VectorIndexB"; }
-def VectorIndexHOperand : AsmOperandClass { let Name = "VectorIndexH"; }
-def VectorIndexSOperand : AsmOperandClass { let Name = "VectorIndexS"; }
-def VectorIndexDOperand : AsmOperandClass { let Name = "VectorIndexD"; }
-def VectorIndexB : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 16;
-}]> {
- let ParserMatchClass = VectorIndexBOperand;
- let PrintMethod = "printVectorIndex";
- let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexH : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 8;
-}]> {
- let ParserMatchClass = VectorIndexHOperand;
- let PrintMethod = "printVectorIndex";
- let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexS : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 4;
-}]> {
- let ParserMatchClass = VectorIndexSOperand;
- let PrintMethod = "printVectorIndex";
- let MIOperandInfo = (ops i64imm);
-}
-def VectorIndexD : Operand<i64>, ImmLeaf<i64, [{
- return ((uint64_t)Imm) < 2;
-}]> {
- let ParserMatchClass = VectorIndexDOperand;
- let PrintMethod = "printVectorIndex";
- let MIOperandInfo = (ops i64imm);
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register vector instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string kind,
- list<dag> pattern>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
- "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
- "|" # kind # "\t$Rd, $Rn, $Rm|}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-11} = opcode;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDThreeSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string kind,
- list<dag> pattern>
- : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm,
- "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
- "|" # kind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-11} = opcode;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-// All operand sizes distinguished in the encoding.
-multiclass SIMDThreeSameVector<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
- asm, ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
- asm, ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
- asm, ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
- asm, ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
- asm, ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
- asm, ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
- def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128,
- asm, ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
-}
-
-// As above, but D sized elements unsupported.
-multiclass SIMDThreeSameVectorBHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
- asm, ".8b",
- [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
- asm, ".16b",
- [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>;
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
- asm, ".4h",
- [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
- asm, ".8h",
- [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
- asm, ".2s",
- [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
- asm, ".4s",
- [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>;
-}
-
-multiclass SIMDThreeSameVectorBHSTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64,
- asm, ".8b",
- [(set (v8i8 V64:$dst),
- (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128,
- asm, ".16b",
- [(set (v16i8 V128:$dst),
- (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
- def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64,
- asm, ".4h",
- [(set (v4i16 V64:$dst),
- (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128,
- asm, ".8h",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64,
- asm, ".2s",
- [(set (v2i32 V64:$dst),
- (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128,
- asm, ".4s",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-}
-
-// As above, but only B sized elements supported.
-multiclass SIMDThreeSameVectorB<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64,
- asm, ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128,
- asm, ".16b",
- [(set (v16i8 V128:$Rd),
- (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>;
-}
-
-// As above, but only S and D sized floating point elements supported.
-multiclass SIMDThreeSameVectorFP<bit U, bit S, bits<5> opc,
- string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
- asm, ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-multiclass SIMDThreeSameVectorFPCmp<bit U, bit S, bits<5> opc,
- string asm,
- SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128,
- asm, ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-multiclass SIMDThreeSameVectorFPTied<bit U, bit S, bits<5> opc,
- string asm, SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64,
- asm, ".2s",
- [(set (v2f32 V64:$dst),
- (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>;
- def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128,
- asm, ".4s",
- [(set (v4f32 V128:$dst),
- (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>;
- def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128,
- asm, ".2d",
- [(set (v2f64 V128:$dst),
- (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>;
-}
-
-// As above, but D and B sized elements unsupported.
-multiclass SIMDThreeSameVectorHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64,
- asm, ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128,
- asm, ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64,
- asm, ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128,
- asm, ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
-}
-
-// Logical three vector ops share opcode bits, and only use B sized elements.
-multiclass SIMDLogicalThreeVector<bit U, bits<2> size, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64,
- asm, ".8b",
- [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>;
- def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128,
- asm, ".16b",
- [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>;
-
- def : Pat<(v4i16 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
- def : Pat<(v2i32 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
- def : Pat<(v1i64 (OpNode V64:$LHS, V64:$RHS)),
- (!cast<Instruction>(NAME#"v8i8") V64:$LHS, V64:$RHS)>;
-
- def : Pat<(v8i16 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
- def : Pat<(v4i32 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
- def : Pat<(v2i64 (OpNode V128:$LHS, V128:$RHS)),
- (!cast<Instruction>(NAME#"v16i8") V128:$LHS, V128:$RHS)>;
-}
-
-multiclass SIMDLogicalThreeVectorTied<bit U, bits<2> size,
- string asm, SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64,
- asm, ".8b",
- [(set (v8i8 V64:$dst),
- (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128,
- asm, ".16b",
- [(set (v16i8 V128:$dst),
- (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
- (v16i8 V128:$Rm)))]>;
-
- def : Pat<(v4i16 (OpNode (v4i16 V64:$LHS), (v4i16 V64:$MHS),
- (v4i16 V64:$RHS))),
- (!cast<Instruction>(NAME#"v8i8")
- V64:$LHS, V64:$MHS, V64:$RHS)>;
- def : Pat<(v2i32 (OpNode (v2i32 V64:$LHS), (v2i32 V64:$MHS),
- (v2i32 V64:$RHS))),
- (!cast<Instruction>(NAME#"v8i8")
- V64:$LHS, V64:$MHS, V64:$RHS)>;
- def : Pat<(v1i64 (OpNode (v1i64 V64:$LHS), (v1i64 V64:$MHS),
- (v1i64 V64:$RHS))),
- (!cast<Instruction>(NAME#"v8i8")
- V64:$LHS, V64:$MHS, V64:$RHS)>;
-
- def : Pat<(v8i16 (OpNode (v8i16 V128:$LHS), (v8i16 V128:$MHS),
- (v8i16 V128:$RHS))),
- (!cast<Instruction>(NAME#"v16i8")
- V128:$LHS, V128:$MHS, V128:$RHS)>;
- def : Pat<(v4i32 (OpNode (v4i32 V128:$LHS), (v4i32 V128:$MHS),
- (v4i32 V128:$RHS))),
- (!cast<Instruction>(NAME#"v16i8")
- V128:$LHS, V128:$MHS, V128:$RHS)>;
- def : Pat<(v2i64 (OpNode (v2i64 V128:$LHS), (v2i64 V128:$MHS),
- (v2i64 V128:$RHS))),
- (!cast<Instruction>(NAME#"v16i8")
- V128:$LHS, V128:$MHS, V128:$RHS)>;
-}
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD two register vector instructions.
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoSameVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
- : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
- "{\t$Rd" # dstkind # ", $Rn" # srckind #
- "|" # dstkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoSameVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, list<dag> pattern>
- : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm,
- "{\t$Rd" # dstkind # ", $Rn" # srckind #
- "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-// Supports B, H, and S element sizes.
-multiclass SIMDTwoVectorBHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-class BaseSIMDVectorLShiftLongBySize<bit Q, bits<2> size,
- RegisterOperand regtype, string asm, string dstkind,
- string srckind, string amount>
- : I<(outs V128:$Rd), (ins regtype:$Rn), asm,
- "{\t$Rd" # dstkind # ", $Rn" # srckind # ", #" # amount #
- "|" # dstkind # "\t$Rd, $Rn, #" # amount # "}", "", []>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29-24} = 0b101110;
- let Inst{23-22} = size;
- let Inst{21-10} = 0b100001001110;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDVectorLShiftLongBySizeBHS {
- let neverHasSideEffects = 1 in {
- def v8i8 : BaseSIMDVectorLShiftLongBySize<0, 0b00, V64,
- "shll", ".8h", ".8b", "8">;
- def v16i8 : BaseSIMDVectorLShiftLongBySize<1, 0b00, V128,
- "shll2", ".8h", ".16b", "8">;
- def v4i16 : BaseSIMDVectorLShiftLongBySize<0, 0b01, V64,
- "shll", ".4s", ".4h", "16">;
- def v8i16 : BaseSIMDVectorLShiftLongBySize<1, 0b01, V128,
- "shll2", ".4s", ".8h", "16">;
- def v2i32 : BaseSIMDVectorLShiftLongBySize<0, 0b10, V64,
- "shll", ".2d", ".2s", "32">;
- def v4i32 : BaseSIMDVectorLShiftLongBySize<1, 0b10, V128,
- "shll2", ".2d", ".4s", "32">;
- }
-}
-
-// Supports all element sizes.
-multiclass SIMDLongTwoVector<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
- asm, ".4h", ".8b",
- [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
- asm, ".8h", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
- asm, ".2s", ".4h",
- [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
- asm, ".4s", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
- asm, ".1d", ".2s",
- [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
- asm, ".2d", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-multiclass SIMDLongTwoVectorTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
- asm, ".4h", ".8b",
- [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd),
- (v8i8 V64:$Rn)))]>;
- def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
- asm, ".8h", ".16b",
- [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd),
- (v16i8 V128:$Rn)))]>;
- def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
- asm, ".2s", ".4h",
- [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd),
- (v4i16 V64:$Rn)))]>;
- def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
- asm, ".4s", ".8h",
- [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd),
- (v8i16 V128:$Rn)))]>;
- def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
- asm, ".1d", ".2s",
- [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd),
- (v2i32 V64:$Rn)))]>;
- def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
- asm, ".2d", ".4s",
- [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd),
- (v4i32 V128:$Rn)))]>;
-}
-
-// Supports all element sizes, except 1xD.
-multiclass SIMDTwoVectorBHSDTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>;
-}
-
-multiclass SIMDTwoVectorBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
-}
-
-
-// Supports only B element sizes.
-multiclass SIMDTwoVectorB<bit U, bits<2> size, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-
-}
-
-// Supports only B and H element sizes.
-multiclass SIMDTwoVectorBH<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>;
- def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>;
- def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>;
- def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>;
-}
-
-// Supports only S and D element sizes, uses high bit of the size field
-// as an extra opcode bit.
-multiclass SIMDTwoVectorFP<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
- asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
-}
-
-// Supports only S element size.
-multiclass SIMDTwoVectorS<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
-}
-
-
-multiclass SIMDTwoVectorFPToInt<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
-}
-
-multiclass SIMDTwoVectorIntToFP<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64,
- asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>;
- def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128,
- asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128,
- asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
-}
-
-
-class BaseSIMDMixedTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand inreg, RegisterOperand outreg,
- string asm, string outkind, string inkind,
- list<dag> pattern>
- : I<(outs outreg:$Rd), (ins inreg:$Rn), asm,
- "{\t$Rd" # outkind # ", $Rn" # inkind #
- "|" # outkind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class BaseSIMDMixedTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand inreg, RegisterOperand outreg,
- string asm, string outkind, string inkind,
- list<dag> pattern>
- : I<(outs outreg:$dst), (ins outreg:$Rd, inreg:$Rn), asm,
- "{\t$Rd" # outkind # ", $Rn" # inkind #
- "|" # outkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDMixedTwoVector<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8 : BaseSIMDMixedTwoVector<0, U, 0b00, opc, V128, V64,
- asm, ".8b", ".8h",
- [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn)))]>;
- def v16i8 : BaseSIMDMixedTwoVectorTied<1, U, 0b00, opc, V128, V128,
- asm#"2", ".16b", ".8h", []>;
- def v4i16 : BaseSIMDMixedTwoVector<0, U, 0b01, opc, V128, V64,
- asm, ".4h", ".4s",
- [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn)))]>;
- def v8i16 : BaseSIMDMixedTwoVectorTied<1, U, 0b01, opc, V128, V128,
- asm#"2", ".8h", ".4s", []>;
- def v2i32 : BaseSIMDMixedTwoVector<0, U, 0b10, opc, V128, V64,
- asm, ".2s", ".2d",
- [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn)))]>;
- def v4i32 : BaseSIMDMixedTwoVectorTied<1, U, 0b10, opc, V128, V128,
- asm#"2", ".4s", ".2d", []>;
-
- def : Pat<(concat_vectors (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn))),
- (!cast<Instruction>(NAME # "v16i8")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
- def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn))),
- (!cast<Instruction>(NAME # "v8i16")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
- def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn))),
- (!cast<Instruction>(NAME # "v4i32")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-}
-
-class BaseSIMDCmpTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, string asm, string kind,
- ValueType dty, ValueType sty, SDNode OpNode>
- : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
- "{\t$Rd" # kind # ", $Rn" # kind # ", #0" #
- "|" # kind # "\t$Rd, $Rn, #0}", "",
- [(set (dty regtype:$Rd), (OpNode (sty regtype:$Rn)))]>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-// Comparisons support all element sizes, except 1xD.
-multiclass SIMDCmpTwoVector<bit U, bits<5> opc, string asm,
- SDNode OpNode> {
- def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64,
- asm, ".8b",
- v8i8, v8i8, OpNode>;
- def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128,
- asm, ".16b",
- v16i8, v16i8, OpNode>;
- def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64,
- asm, ".4h",
- v4i16, v4i16, OpNode>;
- def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128,
- asm, ".8h",
- v8i16, v8i16, OpNode>;
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64,
- asm, ".2s",
- v2i32, v2i32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128,
- asm, ".4s",
- v4i32, v4i32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128,
- asm, ".2d",
- v2i64, v2i64, OpNode>;
-}
-
-// FP Comparisons support only S and D element sizes.
-multiclass SIMDFPCmpTwoVector<bit U, bit S, bits<5> opc,
- string asm, SDNode OpNode> {
- def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64,
- asm, ".2s",
- v2i32, v2f32, OpNode>;
- def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128,
- asm, ".4s",
- v4i32, v4f32, OpNode>;
- def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128,
- asm, ".2d",
- v2i64, v2f64, OpNode>;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDFPCvtTwoVector<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand outtype, RegisterOperand intype,
- string asm, string VdTy, string VnTy,
- list<dag> pattern>
- : I<(outs outtype:$Rd), (ins intype:$Rn), asm,
- !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class BaseSIMDFPCvtTwoVectorTied<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterOperand outtype, RegisterOperand intype,
- string asm, string VdTy, string VnTy,
- list<dag> pattern>
- : I<(outs outtype:$dst), (ins outtype:$Rd, intype:$Rn), asm,
- !strconcat("\t$Rd", VdTy, ", $Rn", VnTy), "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDFPWidenTwoVector<bit U, bit S, bits<5> opc, string asm> {
- def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V128, V64,
- asm, ".4s", ".4h", []>;
- def v8i16 : BaseSIMDFPCvtTwoVector<1, U, {S,0}, opc, V128, V128,
- asm#"2", ".4s", ".8h", []>;
- def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V128, V64,
- asm, ".2d", ".2s", []>;
- def v4i32 : BaseSIMDFPCvtTwoVector<1, U, {S,1}, opc, V128, V128,
- asm#"2", ".2d", ".4s", []>;
-}
-
-multiclass SIMDFPNarrowTwoVector<bit U, bit S, bits<5> opc, string asm> {
- def v4i16 : BaseSIMDFPCvtTwoVector<0, U, {S,0}, opc, V64, V128,
- asm, ".4h", ".4s", []>;
- def v8i16 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,0}, opc, V128, V128,
- asm#"2", ".8h", ".4s", []>;
- def v2i32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
- asm, ".2s", ".2d", []>;
- def v4i32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
- asm#"2", ".4s", ".2d", []>;
-}
-
-multiclass SIMDFPInexactCvtTwoVector<bit U, bit S, bits<5> opc, string asm,
- Intrinsic OpNode> {
- def v2f32 : BaseSIMDFPCvtTwoVector<0, U, {S,1}, opc, V64, V128,
- asm, ".2s", ".2d",
- [(set (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn)))]>;
- def v4f32 : BaseSIMDFPCvtTwoVectorTied<1, U, {S,1}, opc, V128, V128,
- asm#"2", ".4s", ".2d", []>;
-
- def : Pat<(concat_vectors (v2f32 V64:$Rd), (OpNode (v2f64 V128:$Rn))),
- (!cast<Instruction>(NAME # "v4f32")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register different-size vector instructions.
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDDifferentThreeVector<bit U, bits<3> size, bits<4> opcode,
- RegisterOperand outtype, RegisterOperand intype1,
- RegisterOperand intype2, string asm,
- string outkind, string inkind1, string inkind2,
- list<dag> pattern>
- : I<(outs outtype:$Rd), (ins intype1:$Rn, intype2:$Rm), asm,
- "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
- "|" # outkind # "\t$Rd, $Rn, $Rm}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31} = 0;
- let Inst{30} = size{0};
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size{2-1};
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-12} = opcode;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDDifferentThreeVectorTied<bit U, bits<3> size, bits<4> opcode,
- RegisterOperand outtype, RegisterOperand intype1,
- RegisterOperand intype2, string asm,
- string outkind, string inkind1, string inkind2,
- list<dag> pattern>
- : I<(outs outtype:$dst), (ins outtype:$Rd, intype1:$Rn, intype2:$Rm), asm,
- "{\t$Rd" # outkind # ", $Rn" # inkind1 # ", $Rm" # inkind2 #
- "|" # outkind # "\t$Rd, $Rn, $Rm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31} = 0;
- let Inst{30} = size{0};
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size{2-1};
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-12} = opcode;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-// FIXME: TableGen doesn't know how to deal with expanded types that also
-// change the element count (in this case, placing the results in
-// the high elements of the result register rather than the low
-// elements). Until that's fixed, we can't code-gen those.
-multiclass SIMDNarrowThreeVectorBHS<bit U, bits<4> opc, string asm,
- Intrinsic IntOp> {
- def v8i16_v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
- V64, V128, V128,
- asm, ".8b", ".8h", ".8h",
- [(set (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
- def v8i16_v16i8 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".16b", ".8h", ".8h",
- []>;
- def v4i32_v4i16 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
- V64, V128, V128,
- asm, ".4h", ".4s", ".4s",
- [(set (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
- def v4i32_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".4s", ".4s",
- []>;
- def v2i64_v2i32 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
- V64, V128, V128,
- asm, ".2s", ".2d", ".2d",
- [(set (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>;
- def v2i64_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".2d", ".2d",
- []>;
-
-
- // Patterns for the '2' variants involve INSERT_SUBREG, which you can't put in
- // a version attached to an instruction.
- def : Pat<(concat_vectors (v8i8 V64:$Rd), (IntOp (v8i16 V128:$Rn),
- (v8i16 V128:$Rm))),
- (!cast<Instruction>(NAME # "v8i16_v16i8")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
- def : Pat<(concat_vectors (v4i16 V64:$Rd), (IntOp (v4i32 V128:$Rn),
- (v4i32 V128:$Rm))),
- (!cast<Instruction>(NAME # "v4i32_v8i16")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
- def : Pat<(concat_vectors (v2i32 V64:$Rd), (IntOp (v2i64 V128:$Rn),
- (v2i64 V128:$Rm))),
- (!cast<Instruction>(NAME # "v2i64_v4i32")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-}
-
-multiclass SIMDDifferentThreeVectorBD<bit U, bits<4> opc, string asm,
- Intrinsic IntOp> {
- def v8i8 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
- V128, V64, V64,
- asm, ".8h", ".8b", ".8b",
- [(set (v8i16 V128:$Rd), (IntOp (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b", []>;
- def v1i64 : BaseSIMDDifferentThreeVector<U, 0b110, opc,
- V128, V64, V64,
- asm, ".1q", ".1d", ".1d", []>;
- def v2i64 : BaseSIMDDifferentThreeVector<U, 0b111, opc,
- V128, V128, V128,
- asm#"2", ".1q", ".2d", ".2d", []>;
-
- def : Pat<(v8i16 (IntOp (v8i8 (extract_high_v16i8 V128:$Rn)),
- (v8i8 (extract_high_v16i8 V128:$Rm)))),
- (!cast<Instruction>(NAME#"v16i8") V128:$Rn, V128:$Rm)>;
-}
-
-multiclass SIMDLongThreeVectorHS<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorBHSabdl<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
- V128, V64, V64,
- asm, ".8h", ".8b", ".8b",
- [(set (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))))]>;
- def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))))]>;
- def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
-}
-
-multiclass SIMDLongThreeVectorTiedBHSabal<bit U, bits<4> opc,
- string asm,
- SDPatternOperator OpNode> {
- def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
- V128, V64, V64,
- asm, ".8h", ".8b", ".8b",
- [(set (v8i16 V128:$dst),
- (add (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))))]>;
- def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$dst),
- (add (v8i16 V128:$Rd),
- (zext (v8i8 (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm))))))]>;
- def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$dst),
- (add (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$dst),
- (add (v4i32 V128:$Rd),
- (zext (v4i16 (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm))))))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$dst),
- (add (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$dst),
- (add (v2i64 V128:$Rd),
- (zext (v2i32 (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm))))))]>;
-}
-
-multiclass SIMDLongThreeVectorBHS<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
- V128, V64, V64,
- asm, ".8h", ".8b", ".8b",
- [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
- def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorTiedBHS<bit U, bits<4> opc,
- string asm,
- SDPatternOperator OpNode> {
- def v8i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b000, opc,
- V128, V64, V64,
- asm, ".8h", ".8b", ".8b",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8_v8i16 : BaseSIMDDifferentThreeVectorTied<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".16b", ".16b",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd),
- (extract_high_v16i8 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
- def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-multiclass SIMDLongThreeVectorSQDMLXTiedHS<bit U, bits<4> opc, string asm,
- SDPatternOperator Accum> {
- def v4i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b010, opc,
- V128, V64, V64,
- asm, ".4s", ".4h", ".4h",
- [(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
- (v4i16 V64:$Rm)))))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVectorTied<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".8h", ".8h",
- [(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_arm64_neon_sqdmull (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b100, opc,
- V128, V64, V64,
- asm, ".2d", ".2s", ".2s",
- [(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_arm64_neon_sqdmull (v2i32 V64:$Rn),
- (v2i32 V64:$Rm)))))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVectorTied<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".4s", ".4s",
- [(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_arm64_neon_sqdmull (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))))]>;
-}
-
-multiclass SIMDWideThreeVectorBHS<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b000, opc,
- V128, V128, V64,
- asm, ".8h", ".8h", ".8b",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i8 V64:$Rm)))]>;
- def v16i8_v8i16 : BaseSIMDDifferentThreeVector<U, 0b001, opc,
- V128, V128, V128,
- asm#"2", ".8h", ".8h", ".16b",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (extract_high_v16i8 V128:$Rm)))]>;
- def v4i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b010, opc,
- V128, V128, V64,
- asm, ".4s", ".4s", ".4h",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i16 V64:$Rm)))]>;
- def v8i16_v4i32 : BaseSIMDDifferentThreeVector<U, 0b011, opc,
- V128, V128, V128,
- asm#"2", ".4s", ".4s", ".8h",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (extract_high_v8i16 V128:$Rm)))]>;
- def v2i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b100, opc,
- V128, V128, V64,
- asm, ".2d", ".2d", ".2s",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i32 V64:$Rm)))]>;
- def v4i32_v2i64 : BaseSIMDDifferentThreeVector<U, 0b101, opc,
- V128, V128, V128,
- asm#"2", ".2d", ".2d", ".4s",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (extract_high_v4i32 V128:$Rm)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD bitwise extract from vector
-//----------------------------------------------------------------------------
-
-class BaseSIMDBitwiseExtract<bit size, RegisterOperand regtype, ValueType vty,
- string asm, string kind>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm, i32imm:$imm), asm,
- "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind # ", $imm" #
- "|" # kind # "\t$Rd, $Rn, $Rm, $imm}", "",
- [(set (vty regtype:$Rd),
- (ARM64ext regtype:$Rn, regtype:$Rm, (i32 imm:$imm)))]>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- bits<4> imm;
- let Inst{31} = 0;
- let Inst{30} = size;
- let Inst{29-21} = 0b101110000;
- let Inst{20-16} = Rm;
- let Inst{15} = 0;
- let Inst{14-11} = imm;
- let Inst{10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-
-multiclass SIMDBitwiseExtract<string asm> {
- def v8i8 : BaseSIMDBitwiseExtract<0, V64, v8i8, asm, ".8b">;
- def v16i8 : BaseSIMDBitwiseExtract<1, V128, v16i8, asm, ".16b">;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD zip vector
-//----------------------------------------------------------------------------
-
-class BaseSIMDZipVector<bits<3> size, bits<3> opc, RegisterOperand regtype,
- string asm, string kind, SDNode OpNode, ValueType valty>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
- "{\t$Rd" # kind # ", $Rn" # kind # ", $Rm" # kind #
- "|" # kind # "\t$Rd, $Rn, $Rm}", "",
- [(set (valty regtype:$Rd), (OpNode regtype:$Rn, regtype:$Rm))]>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31} = 0;
- let Inst{30} = size{0};
- let Inst{29-24} = 0b001110;
- let Inst{23-22} = size{2-1};
- let Inst{21} = 0;
- let Inst{20-16} = Rm;
- let Inst{15} = 0;
- let Inst{14-12} = opc;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDZipVector<bits<3>opc, string asm,
- SDNode OpNode> {
- def v8i8 : BaseSIMDZipVector<0b000, opc, V64,
- asm, ".8b", OpNode, v8i8>;
- def v16i8 : BaseSIMDZipVector<0b001, opc, V128,
- asm, ".16b", OpNode, v16i8>;
- def v4i16 : BaseSIMDZipVector<0b010, opc, V64,
- asm, ".4h", OpNode, v4i16>;
- def v8i16 : BaseSIMDZipVector<0b011, opc, V128,
- asm, ".8h", OpNode, v8i16>;
- def v2i32 : BaseSIMDZipVector<0b100, opc, V64,
- asm, ".2s", OpNode, v2i32>;
- def v4i32 : BaseSIMDZipVector<0b101, opc, V128,
- asm, ".4s", OpNode, v4i32>;
- def v2i64 : BaseSIMDZipVector<0b111, opc, V128,
- asm, ".2d", OpNode, v2i64>;
-
- def : Pat<(v2f32 (OpNode V64:$Rn, V64:$Rm)),
- (!cast<Instruction>(NAME#"v2i32") V64:$Rn, V64:$Rm)>;
- def : Pat<(v4f32 (OpNode V128:$Rn, V128:$Rm)),
- (!cast<Instruction>(NAME#"v4i32") V128:$Rn, V128:$Rm)>;
- def : Pat<(v2f64 (OpNode V128:$Rn, V128:$Rm)),
- (!cast<Instruction>(NAME#"v2i64") V128:$Rn, V128:$Rm)>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD three register scalar instructions
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDThreeScalar<bit U, bits<2> size, bits<5> opcode,
- RegisterClass regtype, string asm,
- list<dag> pattern>
- : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm,
- "\t$Rd, $Rn, $Rm", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-11} = opcode;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDThreeScalarD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
- [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
-}
-
-multiclass SIMDThreeScalarBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDThreeScalar<U, 0b11, opc, FPR64, asm,
- [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm)))]>;
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm, []>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
- def v1i8 : BaseSIMDThreeScalar<U, 0b00, opc, FPR8 , asm, []>;
-
- def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
- (!cast<Instruction>(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>;
- def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
- (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
-}
-
-multiclass SIMDThreeScalarHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i32 : BaseSIMDThreeScalar<U, 0b10, opc, FPR32, asm,
- [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- def v1i16 : BaseSIMDThreeScalar<U, 0b01, opc, FPR16, asm, []>;
-}
-
-multiclass SIMDThreeScalarSD<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
- [(set (f64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
- [(set FPR32:$Rd, (OpNode FPR32:$Rn, FPR32:$Rm))]>;
- }
-
- def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
-}
-
-multiclass SIMDThreeScalarFPCmp<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def #NAME#64 : BaseSIMDThreeScalar<U, {S,1}, opc, FPR64, asm,
- [(set (i64 FPR64:$Rd), (OpNode (f64 FPR64:$Rn), (f64 FPR64:$Rm)))]>;
- def #NAME#32 : BaseSIMDThreeScalar<U, {S,0}, opc, FPR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (f32 FPR32:$Rn), (f32 FPR32:$Rm)))]>;
- }
-
- def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (!cast<Instruction>(NAME # "64") FPR64:$Rn, FPR64:$Rm)>;
-}
-
-class BaseSIMDThreeScalarMixed<bit U, bits<2> size, bits<5> opcode,
- dag oops, dag iops, string asm, string cstr, list<dag> pat>
- : I<oops, iops, asm,
- "\t$Rd, $Rn, $Rm", cstr, pat>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21} = 1;
- let Inst{20-16} = Rm;
- let Inst{15-11} = opcode;
- let Inst{10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDThreeScalarMixedHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
- (outs FPR32:$Rd),
- (ins FPR16:$Rn, FPR16:$Rm), asm, "", []>;
- def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
- (outs FPR64:$Rd),
- (ins FPR32:$Rn, FPR32:$Rm), asm, "",
- [(set (i64 FPR64:$Rd), (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDThreeScalarMixedTiedHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def i16 : BaseSIMDThreeScalarMixed<U, 0b01, opc,
- (outs FPR32:$dst),
- (ins FPR32:$Rd, FPR16:$Rn, FPR16:$Rm),
- asm, "$Rd = $dst", []>;
- def i32 : BaseSIMDThreeScalarMixed<U, 0b10, opc,
- (outs FPR64:$dst),
- (ins FPR64:$Rd, FPR32:$Rn, FPR32:$Rm),
- asm, "$Rd = $dst",
- [(set (i64 FPR64:$dst),
- (OpNode (i64 FPR64:$Rd), (i32 FPR32:$Rn), (i32 FPR32:$Rm)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD two register scalar instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalar<bit U, bits<2> size, bits<5> opcode,
- RegisterClass regtype, RegisterClass regtype2,
- string asm, list<dag> pat>
- : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm,
- "\t$Rd, $Rn", "", pat>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDTwoScalarTied<bit U, bits<2> size, bits<5> opcode,
- RegisterClass regtype, RegisterClass regtype2,
- string asm, list<dag> pat>
- : I<(outs regtype:$dst), (ins regtype:$Rd, regtype2:$Rn), asm,
- "\t$Rd, $Rn", "$Rd = $dst", pat>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDCmpTwoScalar<bit U, bits<2> size, bits<5> opcode,
- RegisterClass regtype, string asm>
- : I<(outs regtype:$Rd), (ins regtype:$Rn), asm,
- "\t$Rd, $Rn, #0", "", []>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b10000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class SIMDInexactCvtTwoScalar<bits<5> opcode, string asm>
- : I<(outs FPR32:$Rd), (ins FPR64:$Rn), asm, "\t$Rd, $Rn", "",
- [(set (f32 FPR32:$Rd), (int_arm64_sisd_fcvtxn (f64 FPR64:$Rn)))]>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-17} = 0b011111100110000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDCmpTwoScalarD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, 0b11, opc, FPR64, asm>;
-
- def : Pat<(v1i64 (OpNode FPR64:$Rn)),
- (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
-}
-
-multiclass SIMDCmpTwoScalarSD<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i64rz : BaseSIMDCmpTwoScalar<U, {S,1}, opc, FPR64, asm>;
- def v1i32rz : BaseSIMDCmpTwoScalar<U, {S,0}, opc, FPR32, asm>;
-
- def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # v1i64rz) FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
- [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn)))]>;
-
- def : Pat<(i64 (OpNode (i64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # "v1i64") FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,[]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,[]>;
-}
-
-multiclass SIMDTwoScalarCVTSD<bit U, bit S, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v1i64 : BaseSIMDTwoScalar<U, {S,1}, opc, FPR64, FPR64, asm,
- [(set FPR64:$Rd, (OpNode (f64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, {S,0}, opc, FPR32, FPR32, asm,
- [(set FPR32:$Rd, (OpNode (f32 FPR32:$Rn)))]>;
-}
-
-multiclass SIMDTwoScalarBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v1i64 : BaseSIMDTwoScalar<U, 0b11, opc, FPR64, FPR64, asm,
- [(set (i64 FPR64:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR16, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
- }
-
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # v1i64) FPR64:$Rn)>;
-}
-
-multiclass SIMDTwoScalarBHSDTied<bit U, bits<5> opc, string asm,
- Intrinsic OpNode> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v1i64 : BaseSIMDTwoScalarTied<U, 0b11, opc, FPR64, FPR64, asm,
- [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn)))]>;
- def v1i32 : BaseSIMDTwoScalarTied<U, 0b10, opc, FPR32, FPR32, asm,
- [(set (i32 FPR32:$dst), (OpNode (i32 FPR32:$Rd), (i32 FPR32:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalarTied<U, 0b01, opc, FPR16, FPR16, asm, []>;
- def v1i8 : BaseSIMDTwoScalarTied<U, 0b00, opc, FPR8 , FPR8 , asm, []>;
- }
-
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn))),
- (!cast<Instruction>(NAME # v1i64) FPR64:$Rd, FPR64:$Rn)>;
-}
-
-
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDTwoScalarMixedBHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v1i32 : BaseSIMDTwoScalar<U, 0b10, opc, FPR32, FPR64, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn)))]>;
- def v1i16 : BaseSIMDTwoScalar<U, 0b01, opc, FPR16, FPR32, asm, []>;
- def v1i8 : BaseSIMDTwoScalar<U, 0b00, opc, FPR8 , FPR16, asm, []>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar pairwise instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDPairwiseScalar<bit U, bits<2> size, bits<5> opcode,
- RegisterOperand regtype, RegisterOperand vectype,
- string asm, string kind>
- : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
- "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", []>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-24} = 0b11110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b11000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDPairwiseScalarD<bit U, bits<5> opc, string asm> {
- def v2i64p : BaseSIMDPairwiseScalar<U, 0b11, opc, FPR64Op, V128,
- asm, ".2d">;
-}
-
-multiclass SIMDPairwiseScalarSD<bit U, bit S, bits<5> opc, string asm> {
- def v2i32p : BaseSIMDPairwiseScalar<U, {S,0}, opc, FPR32Op, V64,
- asm, ".2s">;
- def v2i64p : BaseSIMDPairwiseScalar<U, {S,1}, opc, FPR64Op, V128,
- asm, ".2d">;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD across lanes instructions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDAcrossLanes<bit Q, bit U, bits<2> size, bits<5> opcode,
- RegisterClass regtype, RegisterOperand vectype,
- string asm, string kind, list<dag> pattern>
- : I<(outs regtype:$Rd), (ins vectype:$Rn), asm,
- "{\t$Rd, $Rn" # kind # "|" # kind # "\t$Rd, $Rn}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-24} = 0b01110;
- let Inst{23-22} = size;
- let Inst{21-17} = 0b11000;
- let Inst{16-12} = opcode;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDAcrossLanesBHS<bit U, bits<5> opcode,
- string asm> {
- def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR8, V64,
- asm, ".8b", []>;
- def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR8, V128,
- asm, ".16b", []>;
- def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR16, V64,
- asm, ".4h", []>;
- def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR16, V128,
- asm, ".8h", []>;
- def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR32, V128,
- asm, ".4s", []>;
-}
-
-multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
- def v8i8v : BaseSIMDAcrossLanes<0, U, 0b00, opcode, FPR16, V64,
- asm, ".8b", []>;
- def v16i8v : BaseSIMDAcrossLanes<1, U, 0b00, opcode, FPR16, V128,
- asm, ".16b", []>;
- def v4i16v : BaseSIMDAcrossLanes<0, U, 0b01, opcode, FPR32, V64,
- asm, ".4h", []>;
- def v8i16v : BaseSIMDAcrossLanes<1, U, 0b01, opcode, FPR32, V128,
- asm, ".8h", []>;
- def v4i32v : BaseSIMDAcrossLanes<1, U, 0b10, opcode, FPR64, V128,
- asm, ".4s", []>;
-}
-
-multiclass SIMDAcrossLanesS<bits<5> opcode, bit sz1, string asm,
- Intrinsic intOp> {
- def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128,
- asm, ".4s",
- [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD INS/DUP instructions
-//----------------------------------------------------------------------------
-
-// FIXME: There has got to be a better way to factor these. ugh.
-
-class BaseSIMDInsDup<bit Q, bit op, dag outs, dag ins, string asm,
- string operands, string constraints, list<dag> pattern>
- : I<outs, ins, asm, operands, constraints, pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = op;
- let Inst{28-21} = 0b01110000;
- let Inst{15} = 0;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class SIMDDupFromMain<bit Q, bits<5> imm5, string size, ValueType vectype,
- RegisterOperand vecreg, RegisterClass regtype>
- : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins regtype:$Rn), "dup",
- "{\t$Rd" # size # ", $Rn" #
- "|" # size # "\t$Rd, $Rn}", "",
- [(set (vectype vecreg:$Rd), (ARM64dup regtype:$Rn))]> {
- let Inst{20-16} = imm5;
- let Inst{14-11} = 0b0001;
-}
-
-class SIMDDupFromElement<bit Q, string dstkind, string srckind,
- ValueType vectype, ValueType insreg,
- RegisterOperand vecreg, Operand idxtype,
- ValueType elttype, SDNode OpNode>
- : BaseSIMDInsDup<Q, 0, (outs vecreg:$Rd), (ins V128:$Rn, idxtype:$idx), "dup",
- "{\t$Rd" # dstkind # ", $Rn" # srckind # "$idx" #
- "|" # dstkind # "\t$Rd, $Rn$idx}", "",
- [(set (vectype vecreg:$Rd),
- (OpNode (insreg V128:$Rn), idxtype:$idx))]> {
- let Inst{14-11} = 0b0000;
-}
-
-class SIMDDup64FromElement
- : SIMDDupFromElement<1, ".2d", ".d", v2i64, v2i64, V128,
- VectorIndexD, i64, ARM64duplane64> {
- bits<1> idx;
- let Inst{20} = idx;
- let Inst{19-16} = 0b1000;
-}
-
-class SIMDDup32FromElement<bit Q, string size, ValueType vectype,
- RegisterOperand vecreg>
- : SIMDDupFromElement<Q, size, ".s", vectype, v4i32, vecreg,
- VectorIndexS, i64, ARM64duplane32> {
- bits<2> idx;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
-}
-
-class SIMDDup16FromElement<bit Q, string size, ValueType vectype,
- RegisterOperand vecreg>
- : SIMDDupFromElement<Q, size, ".h", vectype, v8i16, vecreg,
- VectorIndexH, i64, ARM64duplane16> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
-}
-
-class SIMDDup8FromElement<bit Q, string size, ValueType vectype,
- RegisterOperand vecreg>
- : SIMDDupFromElement<Q, size, ".b", vectype, v16i8, vecreg,
- VectorIndexB, i64, ARM64duplane8> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
-}
-
-class BaseSIMDMov<bit Q, string size, bits<4> imm4, RegisterClass regtype,
- Operand idxtype, string asm, list<dag> pattern>
- : BaseSIMDInsDup<Q, 0, (outs regtype:$Rd), (ins V128:$Rn, idxtype:$idx), asm,
- "{\t$Rd, $Rn" # size # "$idx" #
- "|" # size # "\t$Rd, $Rn$idx}", "", pattern> {
- let Inst{14-11} = imm4;
-}
-
-class SIMDSMov<bit Q, string size, RegisterClass regtype,
- Operand idxtype>
- : BaseSIMDMov<Q, size, 0b0101, regtype, idxtype, "smov", []>;
-class SIMDUMov<bit Q, string size, ValueType vectype, RegisterClass regtype,
- Operand idxtype>
- : BaseSIMDMov<Q, size, 0b0111, regtype, idxtype, "umov",
- [(set regtype:$Rd, (vector_extract (vectype V128:$Rn), idxtype:$idx))]>;
-
-class SIMDMovAlias<string asm, string size, Instruction inst,
- RegisterClass regtype, Operand idxtype>
- : InstAlias<asm#"{\t$dst, $src"#size#"$idx" #
- "|" # size # "\t$dst, $src$idx}",
- (inst regtype:$dst, V128:$src, idxtype:$idx)>;
-
-multiclass SMov {
- def vi8to32 : SIMDSMov<0, ".b", GPR32, VectorIndexB> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- }
- def vi8to64 : SIMDSMov<1, ".b", GPR64, VectorIndexB> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- }
- def vi16to32 : SIMDSMov<0, ".h", GPR32, VectorIndexH> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- }
- def vi16to64 : SIMDSMov<1, ".h", GPR64, VectorIndexH> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- }
- def vi32to64 : SIMDSMov<1, ".s", GPR64, VectorIndexS> {
- bits<2> idx;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
- }
-}
-
-multiclass UMov {
- def vi8 : SIMDUMov<0, ".b", v16i8, GPR32, VectorIndexB> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- }
- def vi16 : SIMDUMov<0, ".h", v8i16, GPR32, VectorIndexH> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- }
- def vi32 : SIMDUMov<0, ".s", v4i32, GPR32, VectorIndexS> {
- bits<2> idx;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
- }
- def vi64 : SIMDUMov<1, ".d", v2i64, GPR64, VectorIndexD> {
- bits<1> idx;
- let Inst{20} = idx;
- let Inst{19-16} = 0b1000;
- }
- def : SIMDMovAlias<"mov", ".s",
- !cast<Instruction>(NAME#"vi32"),
- GPR32, VectorIndexS>;
- def : SIMDMovAlias<"mov", ".d",
- !cast<Instruction>(NAME#"vi64"),
- GPR64, VectorIndexD>;
-}
-
-class SIMDInsFromMain<string size, ValueType vectype,
- RegisterClass regtype, Operand idxtype>
- : BaseSIMDInsDup<1, 0, (outs V128:$dst),
- (ins V128:$Rd, idxtype:$idx, regtype:$Rn), "ins",
- "{\t$Rd" # size # "$idx, $Rn" #
- "|" # size # "\t$Rd$idx, $Rn}",
- "$Rd = $dst",
- [(set V128:$dst,
- (vector_insert (vectype V128:$Rd), regtype:$Rn, idxtype:$idx))]> {
- let Inst{14-11} = 0b0011;
-}
-
-class SIMDInsFromElement<string size, ValueType vectype,
- ValueType elttype, Operand idxtype>
- : BaseSIMDInsDup<1, 1, (outs V128:$dst),
- (ins V128:$Rd, idxtype:$idx, V128:$Rn, idxtype:$idx2), "ins",
- "{\t$Rd" # size # "$idx, $Rn" # size # "$idx2" #
- "|" # size # "\t$Rd$idx, $Rn$idx2}",
- "$Rd = $dst",
- [(set V128:$dst,
- (vector_insert
- (vectype V128:$Rd),
- (elttype (vector_extract (vectype V128:$Rn), idxtype:$idx2)),
- idxtype:$idx))]>;
-
-class SIMDInsMainMovAlias<string size, Instruction inst,
- RegisterClass regtype, Operand idxtype>
- : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" #
- "|" # size #"\t$dst$idx, $src}",
- (inst V128:$dst, idxtype:$idx, regtype:$src)>;
-class SIMDInsElementMovAlias<string size, Instruction inst,
- Operand idxtype>
- : InstAlias<"mov" # "{\t$dst" # size # "$idx, $src" # size # "$idx2" #
- # "|" # size #" $dst$idx, $src$idx2}",
- (inst V128:$dst, idxtype:$idx, V128:$src, idxtype:$idx2)>;
-
-
-multiclass SIMDIns {
- def vi8gpr : SIMDInsFromMain<".b", v16i8, GPR32, VectorIndexB> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- }
- def vi16gpr : SIMDInsFromMain<".h", v8i16, GPR32, VectorIndexH> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- }
- def vi32gpr : SIMDInsFromMain<".s", v4i32, GPR32, VectorIndexS> {
- bits<2> idx;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
- }
- def vi64gpr : SIMDInsFromMain<".d", v2i64, GPR64, VectorIndexD> {
- bits<1> idx;
- let Inst{20} = idx;
- let Inst{19-16} = 0b1000;
- }
-
- def vi8lane : SIMDInsFromElement<".b", v16i8, i32, VectorIndexB> {
- bits<4> idx;
- bits<4> idx2;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- let Inst{14-11} = idx2;
- }
- def vi16lane : SIMDInsFromElement<".h", v8i16, i32, VectorIndexH> {
- bits<3> idx;
- bits<3> idx2;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- let Inst{14-12} = idx2;
- let Inst{11} = 0;
- }
- def vi32lane : SIMDInsFromElement<".s", v4i32, i32, VectorIndexS> {
- bits<2> idx;
- bits<2> idx2;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
- let Inst{14-13} = idx2;
- let Inst{12-11} = 0;
- }
- def vi64lane : SIMDInsFromElement<".d", v2i64, i64, VectorIndexD> {
- bits<1> idx;
- bits<1> idx2;
- let Inst{20} = idx;
- let Inst{19-16} = 0b1000;
- let Inst{14} = idx2;
- let Inst{13-11} = 0;
- }
-
- // For all forms of the INS instruction, the "mov" mnemonic is the
- // preferred alias. Why they didn't just call the instruction "mov" in
- // the first place is a very good question indeed...
- def : SIMDInsMainMovAlias<".b", !cast<Instruction>(NAME#"vi8gpr"),
- GPR32, VectorIndexB>;
- def : SIMDInsMainMovAlias<".h", !cast<Instruction>(NAME#"vi16gpr"),
- GPR32, VectorIndexH>;
- def : SIMDInsMainMovAlias<".s", !cast<Instruction>(NAME#"vi32gpr"),
- GPR32, VectorIndexS>;
- def : SIMDInsMainMovAlias<".d", !cast<Instruction>(NAME#"vi64gpr"),
- GPR64, VectorIndexD>;
-
- def : SIMDInsElementMovAlias<".b", !cast<Instruction>(NAME#"vi8lane"),
- VectorIndexB>;
- def : SIMDInsElementMovAlias<".h", !cast<Instruction>(NAME#"vi16lane"),
- VectorIndexH>;
- def : SIMDInsElementMovAlias<".s", !cast<Instruction>(NAME#"vi32lane"),
- VectorIndexS>;
- def : SIMDInsElementMovAlias<".d", !cast<Instruction>(NAME#"vi64lane"),
- VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD TBL/TBX
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDTableLookup<bit Q, bits<2> len, bit op, RegisterOperand vectype,
- RegisterOperand listtype, string asm, string kind>
- : I<(outs vectype:$Vd), (ins listtype:$Vn, vectype:$Vm), asm,
- "\t$Vd" # kind # ", $Vn, $Vm" # kind, "", []>,
- Sched<[WriteV]> {
- bits<5> Vd;
- bits<5> Vn;
- bits<5> Vm;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29-21} = 0b001110000;
- let Inst{20-16} = Vm;
- let Inst{15} = 0;
- let Inst{14-13} = len;
- let Inst{12} = op;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Vn;
- let Inst{4-0} = Vd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDTableLookupTied<bit Q, bits<2> len, bit op, RegisterOperand vectype,
- RegisterOperand listtype, string asm, string kind>
- : I<(outs vectype:$dst), (ins vectype:$Vd, listtype:$Vn, vectype:$Vm), asm,
- "\t$Vd" # kind # ", $Vn, $Vm" # kind, "$Vd = $dst", []>,
- Sched<[WriteV]> {
- bits<5> Vd;
- bits<5> Vn;
- bits<5> Vm;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29-21} = 0b001110000;
- let Inst{20-16} = Vm;
- let Inst{15} = 0;
- let Inst{14-13} = len;
- let Inst{12} = op;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Vn;
- let Inst{4-0} = Vd;
-}
-
-class SIMDTableLookupAlias<string asm, Instruction inst,
- RegisterOperand vectype, RegisterOperand listtype>
- : InstAlias<!strconcat(asm, "\t$dst, $lst, $index"),
- (inst vectype:$dst, listtype:$lst, vectype:$index), 0>;
-
-multiclass SIMDTableLookup<bit op, string asm> {
- def v8i8One : BaseSIMDTableLookup<0, 0b00, op, V64, VecListOne16b,
- asm, ".8b">;
- def v8i8Two : BaseSIMDTableLookup<0, 0b01, op, V64, VecListTwo16b,
- asm, ".8b">;
- def v8i8Three : BaseSIMDTableLookup<0, 0b10, op, V64, VecListThree16b,
- asm, ".8b">;
- def v8i8Four : BaseSIMDTableLookup<0, 0b11, op, V64, VecListFour16b,
- asm, ".8b">;
- def v16i8One : BaseSIMDTableLookup<1, 0b00, op, V128, VecListOne16b,
- asm, ".16b">;
- def v16i8Two : BaseSIMDTableLookup<1, 0b01, op, V128, VecListTwo16b,
- asm, ".16b">;
- def v16i8Three: BaseSIMDTableLookup<1, 0b10, op, V128, VecListThree16b,
- asm, ".16b">;
- def v16i8Four : BaseSIMDTableLookup<1, 0b11, op, V128, VecListFour16b,
- asm, ".16b">;
-
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8One"),
- V64, VecListOne128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Two"),
- V64, VecListTwo128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Three"),
- V64, VecListThree128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Four"),
- V64, VecListFour128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8One"),
- V128, VecListOne128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Two"),
- V128, VecListTwo128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Three"),
- V128, VecListThree128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Four"),
- V128, VecListFour128>;
-}
-
-multiclass SIMDTableLookupTied<bit op, string asm> {
- def v8i8One : BaseSIMDTableLookupTied<0, 0b00, op, V64, VecListOne16b,
- asm, ".8b">;
- def v8i8Two : BaseSIMDTableLookupTied<0, 0b01, op, V64, VecListTwo16b,
- asm, ".8b">;
- def v8i8Three : BaseSIMDTableLookupTied<0, 0b10, op, V64, VecListThree16b,
- asm, ".8b">;
- def v8i8Four : BaseSIMDTableLookupTied<0, 0b11, op, V64, VecListFour16b,
- asm, ".8b">;
- def v16i8One : BaseSIMDTableLookupTied<1, 0b00, op, V128, VecListOne16b,
- asm, ".16b">;
- def v16i8Two : BaseSIMDTableLookupTied<1, 0b01, op, V128, VecListTwo16b,
- asm, ".16b">;
- def v16i8Three: BaseSIMDTableLookupTied<1, 0b10, op, V128, VecListThree16b,
- asm, ".16b">;
- def v16i8Four : BaseSIMDTableLookupTied<1, 0b11, op, V128, VecListFour16b,
- asm, ".16b">;
-
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8One"),
- V64, VecListOne128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Two"),
- V64, VecListTwo128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Three"),
- V64, VecListThree128>;
- def : SIMDTableLookupAlias<asm # ".8b",
- !cast<Instruction>(NAME#"v8i8Four"),
- V64, VecListFour128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8One"),
- V128, VecListOne128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Two"),
- V128, VecListTwo128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Three"),
- V128, VecListThree128>;
- def : SIMDTableLookupAlias<asm # ".16b",
- !cast<Instruction>(NAME#"v16i8Four"),
- V128, VecListFour128>;
-}
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY
-//----------------------------------------------------------------------------
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDScalarCPY<RegisterClass regtype, RegisterOperand vectype,
- string kind, Operand idxtype>
- : I<(outs regtype:$dst), (ins vectype:$src, idxtype:$idx), "mov",
- "{\t$dst, $src" # kind # "$idx" #
- "|\t$dst, $src$idx}", "", []>,
- Sched<[WriteV]> {
- bits<5> dst;
- bits<5> src;
- let Inst{31-21} = 0b01011110000;
- let Inst{15-10} = 0b000001;
- let Inst{9-5} = src;
- let Inst{4-0} = dst;
-}
-
-class SIMDScalarCPYAlias<string asm, string size, Instruction inst,
- RegisterClass regtype, RegisterOperand vectype, Operand idxtype>
- : InstAlias<asm # "{\t$dst, $src" # size # "$index" #
- # "|\t$dst, $src$index}",
- (inst regtype:$dst, vectype:$src, idxtype:$index)>;
-
-
-multiclass SIMDScalarCPY<string asm> {
- def i8 : BaseSIMDScalarCPY<FPR8, V128, ".b", VectorIndexB> {
- bits<4> idx;
- let Inst{20-17} = idx;
- let Inst{16} = 1;
- }
- def i16 : BaseSIMDScalarCPY<FPR16, V128, ".h", VectorIndexH> {
- bits<3> idx;
- let Inst{20-18} = idx;
- let Inst{17-16} = 0b10;
- }
- def i32 : BaseSIMDScalarCPY<FPR32, V128, ".s", VectorIndexS> {
- bits<2> idx;
- let Inst{20-19} = idx;
- let Inst{18-16} = 0b100;
- }
- def i64 : BaseSIMDScalarCPY<FPR64, V128, ".d", VectorIndexD> {
- bits<1> idx;
- let Inst{20} = idx;
- let Inst{19-16} = 0b1000;
- }
-
- // 'DUP' mnemonic aliases.
- def : SIMDScalarCPYAlias<"dup", ".b",
- !cast<Instruction>(NAME#"i8"),
- FPR8, V128, VectorIndexB>;
- def : SIMDScalarCPYAlias<"dup", ".h",
- !cast<Instruction>(NAME#"i16"),
- FPR16, V128, VectorIndexH>;
- def : SIMDScalarCPYAlias<"dup", ".s",
- !cast<Instruction>(NAME#"i32"),
- FPR32, V128, VectorIndexS>;
- def : SIMDScalarCPYAlias<"dup", ".d",
- !cast<Instruction>(NAME#"i64"),
- FPR64, V128, VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD modified immediate instructions
-//----------------------------------------------------------------------------
-
-class BaseSIMDModifiedImm<bit Q, bit op, dag oops, dag iops,
- string asm, string op_string,
- string cstr, list<dag> pattern>
- : I<oops, iops, asm, op_string, cstr, pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<8> imm8;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = op;
- let Inst{28-19} = 0b0111100000;
- let Inst{18-16} = imm8{7-5};
- let Inst{11-10} = 0b01;
- let Inst{9-5} = imm8{4-0};
- let Inst{4-0} = Rd;
-}
-
-class BaseSIMDModifiedImmVector<bit Q, bit op, RegisterOperand vectype,
- Operand immtype, dag opt_shift_iop,
- string opt_shift, string asm, string kind,
- list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$Rd),
- !con((ins immtype:$imm8), opt_shift_iop), asm,
- "{\t$Rd" # kind # ", $imm8" # opt_shift #
- "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
- "", pattern> {
- let DecoderMethod = "DecodeModImmInstruction";
-}
-
-class BaseSIMDModifiedImmVectorTied<bit Q, bit op, RegisterOperand vectype,
- Operand immtype, dag opt_shift_iop,
- string opt_shift, string asm, string kind,
- list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs vectype:$dst),
- !con((ins vectype:$Rd, immtype:$imm8), opt_shift_iop),
- asm, "{\t$Rd" # kind # ", $imm8" # opt_shift #
- "|" # kind # "\t$Rd, $imm8" # opt_shift # "}",
- "$Rd = $dst", pattern> {
- let DecoderMethod = "DecodeModImmTiedInstruction";
-}
-
-class BaseSIMDModifiedImmVectorShift<bit Q, bit op, bits<2> b15_b12,
- RegisterOperand vectype, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
- (ins logical_vec_shift:$shift),
- "$shift", asm, kind, pattern> {
- bits<2> shift;
- let Inst{15} = b15_b12{1};
- let Inst{14-13} = shift;
- let Inst{12} = b15_b12{0};
-}
-
-class BaseSIMDModifiedImmVectorShiftTied<bit Q, bit op, bits<2> b15_b12,
- RegisterOperand vectype, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
- (ins logical_vec_shift:$shift),
- "$shift", asm, kind, pattern> {
- bits<2> shift;
- let Inst{15} = b15_b12{1};
- let Inst{14-13} = shift;
- let Inst{12} = b15_b12{0};
-}
-
-
-class BaseSIMDModifiedImmVectorShiftHalf<bit Q, bit op, bits<2> b15_b12,
- RegisterOperand vectype, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
- (ins logical_vec_hw_shift:$shift),
- "$shift", asm, kind, pattern> {
- bits<2> shift;
- let Inst{15} = b15_b12{1};
- let Inst{14} = 0;
- let Inst{13} = shift{0};
- let Inst{12} = b15_b12{0};
-}
-
-class BaseSIMDModifiedImmVectorShiftHalfTied<bit Q, bit op, bits<2> b15_b12,
- RegisterOperand vectype, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVectorTied<Q, op, vectype, imm0_255,
- (ins logical_vec_hw_shift:$shift),
- "$shift", asm, kind, pattern> {
- bits<2> shift;
- let Inst{15} = b15_b12{1};
- let Inst{14} = 0;
- let Inst{13} = shift{0};
- let Inst{12} = b15_b12{0};
-}
-
-multiclass SIMDModifiedImmVectorShift<bit op, bits<2> hw_cmode, bits<2> w_cmode,
- string asm> {
- def v4i16 : BaseSIMDModifiedImmVectorShiftHalf<0, op, hw_cmode, V64,
- asm, ".4h", []>;
- def v8i16 : BaseSIMDModifiedImmVectorShiftHalf<1, op, hw_cmode, V128,
- asm, ".8h", []>;
-
- def v2i32 : BaseSIMDModifiedImmVectorShift<0, op, w_cmode, V64,
- asm, ".2s", []>;
- def v4i32 : BaseSIMDModifiedImmVectorShift<1, op, w_cmode, V128,
- asm, ".4s", []>;
-}
-
-multiclass SIMDModifiedImmVectorShiftTied<bit op, bits<2> hw_cmode,
- bits<2> w_cmode, string asm,
- SDNode OpNode> {
- def v4i16 : BaseSIMDModifiedImmVectorShiftHalfTied<0, op, hw_cmode, V64,
- asm, ".4h",
- [(set (v4i16 V64:$dst), (OpNode V64:$Rd,
- imm0_255:$imm8,
- (i32 imm:$shift)))]>;
- def v8i16 : BaseSIMDModifiedImmVectorShiftHalfTied<1, op, hw_cmode, V128,
- asm, ".8h",
- [(set (v8i16 V128:$dst), (OpNode V128:$Rd,
- imm0_255:$imm8,
- (i32 imm:$shift)))]>;
-
- def v2i32 : BaseSIMDModifiedImmVectorShiftTied<0, op, w_cmode, V64,
- asm, ".2s",
- [(set (v2i32 V64:$dst), (OpNode V64:$Rd,
- imm0_255:$imm8,
- (i32 imm:$shift)))]>;
- def v4i32 : BaseSIMDModifiedImmVectorShiftTied<1, op, w_cmode, V128,
- asm, ".4s",
- [(set (v4i32 V128:$dst), (OpNode V128:$Rd,
- imm0_255:$imm8,
- (i32 imm:$shift)))]>;
-}
-
-class SIMDModifiedImmMoveMSL<bit Q, bit op, bits<4> cmode,
- RegisterOperand vectype, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm0_255,
- (ins move_vec_shift:$shift),
- "$shift", asm, kind, pattern> {
- bits<1> shift;
- let Inst{15-13} = cmode{3-1};
- let Inst{12} = shift;
-}
-
-class SIMDModifiedImmVectorNoShift<bit Q, bit op, bits<4> cmode,
- RegisterOperand vectype,
- Operand imm_type, string asm,
- string kind, list<dag> pattern>
- : BaseSIMDModifiedImmVector<Q, op, vectype, imm_type, (ins), "",
- asm, kind, pattern> {
- let Inst{15-12} = cmode;
-}
-
-class SIMDModifiedImmScalarNoShift<bit Q, bit op, bits<4> cmode, string asm,
- list<dag> pattern>
- : BaseSIMDModifiedImm<Q, op, (outs FPR64:$Rd), (ins simdimmtype10:$imm8), asm,
- "\t$Rd, $imm8", "", pattern> {
- let Inst{15-12} = cmode;
- let DecoderMethod = "DecodeModImmInstruction";
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD indexed element
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDIndexed<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
- RegisterOperand dst_reg, RegisterOperand lhs_reg,
- RegisterOperand rhs_reg, Operand vec_idx, string asm,
- string apple_kind, string dst_kind, string lhs_kind,
- string rhs_kind, list<dag> pattern>
- : I<(outs dst_reg:$Rd), (ins lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx),
- asm,
- "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
- "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
-
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28} = Scalar;
- let Inst{27-24} = 0b1111;
- let Inst{23-22} = size;
- // Bit 21 must be set by the derived class.
- let Inst{20-16} = Rm;
- let Inst{15-12} = opc;
- // Bit 11 must be set by the derived class.
- let Inst{10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDIndexedTied<bit Q, bit U, bit Scalar, bits<2> size, bits<4> opc,
- RegisterOperand dst_reg, RegisterOperand lhs_reg,
- RegisterOperand rhs_reg, Operand vec_idx, string asm,
- string apple_kind, string dst_kind, string lhs_kind,
- string rhs_kind, list<dag> pattern>
- : I<(outs dst_reg:$dst),
- (ins dst_reg:$Rd, lhs_reg:$Rn, rhs_reg:$Rm, vec_idx:$idx), asm,
- "{\t$Rd" # dst_kind # ", $Rn" # lhs_kind # ", $Rm" # rhs_kind # "$idx" #
- "|" # apple_kind # "\t$Rd, $Rn, $Rm$idx}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
-
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28} = Scalar;
- let Inst{27-24} = 0b1111;
- let Inst{23-22} = size;
- // Bit 21 must be set by the derived class.
- let Inst{20-16} = Rm;
- let Inst{15-12} = opc;
- // Bit 11 must be set by the derived class.
- let Inst{10} = 0;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDFPIndexedSD<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
- V64, V64,
- V128, VectorIndexS,
- asm, ".2s", ".2s", ".2s", ".s",
- [(set (v2f32 V64:$Rd),
- (OpNode (v2f32 V64:$Rn),
- (v2f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm, ".4s", ".4s", ".4s", ".s",
- [(set (v4f32 V128:$Rd),
- (OpNode (v4f32 V128:$Rn),
- (v4f32 (ARM64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v2i64_indexed : BaseSIMDIndexed<1, U, 0, 0b11, opc,
- V128, V128,
- V128, VectorIndexD,
- asm, ".2d", ".2d", ".2d", ".d",
- [(set (v2f64 V128:$Rd),
- (OpNode (v2f64 V128:$Rn),
- (v2f64 (ARM64duplane64 (v2f64 V128:$Rm), VectorIndexD:$idx))))]> {
- bits<1> idx;
- let Inst{11} = idx{0};
- let Inst{21} = 0;
- }
-
- def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
- FPR32Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s",
- [(set (f32 FPR32Op:$Rd),
- (OpNode (f32 FPR32Op:$Rn),
- (f32 (vector_extract (v4f32 V128:$Rm),
- VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b11, opc,
- FPR64Op, FPR64Op, V128, VectorIndexD,
- asm, ".d", "", "", ".d",
- [(set (f64 FPR64Op:$Rd),
- (OpNode (f64 FPR64Op:$Rn),
- (f64 (vector_extract (v2f64 V128:$Rm),
- VectorIndexD:$idx))))]> {
- bits<1> idx;
- let Inst{11} = idx{0};
- let Inst{21} = 0;
- }
-}
-
-multiclass SIMDFPIndexedSDTiedPatterns<string INST, SDPatternOperator OpNode> {
- // 2 variants for the .2s version: DUPLANE from 128-bit and DUP scalar.
- def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
- (ARM64duplane32 (v4f32 V128:$Rm),
- VectorIndexS:$idx))),
- (!cast<Instruction>(INST # v2i32_indexed)
- V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
- (ARM64dup (f32 FPR32Op:$Rm)))),
- (!cast<Instruction>(INST # "v2i32_indexed") V64:$Rd, V64:$Rn,
- (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
-
- // 2 variants for the .4s version: DUPLANE from 128-bit and DUP scalar.
- def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
- (ARM64duplane32 (v4f32 V128:$Rm),
- VectorIndexS:$idx))),
- (!cast<Instruction>(INST # "v4i32_indexed")
- V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
- (ARM64dup (f32 FPR32Op:$Rm)))),
- (!cast<Instruction>(INST # "v4i32_indexed") V128:$Rd, V128:$Rn,
- (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
- // 2 variants for the .2d version: DUPLANE from 128-bit and DUP scalar.
- def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
- (ARM64duplane64 (v2f64 V128:$Rm),
- VectorIndexD:$idx))),
- (!cast<Instruction>(INST # "v2i64_indexed")
- V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
- (ARM64dup (f64 FPR64Op:$Rm)))),
- (!cast<Instruction>(INST # "v2i64_indexed") V128:$Rd, V128:$Rn,
- (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
-
- // 2 variants for 32-bit scalar version: extract from .2s or from .4s
- def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx))),
- (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
- V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v2f32 V64:$Rm), VectorIndexS:$idx))),
- (!cast<Instruction>(INST # "v1i32_indexed") FPR32:$Rd, FPR32:$Rn,
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
-
- // 1 variant for 64-bit scalar version: extract from .1d or from .2d
- def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
- (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx))),
- (!cast<Instruction>(INST # "v1i64_indexed") FPR64:$Rd, FPR64:$Rn,
- V128:$Rm, VectorIndexD:$idx)>;
-}
-
-multiclass SIMDFPIndexedSDTied<bit U, bits<4> opc, string asm> {
- def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64,
- V128, VectorIndexS,
- asm, ".2s", ".2s", ".2s", ".s", []> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm, ".4s", ".4s", ".4s", ".s", []> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v2i64_indexed : BaseSIMDIndexedTied<1, U, 0, 0b11, opc,
- V128, V128,
- V128, VectorIndexD,
- asm, ".2d", ".2d", ".2d", ".d", []> {
- bits<1> idx;
- let Inst{11} = idx{0};
- let Inst{21} = 0;
- }
-
-
- def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
- FPR32Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s", []> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b11, opc,
- FPR64Op, FPR64Op, V128, VectorIndexD,
- asm, ".d", "", "", ".d", []> {
- bits<1> idx;
- let Inst{11} = idx{0};
- let Inst{21} = 0;
- }
-}
-
-multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc, V64, V64,
- V128_lo, VectorIndexH,
- asm, ".4h", ".4h", ".4h", ".h",
- [(set (v4i16 V64:$Rd),
- (OpNode (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm, ".8h", ".8h", ".8h", ".h",
- [(set (v8i16 V128:$Rd),
- (OpNode (v8i16 V128:$Rn),
- (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
- V64, V64,
- V128, VectorIndexS,
- asm, ".2s", ".2s", ".2s", ".s",
- [(set (v2i32 V64:$Rd),
- (OpNode (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm, ".4s", ".4s", ".4s", ".s",
- [(set (v4i32 V128:$Rd),
- (OpNode (v4i32 V128:$Rn),
- (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
- FPR16Op, FPR16Op, V128_lo, VectorIndexH,
- asm, ".h", "", "", ".h", []> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
- FPR32Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s",
- [(set (i32 FPR32Op:$Rd),
- (OpNode FPR32Op:$Rn,
- (i32 (vector_extract (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-}
-
-multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
- V64, V64,
- V128_lo, VectorIndexH,
- asm, ".4h", ".4h", ".4h", ".h",
- [(set (v4i16 V64:$Rd),
- (OpNode (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm, ".8h", ".8h", ".8h", ".h",
- [(set (v8i16 V128:$Rd),
- (OpNode (v8i16 V128:$Rn),
- (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
- V64, V64,
- V128, VectorIndexS,
- asm, ".2s", ".2s", ".2s", ".s",
- [(set (v2i32 V64:$Rd),
- (OpNode (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm, ".4s", ".4s", ".4s", ".s",
- [(set (v4i32 V128:$Rd),
- (OpNode (v4i32 V128:$Rn),
- (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-}
-
-multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc, V64, V64,
- V128_lo, VectorIndexH,
- asm, ".4h", ".4h", ".4h", ".h",
- [(set (v4i16 V64:$dst),
- (OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm, ".8h", ".8h", ".8h", ".h",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
- (v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
- V64, V64,
- V128, VectorIndexS,
- asm, ".2s", ".2s", ".2s", ".s",
- [(set (v2i32 V64:$dst),
- (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm, ".4s", ".4s", ".4s", ".s",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
- (v4i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-}
-
-multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
- V128, V64,
- V128_lo, VectorIndexH,
- asm, ".4s", ".4s", ".4h", ".h",
- [(set (v4i32 V128:$Rd),
- (OpNode (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm#"2", ".4s", ".4s", ".8h", ".h",
- [(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
-
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
- V128, V64,
- V128, VectorIndexS,
- asm, ".2d", ".2d", ".2s", ".s",
- [(set (v2i64 V128:$Rd),
- (OpNode (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm#"2", ".2d", ".2d", ".4s", ".s",
- [(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b01, opc,
- FPR32Op, FPR16Op, V128_lo, VectorIndexH,
- asm, ".h", "", "", ".h", []> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v1i64_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc,
- FPR64Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s", []> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-}
-
-multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
- SDPatternOperator Accum> {
- def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
- V128, V64,
- V128_lo, VectorIndexH,
- asm, ".4s", ".4s", ".4h", ".h",
- [(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_arm64_neon_sqdmull
- (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- // FIXME: it would be nice to use the scalar (v1i32) instruction here, but an
- // intermediate EXTRACT_SUBREG would be untyped.
- def : Pat<(i32 (Accum (i32 FPR32Op:$Rd),
- (i32 (vector_extract (v4i32
- (int_arm64_neon_sqdmull (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx)))),
- (i64 0))))),
- (EXTRACT_SUBREG
- (!cast<Instruction>(NAME # v4i16_indexed)
- (SUBREG_TO_REG (i32 0), FPR32Op:$Rd, ssub), V64:$Rn,
- V128_lo:$Rm, VectorIndexH:$idx),
- ssub)>;
-
- def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm#"2", ".4s", ".4s", ".8h", ".h",
- [(set (v4i32 V128:$dst),
- (Accum (v4i32 V128:$Rd),
- (v4i32 (int_arm64_neon_sqdmull
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16
- (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
- V128, V64,
- V128, VectorIndexS,
- asm, ".2d", ".2d", ".2s", ".s",
- [(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_arm64_neon_sqdmull
- (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm#"2", ".2d", ".2d", ".4s", ".s",
- [(set (v2i64 V128:$dst),
- (Accum (v2i64 V128:$Rd),
- (v2i64 (int_arm64_neon_sqdmull
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32
- (ARM64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
- FPR32Op, FPR16Op, V128_lo, VectorIndexH,
- asm, ".h", "", "", ".h", []> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
-
- def v1i64_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
- FPR64Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s",
- [(set (i64 FPR64Op:$dst),
- (Accum (i64 FPR64Op:$Rd),
- (i64 (int_arm64_neon_sqdmulls_scalar
- (i32 FPR32Op:$Rn),
- (i32 (vector_extract (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
-
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-}
-
-multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b01, opc,
- V128, V64,
- V128_lo, VectorIndexH,
- asm, ".4s", ".4s", ".4h", ".h",
- [(set (v4i32 V128:$Rd),
- (OpNode (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm#"2", ".4s", ".4s", ".8h", ".h",
- [(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
-
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc,
- V128, V64,
- V128, VectorIndexS,
- asm, ".2d", ".2d", ".2s", ".s",
- [(set (v2i64 V128:$Rd),
- (OpNode (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexed<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm#"2", ".2d", ".2d", ".4s", ".s",
- [(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
- }
-}
-
-multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
- SDPatternOperator OpNode> {
- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
- def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b01, opc,
- V128, V64,
- V128_lo, VectorIndexH,
- asm, ".4s", ".4s", ".4h", ".h",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
- (v4i16 (ARM64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b01, opc,
- V128, V128,
- V128_lo, VectorIndexH,
- asm#"2", ".4s", ".4s", ".8h", ".h",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd),
- (extract_high_v8i16 V128:$Rn),
- (extract_high_v8i16 (ARM64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))]> {
- bits<3> idx;
- let Inst{11} = idx{2};
- let Inst{21} = idx{1};
- let Inst{20} = idx{0};
- }
-
- def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc,
- V128, V64,
- V128, VectorIndexS,
- asm, ".2d", ".2d", ".2s", ".s",
- [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
- (v2i32 (ARM64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
-
- def v4i32_indexed : BaseSIMDIndexedTied<1, U, 0, 0b10, opc,
- V128, V128,
- V128, VectorIndexS,
- asm#"2", ".2d", ".2d", ".4s", ".s",
- [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd),
- (extract_high_v4i32 V128:$Rn),
- (extract_high_v4i32 (ARM64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))]> {
- bits<2> idx;
- let Inst{11} = idx{1};
- let Inst{21} = idx{0};
- }
- }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar shift by immediate
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDScalarShift<bit U, bits<5> opc, bits<7> fixed_imm,
- RegisterClass regtype1, RegisterClass regtype2,
- Operand immtype, string asm, list<dag> pattern>
- : I<(outs regtype1:$Rd), (ins regtype2:$Rn, immtype:$imm),
- asm, "\t$Rd, $Rn, $imm", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<7> imm;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-23} = 0b111110;
- let Inst{22-16} = fixed_imm;
- let Inst{15-11} = opc;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDScalarShiftTied<bit U, bits<5> opc, bits<7> fixed_imm,
- RegisterClass regtype1, RegisterClass regtype2,
- Operand immtype, string asm, list<dag> pattern>
- : I<(outs regtype1:$dst), (ins regtype1:$Rd, regtype2:$Rn, immtype:$imm),
- asm, "\t$Rd, $Rn, $imm", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- bits<7> imm;
- let Inst{31-30} = 0b01;
- let Inst{29} = U;
- let Inst{28-23} = 0b111110;
- let Inst{22-16} = fixed_imm;
- let Inst{15-11} = opc;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-
-multiclass SIMDScalarRShiftSD<bit U, bits<5> opc, string asm> {
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR32, vecshiftR32, asm, []> {
- let Inst{20-16} = imm{4-0};
- }
-
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm, []> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
-multiclass SIMDScalarRShiftD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm,
- [(set (i64 FPR64:$Rd),
- (OpNode (i64 FPR64:$Rn), (i32 vecshiftR64:$imm)))]> {
- let Inst{21-16} = imm{5-0};
- }
-
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftR64:$imm))),
- (!cast<Instruction>(NAME # "d") FPR64:$Rn, vecshiftR64:$imm)>;
-}
-
-multiclass SIMDScalarRShiftDTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm,
- [(set (i64 FPR64:$dst), (OpNode (i64 FPR64:$Rd), (i64 FPR64:$Rn),
- (i32 vecshiftR64:$imm)))]> {
- let Inst{21-16} = imm{5-0};
- }
-
- def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
- (i32 vecshiftR64:$imm))),
- (!cast<Instruction>(NAME # "d") FPR64:$Rd, FPR64:$Rn,
- vecshiftR64:$imm)>;
-}
-
-multiclass SIMDScalarLShiftD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftL64, asm,
- [(set (v1i64 FPR64:$Rd),
- (OpNode (v1i64 FPR64:$Rn), (i32 vecshiftL64:$imm)))]> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-multiclass SIMDScalarLShiftDTied<bit U, bits<5> opc, string asm> {
- def d : BaseSIMDScalarShiftTied<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftL64, asm, []> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-multiclass SIMDScalarRShiftBHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
- FPR8, FPR16, vecshiftR8, asm, []> {
- let Inst{18-16} = imm{2-0};
- }
-
- def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
- FPR16, FPR32, vecshiftR16, asm, []> {
- let Inst{19-16} = imm{3-0};
- }
-
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR64, vecshiftR32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i64 FPR64:$Rn), vecshiftR32:$imm))]> {
- let Inst{20-16} = imm{4-0};
- }
-}
-
-multiclass SIMDScalarLShiftBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
- FPR8, FPR8, vecshiftL8, asm, []> {
- let Inst{18-16} = imm{2-0};
- }
-
- def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
- FPR16, FPR16, vecshiftL16, asm, []> {
- let Inst{19-16} = imm{3-0};
- }
-
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR32, vecshiftL32, asm,
- [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn), (i32 vecshiftL32:$imm)))]> {
- let Inst{20-16} = imm{4-0};
- }
-
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftL64, asm,
- [(set (v1i64 FPR64:$Rd), (OpNode (v1i64 FPR64:$Rn),
- (i32 vecshiftL64:$imm)))]> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
-multiclass SIMDScalarRShiftBHSD<bit U, bits<5> opc, string asm> {
- def b : BaseSIMDScalarShift<U, opc, {0,0,0,1,?,?,?},
- FPR8, FPR8, vecshiftR8, asm, []> {
- let Inst{18-16} = imm{2-0};
- }
-
- def h : BaseSIMDScalarShift<U, opc, {0,0,1,?,?,?,?},
- FPR16, FPR16, vecshiftR16, asm, []> {
- let Inst{19-16} = imm{3-0};
- }
-
- def s : BaseSIMDScalarShift<U, opc, {0,1,?,?,?,?,?},
- FPR32, FPR32, vecshiftR32, asm, []> {
- let Inst{20-16} = imm{4-0};
- }
-
- def d : BaseSIMDScalarShift<U, opc, {1,?,?,?,?,?,?},
- FPR64, FPR64, vecshiftR64, asm, []> {
- let Inst{21-16} = imm{5-0};
- }
-}
-
-//----------------------------------------------------------------------------
-// AdvSIMD vector x indexed element
-//----------------------------------------------------------------------------
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDVectorShift<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
- RegisterOperand dst_reg, RegisterOperand src_reg,
- Operand immtype,
- string asm, string dst_kind, string src_kind,
- list<dag> pattern>
- : I<(outs dst_reg:$Rd), (ins src_reg:$Rn, immtype:$imm),
- asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
- "|" # dst_kind # "\t$Rd, $Rn, $imm}", "", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-23} = 0b011110;
- let Inst{22-16} = fixed_imm;
- let Inst{15-11} = opc;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in
-class BaseSIMDVectorShiftTied<bit Q, bit U, bits<5> opc, bits<7> fixed_imm,
- RegisterOperand vectype1, RegisterOperand vectype2,
- Operand immtype,
- string asm, string dst_kind, string src_kind,
- list<dag> pattern>
- : I<(outs vectype1:$dst), (ins vectype1:$Rd, vectype2:$Rn, immtype:$imm),
- asm, "{\t$Rd" # dst_kind # ", $Rn" # src_kind # ", $imm" #
- "|" # dst_kind # "\t$Rd, $Rn, $imm}", "$Rd = $dst", pattern>,
- Sched<[WriteV]> {
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29} = U;
- let Inst{28-23} = 0b011110;
- let Inst{22-16} = fixed_imm;
- let Inst{15-11} = opc;
- let Inst{10} = 1;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-multiclass SIMDVectorRShiftSD<bit U, bits<5> opc, string asm,
- Intrinsic OpNode> {
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftR32,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (i32 imm:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftR32,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (i32 imm:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftR64,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (i32 imm:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-multiclass SIMDVectorRShiftSDToFP<bit U, bits<5> opc, string asm,
- Intrinsic OpNode> {
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftR32,
- asm, ".2s", ".2s",
- [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (i32 imm:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftR32,
- asm, ".4s", ".4s",
- [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (i32 imm:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftR64,
- asm, ".2d", ".2d",
- [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (i32 imm:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-multiclass SIMDVectorRShiftNarrowBHS<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
- V64, V128, vecshiftR16Narrow,
- asm, ".8b", ".8h",
- [(set (v8i8 V64:$Rd), (OpNode (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftR16Narrow,
- asm#"2", ".16b", ".8h", []> {
- bits<3> imm;
- let Inst{18-16} = imm;
- let hasSideEffects = 0;
- }
-
- def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
- V64, V128, vecshiftR32Narrow,
- asm, ".4h", ".4s",
- [(set (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftR32Narrow,
- asm#"2", ".8h", ".4s", []> {
- bits<4> imm;
- let Inst{19-16} = imm;
- let hasSideEffects = 0;
- }
-
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V64, V128, vecshiftR64Narrow,
- asm, ".2s", ".2d",
- [(set (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftR64Narrow,
- asm#"2", ".4s", ".2d", []> {
- bits<5> imm;
- let Inst{20-16} = imm;
- let hasSideEffects = 0;
- }
-
- // TableGen doesn't like patters w/ INSERT_SUBREG on the instructions
- // themselves, so put them here instead.
-
- // Patterns involving what's effectively an insert high and a normal
- // intrinsic, represented by CONCAT_VECTORS.
- def : Pat<(concat_vectors (v8i8 V64:$Rd),(OpNode (v8i16 V128:$Rn),
- vecshiftR16Narrow:$imm)),
- (!cast<Instruction>(NAME # "v16i8_shift")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR16Narrow:$imm)>;
- def : Pat<(concat_vectors (v4i16 V64:$Rd), (OpNode (v4i32 V128:$Rn),
- vecshiftR32Narrow:$imm)),
- (!cast<Instruction>(NAME # "v8i16_shift")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR32Narrow:$imm)>;
- def : Pat<(concat_vectors (v2i32 V64:$Rd), (OpNode (v2i64 V128:$Rn),
- vecshiftR64Narrow:$imm)),
- (!cast<Instruction>(NAME # "v4i32_shift")
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR64Narrow:$imm)>;
-}
-
-multiclass SIMDVectorLShiftBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
- V64, V64, vecshiftL8,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
- (i32 vecshiftL8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftL8,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
- (i32 vecshiftL8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
- V64, V64, vecshiftL16,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
- (i32 vecshiftL16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftL16,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (i32 vecshiftL16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftL32,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
- (i32 vecshiftL32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftL32,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (i32 vecshiftL32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftL64,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (i32 vecshiftL64:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-multiclass SIMDVectorRShiftBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
- V64, V64, vecshiftR8,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn),
- (i32 vecshiftR8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftR8,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn),
- (i32 vecshiftR8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
- V64, V64, vecshiftR16,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn),
- (i32 vecshiftR16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftR16,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn),
- (i32 vecshiftR16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftR32,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn),
- (i32 vecshiftR32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftR32,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn),
- (i32 vecshiftR32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShift<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftR64,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn),
- (i32 vecshiftR64:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDVectorRShiftBHSDTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
- V64, V64, vecshiftR8, asm, ".8b", ".8b",
- [(set (v8i8 V64:$dst),
- (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
- (i32 vecshiftR8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftR8, asm, ".16b", ".16b",
- [(set (v16i8 V128:$dst),
- (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
- (i32 vecshiftR8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
- V64, V64, vecshiftR16, asm, ".4h", ".4h",
- [(set (v4i16 V64:$dst),
- (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
- (i32 vecshiftR16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftR16, asm, ".8h", ".8h",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
- (i32 vecshiftR16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftR32, asm, ".2s", ".2s",
- [(set (v2i32 V64:$dst),
- (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
- (i32 vecshiftR32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftR32, asm, ".4s", ".4s",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
- (i32 vecshiftR32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftR64,
- asm, ".2d", ".2d", [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
- (i32 vecshiftR64:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-multiclass SIMDVectorLShiftBHSDTied<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode = null_frag> {
- def v8i8_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,0,1,?,?,?},
- V64, V64, vecshiftL8,
- asm, ".8b", ".8b",
- [(set (v8i8 V64:$dst),
- (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn),
- (i32 vecshiftL8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftL8,
- asm, ".16b", ".16b",
- [(set (v16i8 V128:$dst),
- (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn),
- (i32 vecshiftL8:$imm)))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v4i16_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,0,1,?,?,?,?},
- V64, V64, vecshiftL16,
- asm, ".4h", ".4h",
- [(set (v4i16 V64:$dst),
- (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn),
- (i32 vecshiftL16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftL16,
- asm, ".8h", ".8h",
- [(set (v8i16 V128:$dst),
- (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn),
- (i32 vecshiftL16:$imm)))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v2i32_shift : BaseSIMDVectorShiftTied<0, U, opc, {0,1,?,?,?,?,?},
- V64, V64, vecshiftL32,
- asm, ".2s", ".2s",
- [(set (v2i32 V64:$dst),
- (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
- (i32 vecshiftL32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShiftTied<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftL32,
- asm, ".4s", ".4s",
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
- (i32 vecshiftL32:$imm)))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v2i64_shift : BaseSIMDVectorShiftTied<1, U, opc, {1,?,?,?,?,?,?},
- V128, V128, vecshiftL64,
- asm, ".2d", ".2d",
- [(set (v2i64 V128:$dst),
- (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn),
- (i32 vecshiftL64:$imm)))]> {
- bits<6> imm;
- let Inst{21-16} = imm;
- }
-}
-
-multiclass SIMDVectorLShiftLongBHSD<bit U, bits<5> opc, string asm,
- SDPatternOperator OpNode> {
- def v8i8_shift : BaseSIMDVectorShift<0, U, opc, {0,0,0,1,?,?,?},
- V128, V64, vecshiftL8, asm, ".8h", ".8b",
- [(set (v8i16 V128:$Rd), (OpNode (v8i8 V64:$Rn), vecshiftL8:$imm))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v16i8_shift : BaseSIMDVectorShift<1, U, opc, {0,0,0,1,?,?,?},
- V128, V128, vecshiftL8,
- asm#"2", ".8h", ".16b",
- [(set (v8i16 V128:$Rd),
- (OpNode (extract_high_v16i8 V128:$Rn), vecshiftL8:$imm))]> {
- bits<3> imm;
- let Inst{18-16} = imm;
- }
-
- def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?},
- V128, V64, vecshiftL16, asm, ".4s", ".4h",
- [(set (v4i32 V128:$Rd), (OpNode (v4i16 V64:$Rn), vecshiftL16:$imm))]> {
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?},
- V128, V128, vecshiftL16,
- asm#"2", ".4s", ".8h",
- [(set (v4i32 V128:$Rd),
- (OpNode (extract_high_v8i16 V128:$Rn), vecshiftL16:$imm))]> {
-
- bits<4> imm;
- let Inst{19-16} = imm;
- }
-
- def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?},
- V128, V64, vecshiftL32, asm, ".2d", ".2s",
- [(set (v2i64 V128:$Rd), (OpNode (v2i32 V64:$Rn), vecshiftL32:$imm))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-
- def v4i32_shift : BaseSIMDVectorShift<1, U, opc, {0,1,?,?,?,?,?},
- V128, V128, vecshiftL32,
- asm#"2", ".2d", ".4s",
- [(set (v2i64 V128:$Rd),
- (OpNode (extract_high_v4i32 V128:$Rn), vecshiftL32:$imm))]> {
- bits<5> imm;
- let Inst{20-16} = imm;
- }
-}
-
-
-//---
-// Vector load/store
-//---
-// SIMD ldX/stX no-index memory references don't allow the optional
-// ", #0" constant and handle post-indexing explicitly, so we use
-// a more specialized parse method for them. Otherwise, it's the same as
-// the general am_noindex handling.
-def MemorySIMDNoIndexOperand : AsmOperandClass {
- let Name = "MemorySIMDNoIndex";
- let ParserMethod = "tryParseNoIndexMemory";
-}
-def am_simdnoindex : Operand<i64>,
- ComplexPattern<i64, 1, "SelectAddrModeNoIndex", []> {
- let PrintMethod = "printAMNoIndex";
- let ParserMatchClass = MemorySIMDNoIndexOperand;
- let MIOperandInfo = (ops GPR64sp:$base);
- let DecoderMethod = "DecodeGPR64spRegisterClass";
-}
-
-class BaseSIMDLdSt<bit Q, bit L, bits<4> opcode, bits<2> size,
- string asm, dag oops, dag iops, list<dag> pattern>
- : I<oops, iops, asm, "\t$Vt, $vaddr", "", pattern> {
- bits<5> Vt;
- bits<5> vaddr;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29-23} = 0b0011000;
- let Inst{22} = L;
- let Inst{21-16} = 0b000000;
- let Inst{15-12} = opcode;
- let Inst{11-10} = size;
- let Inst{9-5} = vaddr;
- let Inst{4-0} = Vt;
-}
-
-class BaseSIMDLdStPost<bit Q, bit L, bits<4> opcode, bits<2> size,
- string asm, dag oops, dag iops>
- : I<oops, iops, asm, "\t$Vt, $vaddr, $Xm", "", []> {
- bits<5> Vt;
- bits<5> vaddr;
- bits<5> Xm;
- let Inst{31} = 0;
- let Inst{30} = Q;
- let Inst{29-23} = 0b0011001;
- let Inst{22} = L;
- let Inst{21} = 0;
- let Inst{20-16} = Xm;
- let Inst{15-12} = opcode;
- let Inst{11-10} = size;
- let Inst{9-5} = vaddr;
- let Inst{4-0} = Vt;
- let DecoderMethod = "DecodeSIMDLdStPost";
-}
-
-// The immediate form of AdvSIMD post-indexed addressing is encoded with
-// register post-index addressing from the zero register.
-multiclass SIMDLdStAliases<string asm, string layout, string Count,
- int Offset, int Size> {
- // E.g. "ld1 { v0.8b, v1.8b }, [x1], #16"
- // "ld1\t$Vt, $vaddr, #16"
- // may get mapped to
- // (LD1Twov8b_POST VecListTwo8b:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
- am_simdnoindex:$vaddr, XZR), 1>;
-
- // E.g. "ld1.8b { v0, v1 }, [x1], #16"
- // "ld1.8b\t$Vt, $vaddr, #16"
- // may get mapped to
- // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr, XZR), 0>;
-
- // E.g. "ld1.8b { v0, v1 }, [x1]"
- // "ld1\t$Vt, $vaddr"
- // may get mapped to
- // (LD1Twov8b VecListTwo64:$Vt, am_simdnoindex:$vaddr)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr",
- (!cast<Instruction>(NAME # Count # "v" # layout)
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr), 0>;
-
- // E.g. "ld1.8b { v0, v1 }, [x1], x2"
- // "ld1\t$Vt, $vaddr, $Xm"
- // may get mapped to
- // (LD1Twov8b_POST VecListTwo64:$Vt, am_simdnoindex:$vaddr, GPR64pi8:$Xm)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm",
- (!cast<Instruction>(NAME # Count # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass BaseSIMDLdN<string Count, string asm, string veclist, int Offset128,
- int Offset64, bits<4> opcode> {
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def v16b: BaseSIMDLdSt<1, 1, opcode, 0b00, asm,
- (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v8h : BaseSIMDLdSt<1, 1, opcode, 0b01, asm,
- (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v4s : BaseSIMDLdSt<1, 1, opcode, 0b10, asm,
- (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v2d : BaseSIMDLdSt<1, 1, opcode, 0b11, asm,
- (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v8b : BaseSIMDLdSt<0, 1, opcode, 0b00, asm,
- (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v4h : BaseSIMDLdSt<0, 1, opcode, 0b01, asm,
- (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
- def v2s : BaseSIMDLdSt<0, 1, opcode, 0b10, asm,
- (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
-
-
- def v16b_POST: BaseSIMDLdStPost<1, 1, opcode, 0b00, asm,
- (outs !cast<RegisterOperand>(veclist # "16b"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v8h_POST : BaseSIMDLdStPost<1, 1, opcode, 0b01, asm,
- (outs !cast<RegisterOperand>(veclist # "8h"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v4s_POST : BaseSIMDLdStPost<1, 1, opcode, 0b10, asm,
- (outs !cast<RegisterOperand>(veclist # "4s"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v2d_POST : BaseSIMDLdStPost<1, 1, opcode, 0b11, asm,
- (outs !cast<RegisterOperand>(veclist # "2d"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v8b_POST : BaseSIMDLdStPost<0, 1, opcode, 0b00, asm,
- (outs !cast<RegisterOperand>(veclist # "8b"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- def v4h_POST : BaseSIMDLdStPost<0, 1, opcode, 0b01, asm,
- (outs !cast<RegisterOperand>(veclist # "4h"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- def v2s_POST : BaseSIMDLdStPost<0, 1, opcode, 0b10, asm,
- (outs !cast<RegisterOperand>(veclist # "2s"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- }
-
- defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
- defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
- defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
-}
-
-// Only ld1/st1 has a v1d version.
-multiclass BaseSIMDStN<string Count, string asm, string veclist, int Offset128,
- int Offset64, bits<4> opcode> {
- let hasSideEffects = 0, mayStore = 1, mayLoad = 0 in {
- def v16b : BaseSIMDLdSt<1, 0, opcode, 0b00, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v8h : BaseSIMDLdSt<1, 0, opcode, 0b01, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v4s : BaseSIMDLdSt<1, 0, opcode, 0b10, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v2d : BaseSIMDLdSt<1, 0, opcode, 0b11, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v8b : BaseSIMDLdSt<0, 0, opcode, 0b00, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v4h : BaseSIMDLdSt<0, 0, opcode, 0b01, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
- am_simdnoindex:$vaddr), []>;
- def v2s : BaseSIMDLdSt<0, 0, opcode, 0b10, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
- am_simdnoindex:$vaddr), []>;
-
- def v16b_POST : BaseSIMDLdStPost<1, 0, opcode, 0b00, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "16b"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v8h_POST : BaseSIMDLdStPost<1, 0, opcode, 0b01, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "8h"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v4s_POST : BaseSIMDLdStPost<1, 0, opcode, 0b10, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "4s"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v2d_POST : BaseSIMDLdStPost<1, 0, opcode, 0b11, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "2d"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset128):$Xm)>;
- def v8b_POST : BaseSIMDLdStPost<0, 0, opcode, 0b00, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "8b"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- def v4h_POST : BaseSIMDLdStPost<0, 0, opcode, 0b01, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "4h"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- def v2s_POST : BaseSIMDLdStPost<0, 0, opcode, 0b10, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "2s"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- }
-
- defm : SIMDLdStAliases<asm, "16b", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "8h", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "4s", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "2d", Count, Offset128, 128>;
- defm : SIMDLdStAliases<asm, "8b", Count, Offset64, 64>;
- defm : SIMDLdStAliases<asm, "4h", Count, Offset64, 64>;
- defm : SIMDLdStAliases<asm, "2s", Count, Offset64, 64>;
-}
-
-multiclass BaseSIMDLd1<string Count, string asm, string veclist,
- int Offset128, int Offset64, bits<4> opcode>
- : BaseSIMDLdN<Count, asm, veclist, Offset128, Offset64, opcode> {
-
- // LD1 instructions have extra "1d" variants.
- let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
- def v1d : BaseSIMDLdSt<0, 1, opcode, 0b11, asm,
- (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
- (ins am_simdnoindex:$vaddr), []>;
-
- def v1d_POST : BaseSIMDLdStPost<0, 1, opcode, 0b11, asm,
- (outs !cast<RegisterOperand>(veclist # "1d"):$Vt),
- (ins am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- }
-
- defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
-}
-
-multiclass BaseSIMDSt1<string Count, string asm, string veclist,
- int Offset128, int Offset64, bits<4> opcode>
- : BaseSIMDStN<Count, asm, veclist, Offset128, Offset64, opcode> {
-
- // ST1 instructions have extra "1d" variants.
- let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
- def v1d : BaseSIMDLdSt<0, 0, opcode, 0b11, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
- am_simdnoindex:$vaddr), []>;
-
- def v1d_POST : BaseSIMDLdStPost<0, 0, opcode, 0b11, asm, (outs),
- (ins !cast<RegisterOperand>(veclist # "1d"):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset64):$Xm)>;
- }
-
- defm : SIMDLdStAliases<asm, "1d", Count, Offset64, 64>;
-}
-
-multiclass SIMDLd1Multiple<string asm> {
- defm One : BaseSIMDLd1<"One", asm, "VecListOne", 16, 8, 0b0111>;
- defm Two : BaseSIMDLd1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
- defm Three : BaseSIMDLd1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
- defm Four : BaseSIMDLd1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
-}
-
-multiclass SIMDSt1Multiple<string asm> {
- defm One : BaseSIMDSt1<"One", asm, "VecListOne", 16, 8, 0b0111>;
- defm Two : BaseSIMDSt1<"Two", asm, "VecListTwo", 32, 16, 0b1010>;
- defm Three : BaseSIMDSt1<"Three", asm, "VecListThree", 48, 24, 0b0110>;
- defm Four : BaseSIMDSt1<"Four", asm, "VecListFour", 64, 32, 0b0010>;
-}
-
-multiclass SIMDLd2Multiple<string asm> {
- defm Two : BaseSIMDLdN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
-}
-
-multiclass SIMDSt2Multiple<string asm> {
- defm Two : BaseSIMDStN<"Two", asm, "VecListTwo", 32, 16, 0b1000>;
-}
-
-multiclass SIMDLd3Multiple<string asm> {
- defm Three : BaseSIMDLdN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
-}
-
-multiclass SIMDSt3Multiple<string asm> {
- defm Three : BaseSIMDStN<"Three", asm, "VecListThree", 48, 24, 0b0100>;
-}
-
-multiclass SIMDLd4Multiple<string asm> {
- defm Four : BaseSIMDLdN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
-}
-
-multiclass SIMDSt4Multiple<string asm> {
- defm Four : BaseSIMDStN<"Four", asm, "VecListFour", 64, 32, 0b0000>;
-}
-
-//---
-// AdvSIMD Load/store single-element
-//---
-
-class BaseSIMDLdStSingle<bit L, bit R, bits<3> opcode,
- string asm, string operands, dag oops, dag iops,
- list<dag> pattern>
- : I<oops, iops, asm, operands, "", pattern> {
- bits<5> Vt;
- bits<5> vaddr;
- let Inst{31} = 0;
- let Inst{29-24} = 0b001101;
- let Inst{22} = L;
- let Inst{21} = R;
- let Inst{15-13} = opcode;
- let Inst{9-5} = vaddr;
- let Inst{4-0} = Vt;
- let DecoderMethod = "DecodeSIMDLdStSingle";
-}
-
-class BaseSIMDLdStSingleTied<bit L, bit R, bits<3> opcode,
- string asm, string operands, dag oops, dag iops,
- list<dag> pattern>
- : I<oops, iops, asm, operands, "$Vt = $dst", pattern> {
- bits<5> Vt;
- bits<5> vaddr;
- let Inst{31} = 0;
- let Inst{29-24} = 0b001101;
- let Inst{22} = L;
- let Inst{21} = R;
- let Inst{15-13} = opcode;
- let Inst{9-5} = vaddr;
- let Inst{4-0} = Vt;
- let DecoderMethod = "DecodeSIMDLdStSingleTied";
-}
-
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDLdR<bit Q, bit R, bits<3> opcode, bit S, bits<2> size, string asm,
- Operand listtype>
- : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr",
- (outs listtype:$Vt), (ins am_simdnoindex:$vaddr), []> {
- let Inst{30} = Q;
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = S;
- let Inst{11-10} = size;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-class BaseSIMDLdRPost<bit Q, bit R, bits<3> opcode, bit S, bits<2> size,
- string asm, Operand listtype, Operand GPR64pi>
- : BaseSIMDLdStSingle<1, R, opcode, asm, "\t$Vt, $vaddr, $Xm",
- (outs listtype:$Vt),
- (ins am_simdnoindex:$vaddr, GPR64pi:$Xm), []> {
- bits<5> Xm;
- let Inst{30} = Q;
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = S;
- let Inst{11-10} = size;
-}
-
-multiclass SIMDLdrAliases<string asm, string layout, string Count,
- int Offset, int Size> {
- // E.g. "ld1r { v0.8b }, [x1], #1"
- // "ld1r.8b\t$Vt, $vaddr, #1"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "\t$Vt, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
- am_simdnoindex:$vaddr, XZR), 1>;
-
- // E.g. "ld1r.8b { v0 }, [x1], #1"
- // "ld1r.8b\t$Vt, $vaddr, #1"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr, XZR), 0>;
-
- // E.g. "ld1r.8b { v0 }, [x1]"
- // "ld1r.8b\t$Vt, $vaddr"
- // may get mapped to
- // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr",
- (!cast<Instruction>(NAME # "v" # layout)
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr), 0>;
-
- // E.g. "ld1r.8b { v0 }, [x1], x2"
- // "ld1r.8b\t$Vt, $vaddr, $Xm"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm)
- def : InstAlias<asm # "." # layout # "\t$Vt, $vaddr, $Xm",
- (!cast<Instruction>(NAME # "v" # layout # "_POST")
- !cast<RegisterOperand>("VecList" # Count # Size):$Vt,
- am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass SIMDLdR<bit R, bits<3> opcode, bit S, string asm, string Count,
- int Offset1, int Offset2, int Offset4, int Offset8> {
- def v8b : BaseSIMDLdR<0, R, opcode, S, 0b00, asm,
- !cast<Operand>("VecList" # Count # "8b")>;
- def v16b: BaseSIMDLdR<1, R, opcode, S, 0b00, asm,
- !cast<Operand>("VecList" # Count #"16b")>;
- def v4h : BaseSIMDLdR<0, R, opcode, S, 0b01, asm,
- !cast<Operand>("VecList" # Count #"4h")>;
- def v8h : BaseSIMDLdR<1, R, opcode, S, 0b01, asm,
- !cast<Operand>("VecList" # Count #"8h")>;
- def v2s : BaseSIMDLdR<0, R, opcode, S, 0b10, asm,
- !cast<Operand>("VecList" # Count #"2s")>;
- def v4s : BaseSIMDLdR<1, R, opcode, S, 0b10, asm,
- !cast<Operand>("VecList" # Count #"4s")>;
- def v1d : BaseSIMDLdR<0, R, opcode, S, 0b11, asm,
- !cast<Operand>("VecList" # Count #"1d")>;
- def v2d : BaseSIMDLdR<1, R, opcode, S, 0b11, asm,
- !cast<Operand>("VecList" # Count #"2d")>;
-
- def v8b_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b00, asm,
- !cast<Operand>("VecList" # Count # "8b"),
- !cast<Operand>("GPR64pi" # Offset1)>;
- def v16b_POST: BaseSIMDLdRPost<1, R, opcode, S, 0b00, asm,
- !cast<Operand>("VecList" # Count # "16b"),
- !cast<Operand>("GPR64pi" # Offset1)>;
- def v4h_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b01, asm,
- !cast<Operand>("VecList" # Count # "4h"),
- !cast<Operand>("GPR64pi" # Offset2)>;
- def v8h_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b01, asm,
- !cast<Operand>("VecList" # Count # "8h"),
- !cast<Operand>("GPR64pi" # Offset2)>;
- def v2s_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b10, asm,
- !cast<Operand>("VecList" # Count # "2s"),
- !cast<Operand>("GPR64pi" # Offset4)>;
- def v4s_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b10, asm,
- !cast<Operand>("VecList" # Count # "4s"),
- !cast<Operand>("GPR64pi" # Offset4)>;
- def v1d_POST : BaseSIMDLdRPost<0, R, opcode, S, 0b11, asm,
- !cast<Operand>("VecList" # Count # "1d"),
- !cast<Operand>("GPR64pi" # Offset8)>;
- def v2d_POST : BaseSIMDLdRPost<1, R, opcode, S, 0b11, asm,
- !cast<Operand>("VecList" # Count # "2d"),
- !cast<Operand>("GPR64pi" # Offset8)>;
-
- defm : SIMDLdrAliases<asm, "8b", Count, Offset1, 64>;
- defm : SIMDLdrAliases<asm, "16b", Count, Offset1, 128>;
- defm : SIMDLdrAliases<asm, "4h", Count, Offset2, 64>;
- defm : SIMDLdrAliases<asm, "8h", Count, Offset2, 128>;
- defm : SIMDLdrAliases<asm, "2s", Count, Offset4, 64>;
- defm : SIMDLdrAliases<asm, "4s", Count, Offset4, 128>;
- defm : SIMDLdrAliases<asm, "1d", Count, Offset8, 64>;
- defm : SIMDLdrAliases<asm, "2d", Count, Offset8, 128>;
-}
-
-class SIMDLdStSingleB<bit L, bit R, bits<3> opcode, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S:size fields.
- bits<4> idx;
- let Inst{30} = idx{3};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{2};
- let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBTied<bit L, bit R, bits<3> opcode, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S:size fields.
- bits<4> idx;
- let Inst{30} = idx{3};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{2};
- let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBPost<bit L, bit R, bits<3> opcode, string asm,
- dag oops, dag iops>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S:size fields.
- bits<4> idx;
- bits<5> Xm;
- let Inst{30} = idx{3};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{2};
- let Inst{11-10} = idx{1-0};
-}
-class SIMDLdStSingleBTiedPost<bit L, bit R, bits<3> opcode, string asm,
- dag oops, dag iops>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S:size fields.
- bits<4> idx;
- bits<5> Xm;
- let Inst{30} = idx{3};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{2};
- let Inst{11-10} = idx{1-0};
-}
-
-class SIMDLdStSingleH<bit L, bit R, bits<3> opcode, bit size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S:size<1> fields.
- bits<3> idx;
- let Inst{30} = idx{2};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{1};
- let Inst{11} = idx{0};
- let Inst{10} = size;
-}
-class SIMDLdStSingleHTied<bit L, bit R, bits<3> opcode, bit size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S:size<1> fields.
- bits<3> idx;
- let Inst{30} = idx{2};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{1};
- let Inst{11} = idx{0};
- let Inst{10} = size;
-}
-
-class SIMDLdStSingleHPost<bit L, bit R, bits<3> opcode, bit size, string asm,
- dag oops, dag iops>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S:size<1> fields.
- bits<3> idx;
- bits<5> Xm;
- let Inst{30} = idx{2};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{1};
- let Inst{11} = idx{0};
- let Inst{10} = size;
-}
-class SIMDLdStSingleHTiedPost<bit L, bit R, bits<3> opcode, bit size, string asm,
- dag oops, dag iops>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S:size<1> fields.
- bits<3> idx;
- bits<5> Xm;
- let Inst{30} = idx{2};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{1};
- let Inst{11} = idx{0};
- let Inst{10} = size;
-}
-class SIMDLdStSingleS<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S fields.
- bits<2> idx;
- let Inst{30} = idx{1};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{0};
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleSTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q:S fields.
- bits<2> idx;
- let Inst{30} = idx{1};
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = idx{0};
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleSPost<bit L, bit R, bits<3> opcode, bits<2> size,
- string asm, dag oops, dag iops>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S fields.
- bits<2> idx;
- bits<5> Xm;
- let Inst{30} = idx{1};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{0};
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleSTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
- string asm, dag oops, dag iops>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q:S fields.
- bits<2> idx;
- bits<5> Xm;
- let Inst{30} = idx{1};
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = idx{0};
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleD<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q field.
- bits<1> idx;
- let Inst{30} = idx;
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = 0;
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleDTied<bit L, bit R, bits<3> opcode, bits<2> size, string asm,
- dag oops, dag iops, list<dag> pattern>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr", oops, iops,
- pattern> {
- // idx encoded in Q field.
- bits<1> idx;
- let Inst{30} = idx;
- let Inst{23} = 0;
- let Inst{20-16} = 0b00000;
- let Inst{12} = 0;
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleDPost<bit L, bit R, bits<3> opcode, bits<2> size,
- string asm, dag oops, dag iops>
- : BaseSIMDLdStSingle<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q field.
- bits<1> idx;
- bits<5> Xm;
- let Inst{30} = idx;
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = 0;
- let Inst{11-10} = size;
-}
-class SIMDLdStSingleDTiedPost<bit L, bit R, bits<3> opcode, bits<2> size,
- string asm, dag oops, dag iops>
- : BaseSIMDLdStSingleTied<L, R, opcode, asm, "\t$Vt$idx, $vaddr, $Xm",
- oops, iops, []> {
- // idx encoded in Q field.
- bits<1> idx;
- bits<5> Xm;
- let Inst{30} = idx;
- let Inst{23} = 1;
- let Inst{20-16} = Xm;
- let Inst{12} = 0;
- let Inst{11-10} = size;
-}
-
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleBTied<bit R, bits<3> opcode, string asm,
- RegisterOperand listtype,
- RegisterOperand GPR64pi> {
- def i8 : SIMDLdStSingleBTied<1, R, opcode, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexB:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i8_POST : SIMDLdStSingleBTiedPost<1, R, opcode, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexB:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleHTied<bit R, bits<3> opcode, bit size, string asm,
- RegisterOperand listtype,
- RegisterOperand GPR64pi> {
- def i16 : SIMDLdStSingleHTied<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexH:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i16_POST : SIMDLdStSingleHTiedPost<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexH:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleSTied<bit R, bits<3> opcode, bits<2> size,string asm,
- RegisterOperand listtype,
- RegisterOperand GPR64pi> {
- def i32 : SIMDLdStSingleSTied<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexS:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i32_POST : SIMDLdStSingleSTiedPost<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexS:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-multiclass SIMDLdSingleDTied<bit R, bits<3> opcode, bits<2> size, string asm,
- RegisterOperand listtype, RegisterOperand GPR64pi> {
- def i64 : SIMDLdStSingleDTied<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexD:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i64_POST : SIMDLdStSingleDTiedPost<1, R, opcode, size, asm,
- (outs listtype:$dst),
- (ins listtype:$Vt, VectorIndexD:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleB<bit R, bits<3> opcode, string asm,
- RegisterOperand listtype, RegisterOperand GPR64pi> {
- def i8 : SIMDLdStSingleB<0, R, opcode, asm,
- (outs), (ins listtype:$Vt, VectorIndexB:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i8_POST : SIMDLdStSingleBPost<0, R, opcode, asm,
- (outs), (ins listtype:$Vt, VectorIndexB:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleH<bit R, bits<3> opcode, bit size, string asm,
- RegisterOperand listtype, RegisterOperand GPR64pi> {
- def i16 : SIMDLdStSingleH<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexH:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i16_POST : SIMDLdStSingleHPost<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexH:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleS<bit R, bits<3> opcode, bits<2> size,string asm,
- RegisterOperand listtype, RegisterOperand GPR64pi> {
- def i32 : SIMDLdStSingleS<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexS:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i32_POST : SIMDLdStSingleSPost<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexS:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-let mayLoad = 0, mayStore = 1, hasSideEffects = 0 in
-multiclass SIMDStSingleD<bit R, bits<3> opcode, bits<2> size, string asm,
- RegisterOperand listtype, RegisterOperand GPR64pi> {
- def i64 : SIMDLdStSingleD<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexD:$idx,
- am_simdnoindex:$vaddr), []>;
-
- def i64_POST : SIMDLdStSingleDPost<0, R, opcode, size, asm,
- (outs), (ins listtype:$Vt, VectorIndexD:$idx,
- am_simdnoindex:$vaddr, GPR64pi:$Xm)>;
-}
-
-multiclass SIMDLdStSingleAliases<string asm, string layout, string Type,
- string Count, int Offset, Operand idxtype> {
- // E.g. "ld1 { v0.8b }[0], [x1], #1"
- // "ld1\t$Vt, $vaddr, #1"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne8b:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "\t$Vt$idx, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # Type # "_POST")
- !cast<RegisterOperand>("VecList" # Count # layout):$Vt,
- idxtype:$idx, am_simdnoindex:$vaddr, XZR), 1>;
-
- // E.g. "ld1.8b { v0 }[0], [x1], #1"
- // "ld1.8b\t$Vt, $vaddr, #1"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, XZR)
- def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, #" # Offset,
- (!cast<Instruction>(NAME # Type # "_POST")
- !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
- idxtype:$idx, am_simdnoindex:$vaddr, XZR), 0>;
-
- // E.g. "ld1.8b { v0 }[0], [x1]"
- // "ld1.8b\t$Vt, $vaddr"
- // may get mapped to
- // (LD1Rv8b VecListOne64:$Vt, am_simdnoindex:$vaddr)
- def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr",
- (!cast<Instruction>(NAME # Type)
- !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
- idxtype:$idx, am_simdnoindex:$vaddr), 0>;
-
- // E.g. "ld1.8b { v0 }[0], [x1], x2"
- // "ld1.8b\t$Vt, $vaddr, $Xm"
- // may get mapped to
- // (LD1Rv8b_POST VecListOne64:$Vt, am_simdnoindex:$vaddr, GPR64pi1:$Xm)
- def : InstAlias<asm # "." # layout # "\t$Vt$idx, $vaddr, $Xm",
- (!cast<Instruction>(NAME # Type # "_POST")
- !cast<RegisterOperand>("VecList" # Count # "128"):$Vt,
- idxtype:$idx, am_simdnoindex:$vaddr,
- !cast<RegisterOperand>("GPR64pi" # Offset):$Xm), 0>;
-}
-
-multiclass SIMDLdSt1SingleAliases<string asm> {
- defm : SIMDLdStSingleAliases<asm, "b", "i8", "One", 1, VectorIndexB>;
- defm : SIMDLdStSingleAliases<asm, "h", "i16", "One", 2, VectorIndexH>;
- defm : SIMDLdStSingleAliases<asm, "s", "i32", "One", 4, VectorIndexS>;
- defm : SIMDLdStSingleAliases<asm, "d", "i64", "One", 8, VectorIndexD>;
-}
-
-multiclass SIMDLdSt2SingleAliases<string asm> {
- defm : SIMDLdStSingleAliases<asm, "b", "i8", "Two", 2, VectorIndexB>;
- defm : SIMDLdStSingleAliases<asm, "h", "i16", "Two", 4, VectorIndexH>;
- defm : SIMDLdStSingleAliases<asm, "s", "i32", "Two", 8, VectorIndexS>;
- defm : SIMDLdStSingleAliases<asm, "d", "i64", "Two", 16, VectorIndexD>;
-}
-
-multiclass SIMDLdSt3SingleAliases<string asm> {
- defm : SIMDLdStSingleAliases<asm, "b", "i8", "Three", 3, VectorIndexB>;
- defm : SIMDLdStSingleAliases<asm, "h", "i16", "Three", 6, VectorIndexH>;
- defm : SIMDLdStSingleAliases<asm, "s", "i32", "Three", 12, VectorIndexS>;
- defm : SIMDLdStSingleAliases<asm, "d", "i64", "Three", 24, VectorIndexD>;
-}
-
-multiclass SIMDLdSt4SingleAliases<string asm> {
- defm : SIMDLdStSingleAliases<asm, "b", "i8", "Four", 4, VectorIndexB>;
- defm : SIMDLdStSingleAliases<asm, "h", "i16", "Four", 8, VectorIndexH>;
- defm : SIMDLdStSingleAliases<asm, "s", "i32", "Four", 16, VectorIndexS>;
- defm : SIMDLdStSingleAliases<asm, "d", "i64", "Four", 32, VectorIndexD>;
-}
-
-//----------------------------------------------------------------------------
-// Crypto extensions
-//----------------------------------------------------------------------------
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class AESBase<bits<4> opc, string asm, dag outs, dag ins, string cstr,
- list<dag> pat>
- : I<outs, ins, asm, "{\t$Rd.16b, $Rn.16b|.16b\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-16} = 0b0100111000101000;
- let Inst{15-12} = opc;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class AESInst<bits<4> opc, string asm, Intrinsic OpNode>
- : AESBase<opc, asm, (outs V128:$Rd), (ins V128:$Rn), "",
- [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>;
-
-class AESTiedInst<bits<4> opc, string asm, Intrinsic OpNode>
- : AESBase<opc, asm, (outs V128:$dst), (ins V128:$Rd, V128:$Rn),
- "$Rd = $dst",
- [(set (v16i8 V128:$dst),
- (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class SHA3OpTiedInst<bits<3> opc, string asm, string dst_lhs_kind,
- dag oops, dag iops, list<dag> pat>
- : I<oops, iops, asm,
- "{\t$Rd" # dst_lhs_kind # ", $Rn" # dst_lhs_kind # ", $Rm.4s" #
- "|.4s\t$Rd, $Rn, $Rm}", "$Rd = $dst", pat>,
- Sched<[WriteV]>{
- bits<5> Rd;
- bits<5> Rn;
- bits<5> Rm;
- let Inst{31-21} = 0b01011110000;
- let Inst{20-16} = Rm;
- let Inst{15} = 0;
- let Inst{14-12} = opc;
- let Inst{11-10} = 0b00;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class SHATiedInstQSV<bits<3> opc, string asm, Intrinsic OpNode>
- : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
- (ins FPR128:$Rd, FPR32:$Rn, V128:$Rm),
- [(set (v4i32 FPR128:$dst),
- (OpNode (v4i32 FPR128:$Rd), (i32 FPR32:$Rn),
- (v4i32 V128:$Rm)))]>;
-
-class SHATiedInstVVV<bits<3> opc, string asm, Intrinsic OpNode>
- : SHA3OpTiedInst<opc, asm, ".4s", (outs V128:$dst),
- (ins V128:$Rd, V128:$Rn, V128:$Rm),
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn),
- (v4i32 V128:$Rm)))]>;
-
-class SHATiedInstQQV<bits<3> opc, string asm, Intrinsic OpNode>
- : SHA3OpTiedInst<opc, asm, "", (outs FPR128:$dst),
- (ins FPR128:$Rd, FPR128:$Rn, V128:$Rm),
- [(set (v4i32 FPR128:$dst),
- (OpNode (v4i32 FPR128:$Rd), (v4i32 FPR128:$Rn),
- (v4i32 V128:$Rm)))]>;
-
-let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in
-class SHA2OpInst<bits<4> opc, string asm, string kind,
- string cstr, dag oops, dag iops,
- list<dag> pat>
- : I<oops, iops, asm, "{\t$Rd" # kind # ", $Rn" # kind #
- "|" # kind # "\t$Rd, $Rn}", cstr, pat>,
- Sched<[WriteV]>{
- bits<5> Rd;
- bits<5> Rn;
- let Inst{31-16} = 0b0101111000101000;
- let Inst{15-12} = opc;
- let Inst{11-10} = 0b10;
- let Inst{9-5} = Rn;
- let Inst{4-0} = Rd;
-}
-
-class SHATiedInstVV<bits<4> opc, string asm, Intrinsic OpNode>
- : SHA2OpInst<opc, asm, ".4s", "$Rd = $dst", (outs V128:$dst),
- (ins V128:$Rd, V128:$Rn),
- [(set (v4i32 V128:$dst),
- (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>;
-
-class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
- : SHA2OpInst<opc, asm, "", "", (outs FPR32:$Rd), (ins FPR32:$Rn),
- [(set (i32 FPR32:$Rd), (OpNode (i32 FPR32:$Rn)))]>;
-
-// Allow the size specifier tokens to be upper case, not just lower.
-def : TokenAlias<".8B", ".8b">;
-def : TokenAlias<".4H", ".4h">;
-def : TokenAlias<".2S", ".2s">;
-def : TokenAlias<".1D", ".1d">;
-def : TokenAlias<".16B", ".16b">;
-def : TokenAlias<".8H", ".8h">;
-def : TokenAlias<".4S", ".4s">;
-def : TokenAlias<".2D", ".2d">;
-def : TokenAlias<".B", ".b">;
-def : TokenAlias<".H", ".h">;
-def : TokenAlias<".S", ".s">;
-def : TokenAlias<".D", ".d">;
diff --git a/lib/Target/ARM64/ARM64InstrInfo.cpp b/lib/Target/ARM64/ARM64InstrInfo.cpp
deleted file mode 100644
index 8f11757..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.cpp
+++ /dev/null
@@ -1,1864 +0,0 @@
-//===- ARM64InstrInfo.cpp - ARM64 Instruction Information -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_CTOR_DTOR
-#include "ARM64GenInstrInfo.inc"
-
-using namespace llvm;
-
-ARM64InstrInfo::ARM64InstrInfo(const ARM64Subtarget &STI)
- : ARM64GenInstrInfo(ARM64::ADJCALLSTACKDOWN, ARM64::ADJCALLSTACKUP),
- RI(this, &STI), Subtarget(STI) {}
-
-/// GetInstSize - Return the number of bytes of code the specified
-/// instruction may be. This returns the maximum number of bytes.
-unsigned ARM64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
- const MCInstrDesc &Desc = MI->getDesc();
-
- switch (Desc.getOpcode()) {
- default:
- // Anything not explicitly designated otherwise is a nomal 4-byte insn.
- return 4;
- case TargetOpcode::DBG_VALUE:
- case TargetOpcode::EH_LABEL:
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- return 0;
- }
-
- llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
-}
-
-static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
- SmallVectorImpl<MachineOperand> &Cond) {
- // Block ends with fall-through condbranch.
- switch (LastInst->getOpcode()) {
- default:
- llvm_unreachable("Unknown branch instruction?");
- case ARM64::Bcc:
- Target = LastInst->getOperand(1).getMBB();
- Cond.push_back(LastInst->getOperand(0));
- break;
- case ARM64::CBZW:
- case ARM64::CBZX:
- case ARM64::CBNZW:
- case ARM64::CBNZX:
- Target = LastInst->getOperand(1).getMBB();
- Cond.push_back(MachineOperand::CreateImm(-1));
- Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
- Cond.push_back(LastInst->getOperand(0));
- break;
- case ARM64::TBZ:
- case ARM64::TBNZ:
- Target = LastInst->getOperand(2).getMBB();
- Cond.push_back(MachineOperand::CreateImm(-1));
- Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
- Cond.push_back(LastInst->getOperand(0));
- Cond.push_back(LastInst->getOperand(1));
- }
-}
-
-// Branch analysis.
-bool ARM64InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const {
- // If the block has no terminators, it just falls into the block after it.
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return false;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return false;
- --I;
- }
- if (!isUnpredicatedTerminator(I))
- return false;
-
- // Get the last instruction in the block.
- MachineInstr *LastInst = I;
-
- // If there is only one terminator instruction, process it.
- unsigned LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- if (isUncondBranchOpcode(LastOpc)) {
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- }
- if (isCondBranchOpcode(LastOpc)) {
- // Block ends with fall-through condbranch.
- parseCondBranch(LastInst, TBB, Cond);
- return false;
- }
- return true; // Can't handle indirect branch.
- }
-
- // Get the instruction before it if it is a terminator.
- MachineInstr *SecondLastInst = I;
- unsigned SecondLastOpc = SecondLastInst->getOpcode();
-
- // If AllowModify is true and the block ends with two or more unconditional
- // branches, delete all but the first unconditional branch.
- if (AllowModify && isUncondBranchOpcode(LastOpc)) {
- while (isUncondBranchOpcode(SecondLastOpc)) {
- LastInst->eraseFromParent();
- LastInst = SecondLastInst;
- LastOpc = LastInst->getOpcode();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
- // Return now the only terminator is an unconditional branch.
- TBB = LastInst->getOperand(0).getMBB();
- return false;
- } else {
- SecondLastInst = I;
- SecondLastOpc = SecondLastInst->getOpcode();
- }
- }
- }
-
- // If there are three terminators, we don't know what sort of block this is.
- if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
- return true;
-
- // If the block ends with a B and a Bcc, handle it.
- if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- parseCondBranch(SecondLastInst, TBB, Cond);
- FBB = LastInst->getOperand(0).getMBB();
- return false;
- }
-
- // If the block ends with two unconditional branches, handle it. The second
- // one is not executed, so remove it.
- if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- TBB = SecondLastInst->getOperand(0).getMBB();
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return false;
- }
-
- // ...likewise if it ends with an indirect branch followed by an unconditional
- // branch.
- if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
- I = LastInst;
- if (AllowModify)
- I->eraseFromParent();
- return true;
- }
-
- // Otherwise, can't handle this.
- return true;
-}
-
-bool ARM64InstrInfo::ReverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const {
- if (Cond[0].getImm() != -1) {
- // Regular Bcc
- ARM64CC::CondCode CC = (ARM64CC::CondCode)(int)Cond[0].getImm();
- Cond[0].setImm(ARM64CC::getInvertedCondCode(CC));
- } else {
- // Folded compare-and-branch
- switch (Cond[1].getImm()) {
- default:
- llvm_unreachable("Unknown conditional branch!");
- case ARM64::CBZW:
- Cond[1].setImm(ARM64::CBNZW);
- break;
- case ARM64::CBNZW:
- Cond[1].setImm(ARM64::CBZW);
- break;
- case ARM64::CBZX:
- Cond[1].setImm(ARM64::CBNZX);
- break;
- case ARM64::CBNZX:
- Cond[1].setImm(ARM64::CBZX);
- break;
- case ARM64::TBZ:
- Cond[1].setImm(ARM64::TBNZ);
- break;
- case ARM64::TBNZ:
- Cond[1].setImm(ARM64::TBZ);
- break;
- }
- }
-
- return false;
-}
-
-unsigned ARM64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
- MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin())
- return 0;
- --I;
- while (I->isDebugValue()) {
- if (I == MBB.begin())
- return 0;
- --I;
- }
- if (!isUncondBranchOpcode(I->getOpcode()) &&
- !isCondBranchOpcode(I->getOpcode()))
- return 0;
-
- // Remove the branch.
- I->eraseFromParent();
-
- I = MBB.end();
-
- if (I == MBB.begin())
- return 1;
- --I;
- if (!isCondBranchOpcode(I->getOpcode()))
- return 1;
-
- // Remove the branch.
- I->eraseFromParent();
- return 2;
-}
-
-void ARM64InstrInfo::instantiateCondBranch(
- MachineBasicBlock &MBB, DebugLoc DL, MachineBasicBlock *TBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- if (Cond[0].getImm() != -1) {
- // Regular Bcc
- BuildMI(&MBB, DL, get(ARM64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
- } else {
- // Folded compare-and-branch
- const MachineInstrBuilder MIB =
- BuildMI(&MBB, DL, get(Cond[1].getImm())).addReg(Cond[2].getReg());
- if (Cond.size() > 3)
- MIB.addImm(Cond[3].getImm());
- MIB.addMBB(TBB);
- }
-}
-
-unsigned ARM64InstrInfo::InsertBranch(
- MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
- // Shouldn't be a fall through.
- assert(TBB && "InsertBranch must not be told to insert a fallthrough");
-
- if (FBB == 0) {
- if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, DL, get(ARM64::B)).addMBB(TBB);
- else
- instantiateCondBranch(MBB, DL, TBB, Cond);
- return 1;
- }
-
- // Two-way conditional branch.
- instantiateCondBranch(MBB, DL, TBB, Cond);
- BuildMI(&MBB, DL, get(ARM64::B)).addMBB(FBB);
- return 2;
-}
-
-// Find the original register that VReg is copied from.
-static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
- while (TargetRegisterInfo::isVirtualRegister(VReg)) {
- const MachineInstr *DefMI = MRI.getVRegDef(VReg);
- if (!DefMI->isFullCopy())
- return VReg;
- VReg = DefMI->getOperand(1).getReg();
- }
- return VReg;
-}
-
-// Determine if VReg is defined by an instruction that can be folded into a
-// csel instruction. If so, return the folded opcode, and the replacement
-// register.
-static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
- unsigned *NewVReg = 0) {
- VReg = removeCopies(MRI, VReg);
- if (!TargetRegisterInfo::isVirtualRegister(VReg))
- return 0;
-
- bool Is64Bit = ARM64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
- const MachineInstr *DefMI = MRI.getVRegDef(VReg);
- unsigned Opc = 0;
- unsigned SrcOpNum = 0;
- switch (DefMI->getOpcode()) {
- case ARM64::ADDSXri:
- case ARM64::ADDSWri:
- // if CPSR is used, do not fold.
- if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1)
- return 0;
- // fall-through to ADDXri and ADDWri.
- case ARM64::ADDXri:
- case ARM64::ADDWri:
- // add x, 1 -> csinc.
- if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
- DefMI->getOperand(3).getImm() != 0)
- return 0;
- SrcOpNum = 1;
- Opc = Is64Bit ? ARM64::CSINCXr : ARM64::CSINCWr;
- break;
-
- case ARM64::ORNXrr:
- case ARM64::ORNWrr: {
- // not x -> csinv, represented as orn dst, xzr, src.
- unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
- if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
- return 0;
- SrcOpNum = 2;
- Opc = Is64Bit ? ARM64::CSINVXr : ARM64::CSINVWr;
- break;
- }
-
- case ARM64::SUBSXrr:
- case ARM64::SUBSWrr:
- // if CPSR is used, do not fold.
- if (DefMI->findRegisterDefOperandIdx(ARM64::CPSR, true) == -1)
- return 0;
- // fall-through to SUBXrr and SUBWrr.
- case ARM64::SUBXrr:
- case ARM64::SUBWrr: {
- // neg x -> csneg, represented as sub dst, xzr, src.
- unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
- if (ZReg != ARM64::XZR && ZReg != ARM64::WZR)
- return 0;
- SrcOpNum = 2;
- Opc = Is64Bit ? ARM64::CSNEGXr : ARM64::CSNEGWr;
- break;
- }
- default:
- return 0;
- }
- assert(Opc && SrcOpNum && "Missing parameters");
-
- if (NewVReg)
- *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
- return Opc;
-}
-
-bool ARM64InstrInfo::canInsertSelect(
- const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond,
- unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles,
- int &FalseCycles) const {
- // Check register classes.
- const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
- const TargetRegisterClass *RC =
- RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
- if (!RC)
- return false;
-
- // Expanding cbz/tbz requires an extra cycle of latency on the condition.
- unsigned ExtraCondLat = Cond.size() != 1;
-
- // GPRs are handled by csel.
- // FIXME: Fold in x+1, -x, and ~x when applicable.
- if (ARM64::GPR64allRegClass.hasSubClassEq(RC) ||
- ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
- // Single-cycle csel, csinc, csinv, and csneg.
- CondCycles = 1 + ExtraCondLat;
- TrueCycles = FalseCycles = 1;
- if (canFoldIntoCSel(MRI, TrueReg))
- TrueCycles = 0;
- else if (canFoldIntoCSel(MRI, FalseReg))
- FalseCycles = 0;
- return true;
- }
-
- // Scalar floating point is handled by fcsel.
- // FIXME: Form fabs, fmin, and fmax when applicable.
- if (ARM64::FPR64RegClass.hasSubClassEq(RC) ||
- ARM64::FPR32RegClass.hasSubClassEq(RC)) {
- CondCycles = 5 + ExtraCondLat;
- TrueCycles = FalseCycles = 2;
- return true;
- }
-
- // Can't do vectors.
- return false;
-}
-
-void ARM64InstrInfo::insertSelect(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned TrueReg, unsigned FalseReg) const {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-
- // Parse the condition code, see parseCondBranch() above.
- ARM64CC::CondCode CC;
- switch (Cond.size()) {
- default:
- llvm_unreachable("Unknown condition opcode in Cond");
- case 1: // b.cc
- CC = ARM64CC::CondCode(Cond[0].getImm());
- break;
- case 3: { // cbz/cbnz
- // We must insert a compare against 0.
- bool Is64Bit;
- switch (Cond[1].getImm()) {
- default:
- llvm_unreachable("Unknown branch opcode in Cond");
- case ARM64::CBZW:
- Is64Bit = 0;
- CC = ARM64CC::EQ;
- break;
- case ARM64::CBZX:
- Is64Bit = 1;
- CC = ARM64CC::EQ;
- break;
- case ARM64::CBNZW:
- Is64Bit = 0;
- CC = ARM64CC::NE;
- break;
- case ARM64::CBNZX:
- Is64Bit = 1;
- CC = ARM64CC::NE;
- break;
- }
- unsigned SrcReg = Cond[2].getReg();
- if (Is64Bit) {
- // cmp reg, #0 is actually subs xzr, reg, #0.
- MRI.constrainRegClass(SrcReg, &ARM64::GPR64spRegClass);
- BuildMI(MBB, I, DL, get(ARM64::SUBSXri), ARM64::XZR)
- .addReg(SrcReg)
- .addImm(0)
- .addImm(0);
- } else {
- MRI.constrainRegClass(SrcReg, &ARM64::GPR32spRegClass);
- BuildMI(MBB, I, DL, get(ARM64::SUBSWri), ARM64::WZR)
- .addReg(SrcReg)
- .addImm(0)
- .addImm(0);
- }
- break;
- }
- case 4: { // tbz/tbnz
- // We must insert a tst instruction.
- switch (Cond[1].getImm()) {
- default:
- llvm_unreachable("Unknown branch opcode in Cond");
- case ARM64::TBZ:
- CC = ARM64CC::EQ;
- break;
- case ARM64::TBNZ:
- CC = ARM64CC::NE;
- break;
- }
- // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
- BuildMI(MBB, I, DL, get(ARM64::ANDSXri), ARM64::XZR)
- .addReg(Cond[2].getReg())
- .addImm(ARM64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
- break;
- }
- }
-
- unsigned Opc = 0;
- const TargetRegisterClass *RC = 0;
- bool TryFold = false;
- if (MRI.constrainRegClass(DstReg, &ARM64::GPR64RegClass)) {
- RC = &ARM64::GPR64RegClass;
- Opc = ARM64::CSELXr;
- TryFold = true;
- } else if (MRI.constrainRegClass(DstReg, &ARM64::GPR32RegClass)) {
- RC = &ARM64::GPR32RegClass;
- Opc = ARM64::CSELWr;
- TryFold = true;
- } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR64RegClass)) {
- RC = &ARM64::FPR64RegClass;
- Opc = ARM64::FCSELDrrr;
- } else if (MRI.constrainRegClass(DstReg, &ARM64::FPR32RegClass)) {
- RC = &ARM64::FPR32RegClass;
- Opc = ARM64::FCSELSrrr;
- }
- assert(RC && "Unsupported regclass");
-
- // Try folding simple instructions into the csel.
- if (TryFold) {
- unsigned NewVReg = 0;
- unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
- if (FoldedOpc) {
- // The folded opcodes csinc, csinc and csneg apply the operation to
- // FalseReg, so we need to invert the condition.
- CC = ARM64CC::getInvertedCondCode(CC);
- TrueReg = FalseReg;
- } else
- FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
-
- // Fold the operation. Leave any dead instructions for DCE to clean up.
- if (FoldedOpc) {
- FalseReg = NewVReg;
- Opc = FoldedOpc;
- // The extends the live range of NewVReg.
- MRI.clearKillFlags(NewVReg);
- }
- }
-
- // Pull all virtual register into the appropriate class.
- MRI.constrainRegClass(TrueReg, RC);
- MRI.constrainRegClass(FalseReg, RC);
-
- // Insert the csel.
- BuildMI(MBB, I, DL, get(Opc), DstReg).addReg(TrueReg).addReg(FalseReg).addImm(
- CC);
-}
-
-bool ARM64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
- unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const {
- switch (MI.getOpcode()) {
- default:
- return false;
- case ARM64::SBFMXri: // aka sxtw
- case ARM64::UBFMXri: // aka uxtw
- // Check for the 32 -> 64 bit extension case, these instructions can do
- // much more.
- if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
- return false;
- // This is a signed or unsigned 32 -> 64 bit extension.
- SrcReg = MI.getOperand(1).getReg();
- DstReg = MI.getOperand(0).getReg();
- SubIdx = ARM64::sub_32;
- return true;
- }
-}
-
-/// analyzeCompare - For a comparison instruction, return the source registers
-/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
-/// Return true if the comparison instruction can be analyzed.
-bool ARM64InstrInfo::analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
- int &CmpValue) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::SUBSWrr:
- case ARM64::SUBSWrs:
- case ARM64::SUBSWrx:
- case ARM64::SUBSXrr:
- case ARM64::SUBSXrs:
- case ARM64::SUBSXrx:
- case ARM64::ADDSWrr:
- case ARM64::ADDSWrs:
- case ARM64::ADDSWrx:
- case ARM64::ADDSXrr:
- case ARM64::ADDSXrs:
- case ARM64::ADDSXrx:
- // Replace SUBSWrr with SUBWrr if CPSR is not used.
- SrcReg = MI->getOperand(1).getReg();
- SrcReg2 = MI->getOperand(2).getReg();
- CmpMask = ~0;
- CmpValue = 0;
- return true;
- case ARM64::SUBSWri:
- case ARM64::ADDSWri:
- case ARM64::ANDSWri:
- case ARM64::SUBSXri:
- case ARM64::ADDSXri:
- case ARM64::ANDSXri:
- SrcReg = MI->getOperand(1).getReg();
- SrcReg2 = 0;
- CmpMask = ~0;
- CmpValue = MI->getOperand(2).getImm();
- return true;
- }
-
- return false;
-}
-
-static bool UpdateOperandRegClass(MachineInstr *Instr) {
- MachineBasicBlock *MBB = Instr->getParent();
- assert(MBB && "Can't get MachineBasicBlock here");
- MachineFunction *MF = MBB->getParent();
- assert(MF && "Can't get MachineFunction here");
- const TargetMachine *TM = &MF->getTarget();
- const TargetInstrInfo *TII = TM->getInstrInfo();
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- MachineRegisterInfo *MRI = &MF->getRegInfo();
-
- for (unsigned OpIdx = 0, EndIdx = Instr->getNumOperands(); OpIdx < EndIdx;
- ++OpIdx) {
- MachineOperand &MO = Instr->getOperand(OpIdx);
- const TargetRegisterClass *OpRegCstraints =
- Instr->getRegClassConstraint(OpIdx, TII, TRI);
-
- // If there's no constraint, there's nothing to do.
- if (!OpRegCstraints)
- continue;
- // If the operand is a frame index, there's nothing to do here.
- // A frame index operand will resolve correctly during PEI.
- if (MO.isFI())
- continue;
-
- assert(MO.isReg() &&
- "Operand has register constraints without being a register!");
-
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!OpRegCstraints->contains(Reg))
- return false;
- } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
- !MRI->constrainRegClass(Reg, OpRegCstraints))
- return false;
- }
-
- return true;
-}
-
-/// optimizeCompareInstr - Convert the instruction supplying the argument to the
-/// comparison into one that sets the zero bit in the flags register.
-bool ARM64InstrInfo::optimizeCompareInstr(
- MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
- int CmpValue, const MachineRegisterInfo *MRI) const {
-
- // Replace SUBSWrr with SUBWrr if CPSR is not used.
- int Cmp_CPSR = CmpInstr->findRegisterDefOperandIdx(ARM64::CPSR, true);
- if (Cmp_CPSR != -1) {
- unsigned NewOpc;
- switch (CmpInstr->getOpcode()) {
- default:
- return false;
- case ARM64::ADDSWrr: NewOpc = ARM64::ADDWrr; break;
- case ARM64::ADDSWri: NewOpc = ARM64::ADDWri; break;
- case ARM64::ADDSWrs: NewOpc = ARM64::ADDWrs; break;
- case ARM64::ADDSWrx: NewOpc = ARM64::ADDWrx; break;
- case ARM64::ADDSXrr: NewOpc = ARM64::ADDXrr; break;
- case ARM64::ADDSXri: NewOpc = ARM64::ADDXri; break;
- case ARM64::ADDSXrs: NewOpc = ARM64::ADDXrs; break;
- case ARM64::ADDSXrx: NewOpc = ARM64::ADDXrx; break;
- case ARM64::SUBSWrr: NewOpc = ARM64::SUBWrr; break;
- case ARM64::SUBSWri: NewOpc = ARM64::SUBWri; break;
- case ARM64::SUBSWrs: NewOpc = ARM64::SUBWrs; break;
- case ARM64::SUBSWrx: NewOpc = ARM64::SUBWrx; break;
- case ARM64::SUBSXrr: NewOpc = ARM64::SUBXrr; break;
- case ARM64::SUBSXri: NewOpc = ARM64::SUBXri; break;
- case ARM64::SUBSXrs: NewOpc = ARM64::SUBXrs; break;
- case ARM64::SUBSXrx: NewOpc = ARM64::SUBXrx; break;
- }
-
- const MCInstrDesc &MCID = get(NewOpc);
- CmpInstr->setDesc(MCID);
- CmpInstr->RemoveOperand(Cmp_CPSR);
- bool succeeded = UpdateOperandRegClass(CmpInstr);
- (void)succeeded;
- assert(succeeded && "Some operands reg class are incompatible!");
- return true;
- }
-
- // Continue only if we have a "ri" where immediate is zero.
- if (CmpValue != 0 || SrcReg2 != 0)
- return false;
-
- // CmpInstr is a Compare instruction if destination register is not used.
- if (!MRI->use_nodbg_empty(CmpInstr->getOperand(0).getReg()))
- return false;
-
- // Get the unique definition of SrcReg.
- MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
- if (!MI)
- return false;
-
- // We iterate backward, starting from the instruction before CmpInstr and
- // stop when reaching the definition of the source register or done with the
- // basic block, to check whether CPSR is used or modified in between.
- MachineBasicBlock::iterator I = CmpInstr, E = MI,
- B = CmpInstr->getParent()->begin();
-
- // Early exit if CmpInstr is at the beginning of the BB.
- if (I == B)
- return false;
-
- // Check whether the definition of SrcReg is in the same basic block as
- // Compare. If not, we can't optimize away the Compare.
- if (MI->getParent() != CmpInstr->getParent())
- return false;
-
- // Check that CPSR isn't set between the comparison instruction and the one we
- // want to change.
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- for (--I; I != E; --I) {
- const MachineInstr &Instr = *I;
-
- if (Instr.modifiesRegister(ARM64::CPSR, TRI) ||
- Instr.readsRegister(ARM64::CPSR, TRI))
- // This instruction modifies or uses CPSR after the one we want to
- // change. We can't do this transformation.
- return false;
- if (I == B)
- // The 'and' is below the comparison instruction.
- return false;
- }
-
- unsigned NewOpc = MI->getOpcode();
- switch (MI->getOpcode()) {
- default:
- return false;
- case ARM64::ADDSWrr:
- case ARM64::ADDSWri:
- case ARM64::ADDSXrr:
- case ARM64::ADDSXri:
- case ARM64::SUBSWrr:
- case ARM64::SUBSWri:
- case ARM64::SUBSXrr:
- case ARM64::SUBSXri:
- break;
- case ARM64::ADDWrr: NewOpc = ARM64::ADDSWrr; break;
- case ARM64::ADDWri: NewOpc = ARM64::ADDSWri; break;
- case ARM64::ADDXrr: NewOpc = ARM64::ADDSXrr; break;
- case ARM64::ADDXri: NewOpc = ARM64::ADDSXri; break;
- case ARM64::ADCWr: NewOpc = ARM64::ADCSWr; break;
- case ARM64::ADCXr: NewOpc = ARM64::ADCSXr; break;
- case ARM64::SUBWrr: NewOpc = ARM64::SUBSWrr; break;
- case ARM64::SUBWri: NewOpc = ARM64::SUBSWri; break;
- case ARM64::SUBXrr: NewOpc = ARM64::SUBSXrr; break;
- case ARM64::SUBXri: NewOpc = ARM64::SUBSXri; break;
- case ARM64::SBCWr: NewOpc = ARM64::SBCSWr; break;
- case ARM64::SBCXr: NewOpc = ARM64::SBCSXr; break;
- case ARM64::ANDWri: NewOpc = ARM64::ANDSWri; break;
- case ARM64::ANDXri: NewOpc = ARM64::ANDSXri; break;
- }
-
- // Scan forward for the use of CPSR.
- // When checking against MI: if it's a conditional code requires
- // checking of V bit, then this is not safe to do.
- // It is safe to remove CmpInstr if CPSR is redefined or killed.
- // If we are done with the basic block, we need to check whether CPSR is
- // live-out.
- bool IsSafe = false;
- for (MachineBasicBlock::iterator I = CmpInstr,
- E = CmpInstr->getParent()->end();
- !IsSafe && ++I != E;) {
- const MachineInstr &Instr = *I;
- for (unsigned IO = 0, EO = Instr.getNumOperands(); !IsSafe && IO != EO;
- ++IO) {
- const MachineOperand &MO = Instr.getOperand(IO);
- if (MO.isRegMask() && MO.clobbersPhysReg(ARM64::CPSR)) {
- IsSafe = true;
- break;
- }
- if (!MO.isReg() || MO.getReg() != ARM64::CPSR)
- continue;
- if (MO.isDef()) {
- IsSafe = true;
- break;
- }
-
- // Decode the condition code.
- unsigned Opc = Instr.getOpcode();
- ARM64CC::CondCode CC;
- switch (Opc) {
- default:
- return false;
- case ARM64::Bcc:
- CC = (ARM64CC::CondCode)Instr.getOperand(IO - 2).getImm();
- break;
- case ARM64::CSINVWr:
- case ARM64::CSINVXr:
- case ARM64::CSINCWr:
- case ARM64::CSINCXr:
- case ARM64::CSELWr:
- case ARM64::CSELXr:
- case ARM64::CSNEGWr:
- case ARM64::CSNEGXr:
- CC = (ARM64CC::CondCode)Instr.getOperand(IO - 1).getImm();
- break;
- }
-
- // It is not safe to remove Compare instruction if Overflow(V) is used.
- switch (CC) {
- default:
- // CPSR can be used multiple times, we should continue.
- break;
- case ARM64CC::VS:
- case ARM64CC::VC:
- case ARM64CC::GE:
- case ARM64CC::LT:
- case ARM64CC::GT:
- case ARM64CC::LE:
- return false;
- }
- }
- }
-
- // If CPSR is not killed nor re-defined, we should check whether it is
- // live-out. If it is live-out, do not optimize.
- if (!IsSafe) {
- MachineBasicBlock *MBB = CmpInstr->getParent();
- for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end();
- SI != SE; ++SI)
- if ((*SI)->isLiveIn(ARM64::CPSR))
- return false;
- }
-
- // Update the instruction to set CPSR.
- MI->setDesc(get(NewOpc));
- CmpInstr->eraseFromParent();
- bool succeeded = UpdateOperandRegClass(MI);
- (void)succeeded;
- assert(succeeded && "Some operands reg class are incompatible!");
- MI->addRegisterDefined(ARM64::CPSR, TRI);
- return true;
-}
-
-// Return true if this instruction simply sets its single destination register
-// to zero. This is equivalent to a register rename of the zero-register.
-bool ARM64InstrInfo::isGPRZero(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::MOVZWi:
- case ARM64::MOVZXi: // movz Rd, #0 (LSL #0)
- if (MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) {
- assert(MI->getDesc().getNumOperands() == 3 &&
- MI->getOperand(2).getImm() == 0 && "invalid MOVZi operands");
- return true;
- }
- break;
- case ARM64::ANDWri: // and Rd, Rzr, #imm
- return MI->getOperand(1).getReg() == ARM64::WZR;
- case ARM64::ANDXri:
- return MI->getOperand(1).getReg() == ARM64::XZR;
- case TargetOpcode::COPY:
- return MI->getOperand(1).getReg() == ARM64::WZR;
- }
- return false;
-}
-
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool ARM64InstrInfo::isGPRCopy(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case TargetOpcode::COPY: {
- // GPR32 copies will by lowered to ORRXrs
- unsigned DstReg = MI->getOperand(0).getReg();
- return (ARM64::GPR32RegClass.contains(DstReg) ||
- ARM64::GPR64RegClass.contains(DstReg));
- }
- case ARM64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
- if (MI->getOperand(1).getReg() == ARM64::XZR) {
- assert(MI->getDesc().getNumOperands() == 4 &&
- MI->getOperand(3).getImm() == 0 && "invalid ORRrs operands");
- return true;
- }
- case ARM64::ADDXri: // add Xd, Xn, #0 (LSL #0)
- if (MI->getOperand(2).getImm() == 0) {
- assert(MI->getDesc().getNumOperands() == 4 &&
- MI->getOperand(3).getImm() == 0 && "invalid ADDXri operands");
- return true;
- }
- }
- return false;
-}
-
-// Return true if this instruction simply renames a general register without
-// modifying bits.
-bool ARM64InstrInfo::isFPRCopy(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case TargetOpcode::COPY: {
- // FPR64 copies will by lowered to ORR.16b
- unsigned DstReg = MI->getOperand(0).getReg();
- return (ARM64::FPR64RegClass.contains(DstReg) ||
- ARM64::FPR128RegClass.contains(DstReg));
- }
- case ARM64::ORRv16i8:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- assert(MI->getDesc().getNumOperands() == 3 && MI->getOperand(0).isReg() &&
- "invalid ORRv16i8 operands");
- return true;
- }
- }
- return false;
-}
-
-unsigned ARM64InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::LDRWui:
- case ARM64::LDRXui:
- case ARM64::LDRBui:
- case ARM64::LDRHui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
- if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
-
- return 0;
-}
-
-unsigned ARM64InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::STRWui:
- case ARM64::STRXui:
- case ARM64::STRBui:
- case ARM64::STRHui:
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STRQui:
- if (MI->getOperand(0).getSubReg() == 0 && MI->getOperand(1).isFI() &&
- MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
- break;
- }
- return 0;
-}
-
-/// Return true if this is load/store scales or extends its register offset.
-/// This refers to scaling a dynamic index as opposed to scaled immediates.
-/// MI should be a memory op that allows scaled addressing.
-bool ARM64InstrInfo::isScaledAddr(const MachineInstr *MI) const {
- switch (MI->getOpcode()) {
- default:
- break;
- case ARM64::LDRBBro:
- case ARM64::LDRBro:
- case ARM64::LDRDro:
- case ARM64::LDRHHro:
- case ARM64::LDRHro:
- case ARM64::LDRQro:
- case ARM64::LDRSBWro:
- case ARM64::LDRSBXro:
- case ARM64::LDRSHWro:
- case ARM64::LDRSHXro:
- case ARM64::LDRSWro:
- case ARM64::LDRSro:
- case ARM64::LDRWro:
- case ARM64::LDRXro:
- case ARM64::STRBBro:
- case ARM64::STRBro:
- case ARM64::STRDro:
- case ARM64::STRHHro:
- case ARM64::STRHro:
- case ARM64::STRQro:
- case ARM64::STRSro:
- case ARM64::STRWro:
- case ARM64::STRXro:
- unsigned Val = MI->getOperand(3).getImm();
- ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val);
- return (ExtType != ARM64_AM::UXTX) || ARM64_AM::getMemDoShift(Val);
- }
- return false;
-}
-
-/// Check all MachineMemOperands for a hint to suppress pairing.
-bool ARM64InstrInfo::isLdStPairSuppressed(const MachineInstr *MI) const {
- assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
- "Too many target MO flags");
- for (MachineInstr::mmo_iterator MM = MI->memoperands_begin(),
- E = MI->memoperands_end();
- MM != E; ++MM) {
-
- if ((*MM)->getFlags() &
- (MOSuppressPair << MachineMemOperand::MOTargetStartBit)) {
- return true;
- }
- }
- return false;
-}
-
-/// Set a flag on the first MachineMemOperand to suppress pairing.
-void ARM64InstrInfo::suppressLdStPair(MachineInstr *MI) const {
- if (MI->memoperands_empty())
- return;
-
- assert(MOSuppressPair < (1 << MachineMemOperand::MOTargetNumBits) &&
- "Too many target MO flags");
- (*MI->memoperands_begin())
- ->setFlags(MOSuppressPair << MachineMemOperand::MOTargetStartBit);
-}
-
-bool ARM64InstrInfo::getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
- unsigned &Offset,
- const TargetRegisterInfo *TRI) const {
- switch (LdSt->getOpcode()) {
- default:
- return false;
- case ARM64::STRSui:
- case ARM64::STRDui:
- case ARM64::STRQui:
- case ARM64::STRXui:
- case ARM64::STRWui:
- case ARM64::LDRSui:
- case ARM64::LDRDui:
- case ARM64::LDRQui:
- case ARM64::LDRXui:
- case ARM64::LDRWui:
- if (!LdSt->getOperand(1).isReg() || !LdSt->getOperand(2).isImm())
- return false;
- BaseReg = LdSt->getOperand(1).getReg();
- MachineFunction &MF = *LdSt->getParent()->getParent();
- unsigned Width = getRegClass(LdSt->getDesc(), 0, TRI, MF)->getSize();
- Offset = LdSt->getOperand(2).getImm() * Width;
- return true;
- };
-}
-
-/// Detect opportunities for ldp/stp formation.
-///
-/// Only called for LdSt for which getLdStBaseRegImmOfs returns true.
-bool ARM64InstrInfo::shouldClusterLoads(MachineInstr *FirstLdSt,
- MachineInstr *SecondLdSt,
- unsigned NumLoads) const {
- // Only cluster up to a single pair.
- if (NumLoads > 1)
- return false;
- if (FirstLdSt->getOpcode() != SecondLdSt->getOpcode())
- return false;
- // getLdStBaseRegImmOfs guarantees that oper 2 isImm.
- unsigned Ofs1 = FirstLdSt->getOperand(2).getImm();
- // Allow 6 bits of positive range.
- if (Ofs1 > 64)
- return false;
- // The caller should already have ordered First/SecondLdSt by offset.
- unsigned Ofs2 = SecondLdSt->getOperand(2).getImm();
- return Ofs1 + 1 == Ofs2;
-}
-
-bool ARM64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
- MachineInstr *Second) const {
- // Cyclone can fuse CMN, CMP followed by Bcc.
-
- // FIXME: B0 can also fuse:
- // AND, BIC, ORN, ORR, or EOR (optional S) followed by Bcc or CBZ or CBNZ.
- if (Second->getOpcode() != ARM64::Bcc)
- return false;
- switch (First->getOpcode()) {
- default:
- return false;
- case ARM64::SUBSWri:
- case ARM64::ADDSWri:
- case ARM64::ANDSWri:
- case ARM64::SUBSXri:
- case ARM64::ADDSXri:
- case ARM64::ANDSXri:
- return true;
- }
-}
-
-MachineInstr *ARM64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx,
- uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM64::DBG_VALUE))
- .addFrameIndex(FrameIx)
- .addImm(0)
- .addImm(Offset)
- .addMetadata(MDPtr);
- return &*MIB;
-}
-
-static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
- unsigned Reg, unsigned SubIdx,
- unsigned State,
- const TargetRegisterInfo *TRI) {
- if (!SubIdx)
- return MIB.addReg(Reg, State);
-
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
- return MIB.addReg(Reg, State, SubIdx);
-}
-
-static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
- unsigned NumRegs) {
- // We really want the positive remainder mod 32 here, that happens to be
- // easily obtainable with a mask.
- return ((DestReg - SrcReg) & 0x1f) < NumRegs;
-}
-
-void ARM64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- DebugLoc DL, unsigned DestReg,
- unsigned SrcReg, bool KillSrc,
- unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
- uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
- uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
- unsigned NumRegs = Indices.size();
-
- int SubReg = 0, End = NumRegs, Incr = 1;
- if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
- SubReg = NumRegs - 1;
- End = -1;
- Incr = -1;
- }
-
- for (; SubReg != End; SubReg += Incr) {
- const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
- AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
- AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
- AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
- }
-}
-
-void ARM64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const {
- if (ARM64::GPR32spRegClass.contains(DestReg) &&
- (ARM64::GPR32spRegClass.contains(SrcReg) || SrcReg == ARM64::WZR)) {
- const TargetRegisterInfo *TRI = &getRegisterInfo();
-
- if (DestReg == ARM64::WSP || SrcReg == ARM64::WSP) {
- // If either operand is WSP, expand to ADD #0.
- if (Subtarget.hasZeroCycleRegMove()) {
- // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
- unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
- &ARM64::GPR64spRegClass);
- unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
- &ARM64::GPR64spRegClass);
- // This instruction is reading and writing X registers. This may upset
- // the register scavenger and machine verifier, so we need to indicate
- // that we are reading an undefined value from SrcRegX, but a proper
- // value from SrcReg.
- BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestRegX)
- .addReg(SrcRegX, RegState::Undef)
- .addImm(0)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
- .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
- } else {
- BuildMI(MBB, I, DL, get(ARM64::ADDWri), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
- }
- } else if (SrcReg == ARM64::WZR && Subtarget.hasZeroCycleZeroing()) {
- BuildMI(MBB, I, DL, get(ARM64::MOVZWi), DestReg).addImm(0).addImm(
- ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
- } else {
- if (Subtarget.hasZeroCycleRegMove()) {
- // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
- unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, ARM64::sub_32,
- &ARM64::GPR64spRegClass);
- unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, ARM64::sub_32,
- &ARM64::GPR64spRegClass);
- // This instruction is reading and writing X registers. This may upset
- // the register scavenger and machine verifier, so we need to indicate
- // that we are reading an undefined value from SrcRegX, but a proper
- // value from SrcReg.
- BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestRegX)
- .addReg(ARM64::XZR)
- .addReg(SrcRegX, RegState::Undef)
- .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
- } else {
- // Otherwise, expand to ORR WZR.
- BuildMI(MBB, I, DL, get(ARM64::ORRWrr), DestReg)
- .addReg(ARM64::WZR)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
- }
- return;
- }
-
- if (ARM64::GPR64spRegClass.contains(DestReg) &&
- (ARM64::GPR64spRegClass.contains(SrcReg) || SrcReg == ARM64::XZR)) {
- if (DestReg == ARM64::SP || SrcReg == ARM64::SP) {
- // If either operand is SP, expand to ADD #0.
- BuildMI(MBB, I, DL, get(ARM64::ADDXri), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc))
- .addImm(0)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
- } else if (SrcReg == ARM64::XZR && Subtarget.hasZeroCycleZeroing()) {
- BuildMI(MBB, I, DL, get(ARM64::MOVZXi), DestReg).addImm(0).addImm(
- ARM64_AM::getShifterImm(ARM64_AM::LSL, 0));
- } else {
- // Otherwise, expand to ORR XZR.
- BuildMI(MBB, I, DL, get(ARM64::ORRXrr), DestReg)
- .addReg(ARM64::XZR)
- .addReg(SrcReg, getKillRegState(KillSrc));
- }
- return;
- }
-
- // Copy a DDDD register quad by copying the individual sub-registers.
- if (ARM64::DDDDRegClass.contains(DestReg) &&
- ARM64::DDDDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
- ARM64::dsub2, ARM64::dsub3 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
- Indices);
- return;
- }
-
- // Copy a DDD register triple by copying the individual sub-registers.
- if (ARM64::DDDRegClass.contains(DestReg) &&
- ARM64::DDDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1,
- ARM64::dsub2 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
- Indices);
- return;
- }
-
- // Copy a DD register pair by copying the individual sub-registers.
- if (ARM64::DDRegClass.contains(DestReg) &&
- ARM64::DDRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::dsub0, ARM64::dsub1 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv8i8,
- Indices);
- return;
- }
-
- // Copy a QQQQ register quad by copying the individual sub-registers.
- if (ARM64::QQQQRegClass.contains(DestReg) &&
- ARM64::QQQQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
- ARM64::qsub2, ARM64::qsub3 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
- Indices);
- return;
- }
-
- // Copy a QQQ register triple by copying the individual sub-registers.
- if (ARM64::QQQRegClass.contains(DestReg) &&
- ARM64::QQQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1,
- ARM64::qsub2 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
- Indices);
- return;
- }
-
- // Copy a QQ register pair by copying the individual sub-registers.
- if (ARM64::QQRegClass.contains(DestReg) &&
- ARM64::QQRegClass.contains(SrcReg)) {
- static const unsigned Indices[] = { ARM64::qsub0, ARM64::qsub1 };
- copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, ARM64::ORRv16i8,
- Indices);
- return;
- }
-
- if (ARM64::FPR128RegClass.contains(DestReg) &&
- ARM64::FPR128RegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
- SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (ARM64::FPR64RegClass.contains(DestReg) &&
- ARM64::FPR64RegClass.contains(SrcReg)) {
- DestReg =
- RI.getMatchingSuperReg(DestReg, ARM64::dsub, &ARM64::FPR128RegClass);
- SrcReg =
- RI.getMatchingSuperReg(SrcReg, ARM64::dsub, &ARM64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
- SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (ARM64::FPR32RegClass.contains(DestReg) &&
- ARM64::FPR32RegClass.contains(SrcReg)) {
- DestReg =
- RI.getMatchingSuperReg(DestReg, ARM64::ssub, &ARM64::FPR128RegClass);
- SrcReg =
- RI.getMatchingSuperReg(SrcReg, ARM64::ssub, &ARM64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
- SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (ARM64::FPR16RegClass.contains(DestReg) &&
- ARM64::FPR16RegClass.contains(SrcReg)) {
- DestReg =
- RI.getMatchingSuperReg(DestReg, ARM64::hsub, &ARM64::FPR128RegClass);
- SrcReg =
- RI.getMatchingSuperReg(SrcReg, ARM64::hsub, &ARM64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
- SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- if (ARM64::FPR8RegClass.contains(DestReg) &&
- ARM64::FPR8RegClass.contains(SrcReg)) {
- DestReg =
- RI.getMatchingSuperReg(DestReg, ARM64::bsub, &ARM64::FPR128RegClass);
- SrcReg =
- RI.getMatchingSuperReg(SrcReg, ARM64::bsub, &ARM64::FPR128RegClass);
- BuildMI(MBB, I, DL, get(ARM64::ORRv16i8), DestReg).addReg(SrcReg).addReg(
- SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- // Copies between GPR64 and FPR64.
- if (ARM64::FPR64RegClass.contains(DestReg) &&
- ARM64::GPR64RegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(ARM64::FMOVXDr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- if (ARM64::GPR64RegClass.contains(DestReg) &&
- ARM64::FPR64RegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(ARM64::FMOVDXr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- // Copies between GPR32 and FPR32.
- if (ARM64::FPR32RegClass.contains(DestReg) &&
- ARM64::GPR32RegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(ARM64::FMOVWSr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
- if (ARM64::GPR32RegClass.contains(DestReg) &&
- ARM64::FPR32RegClass.contains(SrcReg)) {
- BuildMI(MBB, I, DL, get(ARM64::FMOVSWr), DestReg)
- .addReg(SrcReg, getKillRegState(KillSrc));
- return;
- }
-
- assert(0 && "unimplemented reg-to-reg copy");
-}
-
-void ARM64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
-
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
- unsigned Opc = 0;
- bool Offset = true;
- switch (RC->getSize()) {
- case 1:
- if (ARM64::FPR8RegClass.hasSubClassEq(RC))
- Opc = ARM64::STRBui;
- break;
- case 2:
- if (ARM64::FPR16RegClass.hasSubClassEq(RC))
- Opc = ARM64::STRHui;
- break;
- case 4:
- if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
- Opc = ARM64::STRWui;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
- MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR32RegClass);
- else
- assert(SrcReg != ARM64::WSP);
- } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
- Opc = ARM64::STRSui;
- break;
- case 8:
- if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
- Opc = ARM64::STRXui;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
- MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
- else
- assert(SrcReg != ARM64::SP);
- } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
- Opc = ARM64::STRDui;
- break;
- case 16:
- if (ARM64::FPR128RegClass.hasSubClassEq(RC))
- Opc = ARM64::STRQui;
- else if (ARM64::DDRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Twov1d, Offset = false;
- break;
- case 24:
- if (ARM64::DDDRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Threev1d, Offset = false;
- break;
- case 32:
- if (ARM64::DDDDRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Fourv1d, Offset = false;
- else if (ARM64::QQRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Twov2d, Offset = false;
- break;
- case 48:
- if (ARM64::QQQRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Threev2d, Offset = false;
- break;
- case 64:
- if (ARM64::QQQQRegClass.hasSubClassEq(RC))
- Opc = ARM64::ST1Fourv2d, Offset = false;
- break;
- }
- assert(Opc && "Unknown register class");
-
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI);
-
- if (Offset)
- MI.addImm(0);
- MI.addMemOperand(MMO);
-}
-
-void ARM64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FI,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
- DebugLoc DL;
- if (MBBI != MBB.end())
- DL = MBBI->getDebugLoc();
- MachineFunction &MF = *MBB.getParent();
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Align = MFI.getObjectAlignment(FI);
- MachinePointerInfo PtrInfo(PseudoSourceValue::getFixedStack(FI));
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
-
- unsigned Opc = 0;
- bool Offset = true;
- switch (RC->getSize()) {
- case 1:
- if (ARM64::FPR8RegClass.hasSubClassEq(RC))
- Opc = ARM64::LDRBui;
- break;
- case 2:
- if (ARM64::FPR16RegClass.hasSubClassEq(RC))
- Opc = ARM64::LDRHui;
- break;
- case 4:
- if (ARM64::GPR32allRegClass.hasSubClassEq(RC)) {
- Opc = ARM64::LDRWui;
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
- MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR32RegClass);
- else
- assert(DestReg != ARM64::WSP);
- } else if (ARM64::FPR32RegClass.hasSubClassEq(RC))
- Opc = ARM64::LDRSui;
- break;
- case 8:
- if (ARM64::GPR64allRegClass.hasSubClassEq(RC)) {
- Opc = ARM64::LDRXui;
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
- MF.getRegInfo().constrainRegClass(DestReg, &ARM64::GPR64RegClass);
- else
- assert(DestReg != ARM64::SP);
- } else if (ARM64::FPR64RegClass.hasSubClassEq(RC))
- Opc = ARM64::LDRDui;
- break;
- case 16:
- if (ARM64::FPR128RegClass.hasSubClassEq(RC))
- Opc = ARM64::LDRQui;
- else if (ARM64::DDRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Twov1d, Offset = false;
- break;
- case 24:
- if (ARM64::DDDRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Threev1d, Offset = false;
- break;
- case 32:
- if (ARM64::DDDDRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Fourv1d, Offset = false;
- else if (ARM64::QQRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Twov2d, Offset = false;
- break;
- case 48:
- if (ARM64::QQQRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Threev2d, Offset = false;
- break;
- case 64:
- if (ARM64::QQQQRegClass.hasSubClassEq(RC))
- Opc = ARM64::LD1Fourv2d, Offset = false;
- break;
- }
- assert(Opc && "Unknown register class");
-
- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
- .addReg(DestReg, getDefRegState(true))
- .addFrameIndex(FI);
- if (Offset)
- MI.addImm(0);
- MI.addMemOperand(MMO);
-}
-
-void llvm::emitFrameOffset(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg, int Offset,
- const ARM64InstrInfo *TII, MachineInstr::MIFlag Flag,
- bool SetCPSR) {
- if (DestReg == SrcReg && Offset == 0)
- return;
-
- bool isSub = Offset < 0;
- if (isSub)
- Offset = -Offset;
-
- // FIXME: If the offset won't fit in 24-bits, compute the offset into a
- // scratch register. If DestReg is a virtual register, use it as the
- // scratch register; otherwise, create a new virtual register (to be
- // replaced by the scavenger at the end of PEI). That case can be optimized
- // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
- // register can be loaded with offset%8 and the add/sub can use an extending
- // instruction with LSL#3.
- // Currently the function handles any offsets but generates a poor sequence
- // of code.
- // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
-
- unsigned Opc;
- if (SetCPSR)
- Opc = isSub ? ARM64::SUBSXri : ARM64::ADDSXri;
- else
- Opc = isSub ? ARM64::SUBXri : ARM64::ADDXri;
- const unsigned MaxEncoding = 0xfff;
- const unsigned ShiftSize = 12;
- const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
- while (((unsigned)Offset) >= (1 << ShiftSize)) {
- unsigned ThisVal;
- if (((unsigned)Offset) > MaxEncodableValue) {
- ThisVal = MaxEncodableValue;
- } else {
- ThisVal = Offset & MaxEncodableValue;
- }
- assert((ThisVal >> ShiftSize) <= MaxEncoding &&
- "Encoding cannot handle value that big");
- BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
- .addReg(SrcReg)
- .addImm(ThisVal >> ShiftSize)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, ShiftSize))
- .setMIFlag(Flag);
-
- SrcReg = DestReg;
- Offset -= ThisVal;
- if (Offset == 0)
- return;
- }
- BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
- .addReg(SrcReg)
- .addImm(Offset)
- .addImm(ARM64_AM::getShifterImm(ARM64_AM::LSL, 0))
- .setMIFlag(Flag);
-}
-
-MachineInstr *
-ARM64InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
- // This is a bit of a hack. Consider this instruction:
- //
- // %vreg0<def> = COPY %SP; GPR64all:%vreg0
- //
- // We explicitly chose GPR64all for the virtual register so such a copy might
- // be eliminated by RegisterCoalescer. However, that may not be possible, and
- // %vreg0 may even spill. We can't spill %SP, and since it is in the GPR64all
- // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
- //
- // To prevent that, we are going to constrain the %vreg0 register class here.
- //
- // <rdar://problem/11522048>
- //
- if (MI->isCopy()) {
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (SrcReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(DstReg)) {
- MF.getRegInfo().constrainRegClass(DstReg, &ARM64::GPR64RegClass);
- return 0;
- }
- if (DstReg == ARM64::SP && TargetRegisterInfo::isVirtualRegister(SrcReg)) {
- MF.getRegInfo().constrainRegClass(SrcReg, &ARM64::GPR64RegClass);
- return 0;
- }
- }
-
- // Cannot fold.
- return 0;
-}
-
-int llvm::isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
- bool *OutUseUnscaledOp,
- unsigned *OutUnscaledOp,
- int *EmittableOffset) {
- int Scale = 1;
- bool IsSigned = false;
- // The ImmIdx should be changed case by case if it is not 2.
- unsigned ImmIdx = 2;
- unsigned UnscaledOp = 0;
- // Set output values in case of early exit.
- if (EmittableOffset)
- *EmittableOffset = 0;
- if (OutUseUnscaledOp)
- *OutUseUnscaledOp = false;
- if (OutUnscaledOp)
- *OutUnscaledOp = 0;
- switch (MI.getOpcode()) {
- default:
- assert(0 && "unhandled opcode in rewriteARM64FrameIndex");
- // Vector spills/fills can't take an immediate offset.
- case ARM64::LD1Twov2d:
- case ARM64::LD1Threev2d:
- case ARM64::LD1Fourv2d:
- case ARM64::LD1Twov1d:
- case ARM64::LD1Threev1d:
- case ARM64::LD1Fourv1d:
- case ARM64::ST1Twov2d:
- case ARM64::ST1Threev2d:
- case ARM64::ST1Fourv2d:
- case ARM64::ST1Twov1d:
- case ARM64::ST1Threev1d:
- case ARM64::ST1Fourv1d:
- return ARM64FrameOffsetCannotUpdate;
- case ARM64::PRFMui:
- Scale = 8;
- UnscaledOp = ARM64::PRFUMi;
- break;
- case ARM64::LDRXui:
- Scale = 8;
- UnscaledOp = ARM64::LDURXi;
- break;
- case ARM64::LDRWui:
- Scale = 4;
- UnscaledOp = ARM64::LDURWi;
- break;
- case ARM64::LDRBui:
- Scale = 1;
- UnscaledOp = ARM64::LDURBi;
- break;
- case ARM64::LDRHui:
- Scale = 2;
- UnscaledOp = ARM64::LDURHi;
- break;
- case ARM64::LDRSui:
- Scale = 4;
- UnscaledOp = ARM64::LDURSi;
- break;
- case ARM64::LDRDui:
- Scale = 8;
- UnscaledOp = ARM64::LDURDi;
- break;
- case ARM64::LDRQui:
- Scale = 16;
- UnscaledOp = ARM64::LDURQi;
- break;
- case ARM64::LDRBBui:
- Scale = 1;
- UnscaledOp = ARM64::LDURBBi;
- break;
- case ARM64::LDRHHui:
- Scale = 2;
- UnscaledOp = ARM64::LDURHHi;
- break;
- case ARM64::LDRSBXui:
- Scale = 1;
- UnscaledOp = ARM64::LDURSBXi;
- break;
- case ARM64::LDRSBWui:
- Scale = 1;
- UnscaledOp = ARM64::LDURSBWi;
- break;
- case ARM64::LDRSHXui:
- Scale = 2;
- UnscaledOp = ARM64::LDURSHXi;
- break;
- case ARM64::LDRSHWui:
- Scale = 2;
- UnscaledOp = ARM64::LDURSHWi;
- break;
- case ARM64::LDRSWui:
- Scale = 4;
- UnscaledOp = ARM64::LDURSWi;
- break;
-
- case ARM64::STRXui:
- Scale = 8;
- UnscaledOp = ARM64::STURXi;
- break;
- case ARM64::STRWui:
- Scale = 4;
- UnscaledOp = ARM64::STURWi;
- break;
- case ARM64::STRBui:
- Scale = 1;
- UnscaledOp = ARM64::STURBi;
- break;
- case ARM64::STRHui:
- Scale = 2;
- UnscaledOp = ARM64::STURHi;
- break;
- case ARM64::STRSui:
- Scale = 4;
- UnscaledOp = ARM64::STURSi;
- break;
- case ARM64::STRDui:
- Scale = 8;
- UnscaledOp = ARM64::STURDi;
- break;
- case ARM64::STRQui:
- Scale = 16;
- UnscaledOp = ARM64::STURQi;
- break;
- case ARM64::STRBBui:
- Scale = 1;
- UnscaledOp = ARM64::STURBBi;
- break;
- case ARM64::STRHHui:
- Scale = 2;
- UnscaledOp = ARM64::STURHHi;
- break;
-
- case ARM64::LDPXi:
- case ARM64::LDPDi:
- case ARM64::STPXi:
- case ARM64::STPDi:
- IsSigned = true;
- Scale = 8;
- break;
- case ARM64::LDPQi:
- case ARM64::STPQi:
- IsSigned = true;
- Scale = 16;
- break;
- case ARM64::LDPWi:
- case ARM64::LDPSi:
- case ARM64::STPWi:
- case ARM64::STPSi:
- IsSigned = true;
- Scale = 4;
- break;
-
- case ARM64::LDURXi:
- case ARM64::LDURWi:
- case ARM64::LDURBi:
- case ARM64::LDURHi:
- case ARM64::LDURSi:
- case ARM64::LDURDi:
- case ARM64::LDURQi:
- case ARM64::LDURHHi:
- case ARM64::LDURBBi:
- case ARM64::LDURSBXi:
- case ARM64::LDURSBWi:
- case ARM64::LDURSHXi:
- case ARM64::LDURSHWi:
- case ARM64::LDURSWi:
- case ARM64::STURXi:
- case ARM64::STURWi:
- case ARM64::STURBi:
- case ARM64::STURHi:
- case ARM64::STURSi:
- case ARM64::STURDi:
- case ARM64::STURQi:
- case ARM64::STURBBi:
- case ARM64::STURHHi:
- Scale = 1;
- break;
- }
-
- Offset += MI.getOperand(ImmIdx).getImm() * Scale;
-
- bool useUnscaledOp = false;
- // If the offset doesn't match the scale, we rewrite the instruction to
- // use the unscaled instruction instead. Likewise, if we have a negative
- // offset (and have an unscaled op to use).
- if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
- useUnscaledOp = true;
-
- // Use an unscaled addressing mode if the instruction has a negative offset
- // (or if the instruction is already using an unscaled addressing mode).
- unsigned MaskBits;
- if (IsSigned) {
- // ldp/stp instructions.
- MaskBits = 7;
- Offset /= Scale;
- } else if (UnscaledOp == 0 || useUnscaledOp) {
- MaskBits = 9;
- IsSigned = true;
- Scale = 1;
- } else {
- MaskBits = 12;
- IsSigned = false;
- Offset /= Scale;
- }
-
- // Attempt to fold address computation.
- int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
- int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
- if (Offset >= MinOff && Offset <= MaxOff) {
- if (EmittableOffset)
- *EmittableOffset = Offset;
- Offset = 0;
- } else {
- int NewOff = Offset < 0 ? MinOff : MaxOff;
- if (EmittableOffset)
- *EmittableOffset = NewOff;
- Offset = (Offset - NewOff) * Scale;
- }
- if (OutUseUnscaledOp)
- *OutUseUnscaledOp = useUnscaledOp;
- if (OutUnscaledOp)
- *OutUnscaledOp = UnscaledOp;
- return ARM64FrameOffsetCanUpdate |
- (Offset == 0 ? ARM64FrameOffsetIsLegal : 0);
-}
-
-bool llvm::rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const ARM64InstrInfo *TII) {
- unsigned Opcode = MI.getOpcode();
- unsigned ImmIdx = FrameRegIdx + 1;
-
- if (Opcode == ARM64::ADDSXri || Opcode == ARM64::ADDXri) {
- Offset += MI.getOperand(ImmIdx).getImm();
- emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
- MI.getOperand(0).getReg(), FrameReg, Offset, TII,
- MachineInstr::NoFlags, (Opcode == ARM64::ADDSXri));
- MI.eraseFromParent();
- Offset = 0;
- return true;
- }
-
- int NewOffset;
- unsigned UnscaledOp;
- bool UseUnscaledOp;
- int Status = isARM64FrameOffsetLegal(MI, Offset, &UseUnscaledOp, &UnscaledOp,
- &NewOffset);
- if (Status & ARM64FrameOffsetCanUpdate) {
- if (Status & ARM64FrameOffsetIsLegal)
- // Replace the FrameIndex with FrameReg.
- MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
- if (UseUnscaledOp)
- MI.setDesc(TII->get(UnscaledOp));
-
- MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
- return Offset == 0;
- }
-
- return false;
-}
-
-void ARM64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
- NopInst.setOpcode(ARM64::HINT);
- NopInst.addOperand(MCOperand::CreateImm(0));
-}
diff --git a/lib/Target/ARM64/ARM64InstrInfo.h b/lib/Target/ARM64/ARM64InstrInfo.h
deleted file mode 100644
index 2591ca0..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.h
+++ /dev/null
@@ -1,219 +0,0 @@
-//===- ARM64InstrInfo.h - ARM64 Instruction Information ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetInstrInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64INSTRINFO_H
-#define LLVM_TARGET_ARM64INSTRINFO_H
-
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-#define GET_INSTRINFO_HEADER
-#include "ARM64GenInstrInfo.inc"
-
-namespace llvm {
-
-class ARM64Subtarget;
-class ARM64TargetMachine;
-
-class ARM64InstrInfo : public ARM64GenInstrInfo {
- // Reserve bits in the MachineMemOperand target hint flags, starting at 1.
- // They will be shifted into MOTargetHintStart when accessed.
- enum TargetMemOperandFlags {
- MOSuppressPair = 1
- };
-
- const ARM64RegisterInfo RI;
- const ARM64Subtarget &Subtarget;
-
-public:
- explicit ARM64InstrInfo(const ARM64Subtarget &STI);
-
- /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
- /// such, whenever a client has an instance of instruction info, it should
- /// always be able to get register info as well (through this method).
- const ARM64RegisterInfo &getRegisterInfo() const { return RI; }
-
- unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
-
- bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const override;
-
- unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const override;
- unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const override;
-
- /// \brief Does this instruction set its full destination register to zero?
- bool isGPRZero(const MachineInstr *MI) const;
-
- /// \brief Does this instruction rename a GPR without modifying bits?
- bool isGPRCopy(const MachineInstr *MI) const;
-
- /// \brief Does this instruction rename an FPR without modifying bits?
- bool isFPRCopy(const MachineInstr *MI) const;
-
- /// Return true if this is load/store scales or extends its register offset.
- /// This refers to scaling a dynamic index as opposed to scaled immediates.
- /// MI should be a memory op that allows scaled addressing.
- bool isScaledAddr(const MachineInstr *MI) const;
-
- /// Return true if pairing the given load or store is hinted to be
- /// unprofitable.
- bool isLdStPairSuppressed(const MachineInstr *MI) const;
-
- /// Hint that pairing the given load or store is unprofitable.
- void suppressLdStPair(MachineInstr *MI) const;
-
- bool getLdStBaseRegImmOfs(MachineInstr *LdSt, unsigned &BaseReg,
- unsigned &Offset,
- const TargetRegisterInfo *TRI) const override;
-
- bool enableClusterLoads() const override { return true; }
-
- bool shouldClusterLoads(MachineInstr *FirstLdSt, MachineInstr *SecondLdSt,
- unsigned NumLoads) const override;
-
- bool shouldScheduleAdjacent(MachineInstr *First,
- MachineInstr *Second) const override;
-
- MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
- uint64_t Offset, const MDNode *MDPtr,
- DebugLoc DL) const;
- void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg,
- bool KillSrc, unsigned Opcode,
- llvm::ArrayRef<unsigned> Indices) const;
- void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const override;
-
- void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned SrcReg,
- bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
-
- void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, unsigned DestReg,
- int FrameIndex, const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const override;
-
- MachineInstr *
- foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const override;
-
- bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify = false) const override;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const override;
- bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
- bool canInsertSelect(const MachineBasicBlock &,
- const SmallVectorImpl<MachineOperand> &Cond, unsigned,
- unsigned, int &, int &, int &) const override;
- void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- DebugLoc DL, unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned TrueReg, unsigned FalseReg) const override;
- void getNoopForMachoTarget(MCInst &NopInst) const override;
-
- /// analyzeCompare - For a comparison instruction, return the source registers
- /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
- /// Return true if the comparison instruction can be analyzed.
- bool analyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
- unsigned &SrcReg2, int &CmpMask,
- int &CmpValue) const override;
- /// optimizeCompareInstr - Convert the instruction supplying the argument to
- /// the comparison into one that sets the zero bit in the flags register.
- bool optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
- unsigned SrcReg2, int CmpMask, int CmpValue,
- const MachineRegisterInfo *MRI) const override;
-
-private:
- void instantiateCondBranch(MachineBasicBlock &MBB, DebugLoc DL,
- MachineBasicBlock *TBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
-};
-
-/// emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg
-/// plus Offset. This is intended to be used from within the prolog/epilog
-/// insertion (PEI) pass, where a virtual scratch register may be allocated
-/// if necessary, to be replaced by the scavenger at the end of PEI.
-void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset,
- const ARM64InstrInfo *TII,
- MachineInstr::MIFlag = MachineInstr::NoFlags,
- bool SetCPSR = false);
-
-/// rewriteARM64FrameIndex - Rewrite MI to access 'Offset' bytes from the
-/// FP. Return false if the offset could not be handled directly in MI, and
-/// return the left-over portion by reference.
-bool rewriteARM64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
- unsigned FrameReg, int &Offset,
- const ARM64InstrInfo *TII);
-
-/// \brief Use to report the frame offset status in isARM64FrameOffsetLegal.
-enum ARM64FrameOffsetStatus {
- ARM64FrameOffsetCannotUpdate = 0x0, ///< Offset cannot apply.
- ARM64FrameOffsetIsLegal = 0x1, ///< Offset is legal.
- ARM64FrameOffsetCanUpdate = 0x2 ///< Offset can apply, at least partly.
-};
-
-/// \brief Check if the @p Offset is a valid frame offset for @p MI.
-/// The returned value reports the validity of the frame offset for @p MI.
-/// It uses the values defined by ARM64FrameOffsetStatus for that.
-/// If result == ARM64FrameOffsetCannotUpdate, @p MI cannot be updated to
-/// use an offset.eq
-/// If result & ARM64FrameOffsetIsLegal, @p Offset can completely be
-/// rewriten in @p MI.
-/// If result & ARM64FrameOffsetCanUpdate, @p Offset contains the
-/// amount that is off the limit of the legal offset.
-/// If set, @p OutUseUnscaledOp will contain the whether @p MI should be
-/// turned into an unscaled operator, which opcode is in @p OutUnscaledOp.
-/// If set, @p EmittableOffset contains the amount that can be set in @p MI
-/// (possibly with @p OutUnscaledOp if OutUseUnscaledOp is true) and that
-/// is a legal offset.
-int isARM64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
- bool *OutUseUnscaledOp = NULL,
- unsigned *OutUnscaledOp = NULL,
- int *EmittableOffset = NULL);
-
-static inline bool isUncondBranchOpcode(int Opc) { return Opc == ARM64::B; }
-
-static inline bool isCondBranchOpcode(int Opc) {
- switch (Opc) {
- case ARM64::Bcc:
- case ARM64::CBZW:
- case ARM64::CBZX:
- case ARM64::CBNZW:
- case ARM64::CBNZX:
- case ARM64::TBZ:
- case ARM64::TBNZ:
- return true;
- default:
- return false;
- }
-}
-
-static inline bool isIndirectBranchOpcode(int Opc) { return Opc == ARM64::BR; }
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64InstrInfo.td b/lib/Target/ARM64/ARM64InstrInfo.td
deleted file mode 100644
index 2fe1720..0000000
--- a/lib/Target/ARM64/ARM64InstrInfo.td
+++ /dev/null
@@ -1,4458 +0,0 @@
-//===- ARM64InstrInfo.td - Describe the ARM64 Instructions -*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// ARM64 Instruction definitions.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// ARM64-specific DAG Nodes.
-//
-
-// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
-def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
- [SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisInt<0>, SDTCisVT<1, i32>]>;
-
-// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
-def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
- [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisInt<0>,
- SDTCisVT<3, i32>]>;
-
-// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
-def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
- [SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisInt<0>,
- SDTCisVT<1, i32>,
- SDTCisVT<4, i32>]>;
-
-def SDT_ARM64Brcond : SDTypeProfile<0, 3,
- [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
- SDTCisVT<2, i32>]>;
-def SDT_ARM64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
-def SDT_ARM64tbz : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisVT<1, i64>,
- SDTCisVT<2, OtherVT>]>;
-
-
-def SDT_ARM64CSel : SDTypeProfile<1, 4,
- [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisInt<3>,
- SDTCisVT<4, i32>]>;
-def SDT_ARM64FCmp : SDTypeProfile<0, 2,
- [SDTCisFP<0>,
- SDTCisSameAs<0, 1>]>;
-def SDT_ARM64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
-def SDT_ARM64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
-def SDT_ARM64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
- SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>;
-def SDT_ARM64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
-def SDT_ARM64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
-def SDT_ARM64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisInt<2>, SDTCisInt<3>]>;
-def SDT_ARM64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>, SDTCisInt<3>]>;
-def SDT_ARM64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
-
-def SDT_ARM64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
-def SDT_ARM64fcmpz : SDTypeProfile<1, 1, []>;
-def SDT_ARM64fcmp : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
-def SDT_ARM64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>;
-def SDT_ARM64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>,
- SDTCisSameAs<0,3>]>;
-def SDT_ARM64TCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
-def SDT_ARM64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
-
-def SDT_ARM64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
-
-def SDT_ARM64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
- SDTCisPtrTy<1>]>;
-def SDT_ARM64WrapperLarge : SDTypeProfile<1, 4,
- [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
- SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
- SDTCisSameAs<1, 4>]>;
-
-
-// Node definitions.
-def ARM64adrp : SDNode<"ARM64ISD::ADRP", SDTIntUnaryOp, []>;
-def ARM64addlow : SDNode<"ARM64ISD::ADDlow", SDTIntBinOp, []>;
-def ARM64LOADgot : SDNode<"ARM64ISD::LOADgot", SDTIntUnaryOp>;
-def ARM64callseq_start : SDNode<"ISD::CALLSEQ_START",
- SDCallSeqStart<[ SDTCisVT<0, i32> ]>,
- [SDNPHasChain, SDNPOutGlue]>;
-def ARM64callseq_end : SDNode<"ISD::CALLSEQ_END",
- SDCallSeqEnd<[ SDTCisVT<0, i32>,
- SDTCisVT<1, i32> ]>,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-def ARM64call : SDNode<"ARM64ISD::CALL",
- SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
- [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
- SDNPVariadic]>;
-def ARM64brcond : SDNode<"ARM64ISD::BRCOND", SDT_ARM64Brcond,
- [SDNPHasChain]>;
-def ARM64cbz : SDNode<"ARM64ISD::CBZ", SDT_ARM64cbz,
- [SDNPHasChain]>;
-def ARM64cbnz : SDNode<"ARM64ISD::CBNZ", SDT_ARM64cbz,
- [SDNPHasChain]>;
-def ARM64tbz : SDNode<"ARM64ISD::TBZ", SDT_ARM64tbz,
- [SDNPHasChain]>;
-def ARM64tbnz : SDNode<"ARM64ISD::TBNZ", SDT_ARM64tbz,
- [SDNPHasChain]>;
-
-
-def ARM64csel : SDNode<"ARM64ISD::CSEL", SDT_ARM64CSel>;
-def ARM64csinv : SDNode<"ARM64ISD::CSINV", SDT_ARM64CSel>;
-def ARM64csneg : SDNode<"ARM64ISD::CSNEG", SDT_ARM64CSel>;
-def ARM64csinc : SDNode<"ARM64ISD::CSINC", SDT_ARM64CSel>;
-def ARM64retflag : SDNode<"ARM64ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARM64adc : SDNode<"ARM64ISD::ADC", SDTBinaryArithWithFlagsIn >;
-def ARM64sbc : SDNode<"ARM64ISD::SBC", SDTBinaryArithWithFlagsIn>;
-def ARM64add_flag : SDNode<"ARM64ISD::ADDS", SDTBinaryArithWithFlagsOut,
- [SDNPCommutative]>;
-def ARM64sub_flag : SDNode<"ARM64ISD::SUBS", SDTBinaryArithWithFlagsOut>;
-def ARM64and_flag : SDNode<"ARM64ISD::ANDS", SDTBinaryArithWithFlagsOut>;
-def ARM64adc_flag : SDNode<"ARM64ISD::ADCS", SDTBinaryArithWithFlagsInOut>;
-def ARM64sbc_flag : SDNode<"ARM64ISD::SBCS", SDTBinaryArithWithFlagsInOut>;
-
-def ARM64threadpointer : SDNode<"ARM64ISD::THREAD_POINTER", SDTPtrLeaf>;
-
-def ARM64fcmp : SDNode<"ARM64ISD::FCMP", SDT_ARM64FCmp>;
-
-def ARM64fmax : SDNode<"ARM64ISD::FMAX", SDTFPBinOp>;
-def ARM64fmin : SDNode<"ARM64ISD::FMIN", SDTFPBinOp>;
-
-def ARM64dup : SDNode<"ARM64ISD::DUP", SDT_ARM64Dup>;
-def ARM64duplane8 : SDNode<"ARM64ISD::DUPLANE8", SDT_ARM64DupLane>;
-def ARM64duplane16 : SDNode<"ARM64ISD::DUPLANE16", SDT_ARM64DupLane>;
-def ARM64duplane32 : SDNode<"ARM64ISD::DUPLANE32", SDT_ARM64DupLane>;
-def ARM64duplane64 : SDNode<"ARM64ISD::DUPLANE64", SDT_ARM64DupLane>;
-
-def ARM64zip1 : SDNode<"ARM64ISD::ZIP1", SDT_ARM64Zip>;
-def ARM64zip2 : SDNode<"ARM64ISD::ZIP2", SDT_ARM64Zip>;
-def ARM64uzp1 : SDNode<"ARM64ISD::UZP1", SDT_ARM64Zip>;
-def ARM64uzp2 : SDNode<"ARM64ISD::UZP2", SDT_ARM64Zip>;
-def ARM64trn1 : SDNode<"ARM64ISD::TRN1", SDT_ARM64Zip>;
-def ARM64trn2 : SDNode<"ARM64ISD::TRN2", SDT_ARM64Zip>;
-
-def ARM64movi_edit : SDNode<"ARM64ISD::MOVIedit", SDT_ARM64MOVIedit>;
-def ARM64movi_shift : SDNode<"ARM64ISD::MOVIshift", SDT_ARM64MOVIshift>;
-def ARM64movi_msl : SDNode<"ARM64ISD::MOVImsl", SDT_ARM64MOVIshift>;
-def ARM64mvni_shift : SDNode<"ARM64ISD::MVNIshift", SDT_ARM64MOVIshift>;
-def ARM64mvni_msl : SDNode<"ARM64ISD::MVNImsl", SDT_ARM64MOVIshift>;
-def ARM64movi : SDNode<"ARM64ISD::MOVI", SDT_ARM64MOVIedit>;
-def ARM64fmov : SDNode<"ARM64ISD::FMOV", SDT_ARM64MOVIedit>;
-
-def ARM64rev16 : SDNode<"ARM64ISD::REV16", SDT_ARM64UnaryVec>;
-def ARM64rev32 : SDNode<"ARM64ISD::REV32", SDT_ARM64UnaryVec>;
-def ARM64rev64 : SDNode<"ARM64ISD::REV64", SDT_ARM64UnaryVec>;
-def ARM64ext : SDNode<"ARM64ISD::EXT", SDT_ARM64ExtVec>;
-
-def ARM64vashr : SDNode<"ARM64ISD::VASHR", SDT_ARM64vshift>;
-def ARM64vlshr : SDNode<"ARM64ISD::VLSHR", SDT_ARM64vshift>;
-def ARM64vshl : SDNode<"ARM64ISD::VSHL", SDT_ARM64vshift>;
-def ARM64sqshli : SDNode<"ARM64ISD::SQSHL_I", SDT_ARM64vshift>;
-def ARM64uqshli : SDNode<"ARM64ISD::UQSHL_I", SDT_ARM64vshift>;
-def ARM64sqshlui : SDNode<"ARM64ISD::SQSHLU_I", SDT_ARM64vshift>;
-def ARM64srshri : SDNode<"ARM64ISD::SRSHR_I", SDT_ARM64vshift>;
-def ARM64urshri : SDNode<"ARM64ISD::URSHR_I", SDT_ARM64vshift>;
-
-def ARM64not: SDNode<"ARM64ISD::NOT", SDT_ARM64unvec>;
-def ARM64bit: SDNode<"ARM64ISD::BIT", SDT_ARM64trivec>;
-
-def ARM64cmeq: SDNode<"ARM64ISD::CMEQ", SDT_ARM64binvec>;
-def ARM64cmge: SDNode<"ARM64ISD::CMGE", SDT_ARM64binvec>;
-def ARM64cmgt: SDNode<"ARM64ISD::CMGT", SDT_ARM64binvec>;
-def ARM64cmhi: SDNode<"ARM64ISD::CMHI", SDT_ARM64binvec>;
-def ARM64cmhs: SDNode<"ARM64ISD::CMHS", SDT_ARM64binvec>;
-
-def ARM64fcmeq: SDNode<"ARM64ISD::FCMEQ", SDT_ARM64fcmp>;
-def ARM64fcmge: SDNode<"ARM64ISD::FCMGE", SDT_ARM64fcmp>;
-def ARM64fcmgt: SDNode<"ARM64ISD::FCMGT", SDT_ARM64fcmp>;
-
-def ARM64cmeqz: SDNode<"ARM64ISD::CMEQz", SDT_ARM64unvec>;
-def ARM64cmgez: SDNode<"ARM64ISD::CMGEz", SDT_ARM64unvec>;
-def ARM64cmgtz: SDNode<"ARM64ISD::CMGTz", SDT_ARM64unvec>;
-def ARM64cmlez: SDNode<"ARM64ISD::CMLEz", SDT_ARM64unvec>;
-def ARM64cmltz: SDNode<"ARM64ISD::CMLTz", SDT_ARM64unvec>;
-def ARM64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
- (ARM64not (ARM64cmeqz (and node:$LHS, node:$RHS)))>;
-
-def ARM64fcmeqz: SDNode<"ARM64ISD::FCMEQz", SDT_ARM64fcmpz>;
-def ARM64fcmgez: SDNode<"ARM64ISD::FCMGEz", SDT_ARM64fcmpz>;
-def ARM64fcmgtz: SDNode<"ARM64ISD::FCMGTz", SDT_ARM64fcmpz>;
-def ARM64fcmlez: SDNode<"ARM64ISD::FCMLEz", SDT_ARM64fcmpz>;
-def ARM64fcmltz: SDNode<"ARM64ISD::FCMLTz", SDT_ARM64fcmpz>;
-
-def ARM64bici: SDNode<"ARM64ISD::BICi", SDT_ARM64vecimm>;
-def ARM64orri: SDNode<"ARM64ISD::ORRi", SDT_ARM64vecimm>;
-
-def ARM64neg : SDNode<"ARM64ISD::NEG", SDT_ARM64unvec>;
-
-def ARM64tcret: SDNode<"ARM64ISD::TC_RETURN", SDT_ARM64TCRET,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-
-def ARM64Prefetch : SDNode<"ARM64ISD::PREFETCH", SDT_ARM64PREFETCH,
- [SDNPHasChain, SDNPSideEffect]>;
-
-def ARM64sitof: SDNode<"ARM64ISD::SITOF", SDT_ARM64ITOF>;
-def ARM64uitof: SDNode<"ARM64ISD::UITOF", SDT_ARM64ITOF>;
-
-def ARM64tlsdesc_call : SDNode<"ARM64ISD::TLSDESC_CALL", SDT_ARM64TLSDescCall,
- [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
- SDNPVariadic]>;
-
-def ARM64WrapperLarge : SDNode<"ARM64ISD::WrapperLarge", SDT_ARM64WrapperLarge>;
-
-
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-
-// ARM64 Instruction Predicate Definitions.
-//
-def HasZCZ : Predicate<"Subtarget->hasZeroCycleZeroing()">;
-def NoZCZ : Predicate<"!Subtarget->hasZeroCycleZeroing()">;
-def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
-def IsNotDarwin: Predicate<"!Subtarget->isTargetDarwin()">;
-def ForCodeSize : Predicate<"ForCodeSize">;
-def NotForCodeSize : Predicate<"!ForCodeSize">;
-
-include "ARM64InstrFormats.td"
-
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous instructions.
-//===----------------------------------------------------------------------===//
-
-let Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1 in {
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
- [(ARM64callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
- [(ARM64callseq_end timm:$amt1, timm:$amt2)]>;
-} // Defs = [SP], Uses = [SP], hasSideEffects = 1, isCodeGenOnly = 1
-
-let isReMaterializable = 1, isCodeGenOnly = 1 in {
-// FIXME: The following pseudo instructions are only needed because remat
-// cannot handle multiple instructions. When that changes, they can be
-// removed, along with the ARM64Wrapper node.
-
-let AddedComplexity = 10 in
-def LOADgot : Pseudo<(outs GPR64:$dst), (ins i64imm:$addr),
- [(set GPR64:$dst, (ARM64LOADgot tglobaladdr:$addr))]>,
- Sched<[WriteLDAdr]>;
-
-// The MOVaddr instruction should match only when the add is not folded
-// into a load or store address.
-def MOVaddr
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaladdr:$hi),
- tglobaladdr:$low))]>,
- Sched<[WriteAdrAdr]>;
-def MOVaddrJT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp tjumptable:$hi),
- tjumptable:$low))]>,
- Sched<[WriteAdrAdr]>;
-def MOVaddrCP
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp tconstpool:$hi),
- tconstpool:$low))]>,
- Sched<[WriteAdrAdr]>;
-def MOVaddrBA
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp tblockaddress:$hi),
- tblockaddress:$low))]>,
- Sched<[WriteAdrAdr]>;
-def MOVaddrTLS
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp tglobaltlsaddr:$hi),
- tglobaltlsaddr:$low))]>,
- Sched<[WriteAdrAdr]>;
-def MOVaddrEXT
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$hi, i64imm:$low),
- [(set GPR64:$dst, (ARM64addlow (ARM64adrp texternalsym:$hi),
- texternalsym:$low))]>,
- Sched<[WriteAdrAdr]>;
-
-} // isReMaterializable, isCodeGenOnly
-
-def : Pat<(ARM64LOADgot tglobaltlsaddr:$addr),
- (LOADgot tglobaltlsaddr:$addr)>;
-
-def : Pat<(ARM64LOADgot texternalsym:$addr),
- (LOADgot texternalsym:$addr)>;
-
-def : Pat<(ARM64LOADgot tconstpool:$addr),
- (LOADgot tconstpool:$addr)>;
-
-//===----------------------------------------------------------------------===//
-// System instructions.
-//===----------------------------------------------------------------------===//
-
-def HINT : HintI<"hint">;
-def : InstAlias<"nop", (HINT 0b000)>;
-def : InstAlias<"yield",(HINT 0b001)>;
-def : InstAlias<"wfe", (HINT 0b010)>;
-def : InstAlias<"wfi", (HINT 0b011)>;
-def : InstAlias<"sev", (HINT 0b100)>;
-def : InstAlias<"sevl", (HINT 0b101)>;
-
- // As far as LLVM is concerned this writes to the system's exclusive monitors.
-let mayLoad = 1, mayStore = 1 in
-def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
-
-def DMB : CRmSystemI<barrier_op, 0b101, "dmb">;
-def DSB : CRmSystemI<barrier_op, 0b100, "dsb">;
-def ISB : CRmSystemI<barrier_op, 0b110, "isb">;
-def : InstAlias<"clrex", (CLREX 0xf)>;
-def : InstAlias<"isb", (ISB 0xf)>;
-
-def MRS : MRSI;
-def MSR : MSRI;
-def MSRcpsr: MSRcpsrI;
-
-// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(ARM64threadpointer), (MRS 0xde82)>;
-
-// Generic system instructions
-def SYS : SystemI<0, "sys">;
-def SYSxt : SystemXtI<0, "sys">;
-def SYSLxt : SystemLXtI<1, "sysl">;
-
-//===----------------------------------------------------------------------===//
-// Move immediate instructions.
-//===----------------------------------------------------------------------===//
-
-defm MOVK : InsertImmediate<0b11, "movk">;
-defm MOVN : MoveImmediate<0b00, "movn">;
-
-let PostEncoderMethod = "fixMOVZ" in
-defm MOVZ : MoveImmediate<0b10, "movz">;
-
-def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, imm0_65535:$imm, 0)>;
-def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, imm0_65535:$imm, 0)>;
-
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g3:$sym, 48)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g2:$sym, 32)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g1:$sym, 16)>;
-def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movz_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g2:$sym, 32)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movk_symbol_g0:$sym, 0)>;
-
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g2:$sym, 32)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g1:$sym, 16)>;
-def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movk_symbol_g0:$sym, 0)>;
-
-let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
- isAsCheapAsAMove = 1 in {
-// FIXME: The following pseudo instructions are only needed because remat
-// cannot handle multiple instructions. When that changes, we can select
-// directly to the real instructions and get rid of these pseudos.
-
-def MOVi32imm
- : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
- [(set GPR32:$dst, imm:$src)]>,
- Sched<[WriteImm]>;
-def MOVi64imm
- : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
- [(set GPR64:$dst, imm:$src)]>,
- Sched<[WriteImm]>;
-} // isReMaterializable, isCodeGenOnly
-
-def : Pat<(ARM64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
- tglobaladdr:$g1, tglobaladdr:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g3, 48),
- tglobaladdr:$g2, 32),
- tglobaladdr:$g1, 16),
- tglobaladdr:$g0, 0)>;
-
-def : Pat<(ARM64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
- tblockaddress:$g1, tblockaddress:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g3, 48),
- tblockaddress:$g2, 32),
- tblockaddress:$g1, 16),
- tblockaddress:$g0, 0)>;
-
-def : Pat<(ARM64WrapperLarge tconstpool:$g3, tconstpool:$g2,
- tconstpool:$g1, tconstpool:$g0),
- (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g3, 48),
- tconstpool:$g2, 32),
- tconstpool:$g1, 16),
- tconstpool:$g0, 0)>;
-
-
-//===----------------------------------------------------------------------===//
-// Arithmetic instructions.
-//===----------------------------------------------------------------------===//
-
-// Add/subtract with carry.
-defm ADC : AddSubCarry<0, "adc", "adcs", ARM64adc, ARM64adc_flag>;
-defm SBC : AddSubCarry<1, "sbc", "sbcs", ARM64sbc, ARM64sbc_flag>;
-
-def : InstAlias<"ngc $dst, $src", (SBCWr GPR32:$dst, WZR, GPR32:$src)>;
-def : InstAlias<"ngc $dst, $src", (SBCXr GPR64:$dst, XZR, GPR64:$src)>;
-def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
-def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
-
-// Add/subtract
-defm ADD : AddSub<0, "add", add>;
-defm SUB : AddSub<1, "sub">;
-
-defm ADDS : AddSubS<0, "adds", ARM64add_flag>;
-defm SUBS : AddSubS<1, "subs", ARM64sub_flag>;
-
-// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
-def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
- (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
-def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
- (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
-def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
- (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
-def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
- (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
-def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
- (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
-def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
- (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
-def : Pat<(sub GPR32sp:$R2, arith_extended_reg32<i32>:$R3),
- (SUBSWrx GPR32sp:$R2, arith_extended_reg32<i32>:$R3)>;
-def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3),
- (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64<i64>:$R3)>;
-
-// Because of the immediate format for add/sub-imm instructions, the
-// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
-// These patterns capture that transformation.
-let AddedComplexity = 1 in {
-def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
- (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
- (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
- (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
- (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-}
-
-def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"neg $dst, $src, $shift",
- (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>;
-def : InstAlias<"neg $dst, $src, $shift",
- (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>;
-
-// Because of the immediate format for add/sub-imm instructions, the
-// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
-// These patterns capture that transformation.
-let AddedComplexity = 1 in {
-def : Pat<(ARM64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
- (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
- (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-def : Pat<(ARM64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
- (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
-def : Pat<(ARM64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
- (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
-}
-
-def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"negs $dst, $src, $shift",
- (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift:$shift)>;
-def : InstAlias<"negs $dst, $src, $shift",
- (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift:$shift)>;
-
-// Unsigned/Signed divide
-defm UDIV : Div<0, "udiv", udiv>;
-defm SDIV : Div<1, "sdiv", sdiv>;
-let isCodeGenOnly = 1 in {
-defm UDIV_Int : Div<0, "udiv", int_arm64_udiv>;
-defm SDIV_Int : Div<1, "sdiv", int_arm64_sdiv>;
-}
-
-// Variable shift
-defm ASRV : Shift<0b10, "asrv", sra>;
-defm LSLV : Shift<0b00, "lslv", shl>;
-defm LSRV : Shift<0b01, "lsrv", srl>;
-defm RORV : Shift<0b11, "rorv", rotr>;
-
-def : ShiftAlias<"asr", ASRVWr, GPR32>;
-def : ShiftAlias<"asr", ASRVXr, GPR64>;
-def : ShiftAlias<"lsl", LSLVWr, GPR32>;
-def : ShiftAlias<"lsl", LSLVXr, GPR64>;
-def : ShiftAlias<"lsr", LSRVWr, GPR32>;
-def : ShiftAlias<"lsr", LSRVXr, GPR64>;
-def : ShiftAlias<"ror", RORVWr, GPR32>;
-def : ShiftAlias<"ror", RORVXr, GPR64>;
-
-// Multiply-add
-let AddedComplexity = 7 in {
-defm MADD : MulAccum<0, "madd", add>;
-defm MSUB : MulAccum<1, "msub", sub>;
-
-def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
- (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
-def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
- (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-
-def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
- (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
-def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
- (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
-} // AddedComplexity = 7
-
-let AddedComplexity = 5 in {
-def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
-def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
-def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
-def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
-
-def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
- (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
- (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-
-def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
- (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
- (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
-} // AddedComplexity = 5
-
-def : MulAccumWAlias<"mul", MADDWrrr>;
-def : MulAccumXAlias<"mul", MADDXrrr>;
-def : MulAccumWAlias<"mneg", MSUBWrrr>;
-def : MulAccumXAlias<"mneg", MSUBXrrr>;
-def : WideMulAccumAlias<"smull", SMADDLrrr>;
-def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
-def : WideMulAccumAlias<"umull", UMADDLrrr>;
-def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
-
-// Multiply-high
-def SMULHrr : MulHi<0b010, "smulh", mulhs>;
-def UMULHrr : MulHi<0b110, "umulh", mulhu>;
-
-// CRC32
-def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_arm64_crc32b, "crc32b">;
-def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_arm64_crc32h, "crc32h">;
-def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_arm64_crc32w, "crc32w">;
-def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_arm64_crc32x, "crc32x">;
-
-def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_arm64_crc32cb, "crc32cb">;
-def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_arm64_crc32ch, "crc32ch">;
-def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_arm64_crc32cw, "crc32cw">;
-def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_arm64_crc32cx, "crc32cx">;
-
-
-//===----------------------------------------------------------------------===//
-// Logical instructions.
-//===----------------------------------------------------------------------===//
-
-// (immediate)
-defm ANDS : LogicalImmS<0b11, "ands", ARM64and_flag>;
-defm AND : LogicalImm<0b00, "and", and>;
-defm EOR : LogicalImm<0b10, "eor", xor>;
-defm ORR : LogicalImm<0b01, "orr", or>;
-
-def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
- logical_imm32:$imm)>;
-def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
- logical_imm64:$imm)>;
-
-
-// (register)
-defm ANDS : LogicalRegS<0b11, 0, "ands">;
-defm BICS : LogicalRegS<0b11, 1, "bics">;
-defm AND : LogicalReg<0b00, 0, "and", and>;
-defm BIC : LogicalReg<0b00, 1, "bic",
- BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
-defm EON : LogicalReg<0b10, 1, "eon",
- BinOpFrag<(xor node:$LHS, (not node:$RHS))>>;
-defm EOR : LogicalReg<0b10, 0, "eor", xor>;
-defm ORN : LogicalReg<0b01, 1, "orn",
- BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
-defm ORR : LogicalReg<0b01, 0, "orr", or>;
-
-def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0)>;
-def : InstAlias<"mov $dst, $src",
- (ADDWri GPR32sp:$dst, GPR32sp:$src, 0, 0)>;
-def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0)>;
-def : InstAlias<"mov $dst, $src",
- (ADDXri GPR64sp:$dst, GPR64sp:$src, 0, 0)>;
-
-def : InstAlias<"tst $src1, $src2",
- (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2)>;
-def : InstAlias<"tst $src1, $src2",
- (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2)>;
-
-def : InstAlias<"tst $src1, $src2",
- (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0)>;
-def : InstAlias<"tst $src1, $src2",
- (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0)>;
-
-def : InstAlias<"tst $src1, $src2, $sh",
- (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift:$sh)>;
-def : InstAlias<"tst $src1, $src2, $sh",
- (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift:$sh)>;
-
-def : InstAlias<"mvn $Wd, $Wm",
- (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0)>;
-def : InstAlias<"mvn $Xd, $Xm",
- (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0)>;
-
-def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
-def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
-
-
-//===----------------------------------------------------------------------===//
-// One operand data processing instructions.
-//===----------------------------------------------------------------------===//
-
-defm CLS : OneOperandData<0b101, "cls">;
-defm CLZ : OneOperandData<0b100, "clz", ctlz>;
-defm RBIT : OneOperandData<0b000, "rbit">;
-def REV16Wr : OneWRegData<0b001, "rev16",
- UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
-def REV16Xr : OneXRegData<0b001, "rev16",
- UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
-
-def : Pat<(cttz GPR32:$Rn),
- (CLZWr (RBITWr GPR32:$Rn))>;
-def : Pat<(cttz GPR64:$Rn),
- (CLZXr (RBITXr GPR64:$Rn))>;
-
-// Unlike the other one operand instructions, the instructions with the "rev"
-// mnemonic do *not* just different in the size bit, but actually use different
-// opcode bits for the different sizes.
-def REVWr : OneWRegData<0b010, "rev", bswap>;
-def REVXr : OneXRegData<0b011, "rev", bswap>;
-def REV32Xr : OneXRegData<0b010, "rev32",
- UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
-
-//===----------------------------------------------------------------------===//
-// Bitfield immediate extraction instruction.
-//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in
-defm EXTR : ExtractImm<"extr">;
-def : InstAlias<"ror $dst, $src, $shift",
- (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
-def : InstAlias<"ror $dst, $src, $shift",
- (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
-
-def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
- (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
-def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
- (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
-
-//===----------------------------------------------------------------------===//
-// Other bitfield immediate instructions.
-//===----------------------------------------------------------------------===//
-let neverHasSideEffects = 1 in {
-defm BFM : BitfieldImmWith2RegArgs<0b01, "bfm">;
-defm SBFM : BitfieldImm<0b00, "sbfm">;
-defm UBFM : BitfieldImm<0b10, "ubfm">;
-}
-
-def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 31 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(7, 31 - shift_amt)
-def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 31 - N->getZExtValue();
- enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(15, 31 - shift_amt)
-def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 31 - N->getZExtValue();
- enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 63 - N->getZExtValue();
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(7, 63 - shift_amt)
-def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 63 - N->getZExtValue();
- enc = enc > 7 ? 7 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(15, 63 - shift_amt)
-def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 63 - N->getZExtValue();
- enc = enc > 15 ? 15 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-// min(31, 63 - shift_amt)
-def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
- uint64_t enc = 63 - N->getZExtValue();
- enc = enc > 31 ? 31 : enc;
- return CurDAG->getTargetConstant(enc, MVT::i64);
-}]>;
-
-def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
- (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
- (i64 (i32shift_b imm0_31:$imm)))>;
-def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
- (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_b imm0_63:$imm)))>;
-
-let AddedComplexity = 10 in {
-def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
- (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
-def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
- (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
-}
-
-def : InstAlias<"asr $dst, $src, $shift",
- (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
-def : InstAlias<"asr $dst, $src, $shift",
- (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
-def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
-def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
-def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
-def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
-def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
-
-def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
- (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
-def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
- (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
-
-def : InstAlias<"lsr $dst, $src, $shift",
- (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
-def : InstAlias<"lsr $dst, $src, $shift",
- (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
-def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
-def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
-def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
-def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
-def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
-
-//===----------------------------------------------------------------------===//
-// Conditionally set flags instructions.
-//===----------------------------------------------------------------------===//
-defm CCMN : CondSetFlagsImm<0, "ccmn">;
-defm CCMP : CondSetFlagsImm<1, "ccmp">;
-
-defm CCMN : CondSetFlagsReg<0, "ccmn">;
-defm CCMP : CondSetFlagsReg<1, "ccmp">;
-
-//===----------------------------------------------------------------------===//
-// Conditional select instructions.
-//===----------------------------------------------------------------------===//
-defm CSEL : CondSelect<0, 0b00, "csel">;
-
-def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
-defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
-defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
-defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
-
-def : Pat<(ARM64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
- (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
- (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
- (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
- (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), CPSR),
- (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
-def : Pat<(ARM64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), CPSR),
- (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
-
-def : Pat<(ARM64csel (i32 0), (i32 1), (i32 imm:$cc), CPSR),
- (CSINCWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 1), (i32 imm:$cc), CPSR),
- (CSINCXr XZR, XZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i32 0), (i32 -1), (i32 imm:$cc), CPSR),
- (CSINVWr WZR, WZR, (i32 imm:$cc))>;
-def : Pat<(ARM64csel (i64 0), (i64 -1), (i32 imm:$cc), CPSR),
- (CSINVXr XZR, XZR, (i32 imm:$cc))>;
-
-// The inverse of the condition code from the alias instruction is what is used
-// in the aliased instruction. The parser all ready inverts the condition code
-// for these aliases.
-// FIXME: Is this the correct way to handle these aliases?
-def : InstAlias<"cset $dst, $cc", (CSINCWr GPR32:$dst, WZR, WZR, ccode:$cc)>;
-def : InstAlias<"cset $dst, $cc", (CSINCXr GPR64:$dst, XZR, XZR, ccode:$cc)>;
-
-def : InstAlias<"csetm $dst, $cc", (CSINVWr GPR32:$dst, WZR, WZR, ccode:$cc)>;
-def : InstAlias<"csetm $dst, $cc", (CSINVXr GPR64:$dst, XZR, XZR, ccode:$cc)>;
-
-def : InstAlias<"cinc $dst, $src, $cc",
- (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cinc $dst, $src, $cc",
- (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-def : InstAlias<"cinv $dst, $src, $cc",
- (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cinv $dst, $src, $cc",
- (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-def : InstAlias<"cneg $dst, $src, $cc",
- (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, ccode:$cc)>;
-def : InstAlias<"cneg $dst, $src, $cc",
- (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, ccode:$cc)>;
-
-//===----------------------------------------------------------------------===//
-// PC-relative instructions.
-//===----------------------------------------------------------------------===//
-let isReMaterializable = 1 in {
-let neverHasSideEffects = 1, mayStore = 0, mayLoad = 0 in {
-def ADR : ADRI<0, "adr", adrlabel, []>;
-} // neverHasSideEffects = 1
-
-def ADRP : ADRI<1, "adrp", adrplabel,
- [(set GPR64:$Xd, (ARM64adrp tglobaladdr:$label))]>;
-} // isReMaterializable = 1
-
-// page address of a constant pool entry, block address
-def : Pat<(ARM64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
-def : Pat<(ARM64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
-
-//===----------------------------------------------------------------------===//
-// Unconditional branch (register) instructions.
-//===----------------------------------------------------------------------===//
-
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-def RET : BranchReg<0b0010, "ret", []>;
-def DRPS : SpecialReturn<0b0101, "drps">;
-def ERET : SpecialReturn<0b0100, "eret">;
-} // isReturn = 1, isTerminator = 1, isBarrier = 1
-
-// Default to the LR register.
-def : InstAlias<"ret", (RET LR)>;
-
-let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BLR : BranchReg<0b0001, "blr", [(ARM64call GPR64:$Rn)]>;
-} // isCall
-
-let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
-def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
-} // isBranch, isTerminator, isBarrier, isIndirectBranch
-
-// Create a separate pseudo-instruction for codegen to use so that we don't
-// flag lr as used in every function. It'll be restored before the RET by the
-// epilogue if it's legitimately used.
-def RET_ReallyLR : Pseudo<(outs), (ins), [(ARM64retflag)]> {
- let isTerminator = 1;
- let isBarrier = 1;
- let isReturn = 1;
-}
-
-// This is a directive-like pseudo-instruction. The purpose is to insert an
-// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
-// (which in the usual case is a BLR).
-let hasSideEffects = 1 in
-def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []> {
- let AsmString = ".tlsdesccall $sym";
-}
-
-// Pseudo-instruction representing a BLR with attached TLSDESC relocation. It
-// gets expanded to two MCInsts during lowering.
-let isCall = 1, Defs = [LR] in
-def TLSDESC_BLR
- : Pseudo<(outs), (ins GPR64:$dest, i64imm:$sym),
- [(ARM64tlsdesc_call GPR64:$dest, tglobaltlsaddr:$sym)]>;
-
-def : Pat<(ARM64tlsdesc_call GPR64:$dest, texternalsym:$sym),
- (TLSDESC_BLR GPR64:$dest, texternalsym:$sym)>;
-//===----------------------------------------------------------------------===//
-// Conditional branch (immediate) instruction.
-//===----------------------------------------------------------------------===//
-def Bcc : BranchCond;
-
-//===----------------------------------------------------------------------===//
-// Compare-and-branch instructions.
-//===----------------------------------------------------------------------===//
-defm CBZ : CmpBranch<0, "cbz", ARM64cbz>;
-defm CBNZ : CmpBranch<1, "cbnz", ARM64cbnz>;
-
-//===----------------------------------------------------------------------===//
-// Test-bit-and-branch instructions.
-//===----------------------------------------------------------------------===//
-def TBZ : TestBranch<0, "tbz", ARM64tbz>;
-def TBNZ : TestBranch<1, "tbnz", ARM64tbnz>;
-
-//===----------------------------------------------------------------------===//
-// Unconditional branch (immediate) instructions.
-//===----------------------------------------------------------------------===//
-let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
-def B : BranchImm<0, "b", [(br bb:$addr)]>;
-} // isBranch, isTerminator, isBarrier
-
-let isCall = 1, Defs = [LR], Uses = [SP] in {
-def BL : CallImm<1, "bl", [(ARM64call tglobaladdr:$addr)]>;
-} // isCall
-def : Pat<(ARM64call texternalsym:$func), (BL texternalsym:$func)>;
-
-//===----------------------------------------------------------------------===//
-// Exception generation instructions.
-//===----------------------------------------------------------------------===//
-def BRK : ExceptionGeneration<0b001, 0b00, "brk">;
-def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
-def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
-def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">;
-def HLT : ExceptionGeneration<0b010, 0b00, "hlt">;
-def HVC : ExceptionGeneration<0b000, 0b10, "hvc">;
-def SMC : ExceptionGeneration<0b000, 0b11, "smc">;
-def SVC : ExceptionGeneration<0b000, 0b01, "svc">;
-
-// DCPSn defaults to an immediate operand of zero if unspecified.
-def : InstAlias<"dcps1", (DCPS1 0)>;
-def : InstAlias<"dcps2", (DCPS2 0)>;
-def : InstAlias<"dcps3", (DCPS3 0)>;
-
-//===----------------------------------------------------------------------===//
-// Load instructions.
-//===----------------------------------------------------------------------===//
-
-// Pair (indexed, offset)
-def LDPWi : LoadPairOffset<0b00, 0, GPR32, am_indexed32simm7, "ldp">;
-def LDPXi : LoadPairOffset<0b10, 0, GPR64, am_indexed64simm7, "ldp">;
-def LDPSi : LoadPairOffset<0b00, 1, FPR32, am_indexed32simm7, "ldp">;
-def LDPDi : LoadPairOffset<0b01, 1, FPR64, am_indexed64simm7, "ldp">;
-def LDPQi : LoadPairOffset<0b10, 1, FPR128, am_indexed128simm7, "ldp">;
-
-def LDPSWi : LoadPairOffset<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">;
-
-// Pair (pre-indexed)
-def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "ldp">;
-def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "ldp">;
-def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "ldp">;
-def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "ldp">;
-def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "ldp">;
-
-def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64, am_indexed32simm7, "ldpsw">;
-
-// Pair (post-indexed)
-def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32, simm7s4, "ldp">;
-def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64, simm7s8, "ldp">;
-def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32, simm7s4, "ldp">;
-def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64, simm7s8, "ldp">;
-def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128, simm7s16, "ldp">;
-
-def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64, simm7s4, "ldpsw">;
-
-
-// Pair (no allocate)
-def LDNPWi : LoadPairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "ldnp">;
-def LDNPXi : LoadPairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "ldnp">;
-def LDNPSi : LoadPairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "ldnp">;
-def LDNPDi : LoadPairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "ldnp">;
-def LDNPQi : LoadPairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "ldnp">;
-
-//---
-// (register offset)
-//---
-
-let AddedComplexity = 10 in {
-// Integer
-def LDRBBro : Load8RO<0b00, 0, 0b01, GPR32, "ldrb",
- [(set GPR32:$Rt, (zextloadi8 ro_indexed8:$addr))]>;
-def LDRHHro : Load16RO<0b01, 0, 0b01, GPR32, "ldrh",
- [(set GPR32:$Rt, (zextloadi16 ro_indexed16:$addr))]>;
-def LDRWro : Load32RO<0b10, 0, 0b01, GPR32, "ldr",
- [(set GPR32:$Rt, (load ro_indexed32:$addr))]>;
-def LDRXro : Load64RO<0b11, 0, 0b01, GPR64, "ldr",
- [(set GPR64:$Rt, (load ro_indexed64:$addr))]>;
-
-// Floating-point
-def LDRBro : Load8RO<0b00, 1, 0b01, FPR8, "ldr",
- [(set FPR8:$Rt, (load ro_indexed8:$addr))]>;
-def LDRHro : Load16RO<0b01, 1, 0b01, FPR16, "ldr",
- [(set FPR16:$Rt, (load ro_indexed16:$addr))]>;
-def LDRSro : Load32RO<0b10, 1, 0b01, FPR32, "ldr",
- [(set (f32 FPR32:$Rt), (load ro_indexed32:$addr))]>;
-def LDRDro : Load64RO<0b11, 1, 0b01, FPR64, "ldr",
- [(set (f64 FPR64:$Rt), (load ro_indexed64:$addr))]>;
-def LDRQro : Load128RO<0b00, 1, 0b11, FPR128, "ldr", []> {
- let mayLoad = 1;
-}
-
-// For regular load, we do not have any alignment requirement.
-// Thus, it is safe to directly map the vector loads with interesting
-// addressing modes.
-// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))),
- (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
- (LDRBro ro_indexed8:$addr), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 ro_indexed8:$addr)))),
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (LDRBro ro_indexed8:$addr), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))),
- (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 ro_indexed16:$addr)))),
- (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))),
- (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- (LDRSro ro_indexed32:$addr), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32 (load ro_indexed32:$addr)))),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
- (LDRSro ro_indexed32:$addr), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))),
- (LDRDro ro_indexed64:$addr)>;
-def : Pat <(v2i64 (scalar_to_vector (i64 (load ro_indexed64:$addr)))),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- (LDRDro ro_indexed64:$addr), dsub)>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v1f64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v8i8 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v4i16 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v2i32 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-def : Pat<(v1i64 (load ro_indexed64:$addr)), (LDRDro ro_indexed64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v2f64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v16i8 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v8i16 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v4i32 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(v2i64 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-def : Pat<(f128 (load ro_indexed128:$addr)), (LDRQro ro_indexed128:$addr)>;
-
-// Load sign-extended half-word
-def LDRSHWro : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh",
- [(set GPR32:$Rt, (sextloadi16 ro_indexed16:$addr))]>;
-def LDRSHXro : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh",
- [(set GPR64:$Rt, (sextloadi16 ro_indexed16:$addr))]>;
-
-// Load sign-extended byte
-def LDRSBWro : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb",
- [(set GPR32:$Rt, (sextloadi8 ro_indexed8:$addr))]>;
-def LDRSBXro : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb",
- [(set GPR64:$Rt, (sextloadi8 ro_indexed8:$addr))]>;
-
-// Load sign-extended word
-def LDRSWro : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw",
- [(set GPR64:$Rt, (sextloadi32 ro_indexed32:$addr))]>;
-
-// Pre-fetch.
-def PRFMro : PrefetchRO<0b11, 0, 0b10, "prfm",
- [(ARM64Prefetch imm:$Rt, ro_indexed64:$addr)]>;
-
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 ro_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 ro_indexed16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>;
-
-// zextloadi1 -> zextloadi8
-def : Pat<(i32 (zextloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i64 (zextloadi1 ro_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-
-// extload -> zextload
-def : Pat<(i32 (extloadi16 ro_indexed16:$addr)), (LDRHHro ro_indexed16:$addr)>;
-def : Pat<(i32 (extloadi8 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i32 (extloadi1 ro_indexed8:$addr)), (LDRBBro ro_indexed8:$addr)>;
-def : Pat<(i64 (extloadi32 ro_indexed32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRWro ro_indexed32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 ro_indexed16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRHHro ro_indexed16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 ro_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 ro_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBro ro_indexed8:$addr), sub_32)>;
-
-} // AddedComplexity = 10
-
-//---
-// (unsigned immediate)
-//---
-def LDRXui : LoadUI<0b11, 0, 0b01, GPR64, am_indexed64, "ldr",
- [(set GPR64:$Rt, (load am_indexed64:$addr))]>;
-def LDRWui : LoadUI<0b10, 0, 0b01, GPR32, am_indexed32, "ldr",
- [(set GPR32:$Rt, (load am_indexed32:$addr))]>;
-def LDRBui : LoadUI<0b00, 1, 0b01, FPR8, am_indexed8, "ldr",
- [(set FPR8:$Rt, (load am_indexed8:$addr))]>;
-def LDRHui : LoadUI<0b01, 1, 0b01, FPR16, am_indexed16, "ldr",
- [(set FPR16:$Rt, (load am_indexed16:$addr))]>;
-def LDRSui : LoadUI<0b10, 1, 0b01, FPR32, am_indexed32, "ldr",
- [(set (f32 FPR32:$Rt), (load am_indexed32:$addr))]>;
-def LDRDui : LoadUI<0b11, 1, 0b01, FPR64, am_indexed64, "ldr",
- [(set (f64 FPR64:$Rt), (load am_indexed64:$addr))]>;
-def LDRQui : LoadUI<0b00, 1, 0b11, FPR128, am_indexed128, "ldr",
- [(set (f128 FPR128:$Rt), (load am_indexed128:$addr))]>;
-
-// For regular load, we do not have any alignment requirement.
-// Thus, it is safe to directly map the vector loads with interesting
-// addressing modes.
-// FIXME: We could do the same for bitconvert to floating point vectors.
-def : Pat <(v8i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))),
- (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
- (LDRBui am_indexed8:$addr), bsub)>;
-def : Pat <(v16i8 (scalar_to_vector (i32 (extloadi8 am_indexed8:$addr)))),
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (LDRBui am_indexed8:$addr), bsub)>;
-def : Pat <(v4i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))),
- (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr), hsub)>;
-def : Pat <(v8i16 (scalar_to_vector (i32 (extloadi16 am_indexed16:$addr)))),
- (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr), hsub)>;
-def : Pat <(v2i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))),
- (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- (LDRSui am_indexed32:$addr), ssub)>;
-def : Pat <(v4i32 (scalar_to_vector (i32 (load am_indexed32:$addr)))),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
- (LDRSui am_indexed32:$addr), ssub)>;
-def : Pat <(v1i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))),
- (LDRDui am_indexed64:$addr)>;
-def : Pat <(v2i64 (scalar_to_vector (i64 (load am_indexed64:$addr)))),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- (LDRDui am_indexed64:$addr), dsub)>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v1f64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v8i8 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v4i16 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v2i32 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-def : Pat<(v1i64 (load am_indexed64:$addr)), (LDRDui am_indexed64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v2f64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v16i8 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v8i16 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v4i32 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(v2i64 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-def : Pat<(f128 (load am_indexed128:$addr)), (LDRQui am_indexed128:$addr)>;
-
-def LDRHHui : LoadUI<0b01, 0, 0b01, GPR32, am_indexed16, "ldrh",
- [(set GPR32:$Rt, (zextloadi16 am_indexed16:$addr))]>;
-def LDRBBui : LoadUI<0b00, 0, 0b01, GPR32, am_indexed8, "ldrb",
- [(set GPR32:$Rt, (zextloadi8 am_indexed8:$addr))]>;
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 am_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_indexed16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>;
-
-// zextloadi1 -> zextloadi8
-def : Pat<(i32 (zextloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i64 (zextloadi1 am_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-
-// extload -> zextload
-def : Pat<(i32 (extloadi16 am_indexed16:$addr)), (LDRHHui am_indexed16:$addr)>;
-def : Pat<(i32 (extloadi8 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i32 (extloadi1 am_indexed8:$addr)), (LDRBBui am_indexed8:$addr)>;
-def : Pat<(i64 (extloadi32 am_indexed32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 am_indexed16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRHHui am_indexed16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 am_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 am_indexed8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRBBui am_indexed8:$addr), sub_32)>;
-
-// load sign-extended half-word
-def LDRSHWui : LoadUI<0b01, 0, 0b11, GPR32, am_indexed16, "ldrsh",
- [(set GPR32:$Rt, (sextloadi16 am_indexed16:$addr))]>;
-def LDRSHXui : LoadUI<0b01, 0, 0b10, GPR64, am_indexed16, "ldrsh",
- [(set GPR64:$Rt, (sextloadi16 am_indexed16:$addr))]>;
-
-// load sign-extended byte
-def LDRSBWui : LoadUI<0b00, 0, 0b11, GPR32, am_indexed8, "ldrsb",
- [(set GPR32:$Rt, (sextloadi8 am_indexed8:$addr))]>;
-def LDRSBXui : LoadUI<0b00, 0, 0b10, GPR64, am_indexed8, "ldrsb",
- [(set GPR64:$Rt, (sextloadi8 am_indexed8:$addr))]>;
-
-// load sign-extended word
-def LDRSWui : LoadUI<0b10, 0, 0b10, GPR64, am_indexed32, "ldrsw",
- [(set GPR64:$Rt, (sextloadi32 am_indexed32:$addr))]>;
-
-// load zero-extended word
-def : Pat<(i64 (zextloadi32 am_indexed32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDRWui am_indexed32:$addr), sub_32)>;
-
-// Pre-fetch.
-def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
- [(ARM64Prefetch imm:$Rt, am_indexed64:$addr)]>;
-
-//---
-// (literal)
-def LDRWl : LoadLiteral<0b00, 0, GPR32, "ldr">;
-def LDRXl : LoadLiteral<0b01, 0, GPR64, "ldr">;
-def LDRSl : LoadLiteral<0b00, 1, FPR32, "ldr">;
-def LDRDl : LoadLiteral<0b01, 1, FPR64, "ldr">;
-def LDRQl : LoadLiteral<0b10, 1, FPR128, "ldr">;
-
-// load sign-extended word
-def LDRSWl : LoadLiteral<0b10, 0, GPR64, "ldrsw">;
-
-// prefetch
-def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
-// [(ARM64Prefetch imm:$Rt, tglobaladdr:$label)]>;
-
-//---
-// (unscaled immediate)
-def LDURXi : LoadUnscaled<0b11, 0, 0b01, GPR64, am_unscaled64, "ldur",
- [(set GPR64:$Rt, (load am_unscaled64:$addr))]>;
-def LDURWi : LoadUnscaled<0b10, 0, 0b01, GPR32, am_unscaled32, "ldur",
- [(set GPR32:$Rt, (load am_unscaled32:$addr))]>;
-def LDURBi : LoadUnscaled<0b00, 1, 0b01, FPR8, am_unscaled8, "ldur",
- [(set FPR8:$Rt, (load am_unscaled8:$addr))]>;
-def LDURHi : LoadUnscaled<0b01, 1, 0b01, FPR16, am_unscaled16, "ldur",
- [(set FPR16:$Rt, (load am_unscaled16:$addr))]>;
-def LDURSi : LoadUnscaled<0b10, 1, 0b01, FPR32, am_unscaled32, "ldur",
- [(set (f32 FPR32:$Rt), (load am_unscaled32:$addr))]>;
-def LDURDi : LoadUnscaled<0b11, 1, 0b01, FPR64, am_unscaled64, "ldur",
- [(set (f64 FPR64:$Rt), (load am_unscaled64:$addr))]>;
-def LDURQi : LoadUnscaled<0b00, 1, 0b11, FPR128, am_unscaled128, "ldur",
- [(set (v2f64 FPR128:$Rt), (load am_unscaled128:$addr))]>;
-
-def LDURHHi
- : LoadUnscaled<0b01, 0, 0b01, GPR32, am_unscaled16, "ldurh",
- [(set GPR32:$Rt, (zextloadi16 am_unscaled16:$addr))]>;
-def LDURBBi
- : LoadUnscaled<0b00, 0, 0b01, GPR32, am_unscaled8, "ldurb",
- [(set GPR32:$Rt, (zextloadi8 am_unscaled8:$addr))]>;
-
-// Match all load 64 bits width whose type is compatible with FPR64
-def : Pat<(v2f32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v1f64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v8i8 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v4i16 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v2i32 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-def : Pat<(v1i64 (load am_unscaled64:$addr)), (LDURDi am_unscaled64:$addr)>;
-
-// Match all load 128 bits width whose type is compatible with FPR128
-def : Pat<(v4f32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v2f64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v16i8 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v8i16 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v4i32 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(v2i64 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-def : Pat<(f128 (load am_unscaled128:$addr)), (LDURQi am_unscaled128:$addr)>;
-
-// anyext -> zext
-def : Pat<(i32 (extloadi16 am_unscaled16:$addr)), (LDURHHi am_unscaled16:$addr)>;
-def : Pat<(i32 (extloadi8 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i32 (extloadi1 am_unscaled8:$addr)), (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i64 (extloadi32 am_unscaled32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>;
-def : Pat<(i64 (extloadi16 am_unscaled16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-def : Pat<(i64 (extloadi8 am_unscaled8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (extloadi1 am_unscaled8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-// unscaled zext
-def : Pat<(i32 (zextloadi16 am_unscaled16:$addr)),
- (LDURHHi am_unscaled16:$addr)>;
-def : Pat<(i32 (zextloadi8 am_unscaled8:$addr)),
- (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i32 (zextloadi1 am_unscaled8:$addr)),
- (LDURBBi am_unscaled8:$addr)>;
-def : Pat<(i64 (zextloadi32 am_unscaled32:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURWi am_unscaled32:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi1 am_unscaled8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-
-
-//---
-// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
-
-// Define new assembler match classes as we want to only match these when
-// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
-// associate a DiagnosticType either, as we want the diagnostic for the
-// canonical form (the scaled operand) to take precedence.
-def MemoryUnscaledFB8Operand : AsmOperandClass {
- let Name = "MemoryUnscaledFB8";
- let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB16Operand : AsmOperandClass {
- let Name = "MemoryUnscaledFB16";
- let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB32Operand : AsmOperandClass {
- let Name = "MemoryUnscaledFB32";
- let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB64Operand : AsmOperandClass {
- let Name = "MemoryUnscaledFB64";
- let RenderMethod = "addMemoryUnscaledOperands";
-}
-def MemoryUnscaledFB128Operand : AsmOperandClass {
- let Name = "MemoryUnscaledFB128";
- let RenderMethod = "addMemoryUnscaledOperands";
-}
-def am_unscaled_fb8 : Operand<i64> {
- let ParserMatchClass = MemoryUnscaledFB8Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb16 : Operand<i64> {
- let ParserMatchClass = MemoryUnscaledFB16Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb32 : Operand<i64> {
- let ParserMatchClass = MemoryUnscaledFB32Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb64 : Operand<i64> {
- let ParserMatchClass = MemoryUnscaledFB64Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def am_unscaled_fb128 : Operand<i64> {
- let ParserMatchClass = MemoryUnscaledFB128Operand;
- let MIOperandInfo = (ops GPR64sp:$base, i64imm:$offset);
-}
-def : InstAlias<"ldr $Rt, $addr", (LDURXi GPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURWi GPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURBi FPR8:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURHi FPR16:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURSi FPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURDi FPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"ldr $Rt, $addr", (LDURQi FPR128:$Rt, am_unscaled_fb128:$addr)>;
-
-// zextload -> i64
-def : Pat<(i64 (zextloadi8 am_unscaled8:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURBBi am_unscaled8:$addr), sub_32)>;
-def : Pat<(i64 (zextloadi16 am_unscaled16:$addr)),
- (SUBREG_TO_REG (i64 0), (LDURHHi am_unscaled16:$addr), sub_32)>;
-
-// load sign-extended half-word
-def LDURSHWi
- : LoadUnscaled<0b01, 0, 0b11, GPR32, am_unscaled16, "ldursh",
- [(set GPR32:$Rt, (sextloadi16 am_unscaled16:$addr))]>;
-def LDURSHXi
- : LoadUnscaled<0b01, 0, 0b10, GPR64, am_unscaled16, "ldursh",
- [(set GPR64:$Rt, (sextloadi16 am_unscaled16:$addr))]>;
-
-// load sign-extended byte
-def LDURSBWi
- : LoadUnscaled<0b00, 0, 0b11, GPR32, am_unscaled8, "ldursb",
- [(set GPR32:$Rt, (sextloadi8 am_unscaled8:$addr))]>;
-def LDURSBXi
- : LoadUnscaled<0b00, 0, 0b10, GPR64, am_unscaled8, "ldursb",
- [(set GPR64:$Rt, (sextloadi8 am_unscaled8:$addr))]>;
-
-// load sign-extended word
-def LDURSWi
- : LoadUnscaled<0b10, 0, 0b10, GPR64, am_unscaled32, "ldursw",
- [(set GPR64:$Rt, (sextloadi32 am_unscaled32:$addr))]>;
-
-// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
-def : InstAlias<"ldrb $Rt, $addr", (LDURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrh $Rt, $addr", (LDURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsb $Rt, $addr", (LDURSBWi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrsb $Rt, $addr", (LDURSBXi GPR64:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"ldrsh $Rt, $addr", (LDURSHWi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsh $Rt, $addr", (LDURSHXi GPR64:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"ldrsw $Rt, $addr", (LDURSWi GPR64:$Rt, am_unscaled_fb32:$addr)>;
-
-// Pre-fetch.
-def PRFUMi : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
- [(ARM64Prefetch imm:$Rt, am_unscaled64:$addr)]>;
-
-//---
-// (unscaled immediate, unprivileged)
-def LDTRXi : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
-def LDTRWi : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
-
-def LDTRHi : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
-def LDTRBi : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
-
-// load sign-extended half-word
-def LDTRSHWi : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
-def LDTRSHXi : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
-
-// load sign-extended byte
-def LDTRSBWi : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
-def LDTRSBXi : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
-
-// load sign-extended word
-def LDTRSWi : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
-
-//---
-// (immediate pre-indexed)
-def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32, "ldr">;
-def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64, "ldr">;
-def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8, "ldr">;
-def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16, "ldr">;
-def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32, "ldr">;
-def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64, "ldr">;
-def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128, "ldr">;
-
-// load sign-extended half-word
-def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
-def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
-
-// load sign-extended byte
-def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
-def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
-
-// load zero-extended byte
-def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32, "ldrb">;
-def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32, "ldrh">;
-
-// load sign-extended word
-def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
-
-// ISel pseudos and patterns. See expanded comment on LoadPreIdxPseudo.
-def LDRDpre_isel : LoadPreIdxPseudo<FPR64>;
-def LDRSpre_isel : LoadPreIdxPseudo<FPR32>;
-def LDRXpre_isel : LoadPreIdxPseudo<GPR64>;
-def LDRWpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRHHpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRBBpre_isel : LoadPreIdxPseudo<GPR32>;
-
-def LDRSWpre_isel : LoadPreIdxPseudo<GPR64>;
-def LDRSHWpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRSHXpre_isel : LoadPreIdxPseudo<GPR64>;
-def LDRSBWpre_isel : LoadPreIdxPseudo<GPR32>;
-def LDRSBXpre_isel : LoadPreIdxPseudo<GPR64>;
-
-//---
-// (immediate post-indexed)
-def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32, "ldr">;
-def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64, "ldr">;
-def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8, "ldr">;
-def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16, "ldr">;
-def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32, "ldr">;
-def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64, "ldr">;
-def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128, "ldr">;
-
-// load sign-extended half-word
-def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32, "ldrsh">;
-def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64, "ldrsh">;
-
-// load sign-extended byte
-def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32, "ldrsb">;
-def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64, "ldrsb">;
-
-// load zero-extended byte
-def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32, "ldrb">;
-def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32, "ldrh">;
-
-// load sign-extended word
-def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64, "ldrsw">;
-
-// ISel pseudos and patterns. See expanded comment on LoadPostIdxPseudo.
-def LDRDpost_isel : LoadPostIdxPseudo<FPR64>;
-def LDRSpost_isel : LoadPostIdxPseudo<FPR32>;
-def LDRXpost_isel : LoadPostIdxPseudo<GPR64>;
-def LDRWpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRHHpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRBBpost_isel : LoadPostIdxPseudo<GPR32>;
-
-def LDRSWpost_isel : LoadPostIdxPseudo<GPR64>;
-def LDRSHWpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRSHXpost_isel : LoadPostIdxPseudo<GPR64>;
-def LDRSBWpost_isel : LoadPostIdxPseudo<GPR32>;
-def LDRSBXpost_isel : LoadPostIdxPseudo<GPR64>;
-
-//===----------------------------------------------------------------------===//
-// Store instructions.
-//===----------------------------------------------------------------------===//
-
-// Pair (indexed, offset)
-// FIXME: Use dedicated range-checked addressing mode operand here.
-def STPWi : StorePairOffset<0b00, 0, GPR32, am_indexed32simm7, "stp">;
-def STPXi : StorePairOffset<0b10, 0, GPR64, am_indexed64simm7, "stp">;
-def STPSi : StorePairOffset<0b00, 1, FPR32, am_indexed32simm7, "stp">;
-def STPDi : StorePairOffset<0b01, 1, FPR64, am_indexed64simm7, "stp">;
-def STPQi : StorePairOffset<0b10, 1, FPR128, am_indexed128simm7, "stp">;
-
-// Pair (pre-indexed)
-def STPWpre : StorePairPreIdx<0b00, 0, GPR32, am_indexed32simm7, "stp">;
-def STPXpre : StorePairPreIdx<0b10, 0, GPR64, am_indexed64simm7, "stp">;
-def STPSpre : StorePairPreIdx<0b00, 1, FPR32, am_indexed32simm7, "stp">;
-def STPDpre : StorePairPreIdx<0b01, 1, FPR64, am_indexed64simm7, "stp">;
-def STPQpre : StorePairPreIdx<0b10, 1, FPR128, am_indexed128simm7, "stp">;
-
-// Pair (pre-indexed)
-def STPWpost : StorePairPostIdx<0b00, 0, GPR32, simm7s4, "stp">;
-def STPXpost : StorePairPostIdx<0b10, 0, GPR64, simm7s8, "stp">;
-def STPSpost : StorePairPostIdx<0b00, 1, FPR32, simm7s4, "stp">;
-def STPDpost : StorePairPostIdx<0b01, 1, FPR64, simm7s8, "stp">;
-def STPQpost : StorePairPostIdx<0b10, 1, FPR128, simm7s16, "stp">;
-
-// Pair (no allocate)
-def STNPWi : StorePairNoAlloc<0b00, 0, GPR32, am_indexed32simm7, "stnp">;
-def STNPXi : StorePairNoAlloc<0b10, 0, GPR64, am_indexed64simm7, "stnp">;
-def STNPSi : StorePairNoAlloc<0b00, 1, FPR32, am_indexed32simm7, "stnp">;
-def STNPDi : StorePairNoAlloc<0b01, 1, FPR64, am_indexed64simm7, "stnp">;
-def STNPQi : StorePairNoAlloc<0b10, 1, FPR128, am_indexed128simm7, "stnp">;
-
-//---
-// (Register offset)
-
-let AddedComplexity = 10 in {
-
-// Integer
-def STRHHro : Store16RO<0b01, 0, 0b00, GPR32, "strh",
- [(truncstorei16 GPR32:$Rt, ro_indexed16:$addr)]>;
-def STRBBro : Store8RO<0b00, 0, 0b00, GPR32, "strb",
- [(truncstorei8 GPR32:$Rt, ro_indexed8:$addr)]>;
-def STRWro : Store32RO<0b10, 0, 0b00, GPR32, "str",
- [(store GPR32:$Rt, ro_indexed32:$addr)]>;
-def STRXro : Store64RO<0b11, 0, 0b00, GPR64, "str",
- [(store GPR64:$Rt, ro_indexed64:$addr)]>;
-
-// truncstore i64
-def : Pat<(truncstorei8 GPR64:$Rt, ro_indexed8:$addr),
- (STRBBro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed8:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, ro_indexed16:$addr),
- (STRHHro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed16:$addr)>;
-def : Pat<(truncstorei32 GPR64:$Rt, ro_indexed32:$addr),
- (STRWro (EXTRACT_SUBREG GPR64:$Rt, sub_32), ro_indexed32:$addr)>;
-
-
-// Floating-point
-def STRBro : Store8RO<0b00, 1, 0b00, FPR8, "str",
- [(store FPR8:$Rt, ro_indexed8:$addr)]>;
-def STRHro : Store16RO<0b01, 1, 0b00, FPR16, "str",
- [(store FPR16:$Rt, ro_indexed16:$addr)]>;
-def STRSro : Store32RO<0b10, 1, 0b00, FPR32, "str",
- [(store (f32 FPR32:$Rt), ro_indexed32:$addr)]>;
-def STRDro : Store64RO<0b11, 1, 0b00, FPR64, "str",
- [(store (f64 FPR64:$Rt), ro_indexed64:$addr)]>;
-def STRQro : Store128RO<0b00, 1, 0b10, FPR128, "str", []> {
- let mayStore = 1;
-}
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), ro_indexed64:$addr),
- (STRDro FPR64:$Rn, ro_indexed64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-def : Pat<(store (f128 FPR128:$Rn), ro_indexed128:$addr),
- (STRQro FPR128:$Rn, ro_indexed128:$addr)>;
-
-//---
-// (unsigned immediate)
-def STRXui : StoreUI<0b11, 0, 0b00, GPR64, am_indexed64, "str",
- [(store GPR64:$Rt, am_indexed64:$addr)]>;
-def STRWui : StoreUI<0b10, 0, 0b00, GPR32, am_indexed32, "str",
- [(store GPR32:$Rt, am_indexed32:$addr)]>;
-def STRBui : StoreUI<0b00, 1, 0b00, FPR8, am_indexed8, "str",
- [(store FPR8:$Rt, am_indexed8:$addr)]>;
-def STRHui : StoreUI<0b01, 1, 0b00, FPR16, am_indexed16, "str",
- [(store FPR16:$Rt, am_indexed16:$addr)]>;
-def STRSui : StoreUI<0b10, 1, 0b00, FPR32, am_indexed32, "str",
- [(store (f32 FPR32:$Rt), am_indexed32:$addr)]>;
-def STRDui : StoreUI<0b11, 1, 0b00, FPR64, am_indexed64, "str",
- [(store (f64 FPR64:$Rt), am_indexed64:$addr)]>;
-def STRQui : StoreUI<0b00, 1, 0b10, FPR128, am_indexed128, "str", []> {
- let mayStore = 1;
-}
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), am_indexed64:$addr),
- (STRDui FPR64:$Rn, am_indexed64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-def : Pat<(store (f128 FPR128:$Rn), am_indexed128:$addr),
- (STRQui FPR128:$Rn, am_indexed128:$addr)>;
-
-def STRHHui : StoreUI<0b01, 0, 0b00, GPR32, am_indexed16, "strh",
- [(truncstorei16 GPR32:$Rt, am_indexed16:$addr)]>;
-def STRBBui : StoreUI<0b00, 0, 0b00, GPR32, am_indexed8, "strb",
- [(truncstorei8 GPR32:$Rt, am_indexed8:$addr)]>;
-
-// truncstore i64
-def : Pat<(truncstorei32 GPR64:$Rt, am_indexed32:$addr),
- (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed32:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, am_indexed16:$addr),
- (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed16:$addr)>;
-def : Pat<(truncstorei8 GPR64:$Rt, am_indexed8:$addr),
- (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_indexed8:$addr)>;
-
-} // AddedComplexity = 10
-
-//---
-// (unscaled immediate)
-def STURXi : StoreUnscaled<0b11, 0, 0b00, GPR64, am_unscaled64, "stur",
- [(store GPR64:$Rt, am_unscaled64:$addr)]>;
-def STURWi : StoreUnscaled<0b10, 0, 0b00, GPR32, am_unscaled32, "stur",
- [(store GPR32:$Rt, am_unscaled32:$addr)]>;
-def STURBi : StoreUnscaled<0b00, 1, 0b00, FPR8, am_unscaled8, "stur",
- [(store FPR8:$Rt, am_unscaled8:$addr)]>;
-def STURHi : StoreUnscaled<0b01, 1, 0b00, FPR16, am_unscaled16, "stur",
- [(store FPR16:$Rt, am_unscaled16:$addr)]>;
-def STURSi : StoreUnscaled<0b10, 1, 0b00, FPR32, am_unscaled32, "stur",
- [(store (f32 FPR32:$Rt), am_unscaled32:$addr)]>;
-def STURDi : StoreUnscaled<0b11, 1, 0b00, FPR64, am_unscaled64, "stur",
- [(store (f64 FPR64:$Rt), am_unscaled64:$addr)]>;
-def STURQi : StoreUnscaled<0b00, 1, 0b10, FPR128, am_unscaled128, "stur",
- [(store (v2f64 FPR128:$Rt), am_unscaled128:$addr)]>;
-def STURHHi : StoreUnscaled<0b01, 0, 0b00, GPR32, am_unscaled16, "sturh",
- [(truncstorei16 GPR32:$Rt, am_unscaled16:$addr)]>;
-def STURBBi : StoreUnscaled<0b00, 0, 0b00, GPR32, am_unscaled8, "sturb",
- [(truncstorei8 GPR32:$Rt, am_unscaled8:$addr)]>;
-
-// Match all store 64 bits width whose type is compatible with FPR64
-def : Pat<(store (v2f32 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v1f64 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v8i8 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v4i16 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v2i32 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-def : Pat<(store (v1i64 FPR64:$Rn), am_unscaled64:$addr),
- (STURDi FPR64:$Rn, am_unscaled64:$addr)>;
-
-// Match all store 128 bits width whose type is compatible with FPR128
-def : Pat<(store (v4f32 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v2f64 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v16i8 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v8i16 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v4i32 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (v2i64 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-def : Pat<(store (f128 FPR128:$Rn), am_unscaled128:$addr),
- (STURQi FPR128:$Rn, am_unscaled128:$addr)>;
-
-// unscaled i64 truncating stores
-def : Pat<(truncstorei32 GPR64:$Rt, am_unscaled32:$addr),
- (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled32:$addr)>;
-def : Pat<(truncstorei16 GPR64:$Rt, am_unscaled16:$addr),
- (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled16:$addr)>;
-def : Pat<(truncstorei8 GPR64:$Rt, am_unscaled8:$addr),
- (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_unscaled8:$addr)>;
-
-//---
-// STR mnemonics fall back to STUR for negative or unaligned offsets.
-def : InstAlias<"str $Rt, $addr", (STURXi GPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURWi GPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURBi FPR8:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURHi FPR16:$Rt, am_unscaled_fb16:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURSi FPR32:$Rt, am_unscaled_fb32:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURDi FPR64:$Rt, am_unscaled_fb64:$addr)>;
-def : InstAlias<"str $Rt, $addr", (STURQi FPR128:$Rt, am_unscaled_fb128:$addr)>;
-
-def : InstAlias<"strb $Rt, $addr", (STURBBi GPR32:$Rt, am_unscaled_fb8:$addr)>;
-def : InstAlias<"strh $Rt, $addr", (STURHHi GPR32:$Rt, am_unscaled_fb16:$addr)>;
-
-//---
-// (unscaled immediate, unprivileged)
-def STTRWi : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
-def STTRXi : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
-
-def STTRHi : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
-def STTRBi : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
-
-//---
-// (immediate pre-indexed)
-def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32, "str">;
-def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64, "str">;
-def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8, "str">;
-def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16, "str">;
-def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32, "str">;
-def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64, "str">;
-def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128, "str">;
-
-def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32, "strb">;
-def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32, "strh">;
-
-// ISel pseudos and patterns. See expanded comment on StorePreIdxPseudo.
-defm STRDpre : StorePreIdxPseudo<FPR64, f64, pre_store>;
-defm STRSpre : StorePreIdxPseudo<FPR32, f32, pre_store>;
-defm STRXpre : StorePreIdxPseudo<GPR64, i64, pre_store>;
-defm STRWpre : StorePreIdxPseudo<GPR32, i32, pre_store>;
-defm STRHHpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti16>;
-defm STRBBpre : StorePreIdxPseudo<GPR32, i32, pre_truncsti8>;
-// truncstore i64
-def : Pat<(pre_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRWpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-def : Pat<(pre_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRHHpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-def : Pat<(pre_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRBBpre_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-
-//---
-// (immediate post-indexed)
-def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32, "str">;
-def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64, "str">;
-def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8, "str">;
-def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16, "str">;
-def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32, "str">;
-def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64, "str">;
-def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128, "str">;
-
-def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32, "strb">;
-def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32, "strh">;
-
-// ISel pseudos and patterns. See expanded comment on StorePostIdxPseudo.
-defm STRDpost : StorePostIdxPseudo<FPR64, f64, post_store, STRDpost>;
-defm STRSpost : StorePostIdxPseudo<FPR32, f32, post_store, STRSpost>;
-defm STRXpost : StorePostIdxPseudo<GPR64, i64, post_store, STRXpost>;
-defm STRWpost : StorePostIdxPseudo<GPR32, i32, post_store, STRWpost>;
-defm STRHHpost : StorePostIdxPseudo<GPR32, i32, post_truncsti16, STRHHpost>;
-defm STRBBpost : StorePostIdxPseudo<GPR32, i32, post_truncsti8, STRBBpost>;
-// truncstore i64
-def : Pat<(post_truncsti32 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRWpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-def : Pat<(post_truncsti16 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRHHpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-def : Pat<(post_truncsti8 GPR64:$Rt, am_noindex:$addr, simm9:$off),
- (STRBBpost_isel (EXTRACT_SUBREG GPR64:$Rt, sub_32), am_noindex:$addr,
- simm9:$off)>;
-
-
-//===----------------------------------------------------------------------===//
-// Load/store exclusive instructions.
-//===----------------------------------------------------------------------===//
-
-def LDARW : LoadAcquire <0b10, 1, 1, 0, 1, GPR32, "ldar">;
-def LDARX : LoadAcquire <0b11, 1, 1, 0, 1, GPR64, "ldar">;
-def LDARB : LoadAcquire <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
-def LDARH : LoadAcquire <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
-
-def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
-def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
-def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
-def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
-
-def LDXRW : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
-def LDXRX : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
-def LDXRB : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
-def LDXRH : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
-
-def STLRW : StoreRelease <0b10, 1, 0, 0, 1, GPR32, "stlr">;
-def STLRX : StoreRelease <0b11, 1, 0, 0, 1, GPR64, "stlr">;
-def STLRB : StoreRelease <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
-def STLRH : StoreRelease <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
-
-def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
-def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
-def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
-def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
-
-def STXRW : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
-def STXRX : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
-def STXRB : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
-def STXRH : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
-
-def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
-def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
-
-def LDXPW : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
-def LDXPX : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
-
-def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
-def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
-
-def STXPW : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
-def STXPX : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
-
-//===----------------------------------------------------------------------===//
-// Scaled floating point to integer conversion instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCVTAS : FPToInteger<0b00, 0b100, "fcvtas", int_arm64_neon_fcvtas>;
-defm FCVTAU : FPToInteger<0b00, 0b101, "fcvtau", int_arm64_neon_fcvtau>;
-defm FCVTMS : FPToInteger<0b10, 0b000, "fcvtms", int_arm64_neon_fcvtms>;
-defm FCVTMU : FPToInteger<0b10, 0b001, "fcvtmu", int_arm64_neon_fcvtmu>;
-defm FCVTNS : FPToInteger<0b00, 0b000, "fcvtns", int_arm64_neon_fcvtns>;
-defm FCVTNU : FPToInteger<0b00, 0b001, "fcvtnu", int_arm64_neon_fcvtnu>;
-defm FCVTPS : FPToInteger<0b01, 0b000, "fcvtps", int_arm64_neon_fcvtps>;
-defm FCVTPU : FPToInteger<0b01, 0b001, "fcvtpu", int_arm64_neon_fcvtpu>;
-defm FCVTZS : FPToInteger<0b11, 0b000, "fcvtzs", fp_to_sint>;
-defm FCVTZU : FPToInteger<0b11, 0b001, "fcvtzu", fp_to_uint>;
-let isCodeGenOnly = 1 in {
-defm FCVTZS_Int : FPToInteger<0b11, 0b000, "fcvtzs", int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : FPToInteger<0b11, 0b001, "fcvtzu", int_arm64_neon_fcvtzu>;
-}
-
-//===----------------------------------------------------------------------===//
-// Scaled integer to floating point conversion instructions.
-//===----------------------------------------------------------------------===//
-
-defm SCVTF : IntegerToFP<0, "scvtf", sint_to_fp>;
-defm UCVTF : IntegerToFP<1, "ucvtf", uint_to_fp>;
-
-//===----------------------------------------------------------------------===//
-// Unscaled integer to floating point conversion instruction.
-//===----------------------------------------------------------------------===//
-
-defm FMOV : UnscaledConversion<"fmov">;
-
-def : Pat<(f32 (fpimm0)), (FMOVWSr WZR)>, Requires<[NoZCZ]>;
-def : Pat<(f64 (fpimm0)), (FMOVXDr XZR)>, Requires<[NoZCZ]>;
-
-def : Pat<(v8i8 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)), (FMOVXDr GPR64:$Xn)>;
-def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
-
-def : Pat<(i64 (bitconvert (v8i8 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))), (FMOVDXr V64:$Vn)>;
-
-def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))), (COPY_TO_REGCLASS GPR32:$Xn,
- FPR32)>;
-def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))), (COPY_TO_REGCLASS FPR32:$Xn,
- GPR32)>;
-def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))), (COPY_TO_REGCLASS GPR64:$Xn,
- FPR64)>;
-def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))), (COPY_TO_REGCLASS FPR64:$Xn,
- GPR64)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point conversion instruction.
-//===----------------------------------------------------------------------===//
-
-defm FCVT : FPConversion<"fcvt">;
-
-def : Pat<(f32_to_f16 FPR32:$Rn),
- (i32 (COPY_TO_REGCLASS
- (f32 (SUBREG_TO_REG (i32 0), (FCVTHSr FPR32:$Rn), hsub)),
- GPR32))>;
-
-def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn),
- [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>;
-
-//===----------------------------------------------------------------------===//
-// Floating point single operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FABS : SingleOperandFPData<0b0001, "fabs", fabs>;
-defm FMOV : SingleOperandFPData<0b0000, "fmov">;
-defm FNEG : SingleOperandFPData<0b0010, "fneg", fneg>;
-defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
-defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
-defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
-defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_arm64_neon_frintn>;
-defm FRINTP : SingleOperandFPData<0b1001, "frintp", fceil>;
-
-def : Pat<(v1f64 (int_arm64_neon_frintn (v1f64 FPR64:$Rn))),
- (FRINTNDr FPR64:$Rn)>;
-
-// FRINTX is inserted to set the flags as required by FENV_ACCESS ON behavior
-// in the C spec. Setting hasSideEffects ensures it is not DCE'd.
-// <rdar://problem/13715968>
-// TODO: We should really model the FPSR flags correctly. This is really ugly.
-let hasSideEffects = 1 in {
-defm FRINTX : SingleOperandFPData<0b1110, "frintx", frint>;
-}
-
-defm FRINTZ : SingleOperandFPData<0b1011, "frintz", ftrunc>;
-
-let SchedRW = [WriteFDiv] in {
-defm FSQRT : SingleOperandFPData<0b0011, "fsqrt", fsqrt>;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating point two operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FADD : TwoOperandFPData<0b0010, "fadd", fadd>;
-let SchedRW = [WriteFDiv] in {
-defm FDIV : TwoOperandFPData<0b0001, "fdiv", fdiv>;
-}
-defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAX : TwoOperandFPData<0b0100, "fmax", ARM64fmax>;
-defm FMINNM : TwoOperandFPData<0b0111, "fminnm", int_arm64_neon_fminnm>;
-defm FMIN : TwoOperandFPData<0b0101, "fmin", ARM64fmin>;
-let SchedRW = [WriteFMul] in {
-defm FMUL : TwoOperandFPData<0b0000, "fmul", fmul>;
-defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", fmul>;
-}
-defm FSUB : TwoOperandFPData<0b0011, "fsub", fsub>;
-
-def : Pat<(v1f64 (ARM64fmax (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (ARM64fmin (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fmaxnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(v1f64 (int_arm64_neon_fminnm (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point three operand instructions.
-//===----------------------------------------------------------------------===//
-
-defm FMADD : ThreeOperandFPData<0, 0, "fmadd", fma>;
-defm FMSUB : ThreeOperandFPData<0, 1, "fmsub",
- TriOpFrag<(fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
-defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
- TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))> >;
-defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
- TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
-
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Rd)),
- (FMSUBSrrr FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-
-def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Rd)),
- (FMSUBDrrr FPR64:$Rd, FPR64:$Rn, FPR64:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Floating point comparison instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCMPE : FPComparison<1, "fcmpe">;
-defm FCMP : FPComparison<0, "fcmp", ARM64fcmp>;
-
-//===----------------------------------------------------------------------===//
-// Floating point conditional comparison instructions.
-//===----------------------------------------------------------------------===//
-
-defm FCCMPE : FPCondComparison<1, "fccmpe">;
-defm FCCMP : FPCondComparison<0, "fccmp">;
-
-//===----------------------------------------------------------------------===//
-// Floating point conditional select instruction.
-//===----------------------------------------------------------------------===//
-
-defm FCSEL : FPCondSelect<"fcsel">;
-
-// CSEL instructions providing f128 types need to be handled by a
-// pseudo-instruction since the eventual code will need to introduce basic
-// blocks and control flow.
-def F128CSEL : Pseudo<(outs FPR128:$Rd),
- (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
- [(set (f128 FPR128:$Rd),
- (ARM64csel FPR128:$Rn, FPR128:$Rm,
- (i32 imm:$cond), CPSR))]> {
- let Uses = [CPSR];
- let usesCustomInserter = 1;
-}
-
-
-//===----------------------------------------------------------------------===//
-// Floating point immediate move.
-//===----------------------------------------------------------------------===//
-
-let isReMaterializable = 1 in {
-defm FMOV : FPMoveImmediate<"fmov">;
-}
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD two vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ABS : SIMDTwoVectorBHSD<0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CLS : SIMDTwoVectorBHS<0, 0b00100, "cls", int_arm64_neon_cls>;
-defm CLZ : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
-defm CMEQ : SIMDCmpTwoVector<0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE : SIMDCmpTwoVector<1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT : SIMDCmpTwoVector<0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE : SIMDCmpTwoVector<1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT : SIMDCmpTwoVector<0, 0b01010, "cmlt", ARM64cmltz>;
-defm CNT : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
-defm FABS : SIMDTwoVectorFP<0, 1, 0b01111, "fabs", fabs>;
-
-defm FCMEQ : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
-defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_arm64_neon_fcvtas>;
-defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_arm64_neon_fcvtau>;
-defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (v4i16 V64:$Rn))),
- (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (int_arm64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
- (i64 4)))),
- (FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
- (i64 2))))),
- (FCVTLv4i32 V128:$Rn)>;
-
-defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_arm64_neon_fcvtms>;
-defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_arm64_neon_fcvtmu>;
-defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_arm64_neon_fcvtns>;
-defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_arm64_neon_fcvtnu>;
-defm FCVTN : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
-def : Pat<(v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
- (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd,
- (v4i16 (int_arm64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
- (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
- (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_arm64_neon_fcvtps>;
-defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_arm64_neon_fcvtpu>;
-defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
- int_arm64_neon_fcvtxn>;
-defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", fp_to_sint>;
-defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", fp_to_uint>;
-let isCodeGenOnly = 1 in {
-defm FCVTZS_Int : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs",
- int_arm64_neon_fcvtzs>;
-defm FCVTZU_Int : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu",
- int_arm64_neon_fcvtzu>;
-}
-defm FNEG : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
-defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_arm64_neon_frecpe>;
-defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
-defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
-defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
-defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_arm64_neon_frintn>;
-defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", fceil>;
-defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", frint>;
-defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", ftrunc>;
-defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_arm64_neon_frsqrte>;
-defm FSQRT : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", fsqrt>;
-defm NEG : SIMDTwoVectorBHSD<1, 0b01011, "neg",
- UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm NOT : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
-// Aliases for MVN -> NOT.
-def : InstAlias<"mvn.8b $Vd, $Vn", (NOTv8i8 V64:$Vd, V64:$Vn)>;
-def : InstAlias<"mvn.16b $Vd, $Vn", (NOTv16i8 V128:$Vd, V128:$Vn)>;
-def : InstAlias<"mvn $Vd.8b, $Vn.8b", (NOTv8i8 V64:$Vd, V64:$Vn)>;
-def : InstAlias<"mvn $Vd.16b, $Vn.16b", (NOTv16i8 V128:$Vd, V128:$Vn)>;
-
-def : Pat<(ARM64neg (v8i8 V64:$Rn)), (NEGv8i8 V64:$Rn)>;
-def : Pat<(ARM64neg (v16i8 V128:$Rn)), (NEGv16i8 V128:$Rn)>;
-def : Pat<(ARM64neg (v4i16 V64:$Rn)), (NEGv4i16 V64:$Rn)>;
-def : Pat<(ARM64neg (v8i16 V128:$Rn)), (NEGv8i16 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i32 V64:$Rn)), (NEGv2i32 V64:$Rn)>;
-def : Pat<(ARM64neg (v4i32 V128:$Rn)), (NEGv4i32 V128:$Rn)>;
-def : Pat<(ARM64neg (v2i64 V128:$Rn)), (NEGv2i64 V128:$Rn)>;
-
-def : Pat<(ARM64not (v8i8 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(ARM64not (v16i8 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(ARM64not (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(ARM64not (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(ARM64not (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-
-def : Pat<(vnot (v4i16 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v2i32 V64:$Rn)), (NOTv8i8 V64:$Rn)>;
-def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
-
-defm RBIT : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", int_arm64_neon_rbit>;
-defm REV16 : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", ARM64rev16>;
-defm REV32 : SIMDTwoVectorBH<1, 0b00000, "rev32", ARM64rev32>;
-defm REV64 : SIMDTwoVectorBHS<0, 0b00000, "rev64", ARM64rev64>;
-defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
- BinOpFrag<(add node:$LHS, (int_arm64_neon_saddlp node:$RHS))> >;
-defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", int_arm64_neon_saddlp>;
-defm SCVTF : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", sint_to_fp>;
-defm SHLL : SIMDVectorLShiftLongBySizeBHS;
-defm SQABS : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_arm64_neon_sqxtn>;
-defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_arm64_neon_sqxtun>;
-defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_arm64_neon_suqadd>;
-defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
- BinOpFrag<(add node:$LHS, (int_arm64_neon_uaddlp node:$RHS))> >;
-defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp",
- int_arm64_neon_uaddlp>;
-defm UCVTF : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", uint_to_fp>;
-defm UQXTN : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_arm64_neon_uqxtn>;
-defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_arm64_neon_urecpe>;
-defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_arm64_neon_ursqrte>;
-defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_arm64_neon_usqadd>;
-defm XTN : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
-
-def : Pat<(v2f32 (ARM64rev64 V64:$Rn)), (REV64v2i32 V64:$Rn)>;
-def : Pat<(v4f32 (ARM64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
-
-// Patterns for vector long shift (by element width). These need to match all
-// three of zext, sext and anyext so it's easier to pull the patterns out of the
-// definition.
-multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
- def : Pat<(ARM64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
- (SHLLv8i8 V64:$Rn)>;
- def : Pat<(ARM64vshl (v8i16 (ext (extract_high_v16i8 V128:$Rn))), (i32 8)),
- (SHLLv16i8 V128:$Rn)>;
- def : Pat<(ARM64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
- (SHLLv4i16 V64:$Rn)>;
- def : Pat<(ARM64vshl (v4i32 (ext (extract_high_v8i16 V128:$Rn))), (i32 16)),
- (SHLLv8i16 V128:$Rn)>;
- def : Pat<(ARM64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
- (SHLLv2i32 V64:$Rn)>;
- def : Pat<(ARM64vshl (v2i64 (ext (extract_high_v4i32 V128:$Rn))), (i32 32)),
- (SHLLv4i32 V128:$Rn)>;
-}
-
-defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
-defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
-defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADD : SIMDThreeSameVector<0, 0b10000, "add", add>;
-defm ADDP : SIMDThreeSameVector<0, 0b10111, "addp", int_arm64_neon_addp>;
-defm CMEQ : SIMDThreeSameVector<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE : SIMDThreeSameVector<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_arm64_neon_fabd>;
-defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_arm64_neon_facge>;
-defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_arm64_neon_facgt>;
-defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_arm64_neon_addp>;
-defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>;
-defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
-defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>;
-defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_arm64_neon_fmaxnmp>;
-defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", int_arm64_neon_fmaxnm>;
-defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_arm64_neon_fmaxp>;
-defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", ARM64fmax>;
-defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_arm64_neon_fminnmp>;
-defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", int_arm64_neon_fminnm>;
-defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_arm64_neon_fminp>;
-defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", ARM64fmin>;
-
-// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
-// instruction expects the addend first, while the fma intrinsic puts it last.
-defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla",
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls",
- TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-
-// The following def pats catch the case where the LHS of an FMA is negated.
-// The TriOpFrag above catches the case where the middle operand is negated.
-def : Pat<(v2f32 (fma (fneg V64:$Rn), V64:$Rm, V64:$Rd)),
- (FMLSv2f32 V64:$Rd, V64:$Rn, V64:$Rm)>;
-
-def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv4f32 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)),
- (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>;
-
-defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_arm64_neon_fmulx>;
-defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>;
-defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_arm64_neon_frecps>;
-defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_arm64_neon_frsqrts>;
-defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>;
-defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla",
- TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >;
-defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls",
- TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))> >;
-defm MUL : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
-defm PMUL : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_arm64_neon_pmul>;
-defm SABA : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_sabd node:$MHS, node:$RHS))> >;
-defm SABD : SIMDThreeSameVectorBHS<0,0b01110,"sabd", int_arm64_neon_sabd>;
-defm SHADD : SIMDThreeSameVectorBHS<0,0b00000,"shadd", int_arm64_neon_shadd>;
-defm SHSUB : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_arm64_neon_shsub>;
-defm SMAXP : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_arm64_neon_smaxp>;
-defm SMAX : SIMDThreeSameVectorBHS<0,0b01100,"smax", int_arm64_neon_smax>;
-defm SMINP : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_arm64_neon_sminp>;
-defm SMIN : SIMDThreeSameVectorBHS<0,0b01101,"smin", int_arm64_neon_smin>;
-defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_arm64_neon_sqrdmulh>;
-defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_arm64_neon_sqrshl>;
-defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_arm64_neon_sqshl>;
-defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_arm64_neon_sqsub>;
-defm SRHADD : SIMDThreeSameVectorBHS<0,0b00010,"srhadd",int_arm64_neon_srhadd>;
-defm SRSHL : SIMDThreeSameVector<0,0b01010,"srshl", int_arm64_neon_srshl>;
-defm SSHL : SIMDThreeSameVector<0,0b01000,"sshl", int_arm64_neon_sshl>;
-defm SUB : SIMDThreeSameVector<1,0b10000,"sub", sub>;
-defm UABA : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_uabd node:$MHS, node:$RHS))> >;
-defm UABD : SIMDThreeSameVectorBHS<1,0b01110,"uabd", int_arm64_neon_uabd>;
-defm UHADD : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", int_arm64_neon_uhadd>;
-defm UHSUB : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_arm64_neon_uhsub>;
-defm UMAXP : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_arm64_neon_umaxp>;
-defm UMAX : SIMDThreeSameVectorBHS<1,0b01100,"umax", int_arm64_neon_umax>;
-defm UMINP : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_arm64_neon_uminp>;
-defm UMIN : SIMDThreeSameVectorBHS<1,0b01101,"umin", int_arm64_neon_umin>;
-defm UQADD : SIMDThreeSameVector<1,0b00001,"uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL : SIMDThreeSameVector<1,0b01011,"uqrshl", int_arm64_neon_uqrshl>;
-defm UQSHL : SIMDThreeSameVector<1,0b01001,"uqshl", int_arm64_neon_uqshl>;
-defm UQSUB : SIMDThreeSameVector<1,0b00101,"uqsub", int_arm64_neon_uqsub>;
-defm URHADD : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", int_arm64_neon_urhadd>;
-defm URSHL : SIMDThreeSameVector<1,0b01010,"urshl", int_arm64_neon_urshl>;
-defm USHL : SIMDThreeSameVector<1,0b01000,"ushl", int_arm64_neon_ushl>;
-
-defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
-defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
- BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
-defm BIF : SIMDLogicalThreeVector<1, 0b11, "bif">;
-defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", ARM64bit>;
-defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl",
- TriOpFrag<(or (and node:$LHS, node:$MHS), (and (vnot node:$LHS), node:$RHS))>>;
-defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
-defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
- BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
-defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
-
-// FIXME: the .16b and .8b variantes should be emitted by the
-// AsmWriter. TableGen's AsmWriter-generator doesn't deal with variant syntaxes
-// in aliases yet though.
-def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
- (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.8h, $src.8h|mov.8h\t$dst, $src}",
- (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.4s, $src.4s|mov.4s\t$dst, $src}",
- (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-def : InstAlias<"{mov\t$dst.2d, $src.2d|mov.2d\t$dst, $src}",
- (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
-
-def : InstAlias<"{mov\t$dst.8b, $src.8b|mov.8b\t$dst, $src}",
- (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.4h, $src.4h|mov.4h\t$dst, $src}",
- (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.2s, $src.2s|mov.2s\t$dst, $src}",
- (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-def : InstAlias<"{mov\t$dst.1d, $src.1d|mov.1d\t$dst, $src}",
- (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
-
-def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
- "|cmls.8b\t$dst, $src1, $src2}",
- (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
- "|cmls.16b\t$dst, $src1, $src2}",
- (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
- "|cmls.4h\t$dst, $src1, $src2}",
- (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
- "|cmls.8h\t$dst, $src1, $src2}",
- (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
- "|cmls.2s\t$dst, $src1, $src2}",
- (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
- "|cmls.4s\t$dst, $src1, $src2}",
- (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
- "|cmls.2d\t$dst, $src1, $src2}",
- (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
- "|cmlo.8b\t$dst, $src1, $src2}",
- (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
- "|cmlo.16b\t$dst, $src1, $src2}",
- (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
- "|cmlo.4h\t$dst, $src1, $src2}",
- (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
- "|cmlo.8h\t$dst, $src1, $src2}",
- (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
- "|cmlo.2s\t$dst, $src1, $src2}",
- (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
- "|cmlo.4s\t$dst, $src1, $src2}",
- (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
- "|cmlo.2d\t$dst, $src1, $src2}",
- (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
- "|cmle.8b\t$dst, $src1, $src2}",
- (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
- "|cmle.16b\t$dst, $src1, $src2}",
- (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
- "|cmle.4h\t$dst, $src1, $src2}",
- (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
- "|cmle.8h\t$dst, $src1, $src2}",
- (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
- "|cmle.2s\t$dst, $src1, $src2}",
- (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
- "|cmle.4s\t$dst, $src1, $src2}",
- (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
- "|cmle.2d\t$dst, $src1, $src2}",
- (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
- "|cmlt.8b\t$dst, $src1, $src2}",
- (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
- "|cmlt.16b\t$dst, $src1, $src2}",
- (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
- "|cmlt.4h\t$dst, $src1, $src2}",
- (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
- "|cmlt.8h\t$dst, $src1, $src2}",
- (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
- "|cmlt.2s\t$dst, $src1, $src2}",
- (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
- "|cmlt.4s\t$dst, $src1, $src2}",
- (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
- "|cmlt.2d\t$dst, $src1, $src2}",
- (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
- "|fcmle.2s\t$dst, $src1, $src2}",
- (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
- "|fcmle.4s\t$dst, $src1, $src2}",
- (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
- "|fcmle.2d\t$dst, $src1, $src2}",
- (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
- "|fcmlt.2s\t$dst, $src1, $src2}",
- (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
- "|fcmlt.4s\t$dst, $src1, $src2}",
- (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
- "|fcmlt.2d\t$dst, $src1, $src2}",
- (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
- "|facle.2s\t$dst, $src1, $src2}",
- (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
- "|facle.4s\t$dst, $src1, $src2}",
- (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
- "|facle.2d\t$dst, $src1, $src2}",
- (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
- "|faclt.2s\t$dst, $src1, $src2}",
- (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
-def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
- "|faclt.4s\t$dst, $src1, $src2}",
- (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
-def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
- "|faclt.2d\t$dst, $src1, $src2}",
- (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three scalar instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADD : SIMDThreeScalarD<0, 0b10000, "add", add>;
-defm CMEQ : SIMDThreeScalarD<1, 0b10001, "cmeq", ARM64cmeq>;
-defm CMGE : SIMDThreeScalarD<0, 0b00111, "cmge", ARM64cmge>;
-defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", ARM64cmgt>;
-defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", ARM64cmhi>;
-defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", ARM64cmhs>;
-defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", ARM64cmtst>;
-defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_arm64_sisd_fabd>;
-def : Pat<(v1f64 (int_arm64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
- (FABD64 FPR64:$Rn, FPR64:$Rm)>;
-defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge",
- int_arm64_neon_facge>;
-defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt",
- int_arm64_neon_facgt>;
-defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", ARM64fcmeq>;
-defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", ARM64fcmge>;
-defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", ARM64fcmgt>;
-defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_arm64_neon_fmulx>;
-defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_arm64_neon_frecps>;
-defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_arm64_neon_frsqrts>;
-defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_arm64_neon_sqadd>;
-defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_arm64_neon_sqrdmulh>;
-defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_arm64_neon_sqrshl>;
-defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_arm64_neon_sqshl>;
-defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_arm64_neon_sqsub>;
-defm SRSHL : SIMDThreeScalarD< 0, 0b01010, "srshl", int_arm64_neon_srshl>;
-defm SSHL : SIMDThreeScalarD< 0, 0b01000, "sshl", int_arm64_neon_sshl>;
-defm SUB : SIMDThreeScalarD< 1, 0b10000, "sub", sub>;
-defm UQADD : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_arm64_neon_uqadd>;
-defm UQRSHL : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_arm64_neon_uqrshl>;
-defm UQSHL : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_arm64_neon_uqshl>;
-defm UQSUB : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_arm64_neon_uqsub>;
-defm URSHL : SIMDThreeScalarD< 1, 0b01010, "urshl", int_arm64_neon_urshl>;
-defm USHL : SIMDThreeScalarD< 1, 0b01000, "ushl", int_arm64_neon_ushl>;
-
-def : InstAlias<"cmls $dst, $src1, $src2",
- (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmle $dst, $src1, $src2",
- (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmlo $dst, $src1, $src2",
- (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"cmlt $dst, $src1, $src2",
- (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"fcmle $dst, $src1, $src2",
- (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"fcmle $dst, $src1, $src2",
- (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"fcmlt $dst, $src1, $src2",
- (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"fcmlt $dst, $src1, $src2",
- (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"facle $dst, $src1, $src2",
- (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"facle $dst, $src1, $src2",
- (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-def : InstAlias<"faclt $dst, $src1, $src2",
- (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1)>;
-def : InstAlias<"faclt $dst, $src1, $src2",
- (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1)>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three scalar instructions (mixed operands).
-//===----------------------------------------------------------------------===//
-defm SQDMULL : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
- int_arm64_neon_sqdmulls_scalar>;
-defm SQDMLAL : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
-defm SQDMLSL : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
-
-def : Pat<(i64 (int_arm64_neon_sqadd (i64 FPR64:$Rd),
- (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
- (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_sqsub (i64 FPR64:$Rd),
- (i64 (int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
- (i32 FPR32:$Rm))))),
- (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD two scalar instructions.
-//===----------------------------------------------------------------------===//
-
-defm ABS : SIMDTwoScalarD< 0, 0b01011, "abs", int_arm64_neon_abs>;
-defm CMEQ : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", ARM64cmeqz>;
-defm CMGE : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", ARM64cmgez>;
-defm CMGT : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", ARM64cmgtz>;
-defm CMLE : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", ARM64cmlez>;
-defm CMLT : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", ARM64cmltz>;
-defm FCMEQ : SIMDCmpTwoScalarSD<0, 1, 0b01101, "fcmeq", ARM64fcmeqz>;
-defm FCMGE : SIMDCmpTwoScalarSD<1, 1, 0b01100, "fcmge", ARM64fcmgez>;
-defm FCMGT : SIMDCmpTwoScalarSD<0, 1, 0b01100, "fcmgt", ARM64fcmgtz>;
-defm FCMLE : SIMDCmpTwoScalarSD<1, 1, 0b01101, "fcmle", ARM64fcmlez>;
-defm FCMLT : SIMDCmpTwoScalarSD<0, 1, 0b01110, "fcmlt", ARM64fcmltz>;
-defm FCVTAS : SIMDTwoScalarSD< 0, 0, 0b11100, "fcvtas">;
-defm FCVTAU : SIMDTwoScalarSD< 1, 0, 0b11100, "fcvtau">;
-defm FCVTMS : SIMDTwoScalarSD< 0, 0, 0b11011, "fcvtms">;
-defm FCVTMU : SIMDTwoScalarSD< 1, 0, 0b11011, "fcvtmu">;
-defm FCVTNS : SIMDTwoScalarSD< 0, 0, 0b11010, "fcvtns">;
-defm FCVTNU : SIMDTwoScalarSD< 1, 0, 0b11010, "fcvtnu">;
-defm FCVTPS : SIMDTwoScalarSD< 0, 1, 0b11010, "fcvtps">;
-defm FCVTPU : SIMDTwoScalarSD< 1, 1, 0b11010, "fcvtpu">;
-def FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
-defm FCVTZS : SIMDTwoScalarSD< 0, 1, 0b11011, "fcvtzs">;
-defm FCVTZU : SIMDTwoScalarSD< 1, 1, 0b11011, "fcvtzu">;
-defm FRECPE : SIMDTwoScalarSD< 0, 1, 0b11101, "frecpe">;
-defm FRECPX : SIMDTwoScalarSD< 0, 1, 0b11111, "frecpx">;
-defm FRSQRTE : SIMDTwoScalarSD< 1, 1, 0b11101, "frsqrte">;
-defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg",
- UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
-defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", ARM64sitof>;
-defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_arm64_neon_sqabs>;
-defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_arm64_neon_sqneg>;
-defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_arm64_neon_scalar_sqxtn>;
-defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_arm64_neon_scalar_sqxtun>;
-defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
- int_arm64_neon_suqadd>;
-defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", ARM64uitof>;
-defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_arm64_neon_scalar_uqxtn>;
-defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
- int_arm64_neon_usqadd>;
-
-def : Pat<(v1i64 (int_arm64_neon_fcvtas (v1f64 FPR64:$Rn))),
- (FCVTASv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtau (v1f64 FPR64:$Rn))),
- (FCVTAUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtms (v1f64 FPR64:$Rn))),
- (FCVTMSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtmu (v1f64 FPR64:$Rn))),
- (FCVTMUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtns (v1f64 FPR64:$Rn))),
- (FCVTNSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtnu (v1f64 FPR64:$Rn))),
- (FCVTNUv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtps (v1f64 FPR64:$Rn))),
- (FCVTPSv1i64 FPR64:$Rn)>;
-def : Pat<(v1i64 (int_arm64_neon_fcvtpu (v1f64 FPR64:$Rn))),
- (FCVTPUv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frecpe (f32 FPR32:$Rn))),
- (FRECPEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpe (f64 FPR64:$Rn))),
- (FRECPEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frecpe (v1f64 FPR64:$Rn))),
- (FRECPEv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frecpx (f32 FPR32:$Rn))),
- (FRECPXv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frecpx (f64 FPR64:$Rn))),
- (FRECPXv1i64 FPR64:$Rn)>;
-
-def : Pat<(f32 (int_arm64_neon_frsqrte (f32 FPR32:$Rn))),
- (FRSQRTEv1i32 FPR32:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_frsqrte (f64 FPR64:$Rn))),
- (FRSQRTEv1i64 FPR64:$Rn)>;
-def : Pat<(v1f64 (int_arm64_neon_frsqrte (v1f64 FPR64:$Rn))),
- (FRSQRTEv1i64 FPR64:$Rn)>;
-
-// If an integer is about to be converted to a floating point value,
-// just load it on the floating point unit.
-// Here are the patterns for 8 and 16-bits to float.
-// 8-bits -> float.
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDRBro ro_indexed8:$addr), bsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDRBui am_indexed8:$addr), bsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDURBi am_unscaled8:$addr), bsub))>;
-// 16-bits -> float.
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr), hsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr), hsub))>;
-def : Pat <(f32 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))),
- (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
- (LDURHi am_unscaled16:$addr), hsub))>;
-// 32-bits are handled in target specific dag combine:
-// performIntToFpCombine.
-// 64-bits integer to 32-bits floating point, not possible with
-// UCVTF on floating point registers (both source and destination
-// must have the same size).
-
-// Here are the patterns for 8, 16, 32, and 64-bits to double.
-// 8-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 ro_indexed8:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRBro ro_indexed8:$addr), bsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_indexed8:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRBui am_indexed8:$addr), bsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi8 am_unscaled8:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURBi am_unscaled8:$addr), bsub))>;
-// 16-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 ro_indexed16:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr), hsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_indexed16:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr), hsub))>;
-def : Pat <(f64 (uint_to_fp (i32 (zextloadi16 am_unscaled16:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURHi am_unscaled16:$addr), hsub))>;
-// 32-bits -> double.
-def : Pat <(f64 (uint_to_fp (i32 (load ro_indexed32:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRSro ro_indexed32:$addr), ssub))>;
-def : Pat <(f64 (uint_to_fp (i32 (load am_indexed32:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRSui am_indexed32:$addr), ssub))>;
-def : Pat <(f64 (uint_to_fp (i32 (load am_unscaled32:$addr)))),
- (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURSi am_unscaled32:$addr), ssub))>;
-// 64-bits -> double are handled in target specific dag combine:
-// performIntToFpCombine.
-
-//===----------------------------------------------------------------------===//
-// Advanced SIMD three different-sized vector instructions.
-//===----------------------------------------------------------------------===//
-
-defm ADDHN : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_arm64_neon_addhn>;
-defm SUBHN : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_arm64_neon_subhn>;
-defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_arm64_neon_raddhn>;
-defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_arm64_neon_rsubhn>;
-defm PMULL : SIMDDifferentThreeVectorBD<0,0b1110,"pmull",int_arm64_neon_pmull>;
-defm SABAL : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
- int_arm64_neon_sabd>;
-defm SABDL : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
- int_arm64_neon_sabd>;
-defm SADDL : SIMDLongThreeVectorBHS< 0, 0b0000, "saddl",
- BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
-defm SADDW : SIMDWideThreeVectorBHS< 0, 0b0001, "saddw",
- BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
-defm SMLAL : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMLSL : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
- TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL : SIMDLongThreeVectorBHS<0, 0b1100, "smull", int_arm64_neon_smull>;
-defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
- int_arm64_neon_sqadd>;
-defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
- int_arm64_neon_sqsub>;
-defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
- int_arm64_neon_sqdmull>;
-defm SSUBL : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
- BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
-defm SSUBW : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
- BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
-defm UABAL : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
- int_arm64_neon_uabd>;
-defm UABDL : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
- int_arm64_neon_uabd>;
-defm UADDL : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
- BinOpFrag<(add (zext node:$LHS), (zext node:$RHS))>>;
-defm UADDW : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
- BinOpFrag<(add node:$LHS, (zext node:$RHS))>>;
-defm UMLAL : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMLSL : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
- TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL : SIMDLongThreeVectorBHS<1, 0b1100, "umull", int_arm64_neon_umull>;
-defm USUBL : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
- BinOpFrag<(sub (zext node:$LHS), (zext node:$RHS))>>;
-defm USUBW : SIMDWideThreeVectorBHS< 1, 0b0011, "usubw",
- BinOpFrag<(sub node:$LHS, (zext node:$RHS))>>;
-
-// Patterns for 64-bit pmull
-def : Pat<(int_arm64_neon_pmull64 V64:$Rn, V64:$Rm),
- (PMULLv1i64 V64:$Rn, V64:$Rm)>;
-def : Pat<(int_arm64_neon_pmull64 (vector_extract (v2i64 V128:$Rn), (i64 1)),
- (vector_extract (v2i64 V128:$Rm), (i64 1))),
- (PMULLv2i64 V128:$Rn, V128:$Rm)>;
-
-// CodeGen patterns for addhn and subhn instructions, which can actually be
-// written in LLVM IR without too much difficulty.
-
-// ADDHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
- (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (ARM64vlshr (add V128:$Rn, V128:$Rm),
- (i32 8))))),
- (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (ARM64vlshr (add V128:$Rn, V128:$Rm),
- (i32 16))))),
- (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (ARM64vlshr (add V128:$Rn, V128:$Rm),
- (i32 32))))),
- (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-// SUBHN
-def : Pat<(v8i8 (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
- (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
-def : Pat<(v4i16 (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
-def : Pat<(v2i32 (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v8i8 V64:$Rd),
- (trunc (v8i16 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 8))))),
- (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v4i16 V64:$Rd),
- (trunc (v4i32 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 16))))),
- (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-def : Pat<(concat_vectors (v2i32 V64:$Rd),
- (trunc (v2i64 (ARM64vlshr (sub V128:$Rn, V128:$Rm),
- (i32 32))))),
- (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
- V128:$Rn, V128:$Rm)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD bitwise extract from vector instruction.
-//----------------------------------------------------------------------------
-
-defm EXT : SIMDBitwiseExtract<"ext">;
-
-def : Pat<(v4i16 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v8i16 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v2f32 (ARM64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
- (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
-def : Pat<(v4i32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v4f32 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2i64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-def : Pat<(v2f64 (ARM64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
- (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
-
-// We use EXT to handle extract_subvector to copy the upper 64-bits of a
-// 128-bit vector.
-def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 8))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 4))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 2))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 1))),
- (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD zip vector
-//----------------------------------------------------------------------------
-
-defm TRN1 : SIMDZipVector<0b010, "trn1", ARM64trn1>;
-defm TRN2 : SIMDZipVector<0b110, "trn2", ARM64trn2>;
-defm UZP1 : SIMDZipVector<0b001, "uzp1", ARM64uzp1>;
-defm UZP2 : SIMDZipVector<0b101, "uzp2", ARM64uzp2>;
-defm ZIP1 : SIMDZipVector<0b011, "zip1", ARM64zip1>;
-defm ZIP2 : SIMDZipVector<0b111, "zip2", ARM64zip2>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD TBL/TBX instructions
-//----------------------------------------------------------------------------
-
-defm TBL : SIMDTableLookup< 0, "tbl">;
-defm TBX : SIMDTableLookupTied<1, "tbx">;
-
-def : Pat<(v8i8 (int_arm64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
- (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
- (TBLv16i8One V128:$Ri, V128:$Rn)>;
-
-def : Pat<(v8i8 (int_arm64_neon_tbx1 (v8i8 V64:$Rd),
- (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
- (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
-def : Pat<(v16i8 (int_arm64_neon_tbx1 (v16i8 V128:$Rd),
- (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
- (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar CPY instruction
-//----------------------------------------------------------------------------
-
-defm CPY : SIMDScalarCPY<"cpy">;
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar pairwise instructions
-//----------------------------------------------------------------------------
-
-defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">;
-defm FADDP : SIMDPairwiseScalarSD<1, 0, 0b01101, "faddp">;
-defm FMAXNMP : SIMDPairwiseScalarSD<1, 0, 0b01100, "fmaxnmp">;
-defm FMAXP : SIMDPairwiseScalarSD<1, 0, 0b01111, "fmaxp">;
-defm FMINNMP : SIMDPairwiseScalarSD<1, 1, 0b01100, "fminnmp">;
-defm FMINP : SIMDPairwiseScalarSD<1, 1, 0b01111, "fminp">;
-def : Pat<(i64 (int_arm64_neon_saddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(i64 (int_arm64_neon_uaddv (v2i64 V128:$Rn))),
- (ADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v2f32 V64:$Rn))),
- (FADDPv2i32p V64:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_faddv (v4f32 V128:$Rn))),
- (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
-def : Pat<(f64 (int_arm64_neon_faddv (v2f64 V128:$Rn))),
- (FADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxnmv (v2f32 V64:$Rn))),
- (FMAXNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxnmv (v2f64 V128:$Rn))),
- (FMAXNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fmaxv (v2f32 V64:$Rn))),
- (FMAXPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fmaxv (v2f64 V128:$Rn))),
- (FMAXPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminnmv (v2f32 V64:$Rn))),
- (FMINNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminnmv (v2f64 V128:$Rn))),
- (FMINNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_arm64_neon_fminv (v2f32 V64:$Rn))),
- (FMINPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_arm64_neon_fminv (v2f64 V128:$Rn))),
- (FMINPv2i64p V128:$Rn)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD INS/DUP instructions
-//----------------------------------------------------------------------------
-
-def DUPv8i8gpr : SIMDDupFromMain<0, 0b00001, ".8b", v8i8, V64, GPR32>;
-def DUPv16i8gpr : SIMDDupFromMain<1, 0b00001, ".16b", v16i8, V128, GPR32>;
-def DUPv4i16gpr : SIMDDupFromMain<0, 0b00010, ".4h", v4i16, V64, GPR32>;
-def DUPv8i16gpr : SIMDDupFromMain<1, 0b00010, ".8h", v8i16, V128, GPR32>;
-def DUPv2i32gpr : SIMDDupFromMain<0, 0b00100, ".2s", v2i32, V64, GPR32>;
-def DUPv4i32gpr : SIMDDupFromMain<1, 0b00100, ".4s", v4i32, V128, GPR32>;
-def DUPv2i64gpr : SIMDDupFromMain<1, 0b01000, ".2d", v2i64, V128, GPR64>;
-
-def DUPv2i64lane : SIMDDup64FromElement;
-def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
-def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
-def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
-def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
-def DUPv8i8lane : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
-def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
-
-def : Pat<(v2f32 (ARM64dup (f32 FPR32:$Rn))),
- (v2f32 (DUPv2i32lane
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
- (i64 0)))>;
-def : Pat<(v4f32 (ARM64dup (f32 FPR32:$Rn))),
- (v4f32 (DUPv4i32lane
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
- (i64 0)))>;
-def : Pat<(v2f64 (ARM64dup (f64 FPR64:$Rn))),
- (v2f64 (DUPv2i64lane
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
- (i64 0)))>;
-
-def : Pat<(v2f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
- (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v4f32 (ARM64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
- (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
-def : Pat<(v2f64 (ARM64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
- (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
-
-defm SMOV : SMov;
-defm UMOV : UMov;
-
-def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
- (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
- (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
- (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
- (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
- (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
-def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
- (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
-
-// Extracting i8 or i16 elements will have the zero-extend transformed to
-// an 'and' mask by type legalization since neither i8 nor i16 are legal types
-// for ARM64. Match these patterns here since UMOV already zeroes out the high
-// bits of the destination register.
-def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
- (i32 0xff)),
- (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
-def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
- (i32 0xffff)),
- (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
-
-defm INS : SIMDIns;
-
-def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
- (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>;
-def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
- (EXTRACT_SUBREG
- (INSvi8gpr (v16i8 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>;
-
-def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
- (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn)>;
-def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
- (EXTRACT_SUBREG
- (INSvi16gpr (v8i16 (IMPLICIT_DEF)), (i64 0), GPR32:$Rn), dsub)>;
-
-def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
- (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
- (i32 FPR32:$Rn), ssub))>;
-def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
- (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
- (i32 FPR32:$Rn), ssub))>;
-def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
- (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
- (i64 FPR64:$Rn), dsub))>;
-
-def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
-def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
-
-def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
- (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
- (EXTRACT_SUBREG
- (INSvi32lane
- (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
- VectorIndexS:$imm,
- (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
- (i64 0)),
- dsub)>;
-def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
- (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
- (INSvi32lane
- V128:$Rn, VectorIndexS:$imm,
- (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
- (i64 0))>;
-def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
- (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
- (INSvi64lane
- V128:$Rn, VectorIndexD:$imm,
- (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
- (i64 0))>;
-
-// Copy an element at a constant index in one vector into a constant indexed
-// element of another.
-// FIXME refactor to a shared class/dev parameterized on vector type, vector
-// index type and INS extension
-def : Pat<(v16i8 (int_arm64_neon_vcopy_lane
- (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
- VectorIndexB:$idx2)),
- (v16i8 (INSvi8lane
- V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
- )>;
-def : Pat<(v8i16 (int_arm64_neon_vcopy_lane
- (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
- VectorIndexH:$idx2)),
- (v8i16 (INSvi16lane
- V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
- )>;
-def : Pat<(v4i32 (int_arm64_neon_vcopy_lane
- (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
- VectorIndexS:$idx2)),
- (v4i32 (INSvi32lane
- V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
- )>;
-def : Pat<(v2i64 (int_arm64_neon_vcopy_lane
- (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
- VectorIndexD:$idx2)),
- (v2i64 (INSvi64lane
- V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
- )>;
-
-// Floating point vector extractions are codegen'd as either a sequence of
-// subregister extractions, possibly fed by an INS if the lane number is
-// anything other than zero.
-def : Pat<(vector_extract (v2f64 V128:$Rn), 0),
- (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), 0),
- (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
-def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
- (f64 (EXTRACT_SUBREG
- (INSvi64lane (v2f64 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexD:$idx),
- dsub))>;
-def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
- (f32 (EXTRACT_SUBREG
- (INSvi32lane (v4f32 (IMPLICIT_DEF)), 0,
- V128:$Rn, VectorIndexS:$idx),
- ssub))>;
-
-// All concat_vectors operations are canonicalised to act on i64 vectors for
-// ARM64. In the general case we need an instruction, which had just as well be
-// INS.
-class ConcatPat<ValueType DstTy, ValueType SrcTy>
- : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
- (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
-
-def : ConcatPat<v2i64, v1i64>;
-def : ConcatPat<v2f64, v1f64>;
-def : ConcatPat<v4i32, v2i32>;
-def : ConcatPat<v4f32, v2f32>;
-def : ConcatPat<v8i16, v4i16>;
-def : ConcatPat<v16i8, v8i8>;
-
-// If the high lanes are undef, though, we can just ignore them:
-class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
- : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
- (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
-
-def : ConcatUndefPat<v2i64, v1i64>;
-def : ConcatUndefPat<v2f64, v1f64>;
-def : ConcatUndefPat<v4i32, v2i32>;
-def : ConcatUndefPat<v4f32, v2f32>;
-def : ConcatUndefPat<v8i16, v4i16>;
-def : ConcatUndefPat<v16i8, v8i8>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD across lanes instructions
-//----------------------------------------------------------------------------
-
-defm ADDV : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
-defm SMAXV : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
-defm SMINV : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
-defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
-defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
-defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
-defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_arm64_neon_fmaxnmv>;
-defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_arm64_neon_fmaxv>;
-defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_arm64_neon_fminnmv>;
-defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_arm64_neon_fminv>;
-
-multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
- def : Pat<(i32 (sext_inreg (i32 (intOp (v8i8 V64:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v16i8 V128:$Rn))), i8)),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi8to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v4i16 V64:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- (i64 0)))>;
-// If there is a sign extension after this intrinsic, consume it as smov already
-// performed it
-def : Pat<(i32 (sext_inreg (i32 (intOp (v8i16 V128:$Rn))), i16)),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
-}
-
-multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc, Intrinsic intOp> {
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
- def : Pat<(i32 (and (i32 (intOp (v8i8 V64:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
- ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v16i8 V128:$Rn))), maski8_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
- ssub))>;
-
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v4i16 V64:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
- ssub))>;
-// If there is a masking operation keeping only what has been actually
-// generated, consume it.
-def : Pat<(i32 (and (i32 (intOp (v8i16 V128:$Rn))), maski16_or_more)),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
- ssub))>;
-
-def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
- ssub))>;
-
-}
-
-multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
- (i64 0)))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (SMOVvi16to32
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
- (i64 0)))>;
-
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
- ssub))>;
-
-def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
- (i64 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
- dsub))>;
-}
-
-multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
- Intrinsic intOp> {
- def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
- ssub))>;
-def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
- ssub))>;
-
-def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
- ssub))>;
-def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
- (i32 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
- ssub))>;
-
-def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
- (i64 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
- dsub))>;
-}
-
-defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", int_arm64_neon_saddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_saddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", int_arm64_neon_uaddv>;
-// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
-def : Pat<(i32 (int_arm64_neon_uaddv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", int_arm64_neon_smaxv>;
-def : Pat<(i32 (int_arm64_neon_smaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", int_arm64_neon_sminv>;
-def : Pat<(i32 (int_arm64_neon_sminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", int_arm64_neon_umaxv>;
-def : Pat<(i32 (int_arm64_neon_umaxv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", int_arm64_neon_uminv>;
-def : Pat<(i32 (int_arm64_neon_uminv (v2i32 V64:$Rn))),
- (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub)>;
-
-defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_arm64_neon_saddlv>;
-defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_arm64_neon_uaddlv>;
-
-// The vaddlv_s32 intrinsic gets mapped to SADDLP.
-def : Pat<(i64 (int_arm64_neon_saddlv (v2i32 V64:$Rn))),
- (i64 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (SADDLPv2i32_v1i64 V64:$Rn), dsub),
- dsub))>;
-// The vaddlv_u32 intrinsic gets mapped to UADDLP.
-def : Pat<(i64 (int_arm64_neon_uaddlv (v2i32 V64:$Rn))),
- (i64 (EXTRACT_SUBREG
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
- (UADDLPv2i32_v1i64 V64:$Rn), dsub),
- dsub))>;
-
-//------------------------------------------------------------------------------
-// AdvSIMD modified immediate instructions
-//------------------------------------------------------------------------------
-
-// AdvSIMD BIC
-defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", ARM64bici>;
-// AdvSIMD ORR
-defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", ARM64orri>;
-
-
-// AdvSIMD FMOV
-def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8,
- "fmov", ".2d",
- [(set (v2f64 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8,
- "fmov", ".2s",
- [(set (v2f32 V64:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8,
- "fmov", ".4s",
- [(set (v4f32 V128:$Rd), (ARM64fmov imm0_255:$imm8))]>;
-
-// AdvSIMD MOVI
-
-// EDIT byte mask: scalar
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVID : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
- [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 here.
-def : Pat<(f64 (ARM64movi_edit imm0_255:$shift)),
- (MOVID imm0_255:$shift)>;
-
-def : Pat<(v1i64 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v2i32 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v4i16 immAllZerosV), (MOVID (i32 0))>;
-def : Pat<(v8i8 immAllZerosV), (MOVID (i32 0))>;
-
-def : Pat<(v1i64 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v2i32 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v4i16 immAllOnesV), (MOVID (i32 255))>;
-def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>;
-
-// EDIT byte mask: 2d
-
-// The movi_edit node has the immediate value already encoded, so we use
-// a plain imm0_255 in the pattern
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128,
- simdimmtype10,
- "movi", ".2d",
- [(set (v2i64 V128:$Rd), (ARM64movi_edit imm0_255:$imm8))]>;
-
-
-// Use movi.2d to materialize 0.0 if the HW does zero-cycle zeroing.
-// Complexity is added to break a tie with a plain MOVI.
-let AddedComplexity = 1 in {
-def : Pat<(f32 fpimm0),
- (f32 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), ssub))>,
- Requires<[HasZCZ]>;
-def : Pat<(f64 fpimm0),
- (f64 (EXTRACT_SUBREG (v2i64 (MOVIv2d_ns (i32 0))), dsub))>,
- Requires<[HasZCZ]>;
-}
-
-def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
-
-def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
-
-// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-defm MOVI : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
-def : Pat<(v2i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-// EDIT per word: 2s & 4s with MSL shifter
-def MOVIv2s_msl : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
- [(set (v2i32 V64:$Rd),
- (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
- [(set (v4i32 V128:$Rd),
- (ARM64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-
-// Per byte: 8b & 16b
-def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255,
- "movi", ".8b",
- [(set (v8i8 V64:$Rd), (ARM64movi imm0_255:$imm8))]>;
-def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255,
- "movi", ".16b",
- [(set (v16i8 V128:$Rd), (ARM64movi imm0_255:$imm8))]>;
-
-// AdvSIMD MVNI
-
-// EDIT per word & halfword: 2s, 4h, 4s, & 8h
-defm MVNI : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
-def : Pat<(v2i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i32 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v4i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
-def : Pat<(v8i16 (ARM64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
- (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
-
-// EDIT per word: 2s & 4s with MSL shifter
-def MVNIv2s_msl : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
- [(set (v2i32 V64:$Rd),
- (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-def MVNIv4s_msl : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
- [(set (v4i32 V128:$Rd),
- (ARM64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD indexed element
-//----------------------------------------------------------------------------
-
-let neverHasSideEffects = 1 in {
- defm FMLA : SIMDFPIndexedSDTied<0, 0b0001, "fmla">;
- defm FMLS : SIMDFPIndexedSDTied<0, 0b0101, "fmls">;
-}
-
-// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
-// instruction expects the addend first, while the intrinsic expects it last.
-
-// On the other hand, there are quite a few valid combinatorial options due to
-// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLA",
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
-
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
- TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
-defm : SIMDFPIndexedSDTiedPatterns<"FMLS",
- TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
-
-multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
- // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
- // and DUP scalar.
- def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
- (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
- VectorIndexS:$idx))),
- (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
- (v2f32 (ARM64duplane32
- (v4f32 (insert_subvector undef,
- (v2f32 (fneg V64:$Rm)),
- (i32 0))),
- VectorIndexS:$idx)))),
- (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
- VectorIndexS:$idx)>;
- def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
- (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
- (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
- (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
- // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
- // and DUP scalar.
- def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
- (ARM64duplane32 (v4f32 (fneg V128:$Rm)),
- VectorIndexS:$idx))),
- (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
- VectorIndexS:$idx)>;
- def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
- (v4f32 (ARM64duplane32
- (v4f32 (insert_subvector undef,
- (v2f32 (fneg V64:$Rm)),
- (i32 0))),
- VectorIndexS:$idx)))),
- (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
- VectorIndexS:$idx)>;
- def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
- (ARM64dup (f32 (fneg FPR32Op:$Rm))))),
- (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
- (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
-
- // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
- // (DUPLANE from 64-bit would be trivial).
- def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
- (ARM64duplane64 (v2f64 (fneg V128:$Rm)),
- VectorIndexD:$idx))),
- (FMLSv2i64_indexed
- V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
- (ARM64dup (f64 (fneg FPR64Op:$Rm))))),
- (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
- (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
-
- // 2 variants for 32-bit scalar version: extract from .2s or from .4s
- def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v4f32 (fneg V128:$Rm)),
- VectorIndexS:$idx))),
- (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
- V128:$Rm, VectorIndexS:$idx)>;
- def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
- (vector_extract (v2f32 (fneg V64:$Rm)),
- VectorIndexS:$idx))),
- (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
- (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
-
- // 1 variant for 64-bit scalar version: extract from .1d or from .2d
- def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
- (vector_extract (v2f64 (fneg V128:$Rm)),
- VectorIndexS:$idx))),
- (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
- V128:$Rm, VectorIndexS:$idx)>;
-}
-
-defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
-defm : FMLSIndexedAfterNegPatterns<
- TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
-
-defm FMULX : SIMDFPIndexedSD<1, 0b1001, "fmulx", int_arm64_neon_fmulx>;
-defm FMUL : SIMDFPIndexedSD<0, 0b1001, "fmul", fmul>;
-
-def : Pat<(v2f32 (fmul V64:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
- (FMULv2i32_indexed V64:$Rn,
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
- (i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (ARM64dup (f32 FPR32:$Rm)))),
- (FMULv4i32_indexed V128:$Rn,
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
- (i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (ARM64dup (f64 FPR64:$Rm)))),
- (FMULv2i64_indexed V128:$Rn,
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
- (i64 0))>;
-
-defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_arm64_neon_sqdmulh>;
-defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_arm64_neon_sqrdmulh>;
-defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
- TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
-defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
- TriOpFrag<(sub node:$LHS, (mul node:$MHS, node:$RHS))>>;
-defm MUL : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
-defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
- TriOpFrag<(sub node:$LHS, (int_arm64_neon_smull node:$MHS, node:$RHS))>>;
-defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull",
- int_arm64_neon_smull>;
-defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
- int_arm64_neon_sqadd>;
-defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
- int_arm64_neon_sqsub>;
-defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_arm64_neon_sqdmull>;
-defm UMLAL : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
- TriOpFrag<(add node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMLSL : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
- TriOpFrag<(sub node:$LHS, (int_arm64_neon_umull node:$MHS, node:$RHS))>>;
-defm UMULL : SIMDVectorIndexedLongSD<1, 0b1010, "umull",
- int_arm64_neon_umull>;
-
-// A scalar sqdmull with the second operand being a vector lane can be
-// handled directly with the indexed instruction encoding.
-def : Pat<(int_arm64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
- (vector_extract (v4i32 V128:$Vm),
- VectorIndexS:$idx)),
- (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD scalar shift instructions
-//----------------------------------------------------------------------------
-defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">;
-defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">;
-defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">;
-defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">;
-// Codegen patterns for the above. We don't put these directly on the
-// instructions because TableGen's type inference can't handle the truth.
-// Having the same base pattern for fp <--> int totally freaks it out.
-def : Pat<(int_arm64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
- (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
- (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
- (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(i64 (int_arm64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
- (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
- vecshiftR64:$imm)),
- (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1i64 (int_arm64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
- vecshiftR64:$imm)),
- (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
- (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(int_arm64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
- (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
- (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(f64 (int_arm64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
- (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
- vecshiftR64:$imm)),
- (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-def : Pat<(v1f64 (int_arm64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
- vecshiftR64:$imm)),
- (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
-
-defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", ARM64vshl>;
-defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
-defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
- int_arm64_neon_sqrshrn>;
-defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
- int_arm64_neon_sqrshrun>;
-defm SQSHLU : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
-defm SQSHRN : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
- int_arm64_neon_sqshrn>;
-defm SQSHRUN : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
- int_arm64_neon_sqshrun>;
-defm SRI : SIMDScalarRShiftDTied< 1, 0b01000, "sri">;
-defm SRSHR : SIMDScalarRShiftD< 0, 0b00100, "srshr", ARM64srshri>;
-defm SRSRA : SIMDScalarRShiftDTied< 0, 0b00110, "srsra",
- TriOpFrag<(add node:$LHS,
- (ARM64srshri node:$MHS, node:$RHS))>>;
-defm SSHR : SIMDScalarRShiftD< 0, 0b00000, "sshr", ARM64vashr>;
-defm SSRA : SIMDScalarRShiftDTied< 0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS,
- (ARM64vashr node:$MHS, node:$RHS))>>;
-defm UQRSHRN : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
- int_arm64_neon_uqrshrn>;
-defm UQSHL : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
-defm UQSHRN : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
- int_arm64_neon_uqshrn>;
-defm URSHR : SIMDScalarRShiftD< 1, 0b00100, "urshr", ARM64urshri>;
-defm URSRA : SIMDScalarRShiftDTied< 1, 0b00110, "ursra",
- TriOpFrag<(add node:$LHS,
- (ARM64urshri node:$MHS, node:$RHS))>>;
-defm USHR : SIMDScalarRShiftD< 1, 0b00000, "ushr", ARM64vlshr>;
-defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS,
- (ARM64vlshr node:$MHS, node:$RHS))>>;
-
-//----------------------------------------------------------------------------
-// AdvSIMD vector shift instructions
-//----------------------------------------------------------------------------
-defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_arm64_neon_vcvtfp2fxs>;
-defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_arm64_neon_vcvtfp2fxu>;
-defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf",
- int_arm64_neon_vcvtfxs2fp>;
-defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
- int_arm64_neon_rshrn>;
-defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", ARM64vshl>;
-defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
- BinOpFrag<(trunc (ARM64vashr node:$LHS, node:$RHS))>>;
-defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_arm64_neon_vsli>;
-def : Pat<(v1i64 (int_arm64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
- (i32 vecshiftL64:$imm))),
- (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
-defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
- int_arm64_neon_sqrshrn>;
-defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
- int_arm64_neon_sqrshrun>;
-defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", ARM64sqshlui>;
-defm SQSHL : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", ARM64sqshli>;
-defm SQSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
- int_arm64_neon_sqshrn>;
-defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
- int_arm64_neon_sqshrun>;
-defm SRI : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_arm64_neon_vsri>;
-def : Pat<(v1i64 (int_arm64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
- (i32 vecshiftR64:$imm))),
- (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
-defm SRSHR : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", ARM64srshri>;
-defm SRSRA : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
- TriOpFrag<(add node:$LHS,
- (ARM64srshri node:$MHS, node:$RHS))> >;
-defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
- BinOpFrag<(ARM64vshl (sext node:$LHS), node:$RHS)>>;
-
-defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", ARM64vashr>;
-defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
- TriOpFrag<(add node:$LHS, (ARM64vashr node:$MHS, node:$RHS))>>;
-defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf",
- int_arm64_neon_vcvtfxu2fp>;
-defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
- int_arm64_neon_uqrshrn>;
-defm UQSHL : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", ARM64uqshli>;
-defm UQSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
- int_arm64_neon_uqshrn>;
-defm URSHR : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", ARM64urshri>;
-defm URSRA : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
- TriOpFrag<(add node:$LHS,
- (ARM64urshri node:$MHS, node:$RHS))> >;
-defm USHLL : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
- BinOpFrag<(ARM64vshl (zext node:$LHS), node:$RHS)>>;
-defm USHR : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", ARM64vlshr>;
-defm USRA : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
- TriOpFrag<(add node:$LHS, (ARM64vlshr node:$MHS, node:$RHS))> >;
-
-// SHRN patterns for when a logical right shift was used instead of arithmetic
-// (the immediate guarantees no sign bits actually end up in the result so it
-// doesn't matter).
-def : Pat<(v8i8 (trunc (ARM64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
- (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
-def : Pat<(v4i16 (trunc (ARM64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
- (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
-def : Pat<(v2i32 (trunc (ARM64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
- (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
-
-def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
- (trunc (ARM64vlshr (v8i16 V128:$Rn),
- vecshiftR16Narrow:$imm)))),
- (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR16Narrow:$imm)>;
-def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
- (trunc (ARM64vlshr (v4i32 V128:$Rn),
- vecshiftR32Narrow:$imm)))),
- (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR32Narrow:$imm)>;
-def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
- (trunc (ARM64vlshr (v2i64 V128:$Rn),
- vecshiftR64Narrow:$imm)))),
- (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
- V128:$Rn, vecshiftR32Narrow:$imm)>;
-
-// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
-// Anyexts are implemented as zexts.
-def : Pat<(v8i16 (sext (v8i8 V64:$Rn))), (SSHLLv8i8_shift V64:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
-def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))), (USHLLv8i8_shift V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
-// Also match an extend from the upper half of a 128 bit source register.
-def : Pat<(v8i16 (anyext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
- (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (zext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
- (USHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v8i16 (sext (v8i8 (extract_subvector V128:$Rn, (i64 8)) ))),
- (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (anyext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
- (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (zext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
- (USHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v4i32 (sext (v4i16 (extract_subvector V128:$Rn, (i64 4)) ))),
- (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (anyext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
- (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (zext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
- (USHLLv4i32_shift V128:$Rn, (i32 0))>;
-def : Pat<(v2i64 (sext (v2i32 (extract_subvector V128:$Rn, (i64 2)) ))),
- (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
-
-// Vector shift sxtl aliases
-def : InstAlias<"sxtl.8h $dst, $src1",
- (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.8h, $src1.8b",
- (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl.4s $dst, $src1",
- (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.4s, $src1.4h",
- (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl.2d $dst, $src1",
- (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"sxtl $dst.2d, $src1.2s",
- (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-
-// Vector shift sxtl2 aliases
-def : InstAlias<"sxtl2.8h $dst, $src1",
- (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
- (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2.4s $dst, $src1",
- (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
- (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2.2d $dst, $src1",
- (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
- (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-
-// Vector shift uxtl aliases
-def : InstAlias<"uxtl.8h $dst, $src1",
- (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.8h, $src1.8b",
- (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl.4s $dst, $src1",
- (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.4s, $src1.4h",
- (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl.2d $dst, $src1",
- (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-def : InstAlias<"uxtl $dst.2d, $src1.2s",
- (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
-
-// Vector shift uxtl2 aliases
-def : InstAlias<"uxtl2.8h $dst, $src1",
- (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
- (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2.4s $dst, $src1",
- (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
- (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2.2d $dst, $src1",
- (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
- (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
-
-// If an integer is about to be converted to a floating point value,
-// just load it on the floating point unit.
-// These patterns are more complex because floating point loads do not
-// support sign extension.
-// The sign extension has to be explicitly added and is only supported for
-// one step: byte-to-half, half-to-word, word-to-doubleword.
-// SCVTF GPR -> FPR is 9 cycles.
-// SCVTF FPR -> FPR is 4 cyclces.
-// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
-// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
-// and still being faster.
-// However, this is not good for code size.
-// 8-bits -> float. 2 sizes step-up.
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 ro_indexed8:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv8i8_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRBro ro_indexed8:$addr),
- bsub),
- 0),
- dsub)),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_indexed8:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv8i8_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRBui am_indexed8:$addr),
- bsub),
- 0),
- dsub)),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi8 am_unscaled8:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv8i8_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURBi am_unscaled8:$addr),
- bsub),
- 0),
- dsub)),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-// 16-bits -> float. 1 size step-up.
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr),
- hsub),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr),
- hsub),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f32 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))),
- (SCVTFv1i32 (f32 (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURHi am_unscaled16:$addr),
- hsub),
- 0),
- ssub)))>, Requires<[NotForCodeSize]>;
-// 32-bits to 32-bits are handled in target specific dag combine:
-// performIntToFpCombine.
-// 64-bits integer to 32-bits floating point, not possible with
-// SCVTF on floating point registers (both source and destination
-// must have the same size).
-
-// Here are the patterns for 8, 16, 32, and 64-bits to double.
-// 8-bits -> double. 3 size step-up: give up.
-// 16-bits -> double. 2 size step.
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 ro_indexed16:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHro ro_indexed16:$addr),
- hsub),
- 0),
- dsub)),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_indexed16:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRHui am_indexed16:$addr),
- hsub),
- 0),
- dsub)),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (sextloadi16 am_unscaled16:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (f64
- (EXTRACT_SUBREG
- (SSHLLv4i16_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURHi am_unscaled16:$addr),
- hsub),
- 0),
- dsub)),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-// 32-bits -> double. 1 size step-up.
-def : Pat <(f64 (sint_to_fp (i32 (load ro_indexed32:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRSro ro_indexed32:$addr),
- ssub),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (load am_indexed32:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDRSui am_indexed32:$addr),
- ssub),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-def : Pat <(f64 (sint_to_fp (i32 (load am_unscaled32:$addr)))),
- (SCVTFv1i64 (f64 (EXTRACT_SUBREG
- (SSHLLv2i32_shift
- (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (LDURSi am_unscaled32:$addr),
- ssub),
- 0),
- dsub)))>, Requires<[NotForCodeSize]>;
-// 64-bits -> double are handled in target specific dag combine:
-// performIntToFpCombine.
-
-
-//----------------------------------------------------------------------------
-// AdvSIMD Load-Store Structure
-//----------------------------------------------------------------------------
-defm LD1 : SIMDLd1Multiple<"ld1">;
-defm LD2 : SIMDLd2Multiple<"ld2">;
-defm LD3 : SIMDLd3Multiple<"ld3">;
-defm LD4 : SIMDLd4Multiple<"ld4">;
-
-defm ST1 : SIMDSt1Multiple<"st1">;
-defm ST2 : SIMDSt2Multiple<"st2">;
-defm ST3 : SIMDSt3Multiple<"st3">;
-defm ST4 : SIMDSt4Multiple<"st4">;
-
-class Ld1Pat<ValueType ty, Instruction INST>
- : Pat<(ty (load am_simdnoindex:$vaddr)), (INST am_simdnoindex:$vaddr)>;
-
-def : Ld1Pat<v16i8, LD1Onev16b>;
-def : Ld1Pat<v8i16, LD1Onev8h>;
-def : Ld1Pat<v4i32, LD1Onev4s>;
-def : Ld1Pat<v2i64, LD1Onev2d>;
-def : Ld1Pat<v8i8, LD1Onev8b>;
-def : Ld1Pat<v4i16, LD1Onev4h>;
-def : Ld1Pat<v2i32, LD1Onev2s>;
-def : Ld1Pat<v1i64, LD1Onev1d>;
-
-class St1Pat<ValueType ty, Instruction INST>
- : Pat<(store ty:$Vt, am_simdnoindex:$vaddr),
- (INST ty:$Vt, am_simdnoindex:$vaddr)>;
-
-def : St1Pat<v16i8, ST1Onev16b>;
-def : St1Pat<v8i16, ST1Onev8h>;
-def : St1Pat<v4i32, ST1Onev4s>;
-def : St1Pat<v2i64, ST1Onev2d>;
-def : St1Pat<v8i8, ST1Onev8b>;
-def : St1Pat<v4i16, ST1Onev4h>;
-def : St1Pat<v2i32, ST1Onev2s>;
-def : St1Pat<v1i64, ST1Onev1d>;
-
-//---
-// Single-element
-//---
-
-defm LD1R : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
-defm LD2R : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
-defm LD3R : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
-defm LD4R : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
-let mayLoad = 1, neverHasSideEffects = 1 in {
-defm LD1 : SIMDLdSingleBTied<0, 0b000, "ld1", VecListOneb, GPR64pi1>;
-defm LD1 : SIMDLdSingleHTied<0, 0b010, 0, "ld1", VecListOneh, GPR64pi2>;
-defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes, GPR64pi4>;
-defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned, GPR64pi8>;
-defm LD2 : SIMDLdSingleBTied<1, 0b000, "ld2", VecListTwob, GPR64pi2>;
-defm LD2 : SIMDLdSingleHTied<1, 0b010, 0, "ld2", VecListTwoh, GPR64pi4>;
-defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos, GPR64pi8>;
-defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod, GPR64pi16>;
-defm LD3 : SIMDLdSingleBTied<0, 0b001, "ld3", VecListThreeb, GPR64pi3>;
-defm LD3 : SIMDLdSingleHTied<0, 0b011, 0, "ld3", VecListThreeh, GPR64pi6>;
-defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
-defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
-defm LD4 : SIMDLdSingleBTied<1, 0b001, "ld4", VecListFourb, GPR64pi4>;
-defm LD4 : SIMDLdSingleHTied<1, 0b011, 0, "ld4", VecListFourh, GPR64pi8>;
-defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours, GPR64pi16>;
-defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd, GPR64pi32>;
-}
-
-def : Pat<(v8i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))),
- (LD1Rv8b am_simdnoindex:$vaddr)>;
-def : Pat<(v16i8 (ARM64dup (i32 (extloadi8 am_simdnoindex:$vaddr)))),
- (LD1Rv16b am_simdnoindex:$vaddr)>;
-def : Pat<(v4i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))),
- (LD1Rv4h am_simdnoindex:$vaddr)>;
-def : Pat<(v8i16 (ARM64dup (i32 (extloadi16 am_simdnoindex:$vaddr)))),
- (LD1Rv8h am_simdnoindex:$vaddr)>;
-def : Pat<(v2i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))),
- (LD1Rv2s am_simdnoindex:$vaddr)>;
-def : Pat<(v4i32 (ARM64dup (i32 (load am_simdnoindex:$vaddr)))),
- (LD1Rv4s am_simdnoindex:$vaddr)>;
-def : Pat<(v2i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))),
- (LD1Rv2d am_simdnoindex:$vaddr)>;
-def : Pat<(v1i64 (ARM64dup (i64 (load am_simdnoindex:$vaddr)))),
- (LD1Rv1d am_simdnoindex:$vaddr)>;
-// Grab the floating point version too
-def : Pat<(v2f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))),
- (LD1Rv2s am_simdnoindex:$vaddr)>;
-def : Pat<(v4f32 (ARM64dup (f32 (load am_simdnoindex:$vaddr)))),
- (LD1Rv4s am_simdnoindex:$vaddr)>;
-def : Pat<(v2f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
- (LD1Rv2d am_simdnoindex:$vaddr)>;
-def : Pat<(v1f64 (ARM64dup (f64 (load am_simdnoindex:$vaddr)))),
- (LD1Rv1d am_simdnoindex:$vaddr)>;
-
-class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
- ValueType VTy, ValueType STy, Instruction LD1>
- : Pat<(vector_insert (VTy VecListOne128:$Rd),
- (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
- (LD1 VecListOne128:$Rd, VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : Ld1Lane128Pat<extloadi8, VectorIndexB, v16i8, i32, LD1i8>;
-def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
-def : Ld1Lane128Pat<load, VectorIndexS, v4i32, i32, LD1i32>;
-def : Ld1Lane128Pat<load, VectorIndexS, v4f32, f32, LD1i32>;
-def : Ld1Lane128Pat<load, VectorIndexD, v2i64, i64, LD1i64>;
-def : Ld1Lane128Pat<load, VectorIndexD, v2f64, f64, LD1i64>;
-
-class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
- ValueType VTy, ValueType STy, Instruction LD1>
- : Pat<(vector_insert (VTy VecListOne64:$Rd),
- (STy (scalar_load am_simdnoindex:$vaddr)), VecIndex:$idx),
- (EXTRACT_SUBREG
- (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
- VecIndex:$idx, am_simdnoindex:$vaddr),
- dsub)>;
-
-def : Ld1Lane64Pat<extloadi8, VectorIndexB, v8i8, i32, LD1i8>;
-def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
-def : Ld1Lane64Pat<load, VectorIndexS, v2i32, i32, LD1i32>;
-def : Ld1Lane64Pat<load, VectorIndexS, v2f32, f32, LD1i32>;
-
-
-defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
-defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
-defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
-defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
-
-// Stores
-defm ST1 : SIMDStSingleB<0, 0b000, "st1", VecListOneb, GPR64pi1>;
-defm ST1 : SIMDStSingleH<0, 0b010, 0, "st1", VecListOneh, GPR64pi2>;
-defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
-defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
-
-let AddedComplexity = 8 in
-class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
- ValueType VTy, ValueType STy, Instruction ST1>
- : Pat<(scalar_store
- (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
- am_simdnoindex:$vaddr),
- (ST1 VecListOne128:$Vt, VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : St1Lane128Pat<truncstorei8, VectorIndexB, v16i8, i32, ST1i8>;
-def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
-def : St1Lane128Pat<store, VectorIndexS, v4i32, i32, ST1i32>;
-def : St1Lane128Pat<store, VectorIndexS, v4f32, f32, ST1i32>;
-def : St1Lane128Pat<store, VectorIndexD, v2i64, i64, ST1i64>;
-def : St1Lane128Pat<store, VectorIndexD, v2f64, f64, ST1i64>;
-
-let AddedComplexity = 8 in
-class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
- ValueType VTy, ValueType STy, Instruction ST1>
- : Pat<(scalar_store
- (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
- am_simdnoindex:$vaddr),
- (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
- VecIndex:$idx, am_simdnoindex:$vaddr)>;
-
-def : St1Lane64Pat<truncstorei8, VectorIndexB, v8i8, i32, ST1i8>;
-def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
-def : St1Lane64Pat<store, VectorIndexS, v2i32, i32, ST1i32>;
-def : St1Lane64Pat<store, VectorIndexS, v2f32, f32, ST1i32>;
-
-let mayStore = 1, neverHasSideEffects = 1 in {
-defm ST2 : SIMDStSingleB<1, 0b000, "st2", VecListTwob, GPR64pi2>;
-defm ST2 : SIMDStSingleH<1, 0b010, 0, "st2", VecListTwoh, GPR64pi4>;
-defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos, GPR64pi8>;
-defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod, GPR64pi16>;
-defm ST3 : SIMDStSingleB<0, 0b001, "st3", VecListThreeb, GPR64pi3>;
-defm ST3 : SIMDStSingleH<0, 0b011, 0, "st3", VecListThreeh, GPR64pi6>;
-defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
-defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
-defm ST4 : SIMDStSingleB<1, 0b001, "st4", VecListFourb, GPR64pi4>;
-defm ST4 : SIMDStSingleH<1, 0b011, 0, "st4", VecListFourh, GPR64pi8>;
-defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours, GPR64pi16>;
-defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd, GPR64pi32>;
-}
-
-defm ST1 : SIMDLdSt1SingleAliases<"st1">;
-defm ST2 : SIMDLdSt2SingleAliases<"st2">;
-defm ST3 : SIMDLdSt3SingleAliases<"st3">;
-defm ST4 : SIMDLdSt4SingleAliases<"st4">;
-
-//----------------------------------------------------------------------------
-// Crypto extensions
-//----------------------------------------------------------------------------
-
-def AESErr : AESTiedInst<0b0100, "aese", int_arm64_crypto_aese>;
-def AESDrr : AESTiedInst<0b0101, "aesd", int_arm64_crypto_aesd>;
-def AESMCrr : AESInst< 0b0110, "aesmc", int_arm64_crypto_aesmc>;
-def AESIMCrr : AESInst< 0b0111, "aesimc", int_arm64_crypto_aesimc>;
-
-def SHA1Crrr : SHATiedInstQSV<0b000, "sha1c", int_arm64_crypto_sha1c>;
-def SHA1Prrr : SHATiedInstQSV<0b001, "sha1p", int_arm64_crypto_sha1p>;
-def SHA1Mrrr : SHATiedInstQSV<0b010, "sha1m", int_arm64_crypto_sha1m>;
-def SHA1SU0rrr : SHATiedInstVVV<0b011, "sha1su0", int_arm64_crypto_sha1su0>;
-def SHA256Hrrr : SHATiedInstQQV<0b100, "sha256h", int_arm64_crypto_sha256h>;
-def SHA256H2rrr : SHATiedInstQQV<0b101, "sha256h2",int_arm64_crypto_sha256h2>;
-def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_arm64_crypto_sha256su1>;
-
-def SHA1Hrr : SHAInstSS< 0b0000, "sha1h", int_arm64_crypto_sha1h>;
-def SHA1SU1rr : SHATiedInstVV<0b0001, "sha1su1", int_arm64_crypto_sha1su1>;
-def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_arm64_crypto_sha256su0>;
-
-//----------------------------------------------------------------------------
-// Compiler-pseudos
-//----------------------------------------------------------------------------
-// FIXME: Like for X86, these should go in their own separate .td file.
-
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-// FIXME: X86 also checks for CMOV here. Do we need something similar?
-def def32 : PatLeaf<(i32 GPR32:$src), [{
- return N->getOpcode() != ISD::TRUNCATE &&
- N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
- N->getOpcode() != ISD::CopyFromReg;
-}]>;
-
-// In the case of a 32-bit def that is known to implicitly zero-extend,
-// we can use a SUBREG_TO_REG.
-def : Pat<(i64 (zext def32:$src)), (SUBREG_TO_REG (i64 0), GPR32:$src, sub_32)>;
-
-// For an anyext, we don't care what the high bits are, so we can perform an
-// INSERT_SUBREF into an IMPLICIT_DEF.
-def : Pat<(i64 (anyext GPR32:$src)),
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
-
-// When we need to explicitly zero-extend, we use an unsigned bitfield move
-// instruction (UBFM) on the enclosing super-reg.
-def : Pat<(i64 (zext GPR32:$src)),
- (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
-
-// To sign extend, we use a signed bitfield move instruction (SBFM) on the
-// containing super-reg.
-def : Pat<(i64 (sext GPR32:$src)),
- (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i8)), (SBFMXri GPR64:$src, 0, 7)>;
-def : Pat<(i64 (sext_inreg GPR64:$src, i1)), (SBFMXri GPR64:$src, 0, 0)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i8)), (SBFMWri GPR32:$src, 0, 7)>;
-def : Pat<(i32 (sext_inreg GPR32:$src, i1)), (SBFMWri GPR32:$src, 0, 0)>;
-
-def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
- (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
- (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
-def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
- (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
-
-def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
- (SBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
- (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
-def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
- (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
-
-def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
- (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 (i64shift_a imm0_63:$imm)),
- (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
-
-// sra patterns have an AddedComplexity of 10, so make sure we have a higher
-// AddedComplexity for the following patterns since we want to match sext + sra
-// patterns before we attempt to match a single sra node.
-let AddedComplexity = 20 in {
-// We support all sext + sra combinations which preserve at least one bit of the
-// original value which is to be sign extended. E.g. we support shifts up to
-// bitwidth-1 bits.
-def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
- (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
-def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
- (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
-
-def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
- (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
-def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
- (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
-
-def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
- (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
- (i64 imm0_31:$imm), 31)>;
-} // AddedComplexity = 20
-
-// To truncate, we can simply extract from a subregister.
-def : Pat<(i32 (trunc GPR64sp:$src)),
- (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
-
-// __builtin_trap() uses the BRK instruction on ARM64.
-def : Pat<(trap), (BRK 1)>;
-
-// Conversions within AdvSIMD types in the same register size are free.
-
-def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v8i8 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
-
-def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v8i8 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
-
-def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v8i8 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
-
-def : Pat<(v8i8 (bitconvert (v1i64 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2i32 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v4i16 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v1f64 FPR64:$src))), (v8i8 FPR64:$src)>;
-
-def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v2i32 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v4i16 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v8i8 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v2f32 FPR64:$src))), (f64 FPR64:$src)>;
-def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>;
-
-def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v8i8 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>;
-def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
-
-def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 FPR64:$src))), (v2f32 FPR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
-
-
-def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
-def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
-
-def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
-def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
-
-def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
-def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
-
-def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
-def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
-
-def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
-def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
-
-def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
-def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
-
-def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
-def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
-
-def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
- (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
- (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
- (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
- (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
-
-// A 64-bit subvector insert to the first 128-bit vector position
-// is a subregister copy that needs no instruction.
-def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (i32 0)),
- (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
-
-// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
-// or v2f32.
-def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
- (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
- (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
-def : Pat<(f64 (fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
- (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
- (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
- // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
- // so we match on v4f32 here, not v2f32. This will also catch adding
- // the low two lanes of a true v4f32 vector.
-def : Pat<(fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
- (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
- (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
-
-// Scalar 64-bit shifts in FPR64 registers.
-def : Pat<(i64 (int_arm64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
- (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
- (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
- (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-def : Pat<(i64 (int_arm64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
- (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
-
-// Tail call return handling. These are all compiler pseudo-instructions,
-// so no encoding information or anything like that.
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
- def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst), []>;
- def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst), []>;
-}
-
-def : Pat<(ARM64tcret tcGPR64:$dst), (TCRETURNri tcGPR64:$dst)>;
-def : Pat<(ARM64tcret (i64 tglobaladdr:$dst)), (TCRETURNdi texternalsym:$dst)>;
-def : Pat<(ARM64tcret (i64 texternalsym:$dst)), (TCRETURNdi texternalsym:$dst)>;
-
-include "ARM64InstrAtomics.td"
diff --git a/lib/Target/ARM64/ARM64MCInstLower.cpp b/lib/Target/ARM64/ARM64MCInstLower.cpp
deleted file mode 100644
index 01dc229..0000000
--- a/lib/Target/ARM64/ARM64MCInstLower.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-//===-- ARM64MCInstLower.cpp - Convert ARM64 MachineInstr to an MCInst---===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower ARM64 MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCInstLower.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/CodeGen.h"
-#include "llvm/Target/TargetMachine.h"
-using namespace llvm;
-
-ARM64MCInstLower::ARM64MCInstLower(MCContext &ctx, Mangler &mang,
- AsmPrinter &printer)
- : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
-
-MCSymbol *
-ARM64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
- return Printer.getSymbol(MO.getGlobal());
-}
-
-MCSymbol *
-ARM64MCInstLower::GetExternalSymbolSymbol(const MachineOperand &MO) const {
- return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
-}
-
-MCOperand ARM64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO,
- MCSymbol *Sym) const {
- // FIXME: We would like an efficient form for this, so we don't have to do a
- // lot of extra uniquing.
- MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
- if ((MO.getTargetFlags() & ARM64II::MO_GOT) != 0) {
- if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
- RefKind = MCSymbolRefExpr::VK_GOTPAGE;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
- ARM64II::MO_PAGEOFF)
- RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF;
- else
- assert(0 && "Unexpected target flags with MO_GOT on GV operand");
- } else if ((MO.getTargetFlags() & ARM64II::MO_TLS) != 0) {
- if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
- RefKind = MCSymbolRefExpr::VK_TLVPPAGE;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
- ARM64II::MO_PAGEOFF)
- RefKind = MCSymbolRefExpr::VK_TLVPPAGEOFF;
- else
- llvm_unreachable("Unexpected target flags with MO_TLS on GV operand");
- } else {
- if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
- RefKind = MCSymbolRefExpr::VK_PAGE;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) ==
- ARM64II::MO_PAGEOFF)
- RefKind = MCSymbolRefExpr::VK_PAGEOFF;
- }
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
- if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(
- Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
- return MCOperand::CreateExpr(Expr);
-}
-
-MCOperand ARM64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
- MCSymbol *Sym) const {
- uint32_t RefFlags = 0;
-
- if (MO.getTargetFlags() & ARM64II::MO_GOT)
- RefFlags |= ARM64MCExpr::VK_GOT;
- else if (MO.getTargetFlags() & ARM64II::MO_TLS) {
- TLSModel::Model Model;
- if (MO.isGlobal()) {
- const GlobalValue *GV = MO.getGlobal();
- Model = Printer.TM.getTLSModel(GV);
- } else {
- assert(MO.isSymbol() &&
- StringRef(MO.getSymbolName()) == "_TLS_MODULE_BASE_" &&
- "unexpected external TLS symbol");
- Model = TLSModel::GeneralDynamic;
- }
- switch (Model) {
- case TLSModel::InitialExec:
- RefFlags |= ARM64MCExpr::VK_GOTTPREL;
- break;
- case TLSModel::LocalExec:
- RefFlags |= ARM64MCExpr::VK_TPREL;
- break;
- case TLSModel::LocalDynamic:
- RefFlags |= ARM64MCExpr::VK_DTPREL;
- break;
- case TLSModel::GeneralDynamic:
- RefFlags |= ARM64MCExpr::VK_TLSDESC;
- break;
- }
- } else {
- // No modifier means this is a generic reference, classified as absolute for
- // the cases where it matters (:abs_g0: etc).
- RefFlags |= ARM64MCExpr::VK_ABS;
- }
-
- if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGE)
- RefFlags |= ARM64MCExpr::VK_PAGE;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_PAGEOFF)
- RefFlags |= ARM64MCExpr::VK_PAGEOFF;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G3)
- RefFlags |= ARM64MCExpr::VK_G3;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G2)
- RefFlags |= ARM64MCExpr::VK_G2;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G1)
- RefFlags |= ARM64MCExpr::VK_G1;
- else if ((MO.getTargetFlags() & ARM64II::MO_FRAGMENT) == ARM64II::MO_G0)
- RefFlags |= ARM64MCExpr::VK_G0;
-
- if (MO.getTargetFlags() & ARM64II::MO_NC)
- RefFlags |= ARM64MCExpr::VK_NC;
-
- const MCExpr *Expr =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, Ctx);
- if (!MO.isJTI() && MO.getOffset())
- Expr = MCBinaryExpr::CreateAdd(
- Expr, MCConstantExpr::Create(MO.getOffset(), Ctx), Ctx);
-
- ARM64MCExpr::VariantKind RefKind;
- RefKind = static_cast<ARM64MCExpr::VariantKind>(RefFlags);
- Expr = ARM64MCExpr::Create(Expr, RefKind, Ctx);
-
- return MCOperand::CreateExpr(Expr);
-}
-
-MCOperand ARM64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- MCSymbol *Sym) const {
- if (TargetTriple.isOSDarwin())
- return lowerSymbolOperandDarwin(MO, Sym);
-
- assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
- return lowerSymbolOperandELF(MO, Sym);
-}
-
-bool ARM64MCInstLower::lowerOperand(const MachineOperand &MO,
- MCOperand &MCOp) const {
- switch (MO.getType()) {
- default:
- assert(0 && "unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- return false;
- MCOp = MCOperand::CreateReg(MO.getReg());
- break;
- case MachineOperand::MO_RegisterMask:
- // Regmasks are like implicit defs.
- return false;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::CreateImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::CreateExpr(
- MCSymbolRefExpr::Create(MO.getMBB()->getSymbol(), Ctx));
- break;
- case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
- break;
- case MachineOperand::MO_JumpTableIndex:
- MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
- break;
- case MachineOperand::MO_BlockAddress:
- MCOp = LowerSymbolOperand(
- MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
- break;
- }
- return true;
-}
-
-void ARM64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MCOperand MCOp;
- if (lowerOperand(MI->getOperand(i), MCOp))
- OutMI.addOperand(MCOp);
- }
-}
diff --git a/lib/Target/ARM64/ARM64MachineFunctionInfo.h b/lib/Target/ARM64/ARM64MachineFunctionInfo.h
deleted file mode 100644
index 02bf7cf..0000000
--- a/lib/Target/ARM64/ARM64MachineFunctionInfo.h
+++ /dev/null
@@ -1,139 +0,0 @@
-//===- ARM64MachineFuctionInfo.h - ARM64 machine function info --*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares ARM64-specific per-machine-function information.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64MACHINEFUNCTIONINFO_H
-#define ARM64MACHINEFUNCTIONINFO_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/MC/MCLinkerOptimizationHint.h"
-
-namespace llvm {
-
-/// ARM64FunctionInfo - This class is derived from MachineFunctionInfo and
-/// contains private ARM64-specific information for each MachineFunction.
-class ARM64FunctionInfo : public MachineFunctionInfo {
-
- /// HasStackFrame - True if this function has a stack frame. Set by
- /// processFunctionBeforeCalleeSavedScan().
- bool HasStackFrame;
-
- /// \brief Amount of stack frame size, not including callee-saved registers.
- unsigned LocalStackSize;
-
- /// \brief Number of TLS accesses using the special (combinable)
- /// _TLS_MODULE_BASE_ symbol.
- unsigned NumLocalDynamicTLSAccesses;
-
- /// \brief FrameIndex for start of varargs area for arguments passed on the
- /// stack.
- int VarArgsStackIndex;
-
- /// \brief FrameIndex for start of varargs area for arguments passed in
- /// general purpose registers.
- int VarArgsGPRIndex;
-
- /// \brief Size of the varargs area for arguments passed in general purpose
- /// registers.
- unsigned VarArgsGPRSize;
-
- /// \brief FrameIndex for start of varargs area for arguments passed in
- /// floating-point registers.
- int VarArgsFPRIndex;
-
- /// \brief Size of the varargs area for arguments passed in floating-point
- /// registers.
- unsigned VarArgsFPRSize;
-
-public:
- ARM64FunctionInfo()
- : HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
- VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
- VarArgsFPRIndex(0), VarArgsFPRSize(0) {}
-
- explicit ARM64FunctionInfo(MachineFunction &MF)
- : HasStackFrame(false), NumLocalDynamicTLSAccesses(0),
- VarArgsStackIndex(0), VarArgsGPRIndex(0), VarArgsGPRSize(0),
- VarArgsFPRIndex(0), VarArgsFPRSize(0) {
- (void)MF;
- }
-
- bool hasStackFrame() const { return HasStackFrame; }
- void setHasStackFrame(bool s) { HasStackFrame = s; }
-
- void setLocalStackSize(unsigned Size) { LocalStackSize = Size; }
- unsigned getLocalStackSize() const { return LocalStackSize; }
-
- void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamicTLSAccesses; }
- unsigned getNumLocalDynamicTLSAccesses() const {
- return NumLocalDynamicTLSAccesses;
- }
-
- int getVarArgsStackIndex() const { return VarArgsStackIndex; }
- void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
-
- int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
- void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
-
- unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; }
- void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; }
-
- int getVarArgsFPRIndex() const { return VarArgsFPRIndex; }
- void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; }
-
- unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
- void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
-
- typedef SmallPtrSet<const MachineInstr *, 16> SetOfInstructions;
-
- const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
-
- // Shortcuts for LOH related types.
- class MILOHDirective {
- MCLOHType Kind;
-
- /// Arguments of this directive. Order matters.
- SmallVector<const MachineInstr *, 3> Args;
-
- public:
- typedef SmallVectorImpl<const MachineInstr *> LOHArgs;
-
- MILOHDirective(MCLOHType Kind, const LOHArgs &Args)
- : Kind(Kind), Args(Args.begin(), Args.end()) {
- assert(isValidMCLOHType(Kind) && "Invalid LOH directive type!");
- }
-
- MCLOHType getKind() const { return Kind; }
- const LOHArgs &getArgs() const { return Args; }
- };
-
- typedef MILOHDirective::LOHArgs MILOHArgs;
- typedef SmallVector<MILOHDirective, 32> MILOHContainer;
-
- const MILOHContainer &getLOHContainer() const { return LOHContainerSet; }
-
- /// Add a LOH directive of this @p Kind and this @p Args.
- void addLOHDirective(MCLOHType Kind, const MILOHArgs &Args) {
- LOHContainerSet.push_back(MILOHDirective(Kind, Args));
- LOHRelated.insert(Args.begin(), Args.end());
- }
-
-private:
- // Hold the lists of LOHs.
- MILOHContainer LOHContainerSet;
- SetOfInstructions LOHRelated;
-};
-} // End llvm namespace
-
-#endif // ARM64MACHINEFUNCTIONINFO_H
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.cpp b/lib/Target/ARM64/ARM64RegisterInfo.cpp
deleted file mode 100644
index 4c7fc8a..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.cpp
+++ /dev/null
@@ -1,400 +0,0 @@
-//===- ARM64RegisterInfo.cpp - ARM64 Register Information -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the TargetRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64RegisterInfo.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetOptions.h"
-
-#define GET_REGINFO_TARGET_DESC
-#include "ARM64GenRegisterInfo.inc"
-
-using namespace llvm;
-
-ARM64RegisterInfo::ARM64RegisterInfo(const ARM64InstrInfo *tii,
- const ARM64Subtarget *sti)
- : ARM64GenRegisterInfo(ARM64::LR), TII(tii), STI(sti) {}
-
-const uint16_t *
-ARM64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- assert(MF && "Invalid MachineFunction pointer.");
- if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg)
- return CSR_ARM64_AllRegs_SaveList;
- else
- return CSR_ARM64_AAPCS_SaveList;
-}
-
-const uint32_t *
-ARM64RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
- if (CC == CallingConv::AnyReg)
- return CSR_ARM64_AllRegs_RegMask;
- else
- return CSR_ARM64_AAPCS_RegMask;
-}
-
-const uint32_t *ARM64RegisterInfo::getTLSCallPreservedMask() const {
- if (STI->isTargetDarwin())
- return CSR_ARM64_TLS_Darwin_RegMask;
-
- assert(STI->isTargetELF() && "only expect Darwin or ELF TLS");
- return CSR_ARM64_TLS_ELF_RegMask;
-}
-
-const uint32_t *
-ARM64RegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const {
- // This should return a register mask that is the same as that returned by
- // getCallPreservedMask but that additionally preserves the register used for
- // the first i64 argument (which must also be the register used to return a
- // single i64 return value)
- //
- // In case that the calling convention does not use the same register for
- // both, the function should return NULL (does not currently apply)
- return CSR_ARM64_AAPCS_ThisReturn_RegMask;
-}
-
-BitVector ARM64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- // FIXME: avoid re-calculating this everytime.
- BitVector Reserved(getNumRegs());
- Reserved.set(ARM64::SP);
- Reserved.set(ARM64::XZR);
- Reserved.set(ARM64::WSP);
- Reserved.set(ARM64::WZR);
-
- if (TFI->hasFP(MF) || STI->isTargetDarwin()) {
- Reserved.set(ARM64::FP);
- Reserved.set(ARM64::W29);
- }
-
- if (STI->isTargetDarwin()) {
- Reserved.set(ARM64::X18); // Platform register
- Reserved.set(ARM64::W18);
- }
-
- if (hasBasePointer(MF)) {
- Reserved.set(ARM64::X19);
- Reserved.set(ARM64::W19);
- }
-
- return Reserved;
-}
-
-bool ARM64RegisterInfo::isReservedReg(const MachineFunction &MF,
- unsigned Reg) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (Reg) {
- default:
- break;
- case ARM64::SP:
- case ARM64::XZR:
- case ARM64::WSP:
- case ARM64::WZR:
- return true;
- case ARM64::X18:
- case ARM64::W18:
- return STI->isTargetDarwin();
- case ARM64::FP:
- case ARM64::W29:
- return TFI->hasFP(MF) || STI->isTargetDarwin();
- case ARM64::W19:
- case ARM64::X19:
- return hasBasePointer(MF);
- }
-
- return false;
-}
-
-const TargetRegisterClass *
-ARM64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
- unsigned Kind) const {
- return &ARM64::GPR64RegClass;
-}
-
-const TargetRegisterClass *
-ARM64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
- if (RC == &ARM64::CCRRegClass)
- return NULL; // Can't copy CPSR.
- return RC;
-}
-
-unsigned ARM64RegisterInfo::getBaseRegister() const { return ARM64::X19; }
-
-bool ARM64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // In the presence of variable sized objects, if the fixed stack size is
- // large enough that referencing from the FP won't result in things being
- // in range relatively often, we can use a base pointer to allow access
- // from the other direction like the SP normally works.
- if (MFI->hasVarSizedObjects()) {
- // Conservatively estimate whether the negative offset from the frame
- // pointer will be sufficient to reach. If a function has a smallish
- // frame, it's less likely to have lots of spills and callee saved
- // space, so it's all more likely to be within range of the frame pointer.
- // If it's wrong, we'll materialize the constant and still get to the
- // object; it's just suboptimal. Negative offsets use the unscaled
- // load/store instructions, which have a 9-bit signed immediate.
- if (MFI->getLocalFrameSize() < 256)
- return false;
- return true;
- }
-
- return false;
-}
-
-unsigned ARM64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- return TFI->hasFP(MF) ? ARM64::FP : ARM64::SP;
-}
-
-bool
-ARM64RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
- return true;
-}
-
-bool ARM64RegisterInfo::requiresVirtualBaseRegisters(const MachineFunction &MF)
- const {
- return true;
-}
-
-bool
-ARM64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // ARM64FrameLowering::resolveFrameIndexReference() can always fall back
- // to the stack pointer, so only put the emergency spill slot next to the
- // FP when there's no better way to access it (SP or base pointer).
- return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
-}
-
-bool ARM64RegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF)
- const {
- return true;
-}
-
-bool ARM64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
- const MachineFrameInfo *MFI = MF.getFrameInfo();
- // Only consider eliminating leaf frames.
- if (MFI->hasCalls() || (MF.getTarget().Options.DisableFramePointerElim(MF) &&
- MFI->adjustsStack()))
- return true;
- return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
-}
-
-/// needsFrameBaseReg - Returns true if the instruction's frame index
-/// reference would be better served by a base register other than FP
-/// or SP. Used by LocalStackFrameAllocation to determine which frame index
-/// references it should create new base registers for.
-bool ARM64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
- int64_t Offset) const {
- for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i)
- assert(i < MI->getNumOperands() &&
- "Instr doesn't have FrameIndex operand!");
-
- // It's the load/store FI references that cause issues, as it can be difficult
- // to materialize the offset if it won't fit in the literal field. Estimate
- // based on the size of the local frame and some conservative assumptions
- // about the rest of the stack frame (note, this is pre-regalloc, so
- // we don't know everything for certain yet) whether this offset is likely
- // to be out of range of the immediate. Return true if so.
-
- // We only generate virtual base registers for loads and stores, so
- // return false for everything else.
- if (!MI->mayLoad() && !MI->mayStore())
- return false;
-
- // Without a virtual base register, if the function has variable sized
- // objects, all fixed-size local references will be via the frame pointer,
- // Approximate the offset and see if it's legal for the instruction.
- // Note that the incoming offset is based on the SP value at function entry,
- // so it'll be negative.
- MachineFunction &MF = *MI->getParent()->getParent();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
- MachineFrameInfo *MFI = MF.getFrameInfo();
-
- // Estimate an offset from the frame pointer.
- // Conservatively assume all GPR callee-saved registers get pushed.
- // FP, LR, X19-X28, D8-D15. 64-bits each.
- int64_t FPOffset = Offset - 16 * 20;
- // Estimate an offset from the stack pointer.
- // The incoming offset is relating to the SP at the start of the function,
- // but when we access the local it'll be relative to the SP after local
- // allocation, so adjust our SP-relative offset by that allocation size.
- Offset += MFI->getLocalFrameSize();
- // Assume that we'll have at least some spill slots allocated.
- // FIXME: This is a total SWAG number. We should run some statistics
- // and pick a real one.
- Offset += 128; // 128 bytes of spill slots
-
- // If there is a frame pointer, try using it.
- // The FP is only available if there is no dynamic realignment. We
- // don't know for sure yet whether we'll need that, so we guess based
- // on whether there are any local variables that would trigger it.
- if (TFI->hasFP(MF) && isFrameOffsetLegal(MI, FPOffset))
- return false;
-
- // If we can reference via the stack pointer or base pointer, try that.
- // FIXME: This (and the code that resolves the references) can be improved
- // to only disallow SP relative references in the live range of
- // the VLA(s). In practice, it's unclear how much difference that
- // would make, but it may be worth doing.
- if (isFrameOffsetLegal(MI, Offset))
- return false;
-
- // The offset likely isn't legal; we want to allocate a virtual base register.
- return true;
-}
-
-bool ARM64RegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
- int64_t Offset) const {
- assert(Offset <= INT_MAX && "Offset too big to fit in int.");
- assert(MI && "Unable to get the legal offset for nil instruction.");
- int SaveOffset = Offset;
- return isARM64FrameOffsetLegal(*MI, SaveOffset) & ARM64FrameOffsetIsLegal;
-}
-
-/// Insert defining instruction(s) for BaseReg to be a pointer to FrameIdx
-/// at the beginning of the basic block.
-void ARM64RegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
- unsigned BaseReg,
- int FrameIdx,
- int64_t Offset) const {
- MachineBasicBlock::iterator Ins = MBB->begin();
- DebugLoc DL; // Defaults to "unknown"
- if (Ins != MBB->end())
- DL = Ins->getDebugLoc();
-
- const MCInstrDesc &MCID = TII->get(ARM64::ADDXri);
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
- MRI.constrainRegClass(BaseReg, TII->getRegClass(MCID, 0, this, MF));
- unsigned Shifter = ARM64_AM::getShifterImm(ARM64_AM::LSL, 0);
-
- BuildMI(*MBB, Ins, DL, MCID, BaseReg)
- .addFrameIndex(FrameIdx)
- .addImm(Offset)
- .addImm(Shifter);
-}
-
-void ARM64RegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
- int64_t Offset) const {
- int Off = Offset; // ARM doesn't need the general 64-bit offsets
- unsigned i = 0;
-
- while (!MI.getOperand(i).isFI()) {
- ++i;
- assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
- }
- bool Done = rewriteARM64FrameIndex(MI, i, BaseReg, Off, TII);
- assert(Done && "Unable to resolve frame index!");
- (void)Done;
-}
-
-void ARM64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
- int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS) const {
- assert(SPAdj == 0 && "Unexpected");
-
- MachineInstr &MI = *II;
- MachineBasicBlock &MBB = *MI.getParent();
- MachineFunction &MF = *MBB.getParent();
- const ARM64FrameLowering *TFI = static_cast<const ARM64FrameLowering *>(
- MF.getTarget().getFrameLowering());
-
- int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
- unsigned FrameReg;
- int Offset;
-
- // Special handling of dbg_value, stackmap and patchpoint instructions.
- if (MI.isDebugValue() || MI.getOpcode() == TargetOpcode::STACKMAP ||
- MI.getOpcode() == TargetOpcode::PATCHPOINT) {
- Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg,
- /*PreferFP=*/true);
- Offset += MI.getOperand(FIOperandNum + 1).getImm();
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, false /*isDef*/);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
- return;
- }
-
- // Modify MI as necessary to handle as much of 'Offset' as possible
- Offset = TFI->resolveFrameIndexReference(MF, FrameIndex, FrameReg);
- if (rewriteARM64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII))
- return;
-
- assert((!RS || !RS->isScavengingFrameIndex(FrameIndex)) &&
- "Emergency spill slot is out of reach");
-
- // If we get here, the immediate doesn't fit into the instruction. We folded
- // as much as possible above. Handle the rest, providing a register that is
- // SP+LargeImm.
- unsigned ScratchReg =
- MF.getRegInfo().createVirtualRegister(&ARM64::GPR64RegClass);
- emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset, TII);
- MI.getOperand(FIOperandNum).ChangeToRegister(ScratchReg, false, false, true);
-}
-
-namespace llvm {
-
-unsigned ARM64RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- switch (RC->getID()) {
- default:
- return 0;
- case ARM64::GPR32RegClassID:
- case ARM64::GPR32spRegClassID:
- case ARM64::GPR32allRegClassID:
- case ARM64::GPR64spRegClassID:
- case ARM64::GPR64allRegClassID:
- case ARM64::GPR64RegClassID:
- case ARM64::GPR32commonRegClassID:
- case ARM64::GPR64commonRegClassID:
- return 32 - 1 // XZR/SP
- - (TFI->hasFP(MF) || STI->isTargetDarwin()) // FP
- - STI->isTargetDarwin() // X18 reserved as platform register
- - hasBasePointer(MF); // X19
- case ARM64::FPR8RegClassID:
- case ARM64::FPR16RegClassID:
- case ARM64::FPR32RegClassID:
- case ARM64::FPR64RegClassID:
- case ARM64::FPR128RegClassID:
- return 32;
-
- case ARM64::DDRegClassID:
- case ARM64::DDDRegClassID:
- case ARM64::DDDDRegClassID:
- case ARM64::QQRegClassID:
- case ARM64::QQQRegClassID:
- case ARM64::QQQQRegClassID:
- return 32;
-
- case ARM64::FPR128_loRegClassID:
- return 16;
- }
-}
-
-} // namespace llvm
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.h b/lib/Target/ARM64/ARM64RegisterInfo.h
deleted file mode 100644
index 31d9242..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- ARM64RegisterInfo.h - ARM64 Register Information Impl ----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the ARM64 implementation of the MRegisterInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64REGISTERINFO_H
-#define LLVM_TARGET_ARM64REGISTERINFO_H
-
-#define GET_REGINFO_HEADER
-#include "ARM64GenRegisterInfo.inc"
-
-namespace llvm {
-
-class ARM64InstrInfo;
-class ARM64Subtarget;
-class MachineFunction;
-class RegScavenger;
-class TargetRegisterClass;
-
-struct ARM64RegisterInfo : public ARM64GenRegisterInfo {
-private:
- const ARM64InstrInfo *TII;
- const ARM64Subtarget *STI;
-
-public:
- ARM64RegisterInfo(const ARM64InstrInfo *tii, const ARM64Subtarget *sti);
-
- bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
-
- /// Code Generation virtual methods...
- const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const override;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
-
- unsigned getCSRFirstUseCost() const {
- // The cost will be compared against BlockFrequency where entry has the
- // value of 1 << 14. A value of 5 will choose to spill or split really
- // cold path instead of using a callee-saved register.
- return 5;
- }
-
- // Calls involved in thread-local variable lookup save more registers than
- // normal calls, so they need a different mask to represent this.
- const uint32_t *getTLSCallPreservedMask() const;
-
- /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
- /// case that 'returned' is on an i64 first argument if the calling convention
- /// is one that can (partially) model this attribute with a preserved mask
- /// (i.e. it is a calling convention that uses the same register for the first
- /// i64 argument and an i64 return value)
- ///
- /// Should return NULL in the case that the calling convention does not have
- /// this property
- const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const;
-
- BitVector getReservedRegs(const MachineFunction &MF) const override;
- const TargetRegisterClass *
- getPointerRegClass(const MachineFunction &MF,
- unsigned Kind = 0) const override;
- const TargetRegisterClass *
- getCrossCopyRegClass(const TargetRegisterClass *RC) const override;
-
- bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool useFPForScavengingIndex(const MachineFunction &MF) const override;
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
-
- bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
- bool isFrameOffsetLegal(const MachineInstr *MI,
- int64_t Offset) const override;
- void materializeFrameBaseRegister(MachineBasicBlock *MBB, unsigned BaseReg,
- int FrameIdx,
- int64_t Offset) const override;
- void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
- int64_t Offset) const override;
- void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
- bool cannotEliminateFrame(const MachineFunction &MF) const;
-
- bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override;
- bool hasBasePointer(const MachineFunction &MF) const;
- unsigned getBaseRegister() const;
-
- // Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const override;
-
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const override;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_ARM64REGISTERINFO_H
diff --git a/lib/Target/ARM64/ARM64RegisterInfo.td b/lib/Target/ARM64/ARM64RegisterInfo.td
deleted file mode 100644
index 96001c5..0000000
--- a/lib/Target/ARM64/ARM64RegisterInfo.td
+++ /dev/null
@@ -1,561 +0,0 @@
-//===- ARM64RegisterInfo.td - Describe the ARM64 Regisers --*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-
-class ARM64Reg<bits<16> enc, string n, list<Register> subregs = [],
- list<string> altNames = []>
- : Register<n, altNames> {
- let HWEncoding = enc;
- let Namespace = "ARM64";
- let SubRegs = subregs;
-}
-
-let Namespace = "ARM64" in {
- def sub_32 : SubRegIndex<32>;
-
- def bsub : SubRegIndex<8>;
- def hsub : SubRegIndex<16>;
- def ssub : SubRegIndex<32>;
- def dsub : SubRegIndex<32>;
- def qhisub : SubRegIndex<64>;
- def qsub : SubRegIndex<64>;
- // Note: Code depends on these having consecutive numbers
- def dsub0 : SubRegIndex<64>;
- def dsub1 : SubRegIndex<64>;
- def dsub2 : SubRegIndex<64>;
- def dsub3 : SubRegIndex<64>;
- // Note: Code depends on these having consecutive numbers
- def qsub0 : SubRegIndex<128>;
- def qsub1 : SubRegIndex<128>;
- def qsub2 : SubRegIndex<128>;
- def qsub3 : SubRegIndex<128>;
-}
-
-let Namespace = "ARM64" in {
- def vreg : RegAltNameIndex;
- def vlist1 : RegAltNameIndex;
-}
-
-//===----------------------------------------------------------------------===//
-// Registers
-//===----------------------------------------------------------------------===//
-def W0 : ARM64Reg<0, "w0" >, DwarfRegNum<[0]>;
-def W1 : ARM64Reg<1, "w1" >, DwarfRegNum<[1]>;
-def W2 : ARM64Reg<2, "w2" >, DwarfRegNum<[2]>;
-def W3 : ARM64Reg<3, "w3" >, DwarfRegNum<[3]>;
-def W4 : ARM64Reg<4, "w4" >, DwarfRegNum<[4]>;
-def W5 : ARM64Reg<5, "w5" >, DwarfRegNum<[5]>;
-def W6 : ARM64Reg<6, "w6" >, DwarfRegNum<[6]>;
-def W7 : ARM64Reg<7, "w7" >, DwarfRegNum<[7]>;
-def W8 : ARM64Reg<8, "w8" >, DwarfRegNum<[8]>;
-def W9 : ARM64Reg<9, "w9" >, DwarfRegNum<[9]>;
-def W10 : ARM64Reg<10, "w10">, DwarfRegNum<[10]>;
-def W11 : ARM64Reg<11, "w11">, DwarfRegNum<[11]>;
-def W12 : ARM64Reg<12, "w12">, DwarfRegNum<[12]>;
-def W13 : ARM64Reg<13, "w13">, DwarfRegNum<[13]>;
-def W14 : ARM64Reg<14, "w14">, DwarfRegNum<[14]>;
-def W15 : ARM64Reg<15, "w15">, DwarfRegNum<[15]>;
-def W16 : ARM64Reg<16, "w16">, DwarfRegNum<[16]>;
-def W17 : ARM64Reg<17, "w17">, DwarfRegNum<[17]>;
-def W18 : ARM64Reg<18, "w18">, DwarfRegNum<[18]>;
-def W19 : ARM64Reg<19, "w19">, DwarfRegNum<[19]>;
-def W20 : ARM64Reg<20, "w20">, DwarfRegNum<[20]>;
-def W21 : ARM64Reg<21, "w21">, DwarfRegNum<[21]>;
-def W22 : ARM64Reg<22, "w22">, DwarfRegNum<[22]>;
-def W23 : ARM64Reg<23, "w23">, DwarfRegNum<[23]>;
-def W24 : ARM64Reg<24, "w24">, DwarfRegNum<[24]>;
-def W25 : ARM64Reg<25, "w25">, DwarfRegNum<[25]>;
-def W26 : ARM64Reg<26, "w26">, DwarfRegNum<[26]>;
-def W27 : ARM64Reg<27, "w27">, DwarfRegNum<[27]>;
-def W28 : ARM64Reg<28, "w28">, DwarfRegNum<[28]>;
-def W29 : ARM64Reg<29, "w29">, DwarfRegNum<[29]>;
-def W30 : ARM64Reg<30, "w30">, DwarfRegNum<[30]>;
-def WSP : ARM64Reg<31, "wsp">, DwarfRegNum<[31]>;
-def WZR : ARM64Reg<31, "wzr">, DwarfRegAlias<WSP>;
-
-let SubRegIndices = [sub_32] in {
-def X0 : ARM64Reg<0, "x0", [W0]>, DwarfRegAlias<W0>;
-def X1 : ARM64Reg<1, "x1", [W1]>, DwarfRegAlias<W1>;
-def X2 : ARM64Reg<2, "x2", [W2]>, DwarfRegAlias<W2>;
-def X3 : ARM64Reg<3, "x3", [W3]>, DwarfRegAlias<W3>;
-def X4 : ARM64Reg<4, "x4", [W4]>, DwarfRegAlias<W4>;
-def X5 : ARM64Reg<5, "x5", [W5]>, DwarfRegAlias<W5>;
-def X6 : ARM64Reg<6, "x6", [W6]>, DwarfRegAlias<W6>;
-def X7 : ARM64Reg<7, "x7", [W7]>, DwarfRegAlias<W7>;
-def X8 : ARM64Reg<8, "x8", [W8]>, DwarfRegAlias<W8>;
-def X9 : ARM64Reg<9, "x9", [W9]>, DwarfRegAlias<W9>;
-def X10 : ARM64Reg<10, "x10", [W10]>, DwarfRegAlias<W10>;
-def X11 : ARM64Reg<11, "x11", [W11]>, DwarfRegAlias<W11>;
-def X12 : ARM64Reg<12, "x12", [W12]>, DwarfRegAlias<W12>;
-def X13 : ARM64Reg<13, "x13", [W13]>, DwarfRegAlias<W13>;
-def X14 : ARM64Reg<14, "x14", [W14]>, DwarfRegAlias<W14>;
-def X15 : ARM64Reg<15, "x15", [W15]>, DwarfRegAlias<W15>;
-def X16 : ARM64Reg<16, "x16", [W16]>, DwarfRegAlias<W16>;
-def X17 : ARM64Reg<17, "x17", [W17]>, DwarfRegAlias<W17>;
-def X18 : ARM64Reg<18, "x18", [W18]>, DwarfRegAlias<W18>;
-def X19 : ARM64Reg<19, "x19", [W19]>, DwarfRegAlias<W19>;
-def X20 : ARM64Reg<20, "x20", [W20]>, DwarfRegAlias<W20>;
-def X21 : ARM64Reg<21, "x21", [W21]>, DwarfRegAlias<W21>;
-def X22 : ARM64Reg<22, "x22", [W22]>, DwarfRegAlias<W22>;
-def X23 : ARM64Reg<23, "x23", [W23]>, DwarfRegAlias<W23>;
-def X24 : ARM64Reg<24, "x24", [W24]>, DwarfRegAlias<W24>;
-def X25 : ARM64Reg<25, "x25", [W25]>, DwarfRegAlias<W25>;
-def X26 : ARM64Reg<26, "x26", [W26]>, DwarfRegAlias<W26>;
-def X27 : ARM64Reg<27, "x27", [W27]>, DwarfRegAlias<W27>;
-def X28 : ARM64Reg<28, "x28", [W28]>, DwarfRegAlias<W28>;
-def FP : ARM64Reg<29, "fp", [W29]>, DwarfRegAlias<W29>;
-def LR : ARM64Reg<30, "lr", [W30]>, DwarfRegAlias<W30>;
-def SP : ARM64Reg<31, "sp", [WSP]>, DwarfRegAlias<WSP>;
-def XZR : ARM64Reg<31, "xzr", [WZR]>, DwarfRegAlias<WSP>;
-}
-
-// Condition code register.
-def CPSR : ARM64Reg<0, "cpsr">;
-
-// GPR register classes with the intersections of GPR32/GPR32sp and
-// GPR64/GPR64sp for use by the coalescer.
-def GPR32common : RegisterClass<"ARM64", [i32], 32, (sequence "W%u", 0, 30)> {
- let AltOrders = [(rotl GPR32common, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-def GPR64common : RegisterClass<"ARM64", [i64], 64,
- (add (sequence "X%u", 0, 28), FP, LR)> {
- let AltOrders = [(rotl GPR64common, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-// GPR register classes which exclude SP/WSP.
-def GPR32 : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR)> {
- let AltOrders = [(rotl GPR32, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-def GPR64 : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR)> {
- let AltOrders = [(rotl GPR64, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-
-// GPR register classes which include SP/WSP.
-def GPR32sp : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WSP)> {
- let AltOrders = [(rotl GPR32sp, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-def GPR64sp : RegisterClass<"ARM64", [i64], 64, (add GPR64common, SP)> {
- let AltOrders = [(rotl GPR64sp, 8)];
- let AltOrderSelect = [{ return 1; }];
-}
-
-// GPR register classes which include WZR/XZR AND SP/WSP. This is not a
-// constraint used by any instructions, it is used as a common super-class.
-def GPR32all : RegisterClass<"ARM64", [i32], 32, (add GPR32common, WZR, WSP)>;
-def GPR64all : RegisterClass<"ARM64", [i64], 64, (add GPR64common, XZR, SP)>;
-
-// For tail calls, we can't use callee-saved registers, as they are restored
-// to the saved value before the tail call, which would clobber a call address.
-// This is for indirect tail calls to store the address of the destination.
-def tcGPR64 : RegisterClass<"ARM64", [i64], 64, (sub GPR64common, X19, X20, X21,
- X22, X23, X24, X25, X26,
- X27, X28)>;
-
-// GPR register classes for post increment ammount of vector load/store that
-// has alternate printing when Rm=31 and prints a constant immediate value
-// equal to the total number of bytes transferred.
-def GPR64pi1 : RegisterOperand<GPR64, "printPostIncOperand1">;
-def GPR64pi2 : RegisterOperand<GPR64, "printPostIncOperand2">;
-def GPR64pi3 : RegisterOperand<GPR64, "printPostIncOperand3">;
-def GPR64pi4 : RegisterOperand<GPR64, "printPostIncOperand4">;
-def GPR64pi6 : RegisterOperand<GPR64, "printPostIncOperand6">;
-def GPR64pi8 : RegisterOperand<GPR64, "printPostIncOperand8">;
-def GPR64pi12 : RegisterOperand<GPR64, "printPostIncOperand12">;
-def GPR64pi16 : RegisterOperand<GPR64, "printPostIncOperand16">;
-def GPR64pi24 : RegisterOperand<GPR64, "printPostIncOperand24">;
-def GPR64pi32 : RegisterOperand<GPR64, "printPostIncOperand32">;
-def GPR64pi48 : RegisterOperand<GPR64, "printPostIncOperand48">;
-def GPR64pi64 : RegisterOperand<GPR64, "printPostIncOperand64">;
-
-// Condition code regclass.
-def CCR : RegisterClass<"ARM64", [i32], 32, (add CPSR)> {
- let CopyCost = -1; // Don't allow copying of status registers.
-
- // CCR is not allocatable.
- let isAllocatable = 0;
-}
-
-//===----------------------------------------------------------------------===//
-// Floating Point Scalar Registers
-//===----------------------------------------------------------------------===//
-
-def B0 : ARM64Reg<0, "b0">, DwarfRegNum<[64]>;
-def B1 : ARM64Reg<1, "b1">, DwarfRegNum<[65]>;
-def B2 : ARM64Reg<2, "b2">, DwarfRegNum<[66]>;
-def B3 : ARM64Reg<3, "b3">, DwarfRegNum<[67]>;
-def B4 : ARM64Reg<4, "b4">, DwarfRegNum<[68]>;
-def B5 : ARM64Reg<5, "b5">, DwarfRegNum<[69]>;
-def B6 : ARM64Reg<6, "b6">, DwarfRegNum<[70]>;
-def B7 : ARM64Reg<7, "b7">, DwarfRegNum<[71]>;
-def B8 : ARM64Reg<8, "b8">, DwarfRegNum<[72]>;
-def B9 : ARM64Reg<9, "b9">, DwarfRegNum<[73]>;
-def B10 : ARM64Reg<10, "b10">, DwarfRegNum<[74]>;
-def B11 : ARM64Reg<11, "b11">, DwarfRegNum<[75]>;
-def B12 : ARM64Reg<12, "b12">, DwarfRegNum<[76]>;
-def B13 : ARM64Reg<13, "b13">, DwarfRegNum<[77]>;
-def B14 : ARM64Reg<14, "b14">, DwarfRegNum<[78]>;
-def B15 : ARM64Reg<15, "b15">, DwarfRegNum<[79]>;
-def B16 : ARM64Reg<16, "b16">, DwarfRegNum<[80]>;
-def B17 : ARM64Reg<17, "b17">, DwarfRegNum<[81]>;
-def B18 : ARM64Reg<18, "b18">, DwarfRegNum<[82]>;
-def B19 : ARM64Reg<19, "b19">, DwarfRegNum<[83]>;
-def B20 : ARM64Reg<20, "b20">, DwarfRegNum<[84]>;
-def B21 : ARM64Reg<21, "b21">, DwarfRegNum<[85]>;
-def B22 : ARM64Reg<22, "b22">, DwarfRegNum<[86]>;
-def B23 : ARM64Reg<23, "b23">, DwarfRegNum<[87]>;
-def B24 : ARM64Reg<24, "b24">, DwarfRegNum<[88]>;
-def B25 : ARM64Reg<25, "b25">, DwarfRegNum<[89]>;
-def B26 : ARM64Reg<26, "b26">, DwarfRegNum<[90]>;
-def B27 : ARM64Reg<27, "b27">, DwarfRegNum<[91]>;
-def B28 : ARM64Reg<28, "b28">, DwarfRegNum<[92]>;
-def B29 : ARM64Reg<29, "b29">, DwarfRegNum<[93]>;
-def B30 : ARM64Reg<30, "b30">, DwarfRegNum<[94]>;
-def B31 : ARM64Reg<31, "b31">, DwarfRegNum<[95]>;
-
-let SubRegIndices = [bsub] in {
-def H0 : ARM64Reg<0, "h0", [B0]>, DwarfRegAlias<B0>;
-def H1 : ARM64Reg<1, "h1", [B1]>, DwarfRegAlias<B1>;
-def H2 : ARM64Reg<2, "h2", [B2]>, DwarfRegAlias<B2>;
-def H3 : ARM64Reg<3, "h3", [B3]>, DwarfRegAlias<B3>;
-def H4 : ARM64Reg<4, "h4", [B4]>, DwarfRegAlias<B4>;
-def H5 : ARM64Reg<5, "h5", [B5]>, DwarfRegAlias<B5>;
-def H6 : ARM64Reg<6, "h6", [B6]>, DwarfRegAlias<B6>;
-def H7 : ARM64Reg<7, "h7", [B7]>, DwarfRegAlias<B7>;
-def H8 : ARM64Reg<8, "h8", [B8]>, DwarfRegAlias<B8>;
-def H9 : ARM64Reg<9, "h9", [B9]>, DwarfRegAlias<B9>;
-def H10 : ARM64Reg<10, "h10", [B10]>, DwarfRegAlias<B10>;
-def H11 : ARM64Reg<11, "h11", [B11]>, DwarfRegAlias<B11>;
-def H12 : ARM64Reg<12, "h12", [B12]>, DwarfRegAlias<B12>;
-def H13 : ARM64Reg<13, "h13", [B13]>, DwarfRegAlias<B13>;
-def H14 : ARM64Reg<14, "h14", [B14]>, DwarfRegAlias<B14>;
-def H15 : ARM64Reg<15, "h15", [B15]>, DwarfRegAlias<B15>;
-def H16 : ARM64Reg<16, "h16", [B16]>, DwarfRegAlias<B16>;
-def H17 : ARM64Reg<17, "h17", [B17]>, DwarfRegAlias<B17>;
-def H18 : ARM64Reg<18, "h18", [B18]>, DwarfRegAlias<B18>;
-def H19 : ARM64Reg<19, "h19", [B19]>, DwarfRegAlias<B19>;
-def H20 : ARM64Reg<20, "h20", [B20]>, DwarfRegAlias<B20>;
-def H21 : ARM64Reg<21, "h21", [B21]>, DwarfRegAlias<B21>;
-def H22 : ARM64Reg<22, "h22", [B22]>, DwarfRegAlias<B22>;
-def H23 : ARM64Reg<23, "h23", [B23]>, DwarfRegAlias<B23>;
-def H24 : ARM64Reg<24, "h24", [B24]>, DwarfRegAlias<B24>;
-def H25 : ARM64Reg<25, "h25", [B25]>, DwarfRegAlias<B25>;
-def H26 : ARM64Reg<26, "h26", [B26]>, DwarfRegAlias<B26>;
-def H27 : ARM64Reg<27, "h27", [B27]>, DwarfRegAlias<B27>;
-def H28 : ARM64Reg<28, "h28", [B28]>, DwarfRegAlias<B28>;
-def H29 : ARM64Reg<29, "h29", [B29]>, DwarfRegAlias<B29>;
-def H30 : ARM64Reg<30, "h30", [B30]>, DwarfRegAlias<B30>;
-def H31 : ARM64Reg<31, "h31", [B31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [hsub] in {
-def S0 : ARM64Reg<0, "s0", [H0]>, DwarfRegAlias<B0>;
-def S1 : ARM64Reg<1, "s1", [H1]>, DwarfRegAlias<B1>;
-def S2 : ARM64Reg<2, "s2", [H2]>, DwarfRegAlias<B2>;
-def S3 : ARM64Reg<3, "s3", [H3]>, DwarfRegAlias<B3>;
-def S4 : ARM64Reg<4, "s4", [H4]>, DwarfRegAlias<B4>;
-def S5 : ARM64Reg<5, "s5", [H5]>, DwarfRegAlias<B5>;
-def S6 : ARM64Reg<6, "s6", [H6]>, DwarfRegAlias<B6>;
-def S7 : ARM64Reg<7, "s7", [H7]>, DwarfRegAlias<B7>;
-def S8 : ARM64Reg<8, "s8", [H8]>, DwarfRegAlias<B8>;
-def S9 : ARM64Reg<9, "s9", [H9]>, DwarfRegAlias<B9>;
-def S10 : ARM64Reg<10, "s10", [H10]>, DwarfRegAlias<B10>;
-def S11 : ARM64Reg<11, "s11", [H11]>, DwarfRegAlias<B11>;
-def S12 : ARM64Reg<12, "s12", [H12]>, DwarfRegAlias<B12>;
-def S13 : ARM64Reg<13, "s13", [H13]>, DwarfRegAlias<B13>;
-def S14 : ARM64Reg<14, "s14", [H14]>, DwarfRegAlias<B14>;
-def S15 : ARM64Reg<15, "s15", [H15]>, DwarfRegAlias<B15>;
-def S16 : ARM64Reg<16, "s16", [H16]>, DwarfRegAlias<B16>;
-def S17 : ARM64Reg<17, "s17", [H17]>, DwarfRegAlias<B17>;
-def S18 : ARM64Reg<18, "s18", [H18]>, DwarfRegAlias<B18>;
-def S19 : ARM64Reg<19, "s19", [H19]>, DwarfRegAlias<B19>;
-def S20 : ARM64Reg<20, "s20", [H20]>, DwarfRegAlias<B20>;
-def S21 : ARM64Reg<21, "s21", [H21]>, DwarfRegAlias<B21>;
-def S22 : ARM64Reg<22, "s22", [H22]>, DwarfRegAlias<B22>;
-def S23 : ARM64Reg<23, "s23", [H23]>, DwarfRegAlias<B23>;
-def S24 : ARM64Reg<24, "s24", [H24]>, DwarfRegAlias<B24>;
-def S25 : ARM64Reg<25, "s25", [H25]>, DwarfRegAlias<B25>;
-def S26 : ARM64Reg<26, "s26", [H26]>, DwarfRegAlias<B26>;
-def S27 : ARM64Reg<27, "s27", [H27]>, DwarfRegAlias<B27>;
-def S28 : ARM64Reg<28, "s28", [H28]>, DwarfRegAlias<B28>;
-def S29 : ARM64Reg<29, "s29", [H29]>, DwarfRegAlias<B29>;
-def S30 : ARM64Reg<30, "s30", [H30]>, DwarfRegAlias<B30>;
-def S31 : ARM64Reg<31, "s31", [H31]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [ssub], RegAltNameIndices = [vreg, vlist1] in {
-def D0 : ARM64Reg<0, "d0", [S0], ["v0", ""]>, DwarfRegAlias<B0>;
-def D1 : ARM64Reg<1, "d1", [S1], ["v1", ""]>, DwarfRegAlias<B1>;
-def D2 : ARM64Reg<2, "d2", [S2], ["v2", ""]>, DwarfRegAlias<B2>;
-def D3 : ARM64Reg<3, "d3", [S3], ["v3", ""]>, DwarfRegAlias<B3>;
-def D4 : ARM64Reg<4, "d4", [S4], ["v4", ""]>, DwarfRegAlias<B4>;
-def D5 : ARM64Reg<5, "d5", [S5], ["v5", ""]>, DwarfRegAlias<B5>;
-def D6 : ARM64Reg<6, "d6", [S6], ["v6", ""]>, DwarfRegAlias<B6>;
-def D7 : ARM64Reg<7, "d7", [S7], ["v7", ""]>, DwarfRegAlias<B7>;
-def D8 : ARM64Reg<8, "d8", [S8], ["v8", ""]>, DwarfRegAlias<B8>;
-def D9 : ARM64Reg<9, "d9", [S9], ["v9", ""]>, DwarfRegAlias<B9>;
-def D10 : ARM64Reg<10, "d10", [S10], ["v10", ""]>, DwarfRegAlias<B10>;
-def D11 : ARM64Reg<11, "d11", [S11], ["v11", ""]>, DwarfRegAlias<B11>;
-def D12 : ARM64Reg<12, "d12", [S12], ["v12", ""]>, DwarfRegAlias<B12>;
-def D13 : ARM64Reg<13, "d13", [S13], ["v13", ""]>, DwarfRegAlias<B13>;
-def D14 : ARM64Reg<14, "d14", [S14], ["v14", ""]>, DwarfRegAlias<B14>;
-def D15 : ARM64Reg<15, "d15", [S15], ["v15", ""]>, DwarfRegAlias<B15>;
-def D16 : ARM64Reg<16, "d16", [S16], ["v16", ""]>, DwarfRegAlias<B16>;
-def D17 : ARM64Reg<17, "d17", [S17], ["v17", ""]>, DwarfRegAlias<B17>;
-def D18 : ARM64Reg<18, "d18", [S18], ["v18", ""]>, DwarfRegAlias<B18>;
-def D19 : ARM64Reg<19, "d19", [S19], ["v19", ""]>, DwarfRegAlias<B19>;
-def D20 : ARM64Reg<20, "d20", [S20], ["v20", ""]>, DwarfRegAlias<B20>;
-def D21 : ARM64Reg<21, "d21", [S21], ["v21", ""]>, DwarfRegAlias<B21>;
-def D22 : ARM64Reg<22, "d22", [S22], ["v22", ""]>, DwarfRegAlias<B22>;
-def D23 : ARM64Reg<23, "d23", [S23], ["v23", ""]>, DwarfRegAlias<B23>;
-def D24 : ARM64Reg<24, "d24", [S24], ["v24", ""]>, DwarfRegAlias<B24>;
-def D25 : ARM64Reg<25, "d25", [S25], ["v25", ""]>, DwarfRegAlias<B25>;
-def D26 : ARM64Reg<26, "d26", [S26], ["v26", ""]>, DwarfRegAlias<B26>;
-def D27 : ARM64Reg<27, "d27", [S27], ["v27", ""]>, DwarfRegAlias<B27>;
-def D28 : ARM64Reg<28, "d28", [S28], ["v28", ""]>, DwarfRegAlias<B28>;
-def D29 : ARM64Reg<29, "d29", [S29], ["v29", ""]>, DwarfRegAlias<B29>;
-def D30 : ARM64Reg<30, "d30", [S30], ["v30", ""]>, DwarfRegAlias<B30>;
-def D31 : ARM64Reg<31, "d31", [S31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-let SubRegIndices = [dsub], RegAltNameIndices = [vreg, vlist1] in {
-def Q0 : ARM64Reg<0, "q0", [D0], ["v0", ""]>, DwarfRegAlias<B0>;
-def Q1 : ARM64Reg<1, "q1", [D1], ["v1", ""]>, DwarfRegAlias<B1>;
-def Q2 : ARM64Reg<2, "q2", [D2], ["v2", ""]>, DwarfRegAlias<B2>;
-def Q3 : ARM64Reg<3, "q3", [D3], ["v3", ""]>, DwarfRegAlias<B3>;
-def Q4 : ARM64Reg<4, "q4", [D4], ["v4", ""]>, DwarfRegAlias<B4>;
-def Q5 : ARM64Reg<5, "q5", [D5], ["v5", ""]>, DwarfRegAlias<B5>;
-def Q6 : ARM64Reg<6, "q6", [D6], ["v6", ""]>, DwarfRegAlias<B6>;
-def Q7 : ARM64Reg<7, "q7", [D7], ["v7", ""]>, DwarfRegAlias<B7>;
-def Q8 : ARM64Reg<8, "q8", [D8], ["v8", ""]>, DwarfRegAlias<B8>;
-def Q9 : ARM64Reg<9, "q9", [D9], ["v9", ""]>, DwarfRegAlias<B9>;
-def Q10 : ARM64Reg<10, "q10", [D10], ["v10", ""]>, DwarfRegAlias<B10>;
-def Q11 : ARM64Reg<11, "q11", [D11], ["v11", ""]>, DwarfRegAlias<B11>;
-def Q12 : ARM64Reg<12, "q12", [D12], ["v12", ""]>, DwarfRegAlias<B12>;
-def Q13 : ARM64Reg<13, "q13", [D13], ["v13", ""]>, DwarfRegAlias<B13>;
-def Q14 : ARM64Reg<14, "q14", [D14], ["v14", ""]>, DwarfRegAlias<B14>;
-def Q15 : ARM64Reg<15, "q15", [D15], ["v15", ""]>, DwarfRegAlias<B15>;
-def Q16 : ARM64Reg<16, "q16", [D16], ["v16", ""]>, DwarfRegAlias<B16>;
-def Q17 : ARM64Reg<17, "q17", [D17], ["v17", ""]>, DwarfRegAlias<B17>;
-def Q18 : ARM64Reg<18, "q18", [D18], ["v18", ""]>, DwarfRegAlias<B18>;
-def Q19 : ARM64Reg<19, "q19", [D19], ["v19", ""]>, DwarfRegAlias<B19>;
-def Q20 : ARM64Reg<20, "q20", [D20], ["v20", ""]>, DwarfRegAlias<B20>;
-def Q21 : ARM64Reg<21, "q21", [D21], ["v21", ""]>, DwarfRegAlias<B21>;
-def Q22 : ARM64Reg<22, "q22", [D22], ["v22", ""]>, DwarfRegAlias<B22>;
-def Q23 : ARM64Reg<23, "q23", [D23], ["v23", ""]>, DwarfRegAlias<B23>;
-def Q24 : ARM64Reg<24, "q24", [D24], ["v24", ""]>, DwarfRegAlias<B24>;
-def Q25 : ARM64Reg<25, "q25", [D25], ["v25", ""]>, DwarfRegAlias<B25>;
-def Q26 : ARM64Reg<26, "q26", [D26], ["v26", ""]>, DwarfRegAlias<B26>;
-def Q27 : ARM64Reg<27, "q27", [D27], ["v27", ""]>, DwarfRegAlias<B27>;
-def Q28 : ARM64Reg<28, "q28", [D28], ["v28", ""]>, DwarfRegAlias<B28>;
-def Q29 : ARM64Reg<29, "q29", [D29], ["v29", ""]>, DwarfRegAlias<B29>;
-def Q30 : ARM64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
-def Q31 : ARM64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
-}
-
-def FPR8 : RegisterClass<"ARM64", [untyped], 8, (sequence "B%u", 0, 31)> {
- let Size = 8;
-}
-def FPR16 : RegisterClass<"ARM64", [untyped], 16, (sequence "H%u", 0, 31)> {
- let Size = 16;
-}
-def FPR32 : RegisterClass<"ARM64", [f32, i32], 32,(sequence "S%u", 0, 31)>;
-def FPR64 : RegisterClass<"ARM64", [f64, i64, v2f32, v1f64, v8i8, v4i16, v2i32,
- v1i64],
- 64, (sequence "D%u", 0, 31)>;
-// We don't (yet) have an f128 legal type, so don't use that here. We
-// normalize 128-bit vectors to v2f64 for arg passing and such, so use
-// that here.
-def FPR128 : RegisterClass<"ARM64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, f128],
- 128, (sequence "Q%u", 0, 31)>;
-
-// The lower 16 vector registers. Some instructions can only take registers
-// in this range.
-def FPR128_lo : RegisterClass<"ARM64",
- [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- 128, (trunc FPR128, 16)>;
-
-// Pairs, triples, and quads of 64-bit vector registers.
-def DSeqPairs : RegisterTuples<[dsub0, dsub1], [(rotl FPR64, 0), (rotl FPR64, 1)]>;
-def DSeqTriples : RegisterTuples<[dsub0, dsub1, dsub2],
- [(rotl FPR64, 0), (rotl FPR64, 1),
- (rotl FPR64, 2)]>;
-def DSeqQuads : RegisterTuples<[dsub0, dsub1, dsub2, dsub3],
- [(rotl FPR64, 0), (rotl FPR64, 1),
- (rotl FPR64, 2), (rotl FPR64, 3)]>;
-def DD : RegisterClass<"ARM64", [untyped], 64, (add DSeqPairs)> {
- let Size = 128;
-}
-def DDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqTriples)> {
- let Size = 196;
-}
-def DDDD : RegisterClass<"ARM64", [untyped], 64, (add DSeqQuads)> {
- let Size = 256;
-}
-
-// Pairs, triples, and quads of 128-bit vector registers.
-def QSeqPairs : RegisterTuples<[qsub0, qsub1], [(rotl FPR128, 0), (rotl FPR128, 1)]>;
-def QSeqTriples : RegisterTuples<[qsub0, qsub1, qsub2],
- [(rotl FPR128, 0), (rotl FPR128, 1),
- (rotl FPR128, 2)]>;
-def QSeqQuads : RegisterTuples<[qsub0, qsub1, qsub2, qsub3],
- [(rotl FPR128, 0), (rotl FPR128, 1),
- (rotl FPR128, 2), (rotl FPR128, 3)]>;
-def QQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqPairs)> {
- let Size = 256;
-}
-def QQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqTriples)> {
- let Size = 384;
-}
-def QQQQ : RegisterClass<"ARM64", [untyped], 128, (add QSeqQuads)> {
- let Size = 512;
-}
-
-
-// Vector operand versions of the FP registers. Alternate name printing and
-// assmebler matching.
-def VectorRegAsmOperand : AsmOperandClass { let Name = "VectorReg"; }
-let ParserMatchClass = VectorRegAsmOperand in {
-def V64 : RegisterOperand<FPR64, "printVRegOperand">;
-def V128 : RegisterOperand<FPR128, "printVRegOperand">;
-def V128_lo : RegisterOperand<FPR128_lo, "printVRegOperand">;
-}
-
-class TypedVecListAsmOperand<int count, int regsize, int lanes, string kind>
- : AsmOperandClass {
- let Name = "TypedVectorList" # count # "_" # lanes # kind;
-
- let PredicateMethod
- = "isTypedVectorList<" # count # ", " # lanes # ", '" # kind # "'>";
- let RenderMethod = "addVectorList" # regsize # "Operands<" # count # ">";
-}
-
-class TypedVecListRegOperand<RegisterClass Reg, int lanes, string kind>
- : RegisterOperand<Reg, "printTypedVectorList<" # lanes # ", '"
- # kind # "'>">;
-
-multiclass VectorList<int count, RegisterClass Reg64, RegisterClass Reg128> {
- // With implicit types (probably on instruction instead). E.g. { v0, v1 }
- def _64AsmOperand : AsmOperandClass {
- let Name = NAME # "64";
- let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
- let RenderMethod = "addVectorList64Operands<" # count # ">";
- }
-
- def "64" : RegisterOperand<Reg64, "printImplicitlyTypedVectorList"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_64AsmOperand");
- }
-
- def _128AsmOperand : AsmOperandClass {
- let Name = NAME # "128";
- let PredicateMethod = "isImplicitlyTypedVectorList<" # count # ">";
- let RenderMethod = "addVectorList128Operands<" # count # ">";
- }
-
- def "128" : RegisterOperand<Reg128, "printImplicitlyTypedVectorList"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_128AsmOperand");
- }
-
- // 64-bit register lists with explicit type.
-
- // { v0.8b, v1.8b }
- def _8bAsmOperand : TypedVecListAsmOperand<count, 64, 8, "b">;
- def "8b" : TypedVecListRegOperand<Reg64, 8, "b"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8bAsmOperand");
- }
-
- // { v0.4h, v1.4h }
- def _4hAsmOperand : TypedVecListAsmOperand<count, 64, 4, "h">;
- def "4h" : TypedVecListRegOperand<Reg64, 4, "h"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4hAsmOperand");
- }
-
- // { v0.2s, v1.2s }
- def _2sAsmOperand : TypedVecListAsmOperand<count, 64, 2, "s">;
- def "2s" : TypedVecListRegOperand<Reg64, 2, "s"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2sAsmOperand");
- }
-
- // { v0.1d, v1.1d }
- def _1dAsmOperand : TypedVecListAsmOperand<count, 64, 1, "d">;
- def "1d" : TypedVecListRegOperand<Reg64, 1, "d"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_1dAsmOperand");
- }
-
- // 128-bit register lists with explicit type
-
- // { v0.16b, v1.16b }
- def _16bAsmOperand : TypedVecListAsmOperand<count, 128, 16, "b">;
- def "16b" : TypedVecListRegOperand<Reg128, 16, "b"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_16bAsmOperand");
- }
-
- // { v0.8h, v1.8h }
- def _8hAsmOperand : TypedVecListAsmOperand<count, 128, 8, "h">;
- def "8h" : TypedVecListRegOperand<Reg128, 8, "h"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_8hAsmOperand");
- }
-
- // { v0.4s, v1.4s }
- def _4sAsmOperand : TypedVecListAsmOperand<count, 128, 4, "s">;
- def "4s" : TypedVecListRegOperand<Reg128, 4, "s"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_4sAsmOperand");
- }
-
- // { v0.2d, v1.2d }
- def _2dAsmOperand : TypedVecListAsmOperand<count, 128, 2, "d">;
- def "2d" : TypedVecListRegOperand<Reg128, 2, "d"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_2dAsmOperand");
- }
-
- // { v0.b, v1.b }
- def _bAsmOperand : TypedVecListAsmOperand<count, 128, 0, "b">;
- def "b" : TypedVecListRegOperand<Reg128, 0, "b"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_bAsmOperand");
- }
-
- // { v0.h, v1.h }
- def _hAsmOperand : TypedVecListAsmOperand<count, 128, 0, "h">;
- def "h" : TypedVecListRegOperand<Reg128, 0, "h"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_hAsmOperand");
- }
-
- // { v0.s, v1.s }
- def _sAsmOperand : TypedVecListAsmOperand<count, 128, 0, "s">;
- def "s" : TypedVecListRegOperand<Reg128, 0, "s"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_sAsmOperand");
- }
-
- // { v0.d, v1.d }
- def _dAsmOperand : TypedVecListAsmOperand<count, 128, 0, "d">;
- def "d" : TypedVecListRegOperand<Reg128, 0, "d"> {
- let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_dAsmOperand");
- }
-
-
-}
-
-defm VecListOne : VectorList<1, FPR64, FPR128>;
-defm VecListTwo : VectorList<2, DD, QQ>;
-defm VecListThree : VectorList<3, DDD, QQQ>;
-defm VecListFour : VectorList<4, DDDD, QQQQ>;
-
-
-// Register operand versions of the scalar FP registers.
-def FPR16Op : RegisterOperand<FPR16, "printOperand">;
-def FPR32Op : RegisterOperand<FPR32, "printOperand">;
-def FPR64Op : RegisterOperand<FPR64, "printOperand">;
-def FPR128Op : RegisterOperand<FPR128, "printOperand">;
diff --git a/lib/Target/ARM64/ARM64Schedule.td b/lib/Target/ARM64/ARM64Schedule.td
deleted file mode 100644
index 52f9262..0000000
--- a/lib/Target/ARM64/ARM64Schedule.td
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-// Define TII for use in SchedVariant Predicates.
-// const MachineInstr *MI and const TargetSchedModel *SchedModel
-// are defined by default.
-def : PredicateProlog<[{
- const ARM64InstrInfo *TII =
- static_cast<const ARM64InstrInfo*>(SchedModel->getInstrInfo());
- (void)TII;
-}]>;
-
-// ARM64 Scheduler Definitions
-
-def WriteImm : SchedWrite; // MOVN, MOVZ
-// TODO: Provide variants for MOV32/64imm Pseudos that dynamically
-// select the correct sequence of WriteImms.
-
-def WriteI : SchedWrite; // ALU
-def WriteISReg : SchedWrite; // ALU of Shifted-Reg
-def WriteIEReg : SchedWrite; // ALU of Extended-Reg
-def WriteExtr : SchedWrite; // EXTR shifts a reg pair
-def ReadExtrHi : SchedRead; // Read the high reg of the EXTR pair
-def WriteIS : SchedWrite; // Shift/Scale
-def WriteID32 : SchedWrite; // 32-bit Divide
-def WriteID64 : SchedWrite; // 64-bit Divide
-def WriteIM32 : SchedWrite; // 32-bit Multiply
-def WriteIM64 : SchedWrite; // 64-bit Multiply
-def WriteBr : SchedWrite; // Branch
-def WriteBrReg : SchedWrite; // Indirect Branch
-
-def WriteLD : SchedWrite; // Load from base addr plus immediate offset
-def WriteST : SchedWrite; // Store to base addr plus immediate offset
-def WriteSTP : SchedWrite; // Store a register pair.
-def WriteAdr : SchedWrite; // Address pre/post increment.
-
-def WriteLDIdx : SchedWrite; // Load from a register index (maybe scaled).
-def WriteSTIdx : SchedWrite; // Store to a register index (maybe scaled).
-def ReadAdrBase : SchedRead; // Read the base resister of a reg-offset LD/ST.
-
-// ScaledIdxPred is true if a WriteLDIdx operand will be
-// scaled. Subtargets can use this to dynamically select resources and
-// latency for WriteLDIdx and ReadAdrBase.
-def ScaledIdxPred : SchedPredicate<[{TII->isScaledAddr(MI)}]>;
-
-// Serialized two-level address load.
-// EXAMPLE: LOADGot
-def WriteLDAdr : WriteSequence<[WriteAdr, WriteLD]>;
-
-// Serialized two-level address lookup.
-// EXAMPLE: MOVaddr...
-def WriteAdrAdr : WriteSequence<[WriteAdr, WriteAdr]>;
-
-// The second register of a load-pair.
-// LDP,LDPSW,LDNP,LDXP,LDAXP
-def WriteLDHi : SchedWrite;
-
-// Store-exclusive is a store followed by a dependent load.
-def WriteSTX : WriteSequence<[WriteST, WriteLD]>;
-
-def WriteSys : SchedWrite; // Long, variable latency system ops.
-def WriteBarrier : SchedWrite; // Memory barrier.
-def WriteHint : SchedWrite; // Hint instruction.
-
-def WriteF : SchedWrite; // General floating-point ops.
-def WriteFCmp : SchedWrite; // Floating-point compare.
-def WriteFCvt : SchedWrite; // Float conversion.
-def WriteFCopy : SchedWrite; // Float-int register copy.
-def WriteFImm : SchedWrite; // Floating-point immediate.
-def WriteFMul : SchedWrite; // Floating-point multiply.
-def WriteFDiv : SchedWrite; // Floating-point division.
-
-def WriteV : SchedWrite; // Vector ops.
-def WriteVLD : SchedWrite; // Vector loads.
-def WriteVST : SchedWrite; // Vector stores.
-
-// Read the unwritten lanes of the VLD's destination registers.
-def ReadVLD : SchedRead;
-
-// Sequential vector load and shuffle.
-def WriteVLDShuffle : WriteSequence<[WriteVLD, WriteV]>;
-def WriteVLDPairShuffle : WriteSequence<[WriteVLD, WriteV, WriteV]>;
-
-// Store a shuffled vector.
-def WriteVSTShuffle : WriteSequence<[WriteV, WriteVST]>;
-def WriteVSTPairShuffle : WriteSequence<[WriteV, WriteV, WriteVST]>;
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp b/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
deleted file mode 100644
index 79d507f..0000000
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===-- ARM64SelectionDAGInfo.cpp - ARM64 SelectionDAG Info ---------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64SelectionDAGInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-selectiondag-info"
-#include "ARM64TargetMachine.h"
-using namespace llvm;
-
-ARM64SelectionDAGInfo::ARM64SelectionDAGInfo(const TargetMachine &TM)
- : TargetSelectionDAGInfo(TM),
- Subtarget(&TM.getSubtarget<ARM64Subtarget>()) {}
-
-ARM64SelectionDAGInfo::~ARM64SelectionDAGInfo() {}
-
-SDValue ARM64SelectionDAGInfo::EmitTargetCodeForMemset(
- SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const {
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
- ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size);
- const char *bzeroEntry =
- (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : 0;
- // For small size (< 256), it is not beneficial to use bzero
- // instead of memset.
- if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) {
- const ARM64TargetLowering &TLI = *static_cast<const ARM64TargetLowering *>(
- DAG.getTarget().getTargetLowering());
-
- EVT IntPtr = TLI.getPointerTy();
- Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
- TargetLowering::CallLoweringInfo CLI(
- Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
- 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
- return CallResult.second;
- }
- return SDValue();
-}
diff --git a/lib/Target/ARM64/ARM64SelectionDAGInfo.h b/lib/Target/ARM64/ARM64SelectionDAGInfo.h
deleted file mode 100644
index 770775f..0000000
--- a/lib/Target/ARM64/ARM64SelectionDAGInfo.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===-- ARM64SelectionDAGInfo.h - ARM64 SelectionDAG Info -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the ARM64 subclass for TargetSelectionDAGInfo.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64SELECTIONDAGINFO_H
-#define ARM64SELECTIONDAGINFO_H
-
-#include "llvm/Target/TargetSelectionDAGInfo.h"
-
-namespace llvm {
-
-class ARM64SelectionDAGInfo : public TargetSelectionDAGInfo {
- /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
- /// make the right decision when generating code for different targets.
- const ARM64Subtarget *Subtarget;
-
-public:
- explicit ARM64SelectionDAGInfo(const TargetMachine &TM);
- ~ARM64SelectionDAGInfo();
-
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
- SDValue Dst, SDValue Src, SDValue Size,
- unsigned Align, bool isVolatile,
- MachinePointerInfo DstPtrInfo) const override;
-};
-}
-
-#endif
diff --git a/lib/Target/ARM64/ARM64Subtarget.cpp b/lib/Target/ARM64/ARM64Subtarget.cpp
deleted file mode 100644
index 14b5444..0000000
--- a/lib/Target/ARM64/ARM64Subtarget.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-//===-- ARM64Subtarget.cpp - ARM64 Subtarget Information --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64InstrInfo.h"
-#include "ARM64Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineScheduler.h"
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_SUBTARGETINFO_CTOR
-#define GET_SUBTARGETINFO_TARGET_DESC
-#include "ARM64GenSubtargetInfo.inc"
-
-using namespace llvm;
-
-ARM64Subtarget::ARM64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS)
- : ARM64GenSubtargetInfo(TT, CPU, FS), HasZeroCycleRegMove(false),
- HasZeroCycleZeroing(false), CPUString(CPU), TargetTriple(TT) {
- // Determine default and user-specified characteristics
-
- if (CPUString.empty())
- // We default to Cyclone for now.
- CPUString = "cyclone";
-
- ParseSubtargetFeatures(CPUString, FS);
-}
-
-/// ClassifyGlobalReference - Find the target operand flags that describe
-/// how a global value should be referenced for the current subtarget.
-unsigned char
-ARM64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
- const TargetMachine &TM) const {
-
- // Determine whether this is a reference to a definition or a declaration.
- // Materializable GVs (in JIT lazy compilation mode) do not require an extra
- // load from stub.
- bool isDecl = GV->hasAvailableExternallyLinkage();
- if (GV->isDeclaration() && !GV->isMaterializable())
- isDecl = true;
-
- // MachO large model always goes via a GOT, simply to get a single 8-byte
- // absolute relocation on all global addresses.
- if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
- return ARM64II::MO_GOT;
-
- // The small code mode's direct accesses use ADRP, which cannot necessarily
- // produce the value 0 (if the code is above 4GB). Therefore they must use the
- // GOT.
- if (TM.getCodeModel() == CodeModel::Small && GV->isWeakForLinker() && isDecl)
- return ARM64II::MO_GOT;
-
- // If symbol visibility is hidden, the extra load is not needed if
- // the symbol is definitely defined in the current translation unit.
-
- // The handling of non-hidden symbols in PIC mode is rather target-dependent:
- // + On MachO, if the symbol is defined in this module the GOT can be
- // skipped.
- // + On ELF, the R_AARCH64_COPY relocation means that even symbols actually
- // defined could end up in unexpected places. Use a GOT.
- if (TM.getRelocationModel() != Reloc::Static && GV->hasDefaultVisibility()) {
- if (isTargetMachO())
- return (isDecl || GV->isWeakForLinker()) ? ARM64II::MO_GOT
- : ARM64II::MO_NO_FLAG;
- else
- return ARM64II::MO_GOT;
- }
-
- return ARM64II::MO_NO_FLAG;
-}
-
-/// This function returns the name of a function which has an interface
-/// like the non-standard bzero function, if such a function exists on
-/// the current subtarget and it is considered prefereable over
-/// memset with zero passed as the second argument. Otherwise it
-/// returns null.
-const char *ARM64Subtarget::getBZeroEntry() const {
- // At the moment, always prefer bzero.
- return "bzero";
-}
-
-void ARM64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
- MachineInstr *begin, MachineInstr *end,
- unsigned NumRegionInstrs) const {
- // LNT run (at least on Cyclone) showed reasonably significant gains for
- // bi-directional scheduling. 253.perlbmk.
- Policy.OnlyTopDown = false;
- Policy.OnlyBottomUp = false;
-}
diff --git a/lib/Target/ARM64/ARM64Subtarget.h b/lib/Target/ARM64/ARM64Subtarget.h
deleted file mode 100644
index 1cbd79e..0000000
--- a/lib/Target/ARM64/ARM64Subtarget.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//=====---- ARM64Subtarget.h - Define Subtarget for the ARM64 -*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ARM64 specific subclass of TargetSubtarget.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64SUBTARGET_H
-#define ARM64SUBTARGET_H
-
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include "ARM64RegisterInfo.h"
-#include <string>
-
-#define GET_SUBTARGETINFO_HEADER
-#include "ARM64GenSubtargetInfo.inc"
-
-namespace llvm {
-class GlobalValue;
-class StringRef;
-
-class ARM64Subtarget : public ARM64GenSubtargetInfo {
-protected:
- // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
- bool HasZeroCycleRegMove;
-
- // HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
- bool HasZeroCycleZeroing;
-
- /// CPUString - String name of used CPU.
- std::string CPUString;
-
- /// TargetTriple - What processor and OS we're targeting.
- Triple TargetTriple;
-
-public:
- /// This constructor initializes the data members to match that
- /// of the specified triple.
- ARM64Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS);
-
- bool enableMachineScheduler() const override { return true; }
-
- bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
-
- bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
-
- bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
-
- bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
-
- bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
-
- bool isCyclone() const { return CPUString == "cyclone"; }
-
- /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
- /// that still makes it profitable to inline the call.
- unsigned getMaxInlineSizeThreshold() const { return 64; }
-
- /// ParseSubtargetFeatures - Parses features string setting specified
- /// subtarget options. Definition of function is auto generated by tblgen.
- void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
-
- /// ClassifyGlobalReference - Find the target operand flags that describe
- /// how a global value should be referenced for the current subtarget.
- unsigned char ClassifyGlobalReference(const GlobalValue *GV,
- const TargetMachine &TM) const;
-
- /// This function returns the name of a function which has an interface
- /// like the non-standard bzero function, if such a function exists on
- /// the current subtarget and it is considered prefereable over
- /// memset with zero passed as the second argument. Otherwise it
- /// returns null.
- const char *getBZeroEntry() const;
-
- void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin,
- MachineInstr *end, unsigned NumRegionInstrs) const;
-};
-} // End llvm namespace
-
-#endif // ARM64SUBTARGET_H
diff --git a/lib/Target/ARM64/ARM64TargetMachine.cpp b/lib/Target/ARM64/ARM64TargetMachine.cpp
deleted file mode 100644
index 101dc25..0000000
--- a/lib/Target/ARM64/ARM64TargetMachine.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-//===-- ARM64TargetMachine.cpp - Define TargetMachine for ARM64 -----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/PassManager.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Transforms/Scalar.h"
-using namespace llvm;
-
-static cl::opt<bool> EnableCCMP("arm64-ccmp",
- cl::desc("Enable the CCMP formation pass"),
- cl::init(true));
-
-static cl::opt<bool> EnableStPairSuppress("arm64-stp-suppress", cl::Hidden,
- cl::desc("Suppress STP for ARM64"),
- cl::init(true));
-
-static cl::opt<bool>
-EnablePromoteConstant("arm64-promote-const", cl::Hidden,
- cl::desc("Enable the promote constant pass"),
- cl::init(true));
-
-static cl::opt<bool>
-EnableCollectLOH("arm64-collect-loh", cl::Hidden,
- cl::desc("Enable the pass that emits the linker"
- " optimization hints (LOH)"),
- cl::init(true));
-
-extern "C" void LLVMInitializeARM64Target() {
- // Register the target.
- RegisterTargetMachine<ARM64TargetMachine> X(TheARM64Target);
-}
-
-/// TargetMachine ctor - Create an ARM64 architecture model.
-///
-ARM64TargetMachine::ARM64TargetMachine(const Target &T, StringRef TT,
- StringRef CPU, StringRef FS,
- const TargetOptions &Options,
- Reloc::Model RM, CodeModel::Model CM,
- CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
- Subtarget(TT, CPU, FS),
- DL(Subtarget.isTargetMachO() ? "e-m:o-i64:64-i128:128-n32:64-S128"
- : "e-m:e-i64:64-i128:128-n32:64-S128"),
- InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget),
- TSInfo(*this) {
- initAsmInfo();
-}
-
-namespace {
-/// ARM64 Code Generator Pass Configuration Options.
-class ARM64PassConfig : public TargetPassConfig {
-public:
- ARM64PassConfig(ARM64TargetMachine *TM, PassManagerBase &PM)
- : TargetPassConfig(TM, PM) {}
-
- ARM64TargetMachine &getARM64TargetMachine() const {
- return getTM<ARM64TargetMachine>();
- }
-
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addILPOpts();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
-};
-} // namespace
-
-void ARM64TargetMachine::addAnalysisPasses(PassManagerBase &PM) {
- // Add first the target-independent BasicTTI pass, then our ARM64 pass. This
- // allows the ARM64 pass to delegate to the target independent layer when
- // appropriate.
- PM.add(createBasicTargetTransformInfoPass(this));
- PM.add(createARM64TargetTransformInfoPass(this));
-}
-
-TargetPassConfig *ARM64TargetMachine::createPassConfig(PassManagerBase &PM) {
- return new ARM64PassConfig(this, PM);
-}
-
-// Pass Pipeline Configuration
-bool ARM64PassConfig::addPreISel() {
- // Run promote constant before global merge, so that the promoted constants
- // get a chance to be merged
- if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
- addPass(createARM64PromoteConstantPass());
- if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createGlobalMergePass(TM));
- if (TM->getOptLevel() != CodeGenOpt::None)
- addPass(createARM64AddressTypePromotionPass());
- return false;
-}
-
-bool ARM64PassConfig::addInstSelector() {
- addPass(createARM64ISelDag(getARM64TargetMachine(), getOptLevel()));
-
- // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
- // references to _TLS_MODULE_BASE_ as possible.
- if (TM->getSubtarget<ARM64Subtarget>().isTargetELF() &&
- getOptLevel() != CodeGenOpt::None)
- addPass(createARM64CleanupLocalDynamicTLSPass());
-
- return false;
-}
-
-bool ARM64PassConfig::addILPOpts() {
- if (EnableCCMP)
- addPass(createARM64ConditionalCompares());
- addPass(&EarlyIfConverterID);
- if (EnableStPairSuppress)
- addPass(createARM64StorePairSuppressPass());
- return true;
-}
-
-bool ARM64PassConfig::addPreRegAlloc() {
- // Use AdvSIMD scalar instructions whenever profitable.
- addPass(createARM64AdvSIMDScalar());
- return true;
-}
-
-bool ARM64PassConfig::addPostRegAlloc() {
- // Change dead register definitions to refer to the zero register.
- addPass(createARM64DeadRegisterDefinitions());
- return true;
-}
-
-bool ARM64PassConfig::addPreSched2() {
- // Expand some pseudo instructions to allow proper scheduling.
- addPass(createARM64ExpandPseudoPass());
- // Use load/store pair instructions when possible.
- addPass(createARM64LoadStoreOptimizationPass());
- return true;
-}
-
-bool ARM64PassConfig::addPreEmitPass() {
- // Relax conditional branch instructions if they're otherwise out of
- // range of their destination.
- addPass(createARM64BranchRelaxation());
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH)
- addPass(createARM64CollectLOHPass());
- return true;
-}
diff --git a/lib/Target/ARM64/ARM64TargetMachine.h b/lib/Target/ARM64/ARM64TargetMachine.h
deleted file mode 100644
index 8274550..0000000
--- a/lib/Target/ARM64/ARM64TargetMachine.h
+++ /dev/null
@@ -1,69 +0,0 @@
-//===-- ARM64TargetMachine.h - Define TargetMachine for ARM64 ---*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the ARM64 specific subclass of TargetMachine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64TARGETMACHINE_H
-#define ARM64TARGETMACHINE_H
-
-#include "ARM64InstrInfo.h"
-#include "ARM64ISelLowering.h"
-#include "ARM64Subtarget.h"
-#include "ARM64FrameLowering.h"
-#include "ARM64SelectionDAGInfo.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/MC/MCStreamer.h"
-
-namespace llvm {
-
-class ARM64TargetMachine : public LLVMTargetMachine {
-protected:
- ARM64Subtarget Subtarget;
-
-private:
- const DataLayout DL;
- ARM64InstrInfo InstrInfo;
- ARM64TargetLowering TLInfo;
- ARM64FrameLowering FrameLowering;
- ARM64SelectionDAGInfo TSInfo;
-
-public:
- ARM64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
- const TargetOptions &Options, Reloc::Model RM,
- CodeModel::Model CM, CodeGenOpt::Level OL);
-
- const ARM64Subtarget *getSubtargetImpl() const override { return &Subtarget; }
- const ARM64TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
- const DataLayout *getDataLayout() const override { return &DL; }
- const ARM64FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
- const ARM64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const ARM64RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
- const ARM64SelectionDAGInfo *getSelectionDAGInfo() const override {
- return &TSInfo;
- }
-
- // Pass Pipeline Configuration
- TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-
- /// \brief Register ARM64 analysis passes with a pass manager.
- void addAnalysisPasses(PassManagerBase &PM) override;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.cpp b/lib/Target/ARM64/ARM64TargetObjectFile.cpp
deleted file mode 100644
index cde01e5..0000000
--- a/lib/Target/ARM64/ARM64TargetObjectFile.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-//===-- ARM64TargetObjectFile.cpp - ARM64 Object Info ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64TargetObjectFile.h"
-#include "ARM64TargetMachine.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/Dwarf.h"
-using namespace llvm;
-using namespace dwarf;
-
-void ARM64_ELFTargetObjectFile::Initialize(MCContext &Ctx,
- const TargetMachine &TM) {
- TargetLoweringObjectFileELF::Initialize(Ctx, TM);
- InitializeELF(TM.Options.UseInitArray);
-}
-
-const MCExpr *ARM64_MachoTargetObjectFile::getTTypeGlobalReference(
- const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
- const TargetMachine &TM, MachineModuleInfo *MMI,
- MCStreamer &Streamer) const {
- // On Darwin, we can reference dwarf symbols with foo@GOT-., which
- // is an indirect pc-relative reference. The default implementation
- // won't reference using the GOT, so we need this target-specific
- // version.
- if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
- const MCSymbol *Sym = TM.getSymbol(GV, Mang);
- const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
- MCSymbol *PCSym = getContext().CreateTempSymbol();
- Streamer.EmitLabel(PCSym);
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
- return MCBinaryExpr::CreateSub(Res, PC, getContext());
- }
-
- return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
- GV, Encoding, Mang, TM, MMI, Streamer);
-}
-
-MCSymbol *ARM64_MachoTargetObjectFile::getCFIPersonalitySymbol(
- const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
- MachineModuleInfo *MMI) const {
- return TM.getSymbol(GV, Mang);
-}
diff --git a/lib/Target/ARM64/ARM64TargetObjectFile.h b/lib/Target/ARM64/ARM64TargetObjectFile.h
deleted file mode 100644
index 62446f9..0000000
--- a/lib/Target/ARM64/ARM64TargetObjectFile.h
+++ /dev/null
@@ -1,40 +0,0 @@
-//===-- ARM64TargetObjectFile.h - ARM64 Object Info -*- C++ -------------*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
-#define LLVM_TARGET_ARM64_TARGETOBJECTFILE_H
-
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
-
-namespace llvm {
-class ARM64TargetMachine;
-
-/// This implementation is used for AArch64 ELF targets (Linux in particular).
-class ARM64_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
- void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
-};
-
-/// ARM64_MachoTargetObjectFile - This TLOF implementation is used for Darwin.
-class ARM64_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
-public:
- const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
- unsigned Encoding, Mangler &Mang,
- const TargetMachine &TM,
- MachineModuleInfo *MMI,
- MCStreamer &Streamer) const override;
-
- MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
- const TargetMachine &TM,
- MachineModuleInfo *MMI) const override;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
deleted file mode 100644
index 9b598d7..0000000
--- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp
+++ /dev/null
@@ -1,326 +0,0 @@
-//===-- ARM64TargetTransformInfo.cpp - ARM64 specific TTI pass ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file implements a TargetTransformInfo analysis pass specific to the
-/// ARM64 target machine. It uses the target's detailed information to provide
-/// more precise answers to certain TTI queries, while letting the target
-/// independent and default TTI implementations handle the rest.
-///
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64tti"
-#include "ARM64.h"
-#include "ARM64TargetMachine.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/CostTable.h"
-#include "llvm/Target/TargetLowering.h"
-using namespace llvm;
-
-// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
-// pass constructor initialization.
-namespace llvm {
-void initializeARM64TTIPass(PassRegistry &);
-}
-
-namespace {
-
-class ARM64TTI final : public ImmutablePass, public TargetTransformInfo {
- const ARM64TargetMachine *TM;
- const ARM64Subtarget *ST;
- const ARM64TargetLowering *TLI;
-
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
-public:
- ARM64TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
- llvm_unreachable("This pass cannot be directly constructed");
- }
-
- ARM64TTI(const ARM64TargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
- TLI(TM->getTargetLowering()) {
- initializeARM64TTIPass(*PassRegistry::getPassRegistry());
- }
-
- void initializePass() override { pushTTIStack(this); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- TargetTransformInfo::getAnalysisUsage(AU);
- }
-
- /// Pass identification.
- static char ID;
-
- /// Provide necessary pointer adjustments for the two base classes.
- void *getAdjustedAnalysisPointer(const void *ID) override {
- if (ID == &TargetTransformInfo::ID)
- return (TargetTransformInfo *)this;
- return this;
- }
-
- /// \name Scalar TTI Implementations
- /// @{
-
- unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
- PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
-
- /// @}
-
- /// \name Vector TTI Implementations
- /// @{
-
- unsigned getNumberOfRegisters(bool Vector) const override {
- if (Vector)
- return 32;
-
- return 31;
- }
-
- unsigned getRegisterBitWidth(bool Vector) const override {
- if (Vector)
- return 128;
-
- return 64;
- }
-
- unsigned getMaximumUnrollFactor() const override { return 2; }
-
- unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const
- override;
-
- unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) const
- override;
-
- unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info = OK_AnyValue,
- OperandValueKind Opd2Info = OK_AnyValue) const
- override;
-
- unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const override;
-
- unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const
- override;
-
- unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const override;
- /// @}
-};
-
-} // end anonymous namespace
-
-INITIALIZE_AG_PASS(ARM64TTI, TargetTransformInfo, "arm64tti",
- "ARM64 Target Transform Info", true, true, false)
-char ARM64TTI::ID = 0;
-
-ImmutablePass *
-llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) {
- return new ARM64TTI(TM);
-}
-
-unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
- assert(Ty->isIntegerTy());
-
- unsigned BitSize = Ty->getPrimitiveSizeInBits();
- if (BitSize == 0)
- return ~0U;
-
- int64_t Val = Imm.getSExtValue();
- if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize))
- return 1;
-
- if ((int64_t)Val < 0)
- Val = ~Val;
- if (BitSize == 32)
- Val &= (1LL << 32) - 1;
-
- unsigned LZ = countLeadingZeros((uint64_t)Val);
- unsigned Shift = (63 - LZ) / 16;
- // MOVZ is free so return true for one or fewer MOVK.
- return (Shift == 0) ? 1 : Shift;
-}
-
-ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const {
- assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
- if (TyWidth == 32 || TyWidth == 64)
- return PSK_FastHardware;
- // TODO: ARM64TargetLowering::LowerCTPOP() supports 128bit popcount.
- return PSK_Software;
-}
-
-unsigned ARM64TTI::getCastInstrCost(unsigned Opcode, Type *Dst,
- Type *Src) const {
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- assert(ISD && "Invalid opcode");
-
- EVT SrcTy = TLI->getValueType(Src);
- EVT DstTy = TLI->getValueType(Dst);
-
- if (!SrcTy.isSimple() || !DstTy.isSimple())
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
-
- static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = {
- // LowerVectorINT_TO_FP:
- { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
- { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
- { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 },
- { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
- // LowerVectorFP_TO_INT
- { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 },
- { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 },
- { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 },
- };
-
- int Idx = ConvertCostTableLookup<MVT>(
- ConversionTbl, array_lengthof(ConversionTbl), ISD, DstTy.getSimpleVT(),
- SrcTy.getSimpleVT());
- if (Idx != -1)
- return ConversionTbl[Idx].Cost;
-
- return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src);
-}
-
-unsigned ARM64TTI::getVectorInstrCost(unsigned Opcode, Type *Val,
- unsigned Index) const {
- assert(Val->isVectorTy() && "This must be a vector type");
-
- if (Index != -1U) {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val);
-
- // This type is legalized to a scalar type.
- if (!LT.second.isVector())
- return 0;
-
- // The type may be split. Normalize the index to the new type.
- unsigned Width = LT.second.getVectorNumElements();
- Index = Index % Width;
-
- // The element at index zero is already inside the vector.
- if (Index == 0)
- return 0;
- }
-
- // All other insert/extracts cost this much.
- return 2;
-}
-
-unsigned ARM64TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
- OperandValueKind Opd1Info,
- OperandValueKind Opd2Info) const {
- // Legalize the type.
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
-
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
-
- switch (ISD) {
- default:
- return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Opd1Info,
- Opd2Info);
- case ISD::ADD:
- case ISD::MUL:
- case ISD::XOR:
- case ISD::OR:
- case ISD::AND:
- // These nodes are marked as 'custom' for combining purposes only.
- // We know that they are legal. See LowerAdd in ISelLowering.
- return 1 * LT.first;
- }
-}
-
-unsigned ARM64TTI::getAddressComputationCost(Type *Ty, bool IsComplex) const {
- // Address computations in vectorized code with non-consecutive addresses will
- // likely result in more instructions compared to scalar code where the
- // computation can more often be merged into the index mode. The resulting
- // extra micro-ops can significantly decrease throughput.
- unsigned NumVectorInstToHideOverhead = 10;
-
- if (Ty->isVectorTy() && IsComplex)
- return NumVectorInstToHideOverhead;
-
- // In many cases the address computation is not merged into the instruction
- // addressing mode.
- return 1;
-}
-
-unsigned ARM64TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy) const {
-
- int ISD = TLI->InstructionOpcodeToISD(Opcode);
- // We don't lower vector selects well that are wider than the register width.
- if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
- // We would need this many instructions to hide the scalarization happening.
- unsigned AmortizationCost = 20;
- static const TypeConversionCostTblEntry<MVT::SimpleValueType>
- VectorSelectTbl[] = {
- { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 * AmortizationCost },
- { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 * AmortizationCost },
- { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 * AmortizationCost },
- { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
- { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
- { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
- };
-
- EVT SelCondTy = TLI->getValueType(CondTy);
- EVT SelValTy = TLI->getValueType(ValTy);
- if (SelCondTy.isSimple() && SelValTy.isSimple()) {
- int Idx =
- ConvertCostTableLookup(VectorSelectTbl, ISD, SelCondTy.getSimpleVT(),
- SelValTy.getSimpleVT());
- if (Idx != -1)
- return VectorSelectTbl[Idx].Cost;
- }
- }
- return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy);
-}
-
-unsigned ARM64TTI::getMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) const {
- std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src);
-
- if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
- Src->getVectorElementType()->isIntegerTy(64)) {
- // Unaligned stores are extremely inefficient. We don't split
- // unaligned v2i64 stores because the negative impact that has shown in
- // practice on inlined memcpy code.
- // We make v2i64 stores expensive so that we will only vectorize if there
- // are 6 other instructions getting vectorized.
- unsigned AmortizationCost = 6;
-
- return LT.first * 2 * AmortizationCost;
- }
-
- if (Src->isVectorTy() && Src->getVectorElementType()->isIntegerTy(8) &&
- Src->getVectorNumElements() < 8) {
- // We scalarize the loads/stores because there is not v.4b register and we
- // have to promote the elements to v.4h.
- unsigned NumVecElts = Src->getVectorNumElements();
- unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
- // We generate 2 instructions per vector element.
- return NumVectorizableInstsToAmortize * NumVecElts * 2;
- }
-
- return LT.first;
-}
diff --git a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp b/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
deleted file mode 100644
index 38a61d8..0000000
--- a/lib/Target/ARM64/AsmParser/ARM64AsmParser.cpp
+++ /dev/null
@@ -1,4832 +0,0 @@
-//===-- ARM64AsmParser.cpp - Parse ARM64 assembly to MCInst instructions --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/MC/MCParser/MCAsmLexer.h"
-#include "llvm/MC/MCParser/MCAsmParser.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCTargetAsmParser.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
-#include <cstdio>
-using namespace llvm;
-
-namespace {
-
-class ARM64Operand;
-
-class ARM64AsmParser : public MCTargetAsmParser {
-public:
- typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector;
-
-private:
- StringRef Mnemonic; ///< Instruction mnemonic.
- MCSubtargetInfo &STI;
- MCAsmParser &Parser;
-
- MCAsmParser &getParser() const { return Parser; }
- MCAsmLexer &getLexer() const { return Parser.getLexer(); }
-
- SMLoc getLoc() const { return Parser.getTok().getLoc(); }
-
- bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands);
- unsigned parseCondCodeString(StringRef Cond);
- bool parseCondCode(OperandVector &Operands, bool invertCondCode);
- int tryParseRegister();
- int tryMatchVectorRegister(StringRef &Kind);
- bool parseOptionalShift(OperandVector &Operands);
- bool parseOptionalExtend(OperandVector &Operands);
- bool parseRegister(OperandVector &Operands);
- bool parseMemory(OperandVector &Operands);
- bool parseSymbolicImmVal(const MCExpr *&ImmVal);
- bool parseVectorList(OperandVector &Operands);
- bool parseOperand(OperandVector &Operands, bool isCondCode,
- bool invertCondCode);
-
- void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
- bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
- bool showMatchError(SMLoc Loc, unsigned ErrCode);
-
- bool parseDirectiveWord(unsigned Size, SMLoc L);
- bool parseDirectiveTLSDescCall(SMLoc L);
-
- bool parseDirectiveLOH(StringRef LOH, SMLoc L);
-
- bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc);
- bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands, MCStreamer &Out,
- unsigned &ErrorInfo, bool MatchingInlineAsm);
-/// @name Auto-generated Match Functions
-/// {
-
-#define GET_ASSEMBLER_HEADER
-#include "ARM64GenAsmMatcher.inc"
-
- /// }
-
- OperandMatchResultTy tryParseNoIndexMemory(OperandVector &Operands);
- OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
- OperandMatchResultTy tryParseSystemRegister(OperandVector &Operands);
- OperandMatchResultTy tryParseCPSRField(OperandVector &Operands);
- OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
- OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
- OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
- OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
- OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
- bool tryParseVectorRegister(OperandVector &Operands);
-
-public:
- enum ARM64MatchResultTy {
- Match_InvalidSuffix = FIRST_TARGET_MATCH_RESULT_TY,
-#define GET_OPERAND_DIAGNOSTIC_TYPES
-#include "ARM64GenAsmMatcher.inc"
- };
- ARM64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &MII)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
- MCAsmParserExtension::Initialize(_Parser);
- }
-
- virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc, OperandVector &Operands);
- virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
- virtual bool ParseDirective(AsmToken DirectiveID);
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
-
- static bool classifySymbolRef(const MCExpr *Expr,
- ARM64MCExpr::VariantKind &ELFRefKind,
- MCSymbolRefExpr::VariantKind &DarwinRefKind,
- const MCConstantExpr *&Addend);
-};
-} // end anonymous namespace
-
-namespace {
-
-/// ARM64Operand - Instances of this class represent a parsed ARM64 machine
-/// instruction.
-class ARM64Operand : public MCParsedAsmOperand {
-public:
- enum MemIdxKindTy {
- ImmediateOffset, // pre-indexed, no writeback
- RegisterOffset // register offset, with optional extend
- };
-
-private:
- enum KindTy {
- k_Immediate,
- k_Memory,
- k_Register,
- k_VectorList,
- k_VectorIndex,
- k_Token,
- k_SysCR,
- k_Prefetch,
- k_Shifter,
- k_Extend,
- k_FPImm,
- k_Barrier,
- k_SystemRegister,
- k_CPSRField
- } Kind;
-
- SMLoc StartLoc, EndLoc, OffsetLoc;
-
- struct TokOp {
- const char *Data;
- unsigned Length;
- bool IsSuffix; // Is the operand actually a suffix on the mnemonic.
- };
-
- struct RegOp {
- unsigned RegNum;
- bool isVector;
- };
-
- struct VectorListOp {
- unsigned RegNum;
- unsigned Count;
- unsigned NumElements;
- unsigned ElementKind;
- };
-
- struct VectorIndexOp {
- unsigned Val;
- };
-
- struct ImmOp {
- const MCExpr *Val;
- };
-
- struct FPImmOp {
- unsigned Val; // Encoded 8-bit representation.
- };
-
- struct BarrierOp {
- unsigned Val; // Not the enum since not all values have names.
- };
-
- struct SystemRegisterOp {
- // 16-bit immediate, usually from the ARM64SYS::SystermRegister enum,
- // but not limited to those values.
- uint16_t Val;
- };
-
- struct CPSRFieldOp {
- ARM64SYS::CPSRField Field;
- };
-
- struct SysCRImmOp {
- unsigned Val;
- };
-
- struct PrefetchOp {
- unsigned Val;
- };
-
- struct ShifterOp {
- unsigned Val;
- };
-
- struct ExtendOp {
- unsigned Val;
- };
-
- // This is for all forms of ARM64 address expressions
- struct MemOp {
- unsigned BaseRegNum, OffsetRegNum;
- ARM64_AM::ExtendType ExtType;
- unsigned ShiftVal;
- bool ExplicitShift;
- const MCExpr *OffsetImm;
- MemIdxKindTy Mode;
- };
-
- union {
- struct TokOp Tok;
- struct RegOp Reg;
- struct VectorListOp VectorList;
- struct VectorIndexOp VectorIndex;
- struct ImmOp Imm;
- struct FPImmOp FPImm;
- struct BarrierOp Barrier;
- struct SystemRegisterOp SystemRegister;
- struct CPSRFieldOp CPSRField;
- struct SysCRImmOp SysCRImm;
- struct PrefetchOp Prefetch;
- struct ShifterOp Shifter;
- struct ExtendOp Extend;
- struct MemOp Mem;
- };
-
- // Keep the MCContext around as the MCExprs may need manipulated during
- // the add<>Operands() calls.
- MCContext &Ctx;
-
- ARM64Operand(KindTy K, MCContext &_Ctx)
- : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {}
-
-public:
- ARM64Operand(const ARM64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) {
- Kind = o.Kind;
- StartLoc = o.StartLoc;
- EndLoc = o.EndLoc;
- switch (Kind) {
- case k_Token:
- Tok = o.Tok;
- break;
- case k_Immediate:
- Imm = o.Imm;
- break;
- case k_FPImm:
- FPImm = o.FPImm;
- break;
- case k_Barrier:
- Barrier = o.Barrier;
- break;
- case k_SystemRegister:
- SystemRegister = o.SystemRegister;
- break;
- case k_CPSRField:
- CPSRField = o.CPSRField;
- break;
- case k_Register:
- Reg = o.Reg;
- break;
- case k_VectorList:
- VectorList = o.VectorList;
- break;
- case k_VectorIndex:
- VectorIndex = o.VectorIndex;
- break;
- case k_SysCR:
- SysCRImm = o.SysCRImm;
- break;
- case k_Prefetch:
- Prefetch = o.Prefetch;
- break;
- case k_Memory:
- Mem = o.Mem;
- break;
- case k_Shifter:
- Shifter = o.Shifter;
- break;
- case k_Extend:
- Extend = o.Extend;
- break;
- }
- }
-
- /// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const { return StartLoc; }
- /// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const { return EndLoc; }
- /// getOffsetLoc - Get the location of the offset of this memory operand.
- SMLoc getOffsetLoc() const { return OffsetLoc; }
-
- StringRef getToken() const {
- assert(Kind == k_Token && "Invalid access!");
- return StringRef(Tok.Data, Tok.Length);
- }
-
- bool isTokenSuffix() const {
- assert(Kind == k_Token && "Invalid access!");
- return Tok.IsSuffix;
- }
-
- const MCExpr *getImm() const {
- assert(Kind == k_Immediate && "Invalid access!");
- return Imm.Val;
- }
-
- unsigned getFPImm() const {
- assert(Kind == k_FPImm && "Invalid access!");
- return FPImm.Val;
- }
-
- unsigned getBarrier() const {
- assert(Kind == k_Barrier && "Invalid access!");
- return Barrier.Val;
- }
-
- uint16_t getSystemRegister() const {
- assert(Kind == k_SystemRegister && "Invalid access!");
- return SystemRegister.Val;
- }
-
- ARM64SYS::CPSRField getCPSRField() const {
- assert(Kind == k_CPSRField && "Invalid access!");
- return CPSRField.Field;
- }
-
- unsigned getReg() const {
- assert(Kind == k_Register && "Invalid access!");
- return Reg.RegNum;
- }
-
- unsigned getVectorListStart() const {
- assert(Kind == k_VectorList && "Invalid access!");
- return VectorList.RegNum;
- }
-
- unsigned getVectorListCount() const {
- assert(Kind == k_VectorList && "Invalid access!");
- return VectorList.Count;
- }
-
- unsigned getVectorIndex() const {
- assert(Kind == k_VectorIndex && "Invalid access!");
- return VectorIndex.Val;
- }
-
- unsigned getSysCR() const {
- assert(Kind == k_SysCR && "Invalid access!");
- return SysCRImm.Val;
- }
-
- unsigned getPrefetch() const {
- assert(Kind == k_Prefetch && "Invalid access!");
- return Prefetch.Val;
- }
-
- unsigned getShifter() const {
- assert(Kind == k_Shifter && "Invalid access!");
- return Shifter.Val;
- }
-
- unsigned getExtend() const {
- assert(Kind == k_Extend && "Invalid access!");
- return Extend.Val;
- }
-
- bool isImm() const { return Kind == k_Immediate; }
- bool isSImm9() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= -256 && Val < 256);
- }
- bool isSImm7s4() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= -256 && Val <= 252 && (Val & 3) == 0);
- }
- bool isSImm7s8() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= -512 && Val <= 504 && (Val & 7) == 0);
- }
- bool isSImm7s16() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= -1024 && Val <= 1008 && (Val & 15) == 0);
- }
- bool isImm0_7() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 8);
- }
- bool isImm1_8() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 9);
- }
- bool isImm0_15() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 16);
- }
- bool isImm1_16() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val > 0 && Val < 17);
- }
- bool isImm0_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 32);
- }
- bool isImm1_31() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 32);
- }
- bool isImm1_32() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 33);
- }
- bool isImm0_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 64);
- }
- bool isImm1_63() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 64);
- }
- bool isImm1_64() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 1 && Val < 65);
- }
- bool isImm0_127() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 128);
- }
- bool isImm0_255() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 256);
- }
- bool isImm0_65535() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- int64_t Val = MCE->getValue();
- return (Val >= 0 && Val < 65536);
- }
- bool isLogicalImm32() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- return ARM64_AM::isLogicalImmediate(MCE->getValue(), 32);
- }
- bool isLogicalImm64() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- return ARM64_AM::isLogicalImmediate(MCE->getValue(), 64);
- }
- bool isSIMDImmType10() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return false;
- return ARM64_AM::isAdvSIMDModImmType10(MCE->getValue());
- }
- bool isBranchTarget26() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x2000000 << 2) && Val <= (0x1ffffff << 2));
- }
- bool isBranchTarget19() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x40000 << 2) && Val <= (0x3ffff << 2));
- }
- bool isBranchTarget14() const {
- if (!isImm())
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE)
- return true;
- int64_t Val = MCE->getValue();
- if (Val & 0x3)
- return false;
- return (Val >= -(0x2000 << 2) && Val <= (0x1fff << 2));
- }
-
- bool isMovWSymbol(ArrayRef<ARM64MCExpr::VariantKind> AllowedModifiers) const {
- if (!isImm())
- return false;
-
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (!ARM64AsmParser::classifySymbolRef(getImm(), ELFRefKind, DarwinRefKind,
- Addend)) {
- return false;
- }
- if (DarwinRefKind != MCSymbolRefExpr::VK_None)
- return false;
-
- for (unsigned i = 0; i != AllowedModifiers.size(); ++i) {
- if (ELFRefKind == AllowedModifiers[i])
- return Addend == 0;
- }
-
- return false;
- }
-
- bool isMovZSymbolG3() const {
- static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G3 };
- return isMovWSymbol(Variants);
- }
-
- bool isMovZSymbolG2() const {
- static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2,
- ARM64MCExpr::VK_TPREL_G2,
- ARM64MCExpr::VK_DTPREL_G2 };
- return isMovWSymbol(Variants);
- }
-
- bool isMovZSymbolG1() const {
- static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G1,
- ARM64MCExpr::VK_GOTTPREL_G1,
- ARM64MCExpr::VK_TPREL_G1,
- ARM64MCExpr::VK_DTPREL_G1, };
- return isMovWSymbol(Variants);
- }
-
- bool isMovZSymbolG0() const {
- static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G0,
- ARM64MCExpr::VK_TPREL_G0,
- ARM64MCExpr::VK_DTPREL_G0 };
- return isMovWSymbol(Variants);
- }
-
- bool isMovKSymbolG2() const {
- static ARM64MCExpr::VariantKind Variants[] = { ARM64MCExpr::VK_ABS_G2_NC };
- return isMovWSymbol(Variants);
- }
-
- bool isMovKSymbolG1() const {
- static ARM64MCExpr::VariantKind Variants[] = {
- ARM64MCExpr::VK_ABS_G1_NC, ARM64MCExpr::VK_TPREL_G1_NC,
- ARM64MCExpr::VK_DTPREL_G1_NC
- };
- return isMovWSymbol(Variants);
- }
-
- bool isMovKSymbolG0() const {
- static ARM64MCExpr::VariantKind Variants[] = {
- ARM64MCExpr::VK_ABS_G0_NC, ARM64MCExpr::VK_GOTTPREL_G0_NC,
- ARM64MCExpr::VK_TPREL_G0_NC, ARM64MCExpr::VK_DTPREL_G0_NC
- };
- return isMovWSymbol(Variants);
- }
-
- bool isFPImm() const { return Kind == k_FPImm; }
- bool isBarrier() const { return Kind == k_Barrier; }
- bool isSystemRegister() const {
- if (Kind == k_SystemRegister)
- return true;
- // SPSel is legal for both the system register and the CPSR-field
- // variants of MSR, so special case that. Fugly.
- return (Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
- }
- bool isSystemCPSRField() const { return Kind == k_CPSRField; }
- bool isReg() const { return Kind == k_Register && !Reg.isVector; }
- bool isVectorReg() const { return Kind == k_Register && Reg.isVector; }
-
- /// Is this a vector list with the type implicit (presumably attached to the
- /// instruction itself)?
- template <unsigned NumRegs> bool isImplicitlyTypedVectorList() const {
- return Kind == k_VectorList && VectorList.Count == NumRegs &&
- !VectorList.ElementKind;
- }
-
- template <unsigned NumRegs, unsigned NumElements, char ElementKind>
- bool isTypedVectorList() const {
- if (Kind != k_VectorList)
- return false;
- if (VectorList.Count != NumRegs)
- return false;
- if (VectorList.ElementKind != ElementKind)
- return false;
- return VectorList.NumElements == NumElements;
- }
-
- bool isVectorIndexB() const {
- return Kind == k_VectorIndex && VectorIndex.Val < 16;
- }
- bool isVectorIndexH() const {
- return Kind == k_VectorIndex && VectorIndex.Val < 8;
- }
- bool isVectorIndexS() const {
- return Kind == k_VectorIndex && VectorIndex.Val < 4;
- }
- bool isVectorIndexD() const {
- return Kind == k_VectorIndex && VectorIndex.Val < 2;
- }
- bool isToken() const { return Kind == k_Token; }
- bool isTokenEqual(StringRef Str) const {
- return Kind == k_Token && getToken() == Str;
- }
- bool isMem() const { return Kind == k_Memory; }
- bool isSysCR() const { return Kind == k_SysCR; }
- bool isPrefetch() const { return Kind == k_Prefetch; }
- bool isShifter() const { return Kind == k_Shifter; }
- bool isExtend() const {
- // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
- if (isShifter()) {
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
- return ST == ARM64_AM::LSL;
- }
- return Kind == k_Extend;
- }
- bool isExtend64() const {
- if (Kind != k_Extend)
- return false;
- // UXTX and SXTX require a 64-bit source register (the ExtendLSL64 class).
- ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
- return ET != ARM64_AM::UXTX && ET != ARM64_AM::SXTX;
- }
- bool isExtendLSL64() const {
- // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
- if (isShifter()) {
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
- return ST == ARM64_AM::LSL;
- }
- if (Kind != k_Extend)
- return false;
- ARM64_AM::ExtendType ET = ARM64_AM::getArithExtendType(Extend.Val);
- return ET == ARM64_AM::UXTX || ET == ARM64_AM::SXTX;
- }
-
- bool isArithmeticShifter() const {
- if (!isShifter())
- return false;
-
- // An arithmetic shifter is LSL, LSR, or ASR.
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
- return ST == ARM64_AM::LSL || ST == ARM64_AM::LSR || ST == ARM64_AM::ASR;
- }
-
- bool isMovImm32Shifter() const {
- if (!isShifter())
- return false;
-
- // A MOVi shifter is LSL of 0, 16, 32, or 48.
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
- if (ST != ARM64_AM::LSL)
- return false;
- uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
- return (Val == 0 || Val == 16);
- }
-
- bool isMovImm64Shifter() const {
- if (!isShifter())
- return false;
-
- // A MOVi shifter is LSL of 0 or 16.
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(Shifter.Val);
- if (ST != ARM64_AM::LSL)
- return false;
- uint64_t Val = ARM64_AM::getShiftValue(Shifter.Val);
- return (Val == 0 || Val == 16 || Val == 32 || Val == 48);
- }
-
- bool isAddSubShifter() const {
- if (!isShifter())
- return false;
-
- // An ADD/SUB shifter is either 'lsl #0' or 'lsl #12'.
- unsigned Val = Shifter.Val;
- return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
- (ARM64_AM::getShiftValue(Val) == 0 ||
- ARM64_AM::getShiftValue(Val) == 12);
- }
-
- bool isLogicalVecShifter() const {
- if (!isShifter())
- return false;
-
- // A logical vector shifter is a left shift by 0, 8, 16, or 24.
- unsigned Val = Shifter.Val;
- unsigned Shift = ARM64_AM::getShiftValue(Val);
- return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
- (Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24);
- }
-
- bool isLogicalVecHalfWordShifter() const {
- if (!isLogicalVecShifter())
- return false;
-
- // A logical vector shifter is a left shift by 0 or 8.
- unsigned Val = Shifter.Val;
- unsigned Shift = ARM64_AM::getShiftValue(Val);
- return ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
- (Shift == 0 || Shift == 8);
- }
-
- bool isMoveVecShifter() const {
- if (!isShifter())
- return false;
-
- // A logical vector shifter is a left shift by 8 or 16.
- unsigned Val = Shifter.Val;
- unsigned Shift = ARM64_AM::getShiftValue(Val);
- return ARM64_AM::getShiftType(Val) == ARM64_AM::MSL &&
- (Shift == 8 || Shift == 16);
- }
-
- bool isMemoryRegisterOffset8() const {
- return isMem() && Mem.Mode == RegisterOffset && Mem.ShiftVal == 0;
- }
-
- bool isMemoryRegisterOffset16() const {
- return isMem() && Mem.Mode == RegisterOffset &&
- (Mem.ShiftVal == 0 || Mem.ShiftVal == 1);
- }
-
- bool isMemoryRegisterOffset32() const {
- return isMem() && Mem.Mode == RegisterOffset &&
- (Mem.ShiftVal == 0 || Mem.ShiftVal == 2);
- }
-
- bool isMemoryRegisterOffset64() const {
- return isMem() && Mem.Mode == RegisterOffset &&
- (Mem.ShiftVal == 0 || Mem.ShiftVal == 3);
- }
-
- bool isMemoryRegisterOffset128() const {
- return isMem() && Mem.Mode == RegisterOffset &&
- (Mem.ShiftVal == 0 || Mem.ShiftVal == 4);
- }
-
- bool isMemoryUnscaled() const {
- if (!isMem())
- return false;
- if (Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- // Make sure the immediate value is valid.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- if (!CE)
- return false;
- // The offset must fit in a signed 9-bit unscaled immediate.
- int64_t Value = CE->getValue();
- return (Value >= -256 && Value < 256);
- }
- // Fallback unscaled operands are for aliases of LDR/STR that fall back
- // to LDUR/STUR when the offset is not legal for the former but is for
- // the latter. As such, in addition to checking for being a legal unscaled
- // address, also check that it is not a legal scaled address. This avoids
- // ambiguity in the matcher.
- bool isMemoryUnscaledFB8() const {
- return isMemoryUnscaled() && !isMemoryIndexed8();
- }
- bool isMemoryUnscaledFB16() const {
- return isMemoryUnscaled() && !isMemoryIndexed16();
- }
- bool isMemoryUnscaledFB32() const {
- return isMemoryUnscaled() && !isMemoryIndexed32();
- }
- bool isMemoryUnscaledFB64() const {
- return isMemoryUnscaled() && !isMemoryIndexed64();
- }
- bool isMemoryUnscaledFB128() const {
- return isMemoryUnscaled() && !isMemoryIndexed128();
- }
- bool isMemoryIndexed(unsigned Scale) const {
- if (!isMem())
- return false;
- if (Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- // Make sure the immediate value is valid.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
-
- if (CE) {
- // The offset must be a positive multiple of the scale and in range of
- // encoding with a 12-bit immediate.
- int64_t Value = CE->getValue();
- return (Value >= 0 && (Value % Scale) == 0 && Value <= (4095 * Scale));
- }
-
- // If it's not a constant, check for some expressions we know.
- const MCExpr *Expr = Mem.OffsetImm;
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
- Addend)) {
- // If we don't understand the expression, assume the best and
- // let the fixup and relocation code deal with it.
- return true;
- }
-
- if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
- ELFRefKind == ARM64MCExpr::VK_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_GOT_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
- ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
- ELFRefKind == ARM64MCExpr::VK_GOTTPREL_LO12_NC ||
- ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
- // Note that we don't range-check the addend. It's adjusted modulo page
- // size when converted, so there is no "out of range" condition when using
- // @pageoff.
- int64_t Value = Addend ? Addend->getValue() : 0;
- return Value >= 0 && (Value % Scale) == 0;
- } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF ||
- DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) {
- // @gotpageoff/@tlvppageoff can only be used directly, not with an addend.
- return Addend == 0;
- }
-
- return false;
- }
- bool isMemoryIndexed128() const { return isMemoryIndexed(16); }
- bool isMemoryIndexed64() const { return isMemoryIndexed(8); }
- bool isMemoryIndexed32() const { return isMemoryIndexed(4); }
- bool isMemoryIndexed16() const { return isMemoryIndexed(2); }
- bool isMemoryIndexed8() const { return isMemoryIndexed(1); }
- bool isMemoryNoIndex() const {
- if (!isMem())
- return false;
- if (Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
-
- // Make sure the immediate value is valid. Only zero is allowed.
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- if (!CE || CE->getValue() != 0)
- return false;
- return true;
- }
- bool isMemorySIMDNoIndex() const {
- if (!isMem())
- return false;
- if (Mem.Mode != ImmediateOffset)
- return false;
- return Mem.OffsetImm == 0;
- }
- bool isMemoryIndexedSImm9() const {
- if (!isMem() || Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- assert(CE && "Non-constant pre-indexed offset!");
- int64_t Value = CE->getValue();
- return Value >= -256 && Value <= 255;
- }
- bool isMemoryIndexed32SImm7() const {
- if (!isMem() || Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- assert(CE && "Non-constant pre-indexed offset!");
- int64_t Value = CE->getValue();
- return ((Value % 4) == 0) && Value >= -256 && Value <= 252;
- }
- bool isMemoryIndexed64SImm7() const {
- if (!isMem() || Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- assert(CE && "Non-constant pre-indexed offset!");
- int64_t Value = CE->getValue();
- return ((Value % 8) == 0) && Value >= -512 && Value <= 504;
- }
- bool isMemoryIndexed128SImm7() const {
- if (!isMem() || Mem.Mode != ImmediateOffset)
- return false;
- if (!Mem.OffsetImm)
- return true;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- assert(CE && "Non-constant pre-indexed offset!");
- int64_t Value = CE->getValue();
- return ((Value % 16) == 0) && Value >= -1024 && Value <= 1008;
- }
-
- bool isAdrpLabel() const {
- // Validation was handled during parsing, so we just sanity check that
- // something didn't go haywire.
- return isImm();
- }
-
- bool isAdrLabel() const {
- // Validation was handled during parsing, so we just sanity check that
- // something didn't go haywire.
- return isImm();
- }
-
- void addExpr(MCInst &Inst, const MCExpr *Expr) const {
- // Add as immediates when possible. Null MCExpr = 0.
- if (Expr == 0)
- Inst.addOperand(MCOperand::CreateImm(0));
- else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
- else
- Inst.addOperand(MCOperand::CreateExpr(Expr));
- }
-
- void addRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
- }
-
- void addVectorRegOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(getReg()));
- }
-
- template <unsigned NumRegs>
- void addVectorList64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { ARM64::D0, ARM64::D0_D1,
- ARM64::D0_D1_D2, ARM64::D0_D1_D2_D3 };
- unsigned FirstReg = FirstRegs[NumRegs - 1];
-
- Inst.addOperand(
- MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
- }
-
- template <unsigned NumRegs>
- void addVectorList128Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- static unsigned FirstRegs[] = { ARM64::Q0, ARM64::Q0_Q1,
- ARM64::Q0_Q1_Q2, ARM64::Q0_Q1_Q2_Q3 };
- unsigned FirstReg = FirstRegs[NumRegs - 1];
-
- Inst.addOperand(
- MCOperand::CreateReg(FirstReg + getVectorListStart() - ARM64::Q0));
- }
-
- void addVectorIndexBOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
- }
-
- void addVectorIndexHOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
- }
-
- void addVectorIndexSOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
- }
-
- void addVectorIndexDOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getVectorIndex()));
- }
-
- void addImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // If this is a pageoff symrefexpr with an addend, adjust the addend
- // to be only the page-offset portion. Otherwise, just add the expr
- // as-is.
- addExpr(Inst, getImm());
- }
-
- void addAdrpLabelOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void addAdrLabelOperands(MCInst &Inst, unsigned N) const {
- addImmOperands(Inst, N);
- }
-
- void addSImm9Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addSImm7s4Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 4));
- }
-
- void addSImm7s8Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 8));
- }
-
- void addSImm7s16Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() / 16));
- }
-
- void addImm0_7Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_8Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_15Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_16Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_31Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_31Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_32Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_63Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_63Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm1_64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_127Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_255Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue()));
- }
-
- void addLogicalImm32Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid logical immediate operand!");
- uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 32);
- Inst.addOperand(MCOperand::CreateImm(encoding));
- }
-
- void addLogicalImm64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid logical immediate operand!");
- uint64_t encoding = ARM64_AM::encodeLogicalImmediate(MCE->getValue(), 64);
- Inst.addOperand(MCOperand::CreateImm(encoding));
- }
-
- void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- assert(MCE && "Invalid immediate operand!");
- uint64_t encoding = ARM64_AM::encodeAdvSIMDModImmType10(MCE->getValue());
- Inst.addOperand(MCOperand::CreateImm(encoding));
- }
-
- void addBranchTarget26Operands(MCInst &Inst, unsigned N) const {
- // Branch operands don't encode the low bits, so shift them off
- // here. If it's a label, however, just put it on directly as there's
- // not enough information now to do anything.
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE) {
- addExpr(Inst, getImm());
- return;
- }
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
- }
-
- void addBranchTarget19Operands(MCInst &Inst, unsigned N) const {
- // Branch operands don't encode the low bits, so shift them off
- // here. If it's a label, however, just put it on directly as there's
- // not enough information now to do anything.
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE) {
- addExpr(Inst, getImm());
- return;
- }
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
- }
-
- void addBranchTarget14Operands(MCInst &Inst, unsigned N) const {
- // Branch operands don't encode the low bits, so shift them off
- // here. If it's a label, however, just put it on directly as there's
- // not enough information now to do anything.
- assert(N == 1 && "Invalid number of operands!");
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm());
- if (!MCE) {
- addExpr(Inst, getImm());
- return;
- }
- assert(MCE && "Invalid constant immediate operand!");
- Inst.addOperand(MCOperand::CreateImm(MCE->getValue() >> 2));
- }
-
- void addFPImmOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getFPImm()));
- }
-
- void addBarrierOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getBarrier()));
- }
-
- void addSystemRegisterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- if (Kind == k_SystemRegister)
- Inst.addOperand(MCOperand::CreateImm(getSystemRegister()));
- else {
- assert(Kind == k_CPSRField && getCPSRField() == ARM64SYS::cpsr_SPSel);
- Inst.addOperand(MCOperand::CreateImm(ARM64SYS::SPSel));
- }
- }
-
- void addSystemCPSRFieldOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getCPSRField()));
- }
-
- void addSysCROperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getSysCR()));
- }
-
- void addPrefetchOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getPrefetch()));
- }
-
- void addShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addArithmeticShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addMovImm32ShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addMovImm64ShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addAddSubShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addLogicalVecShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addLogicalVecHalfWordShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addMoveVecShifterOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getShifter()));
- }
-
- void addExtendOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
- if (isShifter()) {
- assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
- unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
- ARM64_AM::getShiftValue(getShifter()));
- Inst.addOperand(MCOperand::CreateImm(imm));
- } else
- Inst.addOperand(MCOperand::CreateImm(getExtend()));
- }
-
- void addExtend64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateImm(getExtend()));
- }
-
- void addExtendLSL64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && "Invalid number of operands!");
- // lsl is an alias for UXTX but will be a parsed as a k_Shifter operand.
- if (isShifter()) {
- assert(ARM64_AM::getShiftType(getShifter()) == ARM64_AM::LSL);
- unsigned imm = getArithExtendImm(ARM64_AM::UXTX,
- ARM64_AM::getShiftValue(getShifter()));
- Inst.addOperand(MCOperand::CreateImm(imm));
- } else
- Inst.addOperand(MCOperand::CreateImm(getExtend()));
- }
-
- void addMemoryRegisterOffsetOperands(MCInst &Inst, unsigned N, bool DoShift) {
- assert(N == 3 && "Invalid number of operands!");
-
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
- Inst.addOperand(MCOperand::CreateReg(Mem.OffsetRegNum));
- unsigned ExtendImm = ARM64_AM::getMemExtendImm(Mem.ExtType, DoShift);
- Inst.addOperand(MCOperand::CreateImm(ExtendImm));
- }
-
- void addMemoryRegisterOffset8Operands(MCInst &Inst, unsigned N) {
- addMemoryRegisterOffsetOperands(Inst, N, Mem.ExplicitShift);
- }
-
- void addMemoryRegisterOffset16Operands(MCInst &Inst, unsigned N) {
- addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 1);
- }
-
- void addMemoryRegisterOffset32Operands(MCInst &Inst, unsigned N) {
- addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 2);
- }
-
- void addMemoryRegisterOffset64Operands(MCInst &Inst, unsigned N) {
- addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 3);
- }
-
- void addMemoryRegisterOffset128Operands(MCInst &Inst, unsigned N) {
- addMemoryRegisterOffsetOperands(Inst, N, Mem.ShiftVal == 4);
- }
-
- void addMemoryIndexedOperands(MCInst &Inst, unsigned N,
- unsigned Scale) const {
- // Add the base register operand.
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
- if (!Mem.OffsetImm) {
- // There isn't an offset.
- Inst.addOperand(MCOperand::CreateImm(0));
- return;
- }
-
- // Add the offset operand.
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm)) {
- assert(CE->getValue() % Scale == 0 &&
- "Offset operand must be multiple of the scale!");
-
- // The MCInst offset operand doesn't include the low bits (like the
- // instruction encoding).
- Inst.addOperand(MCOperand::CreateImm(CE->getValue() / Scale));
- }
-
- // If this is a pageoff symrefexpr with an addend, the linker will
- // do the scaling of the addend.
- //
- // Otherwise we don't know what this is, so just add the scaling divide to
- // the expression and let the MC fixup evaluation code deal with it.
- const MCExpr *Expr = Mem.OffsetImm;
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (Scale > 1 &&
- (!ARM64AsmParser::classifySymbolRef(Expr, ELFRefKind, DarwinRefKind,
- Addend) ||
- (Addend != 0 && DarwinRefKind != MCSymbolRefExpr::VK_PAGEOFF))) {
- Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(Scale, Ctx),
- Ctx);
- }
-
- Inst.addOperand(MCOperand::CreateExpr(Expr));
- }
-
- void addMemoryUnscaledOperands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryUnscaled() && "Invalid number of operands!");
- // Add the base register operand.
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
- // Add the offset operand.
- if (!Mem.OffsetImm)
- Inst.addOperand(MCOperand::CreateImm(0));
- else {
- // Only constant offsets supported.
- const MCConstantExpr *CE = cast<MCConstantExpr>(Mem.OffsetImm);
- Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
- }
- }
-
- void addMemoryIndexed128Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryIndexed128() && "Invalid number of operands!");
- addMemoryIndexedOperands(Inst, N, 16);
- }
-
- void addMemoryIndexed64Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryIndexed64() && "Invalid number of operands!");
- addMemoryIndexedOperands(Inst, N, 8);
- }
-
- void addMemoryIndexed32Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryIndexed32() && "Invalid number of operands!");
- addMemoryIndexedOperands(Inst, N, 4);
- }
-
- void addMemoryIndexed16Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryIndexed16() && "Invalid number of operands!");
- addMemoryIndexedOperands(Inst, N, 2);
- }
-
- void addMemoryIndexed8Operands(MCInst &Inst, unsigned N) const {
- assert(N == 2 && isMemoryIndexed8() && "Invalid number of operands!");
- addMemoryIndexedOperands(Inst, N, 1);
- }
-
- void addMemoryNoIndexOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && isMemoryNoIndex() && "Invalid number of operands!");
- // Add the base register operand (the offset is always zero, so ignore it).
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
- }
-
- void addMemorySIMDNoIndexOperands(MCInst &Inst, unsigned N) const {
- assert(N == 1 && isMemorySIMDNoIndex() && "Invalid number of operands!");
- // Add the base register operand (the offset is always zero, so ignore it).
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
- }
-
- void addMemoryWritebackIndexedOperands(MCInst &Inst, unsigned N,
- unsigned Scale) const {
- assert(N == 2 && "Invalid number of operands!");
-
- // Add the base register operand.
- Inst.addOperand(MCOperand::CreateReg(Mem.BaseRegNum));
-
- // Add the offset operand.
- int64_t Offset = 0;
- if (Mem.OffsetImm) {
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Mem.OffsetImm);
- assert(CE && "Non-constant indexed offset operand!");
- Offset = CE->getValue();
- }
-
- if (Scale != 1) {
- assert(Offset % Scale == 0 &&
- "Offset operand must be a multiple of the scale!");
- Offset /= Scale;
- }
-
- Inst.addOperand(MCOperand::CreateImm(Offset));
- }
-
- void addMemoryIndexedSImm9Operands(MCInst &Inst, unsigned N) const {
- addMemoryWritebackIndexedOperands(Inst, N, 1);
- }
-
- void addMemoryIndexed32SImm7Operands(MCInst &Inst, unsigned N) const {
- addMemoryWritebackIndexedOperands(Inst, N, 4);
- }
-
- void addMemoryIndexed64SImm7Operands(MCInst &Inst, unsigned N) const {
- addMemoryWritebackIndexedOperands(Inst, N, 8);
- }
-
- void addMemoryIndexed128SImm7Operands(MCInst &Inst, unsigned N) const {
- addMemoryWritebackIndexedOperands(Inst, N, 16);
- }
-
- virtual void print(raw_ostream &OS) const;
-
- static ARM64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Token, Ctx);
- Op->Tok.Data = Str.data();
- Op->Tok.Length = Str.size();
- Op->Tok.IsSuffix = IsSuffix;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S,
- SMLoc E, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Register, Ctx);
- Op->Reg.RegNum = RegNum;
- Op->Reg.isVector = isVector;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateVectorList(unsigned RegNum, unsigned Count,
- unsigned NumElements, char ElementKind,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_VectorList, Ctx);
- Op->VectorList.RegNum = RegNum;
- Op->VectorList.Count = Count;
- Op->VectorList.NumElements = NumElements;
- Op->VectorList.ElementKind = ElementKind;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_VectorIndex, Ctx);
- Op->VectorIndex.Val = Idx;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Immediate, Ctx);
- Op->Imm.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_FPImm, Ctx);
- Op->FPImm.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Barrier, Ctx);
- Op->Barrier.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateSystemRegister(uint16_t Val, SMLoc S,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_SystemRegister, Ctx);
- Op->SystemRegister.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateCPSRField(ARM64SYS::CPSRField Field, SMLoc S,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_CPSRField, Ctx);
- Op->CPSRField.Field = Field;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateMem(unsigned BaseRegNum, const MCExpr *Off,
- SMLoc S, SMLoc E, SMLoc OffsetLoc,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
- Op->Mem.BaseRegNum = BaseRegNum;
- Op->Mem.OffsetRegNum = 0;
- Op->Mem.OffsetImm = Off;
- Op->Mem.ExtType = ARM64_AM::UXTX;
- Op->Mem.ShiftVal = 0;
- Op->Mem.ExplicitShift = false;
- Op->Mem.Mode = ImmediateOffset;
- Op->OffsetLoc = OffsetLoc;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateRegOffsetMem(unsigned BaseReg, unsigned OffsetReg,
- ARM64_AM::ExtendType ExtType,
- unsigned ShiftVal, bool ExplicitShift,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Memory, Ctx);
- Op->Mem.BaseRegNum = BaseReg;
- Op->Mem.OffsetRegNum = OffsetReg;
- Op->Mem.OffsetImm = 0;
- Op->Mem.ExtType = ExtType;
- Op->Mem.ShiftVal = ShiftVal;
- Op->Mem.ExplicitShift = ExplicitShift;
- Op->Mem.Mode = RegisterOffset;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E,
- MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_SysCR, Ctx);
- Op->SysCRImm.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Prefetch, Ctx);
- Op->Prefetch.Val = Val;
- Op->StartLoc = S;
- Op->EndLoc = S;
- return Op;
- }
-
- static ARM64Operand *CreateShifter(ARM64_AM::ShiftType ShOp, unsigned Val,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Shifter, Ctx);
- Op->Shifter.Val = ARM64_AM::getShifterImm(ShOp, Val);
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-
- static ARM64Operand *CreateExtend(ARM64_AM::ExtendType ExtOp, unsigned Val,
- SMLoc S, SMLoc E, MCContext &Ctx) {
- ARM64Operand *Op = new ARM64Operand(k_Extend, Ctx);
- Op->Extend.Val = ARM64_AM::getArithExtendImm(ExtOp, Val);
- Op->StartLoc = S;
- Op->EndLoc = E;
- return Op;
- }
-};
-
-} // end anonymous namespace.
-
-void ARM64Operand::print(raw_ostream &OS) const {
- switch (Kind) {
- case k_FPImm:
- OS << "<fpimm " << getFPImm() << "(" << ARM64_AM::getFPImmFloat(getFPImm())
- << ") >";
- break;
- case k_Barrier: {
- const char *Name =
- ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)getBarrier());
- OS << "<barrier ";
- if (Name)
- OS << Name;
- else
- OS << getBarrier();
- OS << ">";
- break;
- }
- case k_SystemRegister: {
- const char *Name = ARM64SYS::getSystemRegisterName(
- (ARM64SYS::SystemRegister)getSystemRegister());
- OS << "<systemreg ";
- if (Name)
- OS << Name;
- else
- OS << "#" << getSystemRegister();
- OS << ">";
- break;
- }
- case k_CPSRField: {
- const char *Name = ARM64SYS::getCPSRFieldName(getCPSRField());
- OS << "<cpsrfield " << Name << ">";
- break;
- }
- case k_Immediate:
- getImm()->print(OS);
- break;
- case k_Memory:
- OS << "<memory>";
- break;
- case k_Register:
- OS << "<register " << getReg() << ">";
- break;
- case k_VectorList: {
- OS << "<vectorlist ";
- unsigned Reg = getVectorListStart();
- for (unsigned i = 0, e = getVectorListCount(); i != e; ++i)
- OS << Reg + i << " ";
- OS << ">";
- break;
- }
- case k_VectorIndex:
- OS << "<vectorindex " << getVectorIndex() << ">";
- break;
- case k_Token:
- OS << "'" << getToken() << "'";
- break;
- case k_SysCR:
- OS << "c" << getSysCR();
- break;
- case k_Prefetch:
- OS << "<prfop ";
- if (ARM64_AM::isNamedPrefetchOp(getPrefetch()))
- OS << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)getPrefetch());
- else
- OS << "#" << getPrefetch();
- OS << ">";
- break;
- case k_Shifter: {
- unsigned Val = getShifter();
- OS << "<" << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
- << ARM64_AM::getShiftValue(Val) << ">";
- break;
- }
- case k_Extend: {
- unsigned Val = getExtend();
- OS << "<" << ARM64_AM::getExtendName(ARM64_AM::getArithExtendType(Val))
- << " #" << ARM64_AM::getArithShiftValue(Val) << ">";
- break;
- }
- }
-}
-
-/// @name Auto-generated Match Functions
-/// {
-
-static unsigned MatchRegisterName(StringRef Name);
-
-/// }
-
-static unsigned matchVectorRegName(StringRef Name) {
- return StringSwitch<unsigned>(Name)
- .Case("v0", ARM64::Q0)
- .Case("v1", ARM64::Q1)
- .Case("v2", ARM64::Q2)
- .Case("v3", ARM64::Q3)
- .Case("v4", ARM64::Q4)
- .Case("v5", ARM64::Q5)
- .Case("v6", ARM64::Q6)
- .Case("v7", ARM64::Q7)
- .Case("v8", ARM64::Q8)
- .Case("v9", ARM64::Q9)
- .Case("v10", ARM64::Q10)
- .Case("v11", ARM64::Q11)
- .Case("v12", ARM64::Q12)
- .Case("v13", ARM64::Q13)
- .Case("v14", ARM64::Q14)
- .Case("v15", ARM64::Q15)
- .Case("v16", ARM64::Q16)
- .Case("v17", ARM64::Q17)
- .Case("v18", ARM64::Q18)
- .Case("v19", ARM64::Q19)
- .Case("v20", ARM64::Q20)
- .Case("v21", ARM64::Q21)
- .Case("v22", ARM64::Q22)
- .Case("v23", ARM64::Q23)
- .Case("v24", ARM64::Q24)
- .Case("v25", ARM64::Q25)
- .Case("v26", ARM64::Q26)
- .Case("v27", ARM64::Q27)
- .Case("v28", ARM64::Q28)
- .Case("v29", ARM64::Q29)
- .Case("v30", ARM64::Q30)
- .Case("v31", ARM64::Q31)
- .Default(0);
-}
-
-static bool isValidVectorKind(StringRef Name) {
- return StringSwitch<bool>(Name.lower())
- .Case(".8b", true)
- .Case(".16b", true)
- .Case(".4h", true)
- .Case(".8h", true)
- .Case(".2s", true)
- .Case(".4s", true)
- .Case(".1d", true)
- .Case(".2d", true)
- .Case(".1q", true)
- // Accept the width neutral ones, too, for verbose syntax. If those
- // aren't used in the right places, the token operand won't match so
- // all will work out.
- .Case(".b", true)
- .Case(".h", true)
- .Case(".s", true)
- .Case(".d", true)
- .Default(false);
-}
-
-static void parseValidVectorKind(StringRef Name, unsigned &NumElements,
- char &ElementKind) {
- assert(isValidVectorKind(Name));
-
- ElementKind = Name.lower()[Name.size() - 1];
- NumElements = 0;
-
- if (Name.size() == 2)
- return;
-
- // Parse the lane count
- Name = Name.drop_front();
- while (isdigit(Name.front())) {
- NumElements = 10 * NumElements + (Name.front() - '0');
- Name = Name.drop_front();
- }
-}
-
-bool ARM64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
- SMLoc &EndLoc) {
- StartLoc = getLoc();
- RegNo = tryParseRegister();
- EndLoc = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- return (RegNo == (unsigned)-1);
-}
-
-/// tryParseRegister - Try to parse a register name. The token must be an
-/// Identifier when called, and if it is a register name the token is eaten and
-/// the register is added to the operand list.
-int ARM64AsmParser::tryParseRegister() {
- const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
-
- std::string lowerCase = Tok.getString().lower();
- unsigned RegNum = MatchRegisterName(lowerCase);
- // Also handle a few aliases of registers.
- if (RegNum == 0)
- RegNum = StringSwitch<unsigned>(lowerCase)
- .Case("x29", ARM64::FP)
- .Case("x30", ARM64::LR)
- .Case("x31", ARM64::XZR)
- .Case("w31", ARM64::WZR)
- .Default(0);
-
- if (RegNum == 0)
- return -1;
-
- Parser.Lex(); // Eat identifier token.
- return RegNum;
-}
-
-/// tryMatchVectorRegister - Try to parse a vector register name with optional
-/// kind specifier. If it is a register specifier, eat the token and return it.
-int ARM64AsmParser::tryMatchVectorRegister(StringRef &Kind) {
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- TokError("vector register expected");
- return -1;
- }
-
- StringRef Name = Parser.getTok().getString();
- // If there is a kind specifier, it's separated from the register name by
- // a '.'.
- size_t Start = 0, Next = Name.find('.');
- StringRef Head = Name.slice(Start, Next);
- unsigned RegNum = matchVectorRegName(Head);
- if (RegNum) {
- if (Next != StringRef::npos) {
- Kind = Name.slice(Next, StringRef::npos);
- if (!isValidVectorKind(Kind)) {
- TokError("invalid vector kind qualifier");
- return -1;
- }
- }
- Parser.Lex(); // Eat the register token.
- return RegNum;
- }
- return -1;
-}
-
-static int MatchSysCRName(StringRef Name) {
- // Use the same layout as the tablegen'erated register name matcher. Ugly,
- // but efficient.
- switch (Name.size()) {
- default:
- break;
- case 2:
- if (Name[0] != 'c' && Name[0] != 'C')
- return -1;
- switch (Name[1]) {
- default:
- return -1;
- case '0':
- return 0;
- case '1':
- return 1;
- case '2':
- return 2;
- case '3':
- return 3;
- case '4':
- return 4;
- case '5':
- return 5;
- case '6':
- return 6;
- case '7':
- return 7;
- case '8':
- return 8;
- case '9':
- return 9;
- }
- break;
- case 3:
- if ((Name[0] != 'c' && Name[0] != 'C') || Name[1] != '1')
- return -1;
- switch (Name[2]) {
- default:
- return -1;
- case '0':
- return 10;
- case '1':
- return 11;
- case '2':
- return 12;
- case '3':
- return 13;
- case '4':
- return 14;
- case '5':
- return 15;
- }
- break;
- }
-
- llvm_unreachable("Unhandled SysCR operand string!");
- return -1;
-}
-
-/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
- SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
-
- int Num = MatchSysCRName(Tok.getString());
- if (Num == -1)
- return MatchOperand_NoMatch;
-
- Parser.Lex(); // Eat identifier token.
- Operands.push_back(ARM64Operand::CreateSysCR(Num, S, getLoc(), getContext()));
- return MatchOperand_Success;
-}
-
-/// tryParsePrefetch - Try to parse a prefetch operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParsePrefetch(OperandVector &Operands) {
- SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
- // Either an identifier for named values or a 5-bit immediate.
- if (Tok.is(AsmToken::Hash)) {
- Parser.Lex(); // Eat hash token.
- const MCExpr *ImmVal;
- if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
-
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for prefetch operand");
- return MatchOperand_ParseFail;
- }
- unsigned prfop = MCE->getValue();
- if (prfop > 31) {
- TokError("prefetch operand out of range, [0,31] expected");
- return MatchOperand_ParseFail;
- }
-
- Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
- return MatchOperand_Success;
- }
-
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("pre-fetch hint expected");
- return MatchOperand_ParseFail;
- }
-
- unsigned prfop = StringSwitch<unsigned>(Tok.getString())
- .Case("pldl1keep", ARM64_AM::PLDL1KEEP)
- .Case("pldl1strm", ARM64_AM::PLDL1STRM)
- .Case("pldl2keep", ARM64_AM::PLDL2KEEP)
- .Case("pldl2strm", ARM64_AM::PLDL2STRM)
- .Case("pldl3keep", ARM64_AM::PLDL3KEEP)
- .Case("pldl3strm", ARM64_AM::PLDL3STRM)
- .Case("pstl1keep", ARM64_AM::PSTL1KEEP)
- .Case("pstl1strm", ARM64_AM::PSTL1STRM)
- .Case("pstl2keep", ARM64_AM::PSTL2KEEP)
- .Case("pstl2strm", ARM64_AM::PSTL2STRM)
- .Case("pstl3keep", ARM64_AM::PSTL3KEEP)
- .Case("pstl3strm", ARM64_AM::PSTL3STRM)
- .Default(0xff);
- if (prfop == 0xff) {
- TokError("pre-fetch hint expected");
- return MatchOperand_ParseFail;
- }
-
- Parser.Lex(); // Eat identifier token.
- Operands.push_back(ARM64Operand::CreatePrefetch(prfop, S, getContext()));
- return MatchOperand_Success;
-}
-
-/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
-/// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
- SMLoc S = getLoc();
- const MCExpr *Expr;
- if (parseSymbolicImmVal(Expr))
- return MatchOperand_ParseFail;
-
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
- Error(S, "modified label reference + constant expected");
- return MatchOperand_ParseFail;
- }
-
- if (DarwinRefKind == MCSymbolRefExpr::VK_None &&
- ELFRefKind == ARM64MCExpr::VK_INVALID) {
- // No modifier was specified at all; this is the syntax for an ELF basic
- // ADRP relocation (unfortunately).
- Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_ABS_PAGE, getContext());
- } else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
- DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
- Addend != 0) {
- Error(S, "gotpage label reference not allowed an addend");
- return MatchOperand_ParseFail;
- } else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
- DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
- DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
- ELFRefKind != ARM64MCExpr::VK_GOT_PAGE &&
- ELFRefKind != ARM64MCExpr::VK_GOTTPREL_PAGE &&
- ELFRefKind != ARM64MCExpr::VK_TLSDESC_PAGE) {
- // The operand must be an @page or @gotpage qualified symbolref.
- Error(S, "page or gotpage label reference expected");
- return MatchOperand_ParseFail;
- }
-
- // We have a label reference possibly with addend. The addend is a raw value
- // here. The linker will adjust it to only reference the page.
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
-
- return MatchOperand_Success;
-}
-
-/// tryParseAdrLabel - Parse and validate a source label for the ADR
-/// instruction.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
- SMLoc S = getLoc();
- const MCExpr *Expr;
- if (getParser().parseExpression(Expr))
- return MatchOperand_ParseFail;
-
- // The operand must be an un-qualified assembler local symbolref.
- // FIXME: wrong for ELF.
- if (const MCSymbolRefExpr *SRE = dyn_cast<const MCSymbolRefExpr>(Expr)) {
- // FIXME: Should reference the MachineAsmInfo to get the private prefix.
- bool isTemporary = SRE->getSymbol().getName().startswith("L");
- if (!isTemporary || SRE->getKind() != MCSymbolRefExpr::VK_None) {
- Error(S, "unqualified, assembler-local label name expected");
- return MatchOperand_ParseFail;
- }
- }
-
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
-
- return MatchOperand_Success;
-}
-
-/// tryParseFPImm - A floating point immediate expression operand.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseFPImm(OperandVector &Operands) {
- SMLoc S = getLoc();
-
- if (Parser.getTok().isNot(AsmToken::Hash))
- return MatchOperand_NoMatch;
- Parser.Lex(); // Eat the '#'.
-
- // Handle negation, as that still comes through as a separate token.
- bool isNegative = false;
- if (Parser.getTok().is(AsmToken::Minus)) {
- isNegative = true;
- Parser.Lex();
- }
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
- uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
- int Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
- Parser.Lex(); // Eat the token.
- // Check for out of range values. As an exception, we let Zero through,
- // as we handle that special case in post-processing before matching in
- // order to use the zero register for it.
- if (Val == -1 && !RealVal.isZero()) {
- TokError("floating point value out of range");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
- return MatchOperand_Success;
- }
- if (Tok.is(AsmToken::Integer)) {
- int64_t Val;
- if (!isNegative && Tok.getString().startswith("0x")) {
- Val = Tok.getIntVal();
- if (Val > 255 || Val < 0) {
- TokError("encoded floating point value out of range");
- return MatchOperand_ParseFail;
- }
- } else {
- APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
- uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- // If we had a '-' in front, toggle the sign bit.
- IntVal ^= (uint64_t)isNegative << 63;
- Val = ARM64_AM::getFP64Imm(APInt(64, IntVal));
- }
- Parser.Lex(); // Eat the token.
- Operands.push_back(ARM64Operand::CreateFPImm(Val, S, getContext()));
- return MatchOperand_Success;
- }
-
- TokError("invalid floating point immediate");
- return MatchOperand_ParseFail;
-}
-
-/// parseCondCodeString - Parse a Condition Code string.
-unsigned ARM64AsmParser::parseCondCodeString(StringRef Cond) {
- unsigned CC = StringSwitch<unsigned>(Cond)
- .Case("eq", ARM64CC::EQ)
- .Case("ne", ARM64CC::NE)
- .Case("cs", ARM64CC::CS)
- .Case("hs", ARM64CC::CS)
- .Case("cc", ARM64CC::CC)
- .Case("lo", ARM64CC::CC)
- .Case("mi", ARM64CC::MI)
- .Case("pl", ARM64CC::PL)
- .Case("vs", ARM64CC::VS)
- .Case("vc", ARM64CC::VC)
- .Case("hi", ARM64CC::HI)
- .Case("ls", ARM64CC::LS)
- .Case("ge", ARM64CC::GE)
- .Case("lt", ARM64CC::LT)
- .Case("gt", ARM64CC::GT)
- .Case("le", ARM64CC::LE)
- .Case("al", ARM64CC::AL)
- // Upper case works too. Not mixed case, though.
- .Case("EQ", ARM64CC::EQ)
- .Case("NE", ARM64CC::NE)
- .Case("CS", ARM64CC::CS)
- .Case("HS", ARM64CC::CS)
- .Case("CC", ARM64CC::CC)
- .Case("LO", ARM64CC::CC)
- .Case("MI", ARM64CC::MI)
- .Case("PL", ARM64CC::PL)
- .Case("VS", ARM64CC::VS)
- .Case("VC", ARM64CC::VC)
- .Case("HI", ARM64CC::HI)
- .Case("LS", ARM64CC::LS)
- .Case("GE", ARM64CC::GE)
- .Case("LT", ARM64CC::LT)
- .Case("GT", ARM64CC::GT)
- .Case("LE", ARM64CC::LE)
- .Case("AL", ARM64CC::AL)
- .Default(~0U);
- return CC;
-}
-
-/// parseCondCode - Parse a Condition Code operand.
-bool ARM64AsmParser::parseCondCode(OperandVector &Operands,
- bool invertCondCode) {
- SMLoc S = getLoc();
- const AsmToken &Tok = Parser.getTok();
- assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
-
- StringRef Cond = Tok.getString();
- unsigned CC = parseCondCodeString(Cond);
- if (CC == ~0U)
- return TokError("invalid condition code");
- Parser.Lex(); // Eat identifier token.
-
- if (invertCondCode)
- CC = ARM64CC::getInvertedCondCode(ARM64CC::CondCode(CC));
-
- const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
- Operands.push_back(
- ARM64Operand::CreateImm(CCExpr, S, getLoc(), getContext()));
- return false;
-}
-
-/// ParseOptionalShift - Some operands take an optional shift argument. Parse
-/// them if present.
-bool ARM64AsmParser::parseOptionalShift(OperandVector &Operands) {
- const AsmToken &Tok = Parser.getTok();
- ARM64_AM::ShiftType ShOp = StringSwitch<ARM64_AM::ShiftType>(Tok.getString())
- .Case("lsl", ARM64_AM::LSL)
- .Case("lsr", ARM64_AM::LSR)
- .Case("asr", ARM64_AM::ASR)
- .Case("ror", ARM64_AM::ROR)
- .Case("msl", ARM64_AM::MSL)
- .Case("LSL", ARM64_AM::LSL)
- .Case("LSR", ARM64_AM::LSR)
- .Case("ASR", ARM64_AM::ASR)
- .Case("ROR", ARM64_AM::ROR)
- .Case("MSL", ARM64_AM::MSL)
- .Default(ARM64_AM::InvalidShift);
- if (ShOp == ARM64_AM::InvalidShift)
- return true;
-
- SMLoc S = Tok.getLoc();
- Parser.Lex();
-
- // We expect a number here.
- if (getLexer().isNot(AsmToken::Hash))
- return TokError("immediate value expected for shifter operand");
- Parser.Lex(); // Eat the '#'.
-
- SMLoc ExprLoc = getLoc();
- const MCExpr *ImmVal;
- if (getParser().parseExpression(ImmVal))
- return true;
-
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE)
- return TokError("immediate value expected for shifter operand");
-
- if ((MCE->getValue() & 0x3f) != MCE->getValue())
- return Error(ExprLoc, "immediate value too large for shifter operand");
-
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(
- ARM64Operand::CreateShifter(ShOp, MCE->getValue(), S, E, getContext()));
- return false;
-}
-
-/// parseOptionalExtend - Some operands take an optional extend argument. Parse
-/// them if present.
-bool ARM64AsmParser::parseOptionalExtend(OperandVector &Operands) {
- const AsmToken &Tok = Parser.getTok();
- ARM64_AM::ExtendType ExtOp =
- StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
- .Case("uxtb", ARM64_AM::UXTB)
- .Case("uxth", ARM64_AM::UXTH)
- .Case("uxtw", ARM64_AM::UXTW)
- .Case("uxtx", ARM64_AM::UXTX)
- .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
- .Case("sxtb", ARM64_AM::SXTB)
- .Case("sxth", ARM64_AM::SXTH)
- .Case("sxtw", ARM64_AM::SXTW)
- .Case("sxtx", ARM64_AM::SXTX)
- .Case("UXTB", ARM64_AM::UXTB)
- .Case("UXTH", ARM64_AM::UXTH)
- .Case("UXTW", ARM64_AM::UXTW)
- .Case("UXTX", ARM64_AM::UXTX)
- .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
- .Case("SXTB", ARM64_AM::SXTB)
- .Case("SXTH", ARM64_AM::SXTH)
- .Case("SXTW", ARM64_AM::SXTW)
- .Case("SXTX", ARM64_AM::SXTX)
- .Default(ARM64_AM::InvalidExtend);
- if (ExtOp == ARM64_AM::InvalidExtend)
- return true;
-
- SMLoc S = Tok.getLoc();
- Parser.Lex();
-
- if (getLexer().is(AsmToken::EndOfStatement) ||
- getLexer().is(AsmToken::Comma)) {
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(
- ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
- return false;
- }
-
- if (getLexer().isNot(AsmToken::Hash)) {
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(
- ARM64Operand::CreateExtend(ExtOp, 0, S, E, getContext()));
- return false;
- }
-
- Parser.Lex(); // Eat the '#'.
-
- const MCExpr *ImmVal;
- if (getParser().parseExpression(ImmVal))
- return true;
-
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE)
- return TokError("immediate value expected for extend operand");
-
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(
- ARM64Operand::CreateExtend(ExtOp, MCE->getValue(), S, E, getContext()));
- return false;
-}
-
-/// parseSysAlias - The IC, DC, AT, and TLBI instructions are simple aliases for
-/// the SYS instruction. Parse them specially so that we create a SYS MCInst.
-bool ARM64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
- OperandVector &Operands) {
- if (Name.find('.') != StringRef::npos)
- return TokError("invalid operand");
-
- Mnemonic = Name;
- Operands.push_back(
- ARM64Operand::CreateToken("sys", false, NameLoc, getContext()));
-
- const AsmToken &Tok = Parser.getTok();
- StringRef Op = Tok.getString();
- SMLoc S = Tok.getLoc();
-
- const MCExpr *Expr = 0;
-
-#define SYS_ALIAS(op1, Cn, Cm, op2) \
- do { \
- Expr = MCConstantExpr::Create(op1, getContext()); \
- Operands.push_back( \
- ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- Operands.push_back( \
- ARM64Operand::CreateSysCR(Cn, S, getLoc(), getContext())); \
- Operands.push_back( \
- ARM64Operand::CreateSysCR(Cm, S, getLoc(), getContext())); \
- Expr = MCConstantExpr::Create(op2, getContext()); \
- Operands.push_back( \
- ARM64Operand::CreateImm(Expr, S, getLoc(), getContext())); \
- } while (0)
-
- if (Mnemonic == "ic") {
- if (!Op.compare_lower("ialluis")) {
- // SYS #0, C7, C1, #0
- SYS_ALIAS(0, 7, 1, 0);
- } else if (!Op.compare_lower("iallu")) {
- // SYS #0, C7, C5, #0
- SYS_ALIAS(0, 7, 5, 0);
- } else if (!Op.compare_lower("ivau")) {
- // SYS #3, C7, C5, #1
- SYS_ALIAS(3, 7, 5, 1);
- } else {
- return TokError("invalid operand for IC instruction");
- }
- } else if (Mnemonic == "dc") {
- if (!Op.compare_lower("zva")) {
- // SYS #3, C7, C4, #1
- SYS_ALIAS(3, 7, 4, 1);
- } else if (!Op.compare_lower("ivac")) {
- // SYS #3, C7, C6, #1
- SYS_ALIAS(0, 7, 6, 1);
- } else if (!Op.compare_lower("isw")) {
- // SYS #0, C7, C6, #2
- SYS_ALIAS(0, 7, 6, 2);
- } else if (!Op.compare_lower("cvac")) {
- // SYS #3, C7, C10, #1
- SYS_ALIAS(3, 7, 10, 1);
- } else if (!Op.compare_lower("csw")) {
- // SYS #0, C7, C10, #2
- SYS_ALIAS(0, 7, 10, 2);
- } else if (!Op.compare_lower("cvau")) {
- // SYS #3, C7, C11, #1
- SYS_ALIAS(3, 7, 11, 1);
- } else if (!Op.compare_lower("civac")) {
- // SYS #3, C7, C14, #1
- SYS_ALIAS(3, 7, 14, 1);
- } else if (!Op.compare_lower("cisw")) {
- // SYS #0, C7, C14, #2
- SYS_ALIAS(0, 7, 14, 2);
- } else {
- return TokError("invalid operand for DC instruction");
- }
- } else if (Mnemonic == "at") {
- if (!Op.compare_lower("s1e1r")) {
- // SYS #0, C7, C8, #0
- SYS_ALIAS(0, 7, 8, 0);
- } else if (!Op.compare_lower("s1e2r")) {
- // SYS #4, C7, C8, #0
- SYS_ALIAS(4, 7, 8, 0);
- } else if (!Op.compare_lower("s1e3r")) {
- // SYS #6, C7, C8, #0
- SYS_ALIAS(6, 7, 8, 0);
- } else if (!Op.compare_lower("s1e1w")) {
- // SYS #0, C7, C8, #1
- SYS_ALIAS(0, 7, 8, 1);
- } else if (!Op.compare_lower("s1e2w")) {
- // SYS #4, C7, C8, #1
- SYS_ALIAS(4, 7, 8, 1);
- } else if (!Op.compare_lower("s1e3w")) {
- // SYS #6, C7, C8, #1
- SYS_ALIAS(6, 7, 8, 1);
- } else if (!Op.compare_lower("s1e0r")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 2);
- } else if (!Op.compare_lower("s1e0w")) {
- // SYS #0, C7, C8, #3
- SYS_ALIAS(0, 7, 8, 3);
- } else if (!Op.compare_lower("s12e1r")) {
- // SYS #4, C7, C8, #4
- SYS_ALIAS(4, 7, 8, 4);
- } else if (!Op.compare_lower("s12e1w")) {
- // SYS #4, C7, C8, #5
- SYS_ALIAS(4, 7, 8, 5);
- } else if (!Op.compare_lower("s12e0r")) {
- // SYS #4, C7, C8, #6
- SYS_ALIAS(4, 7, 8, 6);
- } else if (!Op.compare_lower("s12e0w")) {
- // SYS #4, C7, C8, #7
- SYS_ALIAS(4, 7, 8, 7);
- } else {
- return TokError("invalid operand for AT instruction");
- }
- } else if (Mnemonic == "tlbi") {
- if (!Op.compare_lower("vmalle1is")) {
- // SYS #0, C8, C3, #0
- SYS_ALIAS(0, 8, 3, 0);
- } else if (!Op.compare_lower("alle2is")) {
- // SYS #4, C8, C3, #0
- SYS_ALIAS(4, 8, 3, 0);
- } else if (!Op.compare_lower("alle3is")) {
- // SYS #6, C8, C3, #0
- SYS_ALIAS(6, 8, 3, 0);
- } else if (!Op.compare_lower("vae1is")) {
- // SYS #0, C8, C3, #1
- SYS_ALIAS(0, 8, 3, 1);
- } else if (!Op.compare_lower("vae2is")) {
- // SYS #4, C8, C3, #1
- SYS_ALIAS(4, 8, 3, 1);
- } else if (!Op.compare_lower("vae3is")) {
- // SYS #6, C8, C3, #1
- SYS_ALIAS(6, 8, 3, 1);
- } else if (!Op.compare_lower("aside1is")) {
- // SYS #0, C8, C3, #2
- SYS_ALIAS(0, 8, 3, 2);
- } else if (!Op.compare_lower("vaae1is")) {
- // SYS #0, C8, C3, #3
- SYS_ALIAS(0, 8, 3, 3);
- } else if (!Op.compare_lower("alle1is")) {
- // SYS #4, C8, C3, #4
- SYS_ALIAS(4, 8, 3, 4);
- } else if (!Op.compare_lower("vale1is")) {
- // SYS #0, C8, C3, #5
- SYS_ALIAS(0, 8, 3, 5);
- } else if (!Op.compare_lower("vaale1is")) {
- // SYS #0, C8, C3, #7
- SYS_ALIAS(0, 8, 3, 7);
- } else if (!Op.compare_lower("vmalle1")) {
- // SYS #0, C8, C7, #0
- SYS_ALIAS(0, 8, 7, 0);
- } else if (!Op.compare_lower("alle2")) {
- // SYS #4, C8, C7, #0
- SYS_ALIAS(4, 8, 7, 0);
- } else if (!Op.compare_lower("vale2is")) {
- // SYS #4, C8, C3, #5
- SYS_ALIAS(4, 8, 3, 5);
- } else if (!Op.compare_lower("vale3is")) {
- // SYS #6, C8, C3, #5
- SYS_ALIAS(6, 8, 3, 5);
- } else if (!Op.compare_lower("alle3")) {
- // SYS #6, C8, C7, #0
- SYS_ALIAS(6, 8, 7, 0);
- } else if (!Op.compare_lower("vae1")) {
- // SYS #0, C8, C7, #1
- SYS_ALIAS(0, 8, 7, 1);
- } else if (!Op.compare_lower("vae2")) {
- // SYS #4, C8, C7, #1
- SYS_ALIAS(4, 8, 7, 1);
- } else if (!Op.compare_lower("vae3")) {
- // SYS #6, C8, C7, #1
- SYS_ALIAS(6, 8, 7, 1);
- } else if (!Op.compare_lower("aside1")) {
- // SYS #0, C8, C7, #2
- SYS_ALIAS(0, 8, 7, 2);
- } else if (!Op.compare_lower("vaae1")) {
- // SYS #0, C8, C7, #3
- SYS_ALIAS(0, 8, 7, 3);
- } else if (!Op.compare_lower("alle1")) {
- // SYS #4, C8, C7, #4
- SYS_ALIAS(4, 8, 7, 4);
- } else if (!Op.compare_lower("vale1")) {
- // SYS #0, C8, C7, #5
- SYS_ALIAS(0, 8, 7, 5);
- } else if (!Op.compare_lower("vale2")) {
- // SYS #4, C8, C7, #5
- SYS_ALIAS(4, 8, 7, 5);
- } else if (!Op.compare_lower("vale3")) {
- // SYS #6, C8, C7, #5
- SYS_ALIAS(6, 8, 7, 5);
- } else if (!Op.compare_lower("vaale1")) {
- // SYS #0, C8, C7, #7
- SYS_ALIAS(0, 8, 7, 7);
- } else if (!Op.compare_lower("ipas2e1")) {
- // SYS #4, C8, C4, #1
- SYS_ALIAS(4, 8, 4, 1);
- } else if (!Op.compare_lower("ipas2le1")) {
- // SYS #4, C8, C4, #5
- SYS_ALIAS(4, 8, 4, 5);
- } else if (!Op.compare_lower("vmalls12e1")) {
- // SYS #4, C8, C7, #6
- SYS_ALIAS(4, 8, 7, 6);
- } else if (!Op.compare_lower("vmalls12e1is")) {
- // SYS #4, C8, C3, #6
- SYS_ALIAS(4, 8, 3, 6);
- } else {
- return TokError("invalid operand for TLBI instruction");
- }
- }
-
-#undef SYS_ALIAS
-
- Parser.Lex(); // Eat operand.
-
- // Check for the optional register operand.
- if (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat comma.
-
- if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands))
- return TokError("expected register operand");
- }
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- Parser.eatToEndOfStatement();
- return TokError("unexpected token in argument list");
- }
-
- Parser.Lex(); // Consume the EndOfStatement
- return false;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
- const AsmToken &Tok = Parser.getTok();
-
- // Can be either a #imm style literal or an option name
- if (Tok.is(AsmToken::Hash)) {
- // Immediate operand.
- Parser.Lex(); // Eat the '#'
- const MCExpr *ImmVal;
- SMLoc ExprLoc = getLoc();
- if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- Error(ExprLoc, "immediate value expected for barrier operand");
- return MatchOperand_ParseFail;
- }
- if (MCE->getValue() < 0 || MCE->getValue() > 15) {
- Error(ExprLoc, "barrier operand out of range");
- return MatchOperand_ParseFail;
- }
- Operands.push_back(
- ARM64Operand::CreateBarrier(MCE->getValue(), ExprLoc, getContext()));
- return MatchOperand_Success;
- }
-
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
-
- unsigned Opt = StringSwitch<unsigned>(Tok.getString())
- .Case("oshld", ARM64SYS::OSHLD)
- .Case("oshst", ARM64SYS::OSHST)
- .Case("osh", ARM64SYS::OSH)
- .Case("nshld", ARM64SYS::NSHLD)
- .Case("nshst", ARM64SYS::NSHST)
- .Case("nsh", ARM64SYS::NSH)
- .Case("ishld", ARM64SYS::ISHLD)
- .Case("ishst", ARM64SYS::ISHST)
- .Case("ish", ARM64SYS::ISH)
- .Case("ld", ARM64SYS::LD)
- .Case("st", ARM64SYS::ST)
- .Case("sy", ARM64SYS::SY)
- .Default(ARM64SYS::InvalidBarrier);
- if (Opt == ARM64SYS::InvalidBarrier) {
- TokError("invalid barrier option name");
- return MatchOperand_ParseFail;
- }
-
- // The only valid named option for ISB is 'sy'
- if (Mnemonic == "isb" && Opt != ARM64SYS::SY) {
- TokError("'sy' or #imm operand expected");
- return MatchOperand_ParseFail;
- }
-
- Operands.push_back(ARM64Operand::CreateBarrier(Opt, getLoc(), getContext()));
- Parser.Lex(); // Consume the option
-
- return MatchOperand_Success;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseSystemRegister(OperandVector &Operands) {
- const AsmToken &Tok = Parser.getTok();
-
- // It can be specified as a symbolic name.
- if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
-
- auto ID = Tok.getString().lower();
- ARM64SYS::SystemRegister Reg =
- StringSwitch<ARM64SYS::SystemRegister>(ID)
- .Case("spsr_el1", ARM64SYS::SPSR_svc)
- .Case("spsr_svc", ARM64SYS::SPSR_svc)
- .Case("elr_el1", ARM64SYS::ELR_EL1)
- .Case("sp_el0", ARM64SYS::SP_EL0)
- .Case("spsel", ARM64SYS::SPSel)
- .Case("daif", ARM64SYS::DAIF)
- .Case("currentel", ARM64SYS::CurrentEL)
- .Case("nzcv", ARM64SYS::NZCV)
- .Case("fpcr", ARM64SYS::FPCR)
- .Case("fpsr", ARM64SYS::FPSR)
- .Case("dspsr", ARM64SYS::DSPSR)
- .Case("dlr", ARM64SYS::DLR)
- .Case("spsr_el2", ARM64SYS::SPSR_hyp)
- .Case("spsr_hyp", ARM64SYS::SPSR_hyp)
- .Case("elr_el2", ARM64SYS::ELR_EL2)
- .Case("sp_el1", ARM64SYS::SP_EL1)
- .Case("spsr_irq", ARM64SYS::SPSR_irq)
- .Case("spsr_abt", ARM64SYS::SPSR_abt)
- .Case("spsr_und", ARM64SYS::SPSR_und)
- .Case("spsr_fiq", ARM64SYS::SPSR_fiq)
- .Case("spsr_el3", ARM64SYS::SPSR_EL3)
- .Case("elr_el3", ARM64SYS::ELR_EL3)
- .Case("sp_el2", ARM64SYS::SP_EL2)
- .Case("midr_el1", ARM64SYS::MIDR_EL1)
- .Case("ctr_el0", ARM64SYS::CTR_EL0)
- .Case("mpidr_el1", ARM64SYS::MPIDR_EL1)
- .Case("ecoidr_el1", ARM64SYS::ECOIDR_EL1)
- .Case("dczid_el0", ARM64SYS::DCZID_EL0)
- .Case("mvfr0_el1", ARM64SYS::MVFR0_EL1)
- .Case("mvfr1_el1", ARM64SYS::MVFR1_EL1)
- .Case("id_aa64pfr0_el1", ARM64SYS::ID_AA64PFR0_EL1)
- .Case("id_aa64pfr1_el1", ARM64SYS::ID_AA64PFR1_EL1)
- .Case("id_aa64dfr0_el1", ARM64SYS::ID_AA64DFR0_EL1)
- .Case("id_aa64dfr1_el1", ARM64SYS::ID_AA64DFR1_EL1)
- .Case("id_aa64isar0_el1", ARM64SYS::ID_AA64ISAR0_EL1)
- .Case("id_aa64isar1_el1", ARM64SYS::ID_AA64ISAR1_EL1)
- .Case("id_aa64mmfr0_el1", ARM64SYS::ID_AA64MMFR0_EL1)
- .Case("id_aa64mmfr1_el1", ARM64SYS::ID_AA64MMFR1_EL1)
- .Case("ccsidr_el1", ARM64SYS::CCSIDR_EL1)
- .Case("clidr_el1", ARM64SYS::CLIDR_EL1)
- .Case("aidr_el1", ARM64SYS::AIDR_EL1)
- .Case("csselr_el1", ARM64SYS::CSSELR_EL1)
- .Case("vpidr_el2", ARM64SYS::VPIDR_EL2)
- .Case("vmpidr_el2", ARM64SYS::VMPIDR_EL2)
- .Case("sctlr_el1", ARM64SYS::SCTLR_EL1)
- .Case("sctlr_el2", ARM64SYS::SCTLR_EL2)
- .Case("sctlr_el3", ARM64SYS::SCTLR_EL3)
- .Case("actlr_el1", ARM64SYS::ACTLR_EL1)
- .Case("actlr_el2", ARM64SYS::ACTLR_EL2)
- .Case("actlr_el3", ARM64SYS::ACTLR_EL3)
- .Case("cpacr_el1", ARM64SYS::CPACR_EL1)
- .Case("cptr_el2", ARM64SYS::CPTR_EL2)
- .Case("cptr_el3", ARM64SYS::CPTR_EL3)
- .Case("scr_el3", ARM64SYS::SCR_EL3)
- .Case("hcr_el2", ARM64SYS::HCR_EL2)
- .Case("mdcr_el2", ARM64SYS::MDCR_EL2)
- .Case("mdcr_el3", ARM64SYS::MDCR_EL3)
- .Case("hstr_el2", ARM64SYS::HSTR_EL2)
- .Case("hacr_el2", ARM64SYS::HACR_EL2)
- .Case("ttbr0_el1", ARM64SYS::TTBR0_EL1)
- .Case("ttbr1_el1", ARM64SYS::TTBR1_EL1)
- .Case("ttbr0_el2", ARM64SYS::TTBR0_EL2)
- .Case("ttbr0_el3", ARM64SYS::TTBR0_EL3)
- .Case("vttbr_el2", ARM64SYS::VTTBR_EL2)
- .Case("tcr_el1", ARM64SYS::TCR_EL1)
- .Case("tcr_el2", ARM64SYS::TCR_EL2)
- .Case("tcr_el3", ARM64SYS::TCR_EL3)
- .Case("vtcr_el2", ARM64SYS::VTCR_EL2)
- .Case("adfsr_el1", ARM64SYS::ADFSR_EL1)
- .Case("aifsr_el1", ARM64SYS::AIFSR_EL1)
- .Case("adfsr_el2", ARM64SYS::ADFSR_EL2)
- .Case("aifsr_el2", ARM64SYS::AIFSR_EL2)
- .Case("adfsr_el3", ARM64SYS::ADFSR_EL3)
- .Case("aifsr_el3", ARM64SYS::AIFSR_EL3)
- .Case("esr_el1", ARM64SYS::ESR_EL1)
- .Case("esr_el2", ARM64SYS::ESR_EL2)
- .Case("esr_el3", ARM64SYS::ESR_EL3)
- .Case("far_el1", ARM64SYS::FAR_EL1)
- .Case("far_el2", ARM64SYS::FAR_EL2)
- .Case("far_el3", ARM64SYS::FAR_EL3)
- .Case("hpfar_el2", ARM64SYS::HPFAR_EL2)
- .Case("par_el1", ARM64SYS::PAR_EL1)
- .Case("mair_el1", ARM64SYS::MAIR_EL1)
- .Case("mair_el2", ARM64SYS::MAIR_EL2)
- .Case("mair_el3", ARM64SYS::MAIR_EL3)
- .Case("amair_el1", ARM64SYS::AMAIR_EL1)
- .Case("amair_el2", ARM64SYS::AMAIR_EL2)
- .Case("amair_el3", ARM64SYS::AMAIR_EL3)
- .Case("vbar_el1", ARM64SYS::VBAR_EL1)
- .Case("vbar_el2", ARM64SYS::VBAR_EL2)
- .Case("vbar_el3", ARM64SYS::VBAR_EL3)
- .Case("rvbar_el1", ARM64SYS::RVBAR_EL1)
- .Case("rvbar_el2", ARM64SYS::RVBAR_EL2)
- .Case("rvbar_el3", ARM64SYS::RVBAR_EL3)
- .Case("isr_el1", ARM64SYS::ISR_EL1)
- .Case("contextidr_el1", ARM64SYS::CONTEXTIDR_EL1)
- .Case("tpidr_el0", ARM64SYS::TPIDR_EL0)
- .Case("tpidrro_el0", ARM64SYS::TPIDRRO_EL0)
- .Case("tpidr_el1", ARM64SYS::TPIDR_EL1)
- .Case("tpidr_el2", ARM64SYS::TPIDR_EL2)
- .Case("tpidr_el3", ARM64SYS::TPIDR_EL3)
- .Case("teecr32_el1", ARM64SYS::TEECR32_EL1)
- .Case("cntfrq_el0", ARM64SYS::CNTFRQ_EL0)
- .Case("cntpct_el0", ARM64SYS::CNTPCT_EL0)
- .Case("cntvct_el0", ARM64SYS::CNTVCT_EL0)
- .Case("cntvoff_el2", ARM64SYS::CNTVOFF_EL2)
- .Case("cntkctl_el1", ARM64SYS::CNTKCTL_EL1)
- .Case("cnthctl_el2", ARM64SYS::CNTHCTL_EL2)
- .Case("cntp_tval_el0", ARM64SYS::CNTP_TVAL_EL0)
- .Case("cntp_ctl_el0", ARM64SYS::CNTP_CTL_EL0)
- .Case("cntp_cval_el0", ARM64SYS::CNTP_CVAL_EL0)
- .Case("cntv_tval_el0", ARM64SYS::CNTV_TVAL_EL0)
- .Case("cntv_ctl_el0", ARM64SYS::CNTV_CTL_EL0)
- .Case("cntv_cval_el0", ARM64SYS::CNTV_CVAL_EL0)
- .Case("cnthp_tval_el2", ARM64SYS::CNTHP_TVAL_EL2)
- .Case("cnthp_ctl_el2", ARM64SYS::CNTHP_CTL_EL2)
- .Case("cnthp_cval_el2", ARM64SYS::CNTHP_CVAL_EL2)
- .Case("cntps_tval_el1", ARM64SYS::CNTPS_TVAL_EL1)
- .Case("cntps_ctl_el1", ARM64SYS::CNTPS_CTL_EL1)
- .Case("cntps_cval_el1", ARM64SYS::CNTPS_CVAL_EL1)
- .Case("dacr32_el2", ARM64SYS::DACR32_EL2)
- .Case("ifsr32_el2", ARM64SYS::IFSR32_EL2)
- .Case("teehbr32_el1", ARM64SYS::TEEHBR32_EL1)
- .Case("sder32_el3", ARM64SYS::SDER32_EL3)
- .Case("fpexc32_el2", ARM64SYS::FPEXC32_EL2)
- .Case("current_el", ARM64SYS::CurrentEL)
- .Case("pmevcntr0_el0", ARM64SYS::PMEVCNTR0_EL0)
- .Case("pmevcntr1_el0", ARM64SYS::PMEVCNTR1_EL0)
- .Case("pmevcntr2_el0", ARM64SYS::PMEVCNTR2_EL0)
- .Case("pmevcntr3_el0", ARM64SYS::PMEVCNTR3_EL0)
- .Case("pmevcntr4_el0", ARM64SYS::PMEVCNTR4_EL0)
- .Case("pmevcntr5_el0", ARM64SYS::PMEVCNTR5_EL0)
- .Case("pmevcntr6_el0", ARM64SYS::PMEVCNTR6_EL0)
- .Case("pmevcntr7_el0", ARM64SYS::PMEVCNTR7_EL0)
- .Case("pmevcntr8_el0", ARM64SYS::PMEVCNTR8_EL0)
- .Case("pmevcntr9_el0", ARM64SYS::PMEVCNTR9_EL0)
- .Case("pmevcntr10_el0", ARM64SYS::PMEVCNTR10_EL0)
- .Case("pmevcntr11_el0", ARM64SYS::PMEVCNTR11_EL0)
- .Case("pmevcntr12_el0", ARM64SYS::PMEVCNTR12_EL0)
- .Case("pmevcntr13_el0", ARM64SYS::PMEVCNTR13_EL0)
- .Case("pmevcntr14_el0", ARM64SYS::PMEVCNTR14_EL0)
- .Case("pmevcntr15_el0", ARM64SYS::PMEVCNTR15_EL0)
- .Case("pmevcntr16_el0", ARM64SYS::PMEVCNTR16_EL0)
- .Case("pmevcntr17_el0", ARM64SYS::PMEVCNTR17_EL0)
- .Case("pmevcntr18_el0", ARM64SYS::PMEVCNTR18_EL0)
- .Case("pmevcntr19_el0", ARM64SYS::PMEVCNTR19_EL0)
- .Case("pmevcntr20_el0", ARM64SYS::PMEVCNTR20_EL0)
- .Case("pmevcntr21_el0", ARM64SYS::PMEVCNTR21_EL0)
- .Case("pmevcntr22_el0", ARM64SYS::PMEVCNTR22_EL0)
- .Case("pmevcntr23_el0", ARM64SYS::PMEVCNTR23_EL0)
- .Case("pmevcntr24_el0", ARM64SYS::PMEVCNTR24_EL0)
- .Case("pmevcntr25_el0", ARM64SYS::PMEVCNTR25_EL0)
- .Case("pmevcntr26_el0", ARM64SYS::PMEVCNTR26_EL0)
- .Case("pmevcntr27_el0", ARM64SYS::PMEVCNTR27_EL0)
- .Case("pmevcntr28_el0", ARM64SYS::PMEVCNTR28_EL0)
- .Case("pmevcntr29_el0", ARM64SYS::PMEVCNTR29_EL0)
- .Case("pmevcntr30_el0", ARM64SYS::PMEVCNTR30_EL0)
- .Case("pmevtyper0_el0", ARM64SYS::PMEVTYPER0_EL0)
- .Case("pmevtyper1_el0", ARM64SYS::PMEVTYPER1_EL0)
- .Case("pmevtyper2_el0", ARM64SYS::PMEVTYPER2_EL0)
- .Case("pmevtyper3_el0", ARM64SYS::PMEVTYPER3_EL0)
- .Case("pmevtyper4_el0", ARM64SYS::PMEVTYPER4_EL0)
- .Case("pmevtyper5_el0", ARM64SYS::PMEVTYPER5_EL0)
- .Case("pmevtyper6_el0", ARM64SYS::PMEVTYPER6_EL0)
- .Case("pmevtyper7_el0", ARM64SYS::PMEVTYPER7_EL0)
- .Case("pmevtyper8_el0", ARM64SYS::PMEVTYPER8_EL0)
- .Case("pmevtyper9_el0", ARM64SYS::PMEVTYPER9_EL0)
- .Case("pmevtyper10_el0", ARM64SYS::PMEVTYPER10_EL0)
- .Case("pmevtyper11_el0", ARM64SYS::PMEVTYPER11_EL0)
- .Case("pmevtyper12_el0", ARM64SYS::PMEVTYPER12_EL0)
- .Case("pmevtyper13_el0", ARM64SYS::PMEVTYPER13_EL0)
- .Case("pmevtyper14_el0", ARM64SYS::PMEVTYPER14_EL0)
- .Case("pmevtyper15_el0", ARM64SYS::PMEVTYPER15_EL0)
- .Case("pmevtyper16_el0", ARM64SYS::PMEVTYPER16_EL0)
- .Case("pmevtyper17_el0", ARM64SYS::PMEVTYPER17_EL0)
- .Case("pmevtyper18_el0", ARM64SYS::PMEVTYPER18_EL0)
- .Case("pmevtyper19_el0", ARM64SYS::PMEVTYPER19_EL0)
- .Case("pmevtyper20_el0", ARM64SYS::PMEVTYPER20_EL0)
- .Case("pmevtyper21_el0", ARM64SYS::PMEVTYPER21_EL0)
- .Case("pmevtyper22_el0", ARM64SYS::PMEVTYPER22_EL0)
- .Case("pmevtyper23_el0", ARM64SYS::PMEVTYPER23_EL0)
- .Case("pmevtyper24_el0", ARM64SYS::PMEVTYPER24_EL0)
- .Case("pmevtyper25_el0", ARM64SYS::PMEVTYPER25_EL0)
- .Case("pmevtyper26_el0", ARM64SYS::PMEVTYPER26_EL0)
- .Case("pmevtyper27_el0", ARM64SYS::PMEVTYPER27_EL0)
- .Case("pmevtyper28_el0", ARM64SYS::PMEVTYPER28_EL0)
- .Case("pmevtyper29_el0", ARM64SYS::PMEVTYPER29_EL0)
- .Case("pmevtyper30_el0", ARM64SYS::PMEVTYPER30_EL0)
- .Case("pmccfiltr_el0", ARM64SYS::PMCCFILTR_EL0)
- .Case("rmr_el3", ARM64SYS::RMR_EL3)
- .Case("rmr_el2", ARM64SYS::RMR_EL2)
- .Case("rmr_el1", ARM64SYS::RMR_EL1)
- .Case("cpm_ioacc_ctl_el3", ARM64SYS::CPM_IOACC_CTL_EL3)
- .Case("mdccsr_el0", ARM64SYS::MDCCSR_EL0)
- .Case("mdccint_el1", ARM64SYS::MDCCINT_EL1)
- .Case("dbgdtr_el0", ARM64SYS::DBGDTR_EL0)
- .Case("dbgdtrrx_el0", ARM64SYS::DBGDTRRX_EL0)
- .Case("dbgdtrtx_el0", ARM64SYS::DBGDTRTX_EL0)
- .Case("dbgvcr32_el2", ARM64SYS::DBGVCR32_EL2)
- .Case("osdtrrx_el1", ARM64SYS::OSDTRRX_EL1)
- .Case("mdscr_el1", ARM64SYS::MDSCR_EL1)
- .Case("osdtrtx_el1", ARM64SYS::OSDTRTX_EL1)
- .Case("oseccr_el11", ARM64SYS::OSECCR_EL11)
- .Case("dbgbvr0_el1", ARM64SYS::DBGBVR0_EL1)
- .Case("dbgbvr1_el1", ARM64SYS::DBGBVR1_EL1)
- .Case("dbgbvr2_el1", ARM64SYS::DBGBVR2_EL1)
- .Case("dbgbvr3_el1", ARM64SYS::DBGBVR3_EL1)
- .Case("dbgbvr4_el1", ARM64SYS::DBGBVR4_EL1)
- .Case("dbgbvr5_el1", ARM64SYS::DBGBVR5_EL1)
- .Case("dbgbvr6_el1", ARM64SYS::DBGBVR6_EL1)
- .Case("dbgbvr7_el1", ARM64SYS::DBGBVR7_EL1)
- .Case("dbgbvr8_el1", ARM64SYS::DBGBVR8_EL1)
- .Case("dbgbvr9_el1", ARM64SYS::DBGBVR9_EL1)
- .Case("dbgbvr10_el1", ARM64SYS::DBGBVR10_EL1)
- .Case("dbgbvr11_el1", ARM64SYS::DBGBVR11_EL1)
- .Case("dbgbvr12_el1", ARM64SYS::DBGBVR12_EL1)
- .Case("dbgbvr13_el1", ARM64SYS::DBGBVR13_EL1)
- .Case("dbgbvr14_el1", ARM64SYS::DBGBVR14_EL1)
- .Case("dbgbvr15_el1", ARM64SYS::DBGBVR15_EL1)
- .Case("dbgbcr0_el1", ARM64SYS::DBGBCR0_EL1)
- .Case("dbgbcr1_el1", ARM64SYS::DBGBCR1_EL1)
- .Case("dbgbcr2_el1", ARM64SYS::DBGBCR2_EL1)
- .Case("dbgbcr3_el1", ARM64SYS::DBGBCR3_EL1)
- .Case("dbgbcr4_el1", ARM64SYS::DBGBCR4_EL1)
- .Case("dbgbcr5_el1", ARM64SYS::DBGBCR5_EL1)
- .Case("dbgbcr6_el1", ARM64SYS::DBGBCR6_EL1)
- .Case("dbgbcr7_el1", ARM64SYS::DBGBCR7_EL1)
- .Case("dbgbcr8_el1", ARM64SYS::DBGBCR8_EL1)
- .Case("dbgbcr9_el1", ARM64SYS::DBGBCR9_EL1)
- .Case("dbgbcr10_el1", ARM64SYS::DBGBCR10_EL1)
- .Case("dbgbcr11_el1", ARM64SYS::DBGBCR11_EL1)
- .Case("dbgbcr12_el1", ARM64SYS::DBGBCR12_EL1)
- .Case("dbgbcr13_el1", ARM64SYS::DBGBCR13_EL1)
- .Case("dbgbcr14_el1", ARM64SYS::DBGBCR14_EL1)
- .Case("dbgbcr15_el1", ARM64SYS::DBGBCR15_EL1)
- .Case("dbgwvr0_el1", ARM64SYS::DBGWVR0_EL1)
- .Case("dbgwvr1_el1", ARM64SYS::DBGWVR1_EL1)
- .Case("dbgwvr2_el1", ARM64SYS::DBGWVR2_EL1)
- .Case("dbgwvr3_el1", ARM64SYS::DBGWVR3_EL1)
- .Case("dbgwvr4_el1", ARM64SYS::DBGWVR4_EL1)
- .Case("dbgwvr5_el1", ARM64SYS::DBGWVR5_EL1)
- .Case("dbgwvr6_el1", ARM64SYS::DBGWVR6_EL1)
- .Case("dbgwvr7_el1", ARM64SYS::DBGWVR7_EL1)
- .Case("dbgwvr8_el1", ARM64SYS::DBGWVR8_EL1)
- .Case("dbgwvr9_el1", ARM64SYS::DBGWVR9_EL1)
- .Case("dbgwvr10_el1", ARM64SYS::DBGWVR10_EL1)
- .Case("dbgwvr11_el1", ARM64SYS::DBGWVR11_EL1)
- .Case("dbgwvr12_el1", ARM64SYS::DBGWVR12_EL1)
- .Case("dbgwvr13_el1", ARM64SYS::DBGWVR13_EL1)
- .Case("dbgwvr14_el1", ARM64SYS::DBGWVR14_EL1)
- .Case("dbgwvr15_el1", ARM64SYS::DBGWVR15_EL1)
- .Case("dbgwcr0_el1", ARM64SYS::DBGWCR0_EL1)
- .Case("dbgwcr1_el1", ARM64SYS::DBGWCR1_EL1)
- .Case("dbgwcr2_el1", ARM64SYS::DBGWCR2_EL1)
- .Case("dbgwcr3_el1", ARM64SYS::DBGWCR3_EL1)
- .Case("dbgwcr4_el1", ARM64SYS::DBGWCR4_EL1)
- .Case("dbgwcr5_el1", ARM64SYS::DBGWCR5_EL1)
- .Case("dbgwcr6_el1", ARM64SYS::DBGWCR6_EL1)
- .Case("dbgwcr7_el1", ARM64SYS::DBGWCR7_EL1)
- .Case("dbgwcr8_el1", ARM64SYS::DBGWCR8_EL1)
- .Case("dbgwcr9_el1", ARM64SYS::DBGWCR9_EL1)
- .Case("dbgwcr10_el1", ARM64SYS::DBGWCR10_EL1)
- .Case("dbgwcr11_el1", ARM64SYS::DBGWCR11_EL1)
- .Case("dbgwcr12_el1", ARM64SYS::DBGWCR12_EL1)
- .Case("dbgwcr13_el1", ARM64SYS::DBGWCR13_EL1)
- .Case("dbgwcr14_el1", ARM64SYS::DBGWCR14_EL1)
- .Case("dbgwcr15_el1", ARM64SYS::DBGWCR15_EL1)
- .Case("mdrar_el1", ARM64SYS::MDRAR_EL1)
- .Case("oslar_el1", ARM64SYS::OSLAR_EL1)
- .Case("oslsr_el1", ARM64SYS::OSLSR_EL1)
- .Case("osdlr_el1", ARM64SYS::OSDLR_EL1)
- .Case("dbgprcr_el1", ARM64SYS::DBGPRCR_EL1)
- .Case("dbgclaimset_el1", ARM64SYS::DBGCLAIMSET_EL1)
- .Case("dbgclaimclr_el1", ARM64SYS::DBGCLAIMCLR_EL1)
- .Case("dbgauthstatus_el1", ARM64SYS::DBGAUTHSTATUS_EL1)
- .Case("dbgdevid2", ARM64SYS::DBGDEVID2)
- .Case("dbgdevid1", ARM64SYS::DBGDEVID1)
- .Case("dbgdevid0", ARM64SYS::DBGDEVID0)
- .Case("id_pfr0_el1", ARM64SYS::ID_PFR0_EL1)
- .Case("id_pfr1_el1", ARM64SYS::ID_PFR1_EL1)
- .Case("id_dfr0_el1", ARM64SYS::ID_DFR0_EL1)
- .Case("id_afr0_el1", ARM64SYS::ID_AFR0_EL1)
- .Case("id_isar0_el1", ARM64SYS::ID_ISAR0_EL1)
- .Case("id_isar1_el1", ARM64SYS::ID_ISAR1_EL1)
- .Case("id_isar2_el1", ARM64SYS::ID_ISAR2_EL1)
- .Case("id_isar3_el1", ARM64SYS::ID_ISAR3_EL1)
- .Case("id_isar4_el1", ARM64SYS::ID_ISAR4_EL1)
- .Case("id_isar5_el1", ARM64SYS::ID_ISAR5_EL1)
- .Case("afsr1_el1", ARM64SYS::AFSR1_EL1)
- .Case("afsr0_el1", ARM64SYS::AFSR0_EL1)
- .Case("revidr_el1", ARM64SYS::REVIDR_EL1)
- .Default(ARM64SYS::InvalidSystemReg);
- if (Reg != ARM64SYS::InvalidSystemReg) {
- // We matched a reg name, so create the operand.
- Operands.push_back(
- ARM64Operand::CreateSystemRegister(Reg, getLoc(), getContext()));
- Parser.Lex(); // Consume the register name.
- return MatchOperand_Success;
- }
-
- // Or we may have an identifier that encodes the sub-operands.
- // For example, s3_2_c15_c0_0.
- unsigned op0, op1, CRn, CRm, op2;
- std::string Desc = ID;
- if (std::sscanf(Desc.c_str(), "s%u_%u_c%u_c%u_%u", &op0, &op1, &CRn, &CRm,
- &op2) != 5)
- return MatchOperand_NoMatch;
- if ((op0 != 2 && op0 != 3) || op1 > 7 || CRn > 15 || CRm > 15 || op2 > 7)
- return MatchOperand_NoMatch;
-
- unsigned Val = op0 << 14 | op1 << 11 | CRn << 7 | CRm << 3 | op2;
- Operands.push_back(
- ARM64Operand::CreateSystemRegister(Val, getLoc(), getContext()));
- Parser.Lex(); // Consume the register name.
-
- return MatchOperand_Success;
-}
-
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseCPSRField(OperandVector &Operands) {
- const AsmToken &Tok = Parser.getTok();
-
- if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
-
- ARM64SYS::CPSRField Field =
- StringSwitch<ARM64SYS::CPSRField>(Tok.getString().lower())
- .Case("spsel", ARM64SYS::cpsr_SPSel)
- .Case("daifset", ARM64SYS::cpsr_DAIFSet)
- .Case("daifclr", ARM64SYS::cpsr_DAIFClr)
- .Default(ARM64SYS::InvalidCPSRField);
- if (Field == ARM64SYS::InvalidCPSRField)
- return MatchOperand_NoMatch;
- Operands.push_back(
- ARM64Operand::CreateCPSRField(Field, getLoc(), getContext()));
- Parser.Lex(); // Consume the register name.
-
- return MatchOperand_Success;
-}
-
-/// tryParseVectorRegister - Parse a vector register operand.
-bool ARM64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
- if (Parser.getTok().isNot(AsmToken::Identifier))
- return true;
-
- SMLoc S = getLoc();
- // Check for a vector register specifier first.
- StringRef Kind;
- int64_t Reg = tryMatchVectorRegister(Kind);
- if (Reg == -1)
- return true;
- Operands.push_back(
- ARM64Operand::CreateReg(Reg, true, S, getLoc(), getContext()));
- // If there was an explicit qualifier, that goes on as a literal text
- // operand.
- if (!Kind.empty())
- Operands.push_back(ARM64Operand::CreateToken(Kind, false, S, getContext()));
-
- // If there is an index specifier following the register, parse that too.
- if (Parser.getTok().is(AsmToken::LBrac)) {
- SMLoc SIdx = getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const MCExpr *ImmVal;
- if (getParser().parseExpression(ImmVal))
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return false;
- }
-
- SMLoc E = getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return false;
- }
-
- Parser.Lex(); // Eat right bracket token.
-
- Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
- getContext()));
- }
-
- return false;
-}
-
-/// parseRegister - Parse a non-vector register operand.
-bool ARM64AsmParser::parseRegister(OperandVector &Operands) {
- SMLoc S = getLoc();
- // Try for a vector register.
- if (!tryParseVectorRegister(Operands))
- return false;
-
- // Try for a scalar register.
- int64_t Reg = tryParseRegister();
- if (Reg == -1)
- return true;
- Operands.push_back(
- ARM64Operand::CreateReg(Reg, false, S, getLoc(), getContext()));
-
- // A small number of instructions (FMOVXDhighr, for example) have "[1]"
- // as a string token in the instruction itself.
- if (getLexer().getKind() == AsmToken::LBrac) {
- SMLoc LBracS = getLoc();
- Parser.Lex();
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Integer)) {
- SMLoc IntS = getLoc();
- int64_t Val = Tok.getIntVal();
- if (Val == 1) {
- Parser.Lex();
- if (getLexer().getKind() == AsmToken::RBrac) {
- SMLoc RBracS = getLoc();
- Parser.Lex();
- Operands.push_back(
- ARM64Operand::CreateToken("[", false, LBracS, getContext()));
- Operands.push_back(
- ARM64Operand::CreateToken("1", false, IntS, getContext()));
- Operands.push_back(
- ARM64Operand::CreateToken("]", false, RBracS, getContext()));
- return false;
- }
- }
- }
- }
-
- return false;
-}
-
-/// tryParseNoIndexMemory - Custom parser method for memory operands that
-/// do not allow base regisrer writeback modes,
-/// or those that handle writeback separately from
-/// the memory operand (like the AdvSIMD ldX/stX
-/// instructions.
-ARM64AsmParser::OperandMatchResultTy
-ARM64AsmParser::tryParseNoIndexMemory(OperandVector &Operands) {
- if (Parser.getTok().isNot(AsmToken::LBrac))
- return MatchOperand_NoMatch;
- SMLoc S = getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const AsmToken &BaseRegTok = Parser.getTok();
- if (BaseRegTok.isNot(AsmToken::Identifier)) {
- Error(BaseRegTok.getLoc(), "register expected");
- return MatchOperand_ParseFail;
- }
-
- int64_t Reg = tryParseRegister();
- if (Reg == -1) {
- Error(BaseRegTok.getLoc(), "register expected");
- return MatchOperand_ParseFail;
- }
-
- SMLoc E = getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return MatchOperand_ParseFail;
- }
-
- Parser.Lex(); // Eat right bracket token.
-
- Operands.push_back(ARM64Operand::CreateMem(Reg, 0, S, E, E, getContext()));
- return MatchOperand_Success;
-}
-
-/// parseMemory - Parse a memory operand for a basic load/store instruction.
-bool ARM64AsmParser::parseMemory(OperandVector &Operands) {
- assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket");
- SMLoc S = getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const AsmToken &BaseRegTok = Parser.getTok();
- if (BaseRegTok.isNot(AsmToken::Identifier))
- return Error(BaseRegTok.getLoc(), "register expected");
-
- int64_t Reg = tryParseRegister();
- if (Reg == -1)
- return Error(BaseRegTok.getLoc(), "register expected");
-
- // If there is an offset expression, parse it.
- const MCExpr *OffsetExpr = 0;
- SMLoc OffsetLoc;
- if (Parser.getTok().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
- OffsetLoc = getLoc();
-
- // Register offset
- const AsmToken &OffsetRegTok = Parser.getTok();
- int Reg2 = OffsetRegTok.is(AsmToken::Identifier) ? tryParseRegister() : -1;
- if (Reg2 != -1) {
- // Default shift is LSL, with an omitted shift. We use the third bit of
- // the extend value to indicate presence/omission of the immediate offset.
- ARM64_AM::ExtendType ExtOp = ARM64_AM::UXTX;
- int64_t ShiftVal = 0;
- bool ExplicitShift = false;
-
- if (Parser.getTok().is(AsmToken::Comma)) {
- // Embedded extend operand.
- Parser.Lex(); // Eat the comma
-
- SMLoc ExtLoc = getLoc();
- const AsmToken &Tok = Parser.getTok();
- ExtOp = StringSwitch<ARM64_AM::ExtendType>(Tok.getString())
- .Case("uxtw", ARM64_AM::UXTW)
- .Case("lsl", ARM64_AM::UXTX) // Alias for UXTX
- .Case("sxtw", ARM64_AM::SXTW)
- .Case("sxtx", ARM64_AM::SXTX)
- .Case("UXTW", ARM64_AM::UXTW)
- .Case("LSL", ARM64_AM::UXTX) // Alias for UXTX
- .Case("SXTW", ARM64_AM::SXTW)
- .Case("SXTX", ARM64_AM::SXTX)
- .Default(ARM64_AM::InvalidExtend);
- if (ExtOp == ARM64_AM::InvalidExtend)
- return Error(ExtLoc, "expected valid extend operation");
-
- Parser.Lex(); // Eat the extend op.
-
- if (getLexer().is(AsmToken::RBrac)) {
- // No immediate operand.
- if (ExtOp == ARM64_AM::UXTX)
- return Error(ExtLoc, "LSL extend requires immediate operand");
- } else if (getLexer().is(AsmToken::Hash)) {
- // Immediate operand.
- Parser.Lex(); // Eat the '#'
- const MCExpr *ImmVal;
- SMLoc ExprLoc = getLoc();
- if (getParser().parseExpression(ImmVal))
- return true;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE)
- return TokError("immediate value expected for extend operand");
-
- ExplicitShift = true;
- ShiftVal = MCE->getValue();
- if (ShiftVal < 0 || ShiftVal > 4)
- return Error(ExprLoc, "immediate operand out of range");
- } else
- return Error(getLoc(), "expected immediate operand");
- }
-
- if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(getLoc(), "']' expected");
-
- Parser.Lex(); // Eat right bracket token.
-
- SMLoc E = getLoc();
- Operands.push_back(ARM64Operand::CreateRegOffsetMem(
- Reg, Reg2, ExtOp, ShiftVal, ExplicitShift, S, E, getContext()));
- return false;
-
- // Immediate expressions.
- } else if (Parser.getTok().is(AsmToken::Hash)) {
- Parser.Lex(); // Eat hash token.
-
- if (parseSymbolicImmVal(OffsetExpr))
- return true;
- } else {
- // FIXME: We really should make sure that we're dealing with a LDR/STR
- // instruction that can legally have a symbolic expression here.
- // Symbol reference.
- if (Parser.getTok().isNot(AsmToken::Identifier) &&
- Parser.getTok().isNot(AsmToken::String))
- return Error(getLoc(), "identifier or immediate expression expected");
- if (getParser().parseExpression(OffsetExpr))
- return true;
- // If this is a plain ref, Make sure a legal variant kind was specified.
- // Otherwise, it's a more complicated expression and we have to just
- // assume it's OK and let the relocation stuff puke if it's not.
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (classifySymbolRef(OffsetExpr, ELFRefKind, DarwinRefKind, Addend) &&
- Addend == 0) {
- assert(ELFRefKind == ARM64MCExpr::VK_INVALID &&
- "ELF symbol modifiers not supported here yet");
-
- switch (DarwinRefKind) {
- default:
- return Error(getLoc(), "expected @pageoff or @gotpageoff modifier");
- case MCSymbolRefExpr::VK_GOTPAGEOFF:
- case MCSymbolRefExpr::VK_PAGEOFF:
- case MCSymbolRefExpr::VK_TLVPPAGEOFF:
- // These are what we're expecting.
- break;
- }
- }
- }
- }
-
- SMLoc E = getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac))
- return Error(E, "']' expected");
-
- Parser.Lex(); // Eat right bracket token.
-
- // Create the memory operand.
- Operands.push_back(
- ARM64Operand::CreateMem(Reg, OffsetExpr, S, E, OffsetLoc, getContext()));
-
- // Check for a '!', indicating pre-indexed addressing with writeback.
- if (Parser.getTok().is(AsmToken::Exclaim)) {
- // There needs to have been an immediate or wback doesn't make sense.
- if (!OffsetExpr)
- return Error(E, "missing offset for pre-indexed addressing");
- // Pre-indexed with writeback must have a constant expression for the
- // offset. FIXME: Theoretically, we'd like to allow fixups so long
- // as they don't require a relocation.
- if (!isa<MCConstantExpr>(OffsetExpr))
- return Error(OffsetLoc, "constant immediate expression expected");
-
- // Create the Token operand for the '!'.
- Operands.push_back(ARM64Operand::CreateToken(
- "!", false, Parser.getTok().getLoc(), getContext()));
- Parser.Lex(); // Eat the '!' token.
- }
-
- return false;
-}
-
-bool ARM64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
- bool HasELFModifier = false;
- ARM64MCExpr::VariantKind RefKind;
-
- if (Parser.getTok().is(AsmToken::Colon)) {
- Parser.Lex(); // Eat ':"
- HasELFModifier = true;
-
- if (Parser.getTok().isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(),
- "expect relocation specifier in operand after ':'");
- return true;
- }
-
- std::string LowerCase = Parser.getTok().getIdentifier().lower();
- RefKind = StringSwitch<ARM64MCExpr::VariantKind>(LowerCase)
- .Case("lo12", ARM64MCExpr::VK_LO12)
- .Case("abs_g3", ARM64MCExpr::VK_ABS_G3)
- .Case("abs_g2", ARM64MCExpr::VK_ABS_G2)
- .Case("abs_g2_nc", ARM64MCExpr::VK_ABS_G2_NC)
- .Case("abs_g1", ARM64MCExpr::VK_ABS_G1)
- .Case("abs_g1_nc", ARM64MCExpr::VK_ABS_G1_NC)
- .Case("abs_g0", ARM64MCExpr::VK_ABS_G0)
- .Case("abs_g0_nc", ARM64MCExpr::VK_ABS_G0_NC)
- .Case("dtprel_g2", ARM64MCExpr::VK_DTPREL_G2)
- .Case("dtprel_g1", ARM64MCExpr::VK_DTPREL_G1)
- .Case("dtprel_g1_nc", ARM64MCExpr::VK_DTPREL_G1_NC)
- .Case("dtprel_g0", ARM64MCExpr::VK_DTPREL_G0)
- .Case("dtprel_g0_nc", ARM64MCExpr::VK_DTPREL_G0_NC)
- .Case("dtprel_lo12", ARM64MCExpr::VK_DTPREL_LO12)
- .Case("dtprel_lo12_nc", ARM64MCExpr::VK_DTPREL_LO12_NC)
- .Case("tprel_g2", ARM64MCExpr::VK_TPREL_G2)
- .Case("tprel_g1", ARM64MCExpr::VK_TPREL_G1)
- .Case("tprel_g1_nc", ARM64MCExpr::VK_TPREL_G1_NC)
- .Case("tprel_g0", ARM64MCExpr::VK_TPREL_G0)
- .Case("tprel_g0_nc", ARM64MCExpr::VK_TPREL_G0_NC)
- .Case("tprel_lo12", ARM64MCExpr::VK_TPREL_LO12)
- .Case("tprel_lo12_nc", ARM64MCExpr::VK_TPREL_LO12_NC)
- .Case("tlsdesc_lo12", ARM64MCExpr::VK_TLSDESC_LO12)
- .Case("got", ARM64MCExpr::VK_GOT_PAGE)
- .Case("got_lo12", ARM64MCExpr::VK_GOT_LO12)
- .Case("gottprel", ARM64MCExpr::VK_GOTTPREL_PAGE)
- .Case("gottprel_lo12", ARM64MCExpr::VK_GOTTPREL_LO12_NC)
- .Case("gottprel_g1", ARM64MCExpr::VK_GOTTPREL_G1)
- .Case("gottprel_g0_nc", ARM64MCExpr::VK_GOTTPREL_G0_NC)
- .Case("tlsdesc", ARM64MCExpr::VK_TLSDESC_PAGE)
- .Default(ARM64MCExpr::VK_INVALID);
-
- if (RefKind == ARM64MCExpr::VK_INVALID) {
- Error(Parser.getTok().getLoc(),
- "expect relocation specifier in operand after ':'");
- return true;
- }
-
- Parser.Lex(); // Eat identifier
-
- if (Parser.getTok().isNot(AsmToken::Colon)) {
- Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier");
- return true;
- }
- Parser.Lex(); // Eat ':'
- }
-
- if (getParser().parseExpression(ImmVal))
- return true;
-
- if (HasELFModifier)
- ImmVal = ARM64MCExpr::Create(ImmVal, RefKind, getContext());
-
- return false;
-}
-
-/// parseVectorList - Parse a vector list operand for AdvSIMD instructions.
-bool ARM64AsmParser::parseVectorList(OperandVector &Operands) {
- assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Bracket");
- SMLoc S = getLoc();
- Parser.Lex(); // Eat left bracket token.
- StringRef Kind;
- int64_t FirstReg = tryMatchVectorRegister(Kind);
- if (FirstReg == -1)
- return Error(getLoc(), "vector register expected");
- int64_t PrevReg = FirstReg;
- unsigned Count = 1;
- while (Parser.getTok().isNot(AsmToken::RCurly)) {
- if (Parser.getTok().is(AsmToken::EndOfStatement))
- Error(getLoc(), "'}' expected");
-
- if (Parser.getTok().isNot(AsmToken::Comma))
- return Error(getLoc(), "',' expected");
- Parser.Lex(); // Eat the comma token.
-
- SMLoc Loc = getLoc();
- StringRef NextKind;
- int64_t Reg = tryMatchVectorRegister(NextKind);
- if (Reg == -1)
- return Error(Loc, "vector register expected");
- // Any Kind suffices must match on all regs in the list.
- if (Kind != NextKind)
- return Error(Loc, "mismatched register size suffix");
-
- // Registers must be incremental (with wraparound at 31)
- if (getContext().getRegisterInfo()->getEncodingValue(Reg) !=
- (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32)
- return Error(Loc, "registers must be sequential");
-
- PrevReg = Reg;
- ++Count;
- }
- Parser.Lex(); // Eat the '}' token.
-
- unsigned NumElements = 0;
- char ElementKind = 0;
- if (!Kind.empty())
- parseValidVectorKind(Kind, NumElements, ElementKind);
-
- Operands.push_back(ARM64Operand::CreateVectorList(
- FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
-
- // If there is an index specifier following the list, parse that too.
- if (Parser.getTok().is(AsmToken::LBrac)) {
- SMLoc SIdx = getLoc();
- Parser.Lex(); // Eat left bracket token.
-
- const MCExpr *ImmVal;
- if (getParser().parseExpression(ImmVal))
- return false;
- const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return false;
- }
-
- SMLoc E = getLoc();
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(E, "']' expected");
- return false;
- }
-
- Parser.Lex(); // Eat right bracket token.
-
- Operands.push_back(ARM64Operand::CreateVectorIndex(MCE->getValue(), SIdx, E,
- getContext()));
- }
- return false;
-}
-
-/// parseOperand - Parse a arm instruction operand. For now this parses the
-/// operand regardless of the mnemonic.
-bool ARM64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
- bool invertCondCode) {
- // Check if the current operand has a custom associated parser, if so, try to
- // custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
- if (ResTy == MatchOperand_Success)
- return false;
- // If there wasn't a custom match, try the generic matcher below. Otherwise,
- // there was a match, but an error occurred, in which case, just return that
- // the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
- return true;
-
- // Nothing custom, so do general case parsing.
- SMLoc S, E;
- switch (getLexer().getKind()) {
- default: {
- SMLoc S = getLoc();
- const MCExpr *Expr;
- if (parseSymbolicImmVal(Expr))
- return Error(S, "invalid operand");
-
- SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(ARM64Operand::CreateImm(Expr, S, E, getContext()));
- return false;
- }
- case AsmToken::LBrac:
- return parseMemory(Operands);
- case AsmToken::LCurly:
- return parseVectorList(Operands);
- case AsmToken::Identifier: {
- // If we're expecting a Condition Code operand, then just parse that.
- if (isCondCode)
- return parseCondCode(Operands, invertCondCode);
-
- // If it's a register name, parse it.
- if (!parseRegister(Operands))
- return false;
-
- // This could be an optional "shift" operand.
- if (!parseOptionalShift(Operands))
- return false;
-
- // Or maybe it could be an optional "extend" operand.
- if (!parseOptionalExtend(Operands))
- return false;
-
- // This was not a register so parse other operands that start with an
- // identifier (like labels) as expressions and create them as immediates.
- const MCExpr *IdVal;
- S = getLoc();
- if (getParser().parseExpression(IdVal))
- return true;
-
- E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(ARM64Operand::CreateImm(IdVal, S, E, getContext()));
- return false;
- }
- case AsmToken::Hash: {
- // #42 -> immediate.
- S = getLoc();
- Parser.Lex();
-
- // The only Real that should come through here is a literal #0.0 for
- // the fcmp[e] r, #0.0 instructions. They expect raw token operands,
- // so convert the value.
- const AsmToken &Tok = Parser.getTok();
- if (Tok.is(AsmToken::Real)) {
- APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
- uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
- if (IntVal != 0 || (Mnemonic != "fcmp" && Mnemonic != "fcmpe"))
- return TokError("unexpected floating point literal");
- Parser.Lex(); // Eat the token.
-
- Operands.push_back(
- ARM64Operand::CreateToken("#0", false, S, getContext()));
- Operands.push_back(
- ARM64Operand::CreateToken(".0", false, S, getContext()));
- return false;
- }
-
- const MCExpr *ImmVal;
- if (parseSymbolicImmVal(ImmVal))
- return true;
-
- E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
- Operands.push_back(ARM64Operand::CreateImm(ImmVal, S, E, getContext()));
- return false;
- }
- }
-}
-
-/// ParseInstruction - Parse an ARM64 instruction mnemonic followed by its
-/// operands.
-bool ARM64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- OperandVector &Operands) {
- // Create the leading tokens for the mnemonic, split by '.' characters.
- size_t Start = 0, Next = Name.find('.');
- StringRef Head = Name.slice(Start, Next);
-
- // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
- if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
- return parseSysAlias(Head, NameLoc, Operands);
-
- Operands.push_back(
- ARM64Operand::CreateToken(Head, false, NameLoc, getContext()));
- Mnemonic = Head;
-
- // Handle condition codes for a branch mnemonic
- if (Head == "b" && Next != StringRef::npos) {
- Start = Next;
- Next = Name.find('.', Start + 1);
- Head = Name.slice(Start + 1, Next);
-
- SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
- (Head.data() - Name.data()));
- unsigned CC = parseCondCodeString(Head);
- if (CC == ~0U)
- return Error(SuffixLoc, "invalid condition code");
- const MCExpr *CCExpr = MCConstantExpr::Create(CC, getContext());
- Operands.push_back(
- ARM64Operand::CreateImm(CCExpr, NameLoc, NameLoc, getContext()));
- }
-
- // Add the remaining tokens in the mnemonic.
- while (Next != StringRef::npos) {
- Start = Next;
- Next = Name.find('.', Start + 1);
- Head = Name.slice(Start, Next);
- SMLoc SuffixLoc = SMLoc::getFromPointer(NameLoc.getPointer() +
- (Head.data() - Name.data()) + 1);
- Operands.push_back(
- ARM64Operand::CreateToken(Head, true, SuffixLoc, getContext()));
- }
-
- // Conditional compare instructions have a Condition Code operand, which needs
- // to be parsed and an immediate operand created.
- bool condCodeFourthOperand =
- (Head == "ccmp" || Head == "ccmn" || Head == "fccmp" ||
- Head == "fccmpe" || Head == "fcsel" || Head == "csel" ||
- Head == "csinc" || Head == "csinv" || Head == "csneg");
-
- // These instructions are aliases to some of the conditional select
- // instructions. However, the condition code is inverted in the aliased
- // instruction.
- //
- // FIXME: Is this the correct way to handle these? Or should the parser
- // generate the aliased instructions directly?
- bool condCodeSecondOperand = (Head == "cset" || Head == "csetm");
- bool condCodeThirdOperand =
- (Head == "cinc" || Head == "cinv" || Head == "cneg");
-
- // Read the remaining operands.
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- // Read the first operand.
- if (parseOperand(Operands, false, false)) {
- Parser.eatToEndOfStatement();
- return true;
- }
-
- unsigned N = 2;
- while (getLexer().is(AsmToken::Comma)) {
- Parser.Lex(); // Eat the comma.
-
- // Parse and remember the operand.
- if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
- (N == 3 && condCodeThirdOperand) ||
- (N == 2 && condCodeSecondOperand),
- condCodeSecondOperand || condCodeThirdOperand)) {
- Parser.eatToEndOfStatement();
- return true;
- }
-
- ++N;
- }
- }
-
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- SMLoc Loc = Parser.getTok().getLoc();
- Parser.eatToEndOfStatement();
- return Error(Loc, "unexpected token in argument list");
- }
-
- Parser.Lex(); // Consume the EndOfStatement
- return false;
-}
-
-/// isFPR32Register - Check if a register is in the FPR32 register class.
-/// (The parser does not have the target register info to check the register
-/// class directly.)
-static bool isFPR32Register(unsigned Reg) {
- using namespace ARM64;
- switch (Reg) {
- default:
- break;
- case S0: case S1: case S2: case S3: case S4: case S5: case S6:
- case S7: case S8: case S9: case S10: case S11: case S12: case S13:
- case S14: case S15: case S16: case S17: case S18: case S19: case S20:
- case S21: case S22: case S23: case S24: case S25: case S26: case S27:
- case S28: case S29: case S30: case S31:
- return true;
- }
- return false;
-}
-
-/// isGPR32Register - Check if a register is in the GPR32sp register class.
-/// (The parser does not have the target register info to check the register
-/// class directly.)
-static bool isGPR32Register(unsigned Reg) {
- using namespace ARM64;
- switch (Reg) {
- default:
- break;
- case W0: case W1: case W2: case W3: case W4: case W5: case W6:
- case W7: case W8: case W9: case W10: case W11: case W12: case W13:
- case W14: case W15: case W16: case W17: case W18: case W19: case W20:
- case W21: case W22: case W23: case W24: case W25: case W26: case W27:
- case W28: case W29: case W30: case WSP:
- return true;
- }
- return false;
-}
-
-static bool isGPR64Reg(unsigned Reg) {
- using namespace ARM64;
- switch (Reg) {
- case X0: case X1: case X2: case X3: case X4: case X5: case X6:
- case X7: case X8: case X9: case X10: case X11: case X12: case X13:
- case X14: case X15: case X16: case X17: case X18: case X19: case X20:
- case X21: case X22: case X23: case X24: case X25: case X26: case X27:
- case X28: case FP: case LR: case SP: case XZR:
- return true;
- default:
- return false;
- }
-}
-
-
-// FIXME: This entire function is a giant hack to provide us with decent
-// operand range validation/diagnostics until TableGen/MC can be extended
-// to support autogeneration of this kind of validation.
-bool ARM64AsmParser::validateInstruction(MCInst &Inst,
- SmallVectorImpl<SMLoc> &Loc) {
- const MCRegisterInfo *RI = getContext().getRegisterInfo();
- // Check for indexed addressing modes w/ the base register being the
- // same as a destination/source register or pair load where
- // the Rt == Rt2. All of those are undefined behaviour.
- switch (Inst.getOpcode()) {
- case ARM64::LDPSWpre:
- case ARM64::LDPWpost:
- case ARM64::LDPWpre:
- case ARM64::LDPXpost:
- case ARM64::LDPXpre: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rt2 = Inst.getOperand(1).getReg();
- unsigned Rn = Inst.getOperand(2).getReg();
- if (RI->isSubRegisterEq(Rn, Rt))
- return Error(Loc[0], "unpredictable LDP instruction, writeback base "
- "is also a destination");
- if (RI->isSubRegisterEq(Rn, Rt2))
- return Error(Loc[1], "unpredictable LDP instruction, writeback base "
- "is also a destination");
- // FALLTHROUGH
- }
- case ARM64::LDPDpost:
- case ARM64::LDPDpre:
- case ARM64::LDPQpost:
- case ARM64::LDPQpre:
- case ARM64::LDPSpost:
- case ARM64::LDPSpre:
- case ARM64::LDPSWpost:
- case ARM64::LDPDi:
- case ARM64::LDPQi:
- case ARM64::LDPSi:
- case ARM64::LDPSWi:
- case ARM64::LDPWi:
- case ARM64::LDPXi: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rt2 = Inst.getOperand(1).getReg();
- if (Rt == Rt2)
- return Error(Loc[1], "unpredictable LDP instruction, Rt2==Rt");
- break;
- }
- case ARM64::STPDpost:
- case ARM64::STPDpre:
- case ARM64::STPQpost:
- case ARM64::STPQpre:
- case ARM64::STPSpost:
- case ARM64::STPSpre:
- case ARM64::STPWpost:
- case ARM64::STPWpre:
- case ARM64::STPXpost:
- case ARM64::STPXpre: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rt2 = Inst.getOperand(1).getReg();
- unsigned Rn = Inst.getOperand(2).getReg();
- if (RI->isSubRegisterEq(Rn, Rt))
- return Error(Loc[0], "unpredictable STP instruction, writeback base "
- "is also a source");
- if (RI->isSubRegisterEq(Rn, Rt2))
- return Error(Loc[1], "unpredictable STP instruction, writeback base "
- "is also a source");
- break;
- }
- case ARM64::LDRBBpre:
- case ARM64::LDRBpre:
- case ARM64::LDRHHpre:
- case ARM64::LDRHpre:
- case ARM64::LDRSBWpre:
- case ARM64::LDRSBXpre:
- case ARM64::LDRSHWpre:
- case ARM64::LDRSHXpre:
- case ARM64::LDRSWpre:
- case ARM64::LDRWpre:
- case ARM64::LDRXpre:
- case ARM64::LDRBBpost:
- case ARM64::LDRBpost:
- case ARM64::LDRHHpost:
- case ARM64::LDRHpost:
- case ARM64::LDRSBWpost:
- case ARM64::LDRSBXpost:
- case ARM64::LDRSHWpost:
- case ARM64::LDRSHXpost:
- case ARM64::LDRSWpost:
- case ARM64::LDRWpost:
- case ARM64::LDRXpost: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rn = Inst.getOperand(1).getReg();
- if (RI->isSubRegisterEq(Rn, Rt))
- return Error(Loc[0], "unpredictable LDR instruction, writeback base "
- "is also a source");
- break;
- }
- case ARM64::STRBBpost:
- case ARM64::STRBpost:
- case ARM64::STRHHpost:
- case ARM64::STRHpost:
- case ARM64::STRWpost:
- case ARM64::STRXpost:
- case ARM64::STRBBpre:
- case ARM64::STRBpre:
- case ARM64::STRHHpre:
- case ARM64::STRHpre:
- case ARM64::STRWpre:
- case ARM64::STRXpre: {
- unsigned Rt = Inst.getOperand(0).getReg();
- unsigned Rn = Inst.getOperand(1).getReg();
- if (RI->isSubRegisterEq(Rn, Rt))
- return Error(Loc[0], "unpredictable STR instruction, writeback base "
- "is also a source");
- break;
- }
- }
-
- // Now check immediate ranges. Separate from the above as there is overlap
- // in the instructions being checked and this keeps the nested conditionals
- // to a minimum.
- switch (Inst.getOpcode()) {
- case ARM64::ANDWrs:
- case ARM64::ANDSWrs:
- case ARM64::EORWrs:
- case ARM64::ORRWrs: {
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[3], "immediate value expected");
- int64_t shifter = Inst.getOperand(3).getImm();
- ARM64_AM::ShiftType ST = ARM64_AM::getShiftType(shifter);
- if (ST == ARM64_AM::LSL && shifter > 31)
- return Error(Loc[3], "shift value out of range");
- return false;
- }
- case ARM64::ADDSWri:
- case ARM64::ADDSXri:
- case ARM64::ADDWri:
- case ARM64::ADDXri:
- case ARM64::SUBSWri:
- case ARM64::SUBSXri:
- case ARM64::SUBWri:
- case ARM64::SUBXri: {
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[3], "immediate value expected");
- int64_t shifter = Inst.getOperand(3).getImm();
- if (shifter != 0 && shifter != 12)
- return Error(Loc[3], "shift value out of range");
- // The imm12 operand can be an expression. Validate that it's legit.
- // FIXME: We really, really want to allow arbitrary expressions here
- // and resolve the value and validate the result at fixup time, but
- // that's hard as we have long since lost any source information we
- // need to generate good diagnostics by that point.
- if (Inst.getOpcode() == ARM64::ADDXri && Inst.getOperand(2).isExpr()) {
- const MCExpr *Expr = Inst.getOperand(2).getExpr();
- ARM64MCExpr::VariantKind ELFRefKind;
- MCSymbolRefExpr::VariantKind DarwinRefKind;
- const MCConstantExpr *Addend;
- if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
- return Error(Loc[2], "invalid immediate expression");
- }
-
- if (DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
- DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF ||
- ELFRefKind == ARM64MCExpr::VK_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_DTPREL_LO12_NC ||
- ELFRefKind == ARM64MCExpr::VK_TPREL_LO12 ||
- ELFRefKind == ARM64MCExpr::VK_TPREL_LO12_NC ||
- ELFRefKind == ARM64MCExpr::VK_TLSDESC_LO12) {
- // Note that we don't range-check the addend. It's adjusted
- // modulo page size when converted, so there is no "out of range"
- // condition when using @pageoff. Any validity checking for the value
- // was done in the is*() predicate function.
- return false;
- } else if (DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGEOFF) {
- // @gotpageoff can only be used directly, not with an addend.
- return Addend != 0;
- }
-
- // Otherwise, we're not sure, so don't allow it for now.
- return Error(Loc[2], "invalid immediate expression");
- }
-
- // If it's anything but an immediate, it's not legit.
- if (!Inst.getOperand(2).isImm())
- return Error(Loc[2], "invalid immediate expression");
- int64_t imm = Inst.getOperand(2).getImm();
- if (imm > 4095 || imm < 0)
- return Error(Loc[2], "immediate value out of range");
- return false;
- }
- case ARM64::LDRBpre:
- case ARM64::LDRHpre:
- case ARM64::LDRSBWpre:
- case ARM64::LDRSBXpre:
- case ARM64::LDRSHWpre:
- case ARM64::LDRSHXpre:
- case ARM64::LDRWpre:
- case ARM64::LDRXpre:
- case ARM64::LDRSpre:
- case ARM64::LDRDpre:
- case ARM64::LDRQpre:
- case ARM64::STRBpre:
- case ARM64::STRHpre:
- case ARM64::STRWpre:
- case ARM64::STRXpre:
- case ARM64::STRSpre:
- case ARM64::STRDpre:
- case ARM64::STRQpre:
- case ARM64::LDRBpost:
- case ARM64::LDRHpost:
- case ARM64::LDRSBWpost:
- case ARM64::LDRSBXpost:
- case ARM64::LDRSHWpost:
- case ARM64::LDRSHXpost:
- case ARM64::LDRWpost:
- case ARM64::LDRXpost:
- case ARM64::LDRSpost:
- case ARM64::LDRDpost:
- case ARM64::LDRQpost:
- case ARM64::STRBpost:
- case ARM64::STRHpost:
- case ARM64::STRWpost:
- case ARM64::STRXpost:
- case ARM64::STRSpost:
- case ARM64::STRDpost:
- case ARM64::STRQpost:
- case ARM64::LDTRXi:
- case ARM64::LDTRWi:
- case ARM64::LDTRHi:
- case ARM64::LDTRBi:
- case ARM64::LDTRSHWi:
- case ARM64::LDTRSHXi:
- case ARM64::LDTRSBWi:
- case ARM64::LDTRSBXi:
- case ARM64::LDTRSWi:
- case ARM64::STTRWi:
- case ARM64::STTRXi:
- case ARM64::STTRHi:
- case ARM64::STTRBi:
- case ARM64::LDURWi:
- case ARM64::LDURXi:
- case ARM64::LDURSi:
- case ARM64::LDURDi:
- case ARM64::LDURQi:
- case ARM64::LDURHi:
- case ARM64::LDURBi:
- case ARM64::LDURSHWi:
- case ARM64::LDURSHXi:
- case ARM64::LDURSBWi:
- case ARM64::LDURSBXi:
- case ARM64::LDURSWi:
- case ARM64::PRFUMi:
- case ARM64::STURWi:
- case ARM64::STURXi:
- case ARM64::STURSi:
- case ARM64::STURDi:
- case ARM64::STURQi:
- case ARM64::STURHi:
- case ARM64::STURBi: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(2).isImm())
- return Error(Loc[1], "immediate value expected");
- int64_t offset = Inst.getOperand(2).getImm();
- if (offset > 255 || offset < -256)
- return Error(Loc[1], "offset value out of range");
- return false;
- }
- case ARM64::LDRSro:
- case ARM64::LDRWro:
- case ARM64::LDRSWro:
- case ARM64::STRWro:
- case ARM64::STRSro: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[1], "immediate value expected");
- int64_t shift = Inst.getOperand(3).getImm();
- ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
- if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
- type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
- return Error(Loc[1], "shift type invalid");
- return false;
- }
- case ARM64::LDRDro:
- case ARM64::LDRQro:
- case ARM64::LDRXro:
- case ARM64::PRFMro:
- case ARM64::STRXro:
- case ARM64::STRDro:
- case ARM64::STRQro: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[1], "immediate value expected");
- int64_t shift = Inst.getOperand(3).getImm();
- ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
- if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
- type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
- return Error(Loc[1], "shift type invalid");
- return false;
- }
- case ARM64::LDRHro:
- case ARM64::LDRHHro:
- case ARM64::LDRSHWro:
- case ARM64::LDRSHXro:
- case ARM64::STRHro:
- case ARM64::STRHHro: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[1], "immediate value expected");
- int64_t shift = Inst.getOperand(3).getImm();
- ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
- if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
- type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
- return Error(Loc[1], "shift type invalid");
- return false;
- }
- case ARM64::LDRBro:
- case ARM64::LDRBBro:
- case ARM64::LDRSBWro:
- case ARM64::LDRSBXro:
- case ARM64::STRBro:
- case ARM64::STRBBro: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[1], "immediate value expected");
- int64_t shift = Inst.getOperand(3).getImm();
- ARM64_AM::ExtendType type = ARM64_AM::getMemExtendType(shift);
- if (type != ARM64_AM::UXTW && type != ARM64_AM::UXTX &&
- type != ARM64_AM::SXTW && type != ARM64_AM::SXTX)
- return Error(Loc[1], "shift type invalid");
- return false;
- }
- case ARM64::LDPWi:
- case ARM64::LDPXi:
- case ARM64::LDPSi:
- case ARM64::LDPDi:
- case ARM64::LDPQi:
- case ARM64::LDPSWi:
- case ARM64::STPWi:
- case ARM64::STPXi:
- case ARM64::STPSi:
- case ARM64::STPDi:
- case ARM64::STPQi:
- case ARM64::LDPWpre:
- case ARM64::LDPXpre:
- case ARM64::LDPSpre:
- case ARM64::LDPDpre:
- case ARM64::LDPQpre:
- case ARM64::LDPSWpre:
- case ARM64::STPWpre:
- case ARM64::STPXpre:
- case ARM64::STPSpre:
- case ARM64::STPDpre:
- case ARM64::STPQpre:
- case ARM64::LDPWpost:
- case ARM64::LDPXpost:
- case ARM64::LDPSpost:
- case ARM64::LDPDpost:
- case ARM64::LDPQpost:
- case ARM64::LDPSWpost:
- case ARM64::STPWpost:
- case ARM64::STPXpost:
- case ARM64::STPSpost:
- case ARM64::STPDpost:
- case ARM64::STPQpost:
- case ARM64::LDNPWi:
- case ARM64::LDNPXi:
- case ARM64::LDNPSi:
- case ARM64::LDNPDi:
- case ARM64::LDNPQi:
- case ARM64::STNPWi:
- case ARM64::STNPXi:
- case ARM64::STNPSi:
- case ARM64::STNPDi:
- case ARM64::STNPQi: {
- // FIXME: Should accept expressions and error in fixup evaluation
- // if out of range.
- if (!Inst.getOperand(3).isImm())
- return Error(Loc[2], "immediate value expected");
- int64_t offset = Inst.getOperand(3).getImm();
- if (offset > 63 || offset < -64)
- return Error(Loc[2], "offset value out of range");
- return false;
- }
- default:
- return false;
- }
-}
-
-static void rewriteMOV(ARM64AsmParser::OperandVector &Operands,
- StringRef mnemonic, uint64_t imm, unsigned shift,
- MCContext &Context) {
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
- ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
- Operands[0] =
- ARM64Operand::CreateToken(mnemonic, false, Op->getStartLoc(), Context);
-
- const MCExpr *NewImm = MCConstantExpr::Create(imm >> shift, Context);
- Operands[2] = ARM64Operand::CreateImm(NewImm, Op2->getStartLoc(),
- Op2->getEndLoc(), Context);
-
- Operands.push_back(ARM64Operand::CreateShifter(
- ARM64_AM::LSL, shift, Op2->getStartLoc(), Op2->getEndLoc(), Context));
- delete Op2;
- delete Op;
-}
-
-bool ARM64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) {
- switch (ErrCode) {
- case Match_MissingFeature:
- return Error(Loc,
- "instruction requires a CPU feature not currently enabled");
- case Match_InvalidOperand:
- return Error(Loc, "invalid operand for instruction");
- case Match_InvalidSuffix:
- return Error(Loc, "invalid type suffix for instruction");
- case Match_InvalidMemoryIndexedSImm9:
- return Error(Loc, "index must be an integer in range [-256,255].");
- case Match_InvalidMemoryIndexed32SImm7:
- return Error(Loc, "index must be a multiple of 4 in range [-256,252].");
- case Match_InvalidMemoryIndexed64SImm7:
- return Error(Loc, "index must be a multiple of 8 in range [-512,504].");
- case Match_InvalidMemoryIndexed128SImm7:
- return Error(Loc, "index must be a multiple of 16 in range [-1024,1008].");
- case Match_InvalidMemoryIndexed8:
- return Error(Loc, "index must be an integer in range [0,4095].");
- case Match_InvalidMemoryIndexed16:
- return Error(Loc, "index must be a multiple of 2 in range [0,8190].");
- case Match_InvalidMemoryIndexed32:
- return Error(Loc, "index must be a multiple of 4 in range [0,16380].");
- case Match_InvalidMemoryIndexed64:
- return Error(Loc, "index must be a multiple of 8 in range [0,32760].");
- case Match_InvalidMemoryIndexed128:
- return Error(Loc, "index must be a multiple of 16 in range [0,65520].");
- case Match_InvalidImm1_8:
- return Error(Loc, "immediate must be an integer in range [1,8].");
- case Match_InvalidImm1_16:
- return Error(Loc, "immediate must be an integer in range [1,16].");
- case Match_InvalidImm1_32:
- return Error(Loc, "immediate must be an integer in range [1,32].");
- case Match_InvalidImm1_64:
- return Error(Loc, "immediate must be an integer in range [1,64].");
- case Match_MnemonicFail:
- return Error(Loc, "unrecognized instruction mnemonic");
- default:
- assert(0 && "unexpected error code!");
- return Error(Loc, "invalid instruction format");
- }
-}
-
-bool ARM64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
- OperandVector &Operands,
- MCStreamer &Out,
- unsigned &ErrorInfo,
- bool MatchingInlineAsm) {
- assert(!Operands.empty() && "Unexpect empty operand list!");
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
- assert(Op->isToken() && "Leading operand should always be a mnemonic!");
-
- StringRef Tok = Op->getToken();
- // Translate CMN/CMP pseudos to ADDS/SUBS with zero register destination.
- // This needs to be done before the special handling of ADD/SUB immediates.
- if (Tok == "cmp" || Tok == "cmn") {
- // Replace the opcode with either ADDS or SUBS.
- const char *Repl = StringSwitch<const char *>(Tok)
- .Case("cmp", "subs")
- .Case("cmn", "adds")
- .Default(0);
- assert(Repl && "Unknown compare instruction");
- delete Operands[0];
- Operands[0] = ARM64Operand::CreateToken(Repl, false, IDLoc, getContext());
-
- // Insert WZR or XZR as destination operand.
- ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
- unsigned ZeroReg;
- if (RegOp->isReg() &&
- (isGPR32Register(RegOp->getReg()) || RegOp->getReg() == ARM64::WZR))
- ZeroReg = ARM64::WZR;
- else
- ZeroReg = ARM64::XZR;
- Operands.insert(
- Operands.begin() + 1,
- ARM64Operand::CreateReg(ZeroReg, false, IDLoc, IDLoc, getContext()));
- // Update since we modified it above.
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[0]);
- Tok = Op->getToken();
- }
-
- unsigned NumOperands = Operands.size();
-
- if (Tok == "mov" && NumOperands == 3) {
- // The MOV mnemomic is aliased to movn/movz, depending on the value of
- // the immediate being instantiated.
- // FIXME: Catching this here is a total hack, and we should use tblgen
- // support to implement this instead as soon as it is available.
-
- ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
- if (Op2->isImm()) {
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op2->getImm())) {
- uint64_t Val = CE->getValue();
- uint64_t NVal = ~Val;
-
- // If this is a 32-bit register and the value has none of the upper
- // set, clear the complemented upper 32-bits so the logic below works
- // for 32-bit registers too.
- ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
- if (Op1->isReg() && isGPR32Register(Op1->getReg()) &&
- (Val & 0xFFFFFFFFULL) == Val)
- NVal &= 0x00000000FFFFFFFFULL;
-
- // MOVK Rd, imm << 0
- if ((Val & 0xFFFF) == Val)
- rewriteMOV(Operands, "movz", Val, 0, getContext());
-
- // MOVK Rd, imm << 16
- else if ((Val & 0xFFFF0000ULL) == Val)
- rewriteMOV(Operands, "movz", Val, 16, getContext());
-
- // MOVK Rd, imm << 32
- else if ((Val & 0xFFFF00000000ULL) == Val)
- rewriteMOV(Operands, "movz", Val, 32, getContext());
-
- // MOVK Rd, imm << 48
- else if ((Val & 0xFFFF000000000000ULL) == Val)
- rewriteMOV(Operands, "movz", Val, 48, getContext());
-
- // MOVN Rd, (~imm << 0)
- else if ((NVal & 0xFFFFULL) == NVal)
- rewriteMOV(Operands, "movn", NVal, 0, getContext());
-
- // MOVN Rd, ~(imm << 16)
- else if ((NVal & 0xFFFF0000ULL) == NVal)
- rewriteMOV(Operands, "movn", NVal, 16, getContext());
-
- // MOVN Rd, ~(imm << 32)
- else if ((NVal & 0xFFFF00000000ULL) == NVal)
- rewriteMOV(Operands, "movn", NVal, 32, getContext());
-
- // MOVN Rd, ~(imm << 48)
- else if ((NVal & 0xFFFF000000000000ULL) == NVal)
- rewriteMOV(Operands, "movn", NVal, 48, getContext());
- }
- }
- } else if (NumOperands == 4) {
- if (Tok == "add" || Tok == "adds" || Tok == "sub" || Tok == "subs") {
- // Handle the uimm24 immediate form, where the shift is not specified.
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- if (Op3->isImm()) {
- if (const MCConstantExpr *CE =
- dyn_cast<MCConstantExpr>(Op3->getImm())) {
- uint64_t Val = CE->getValue();
- if (Val >= (1 << 24)) {
- Error(IDLoc, "immediate value is too large");
- return true;
- }
- if (Val < (1 << 12)) {
- Operands.push_back(ARM64Operand::CreateShifter(
- ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
- } else if ((Val & 0xfff) == 0) {
- delete Operands[3];
- CE = MCConstantExpr::Create(Val >> 12, getContext());
- Operands[3] =
- ARM64Operand::CreateImm(CE, IDLoc, IDLoc, getContext());
- Operands.push_back(ARM64Operand::CreateShifter(
- ARM64_AM::LSL, 12, IDLoc, IDLoc, getContext()));
- } else {
- Error(IDLoc, "immediate value is too large");
- return true;
- }
- } else {
- Operands.push_back(ARM64Operand::CreateShifter(
- ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
- }
- }
-
- // FIXME: Horible hack to handle the LSL -> UBFM alias.
- } else if (NumOperands == 4 && Tok == "lsl") {
- ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- if (Op2->isReg() && Op3->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- if (Op3CE) {
- uint64_t Op3Val = Op3CE->getValue();
- uint64_t NewOp3Val = 0;
- uint64_t NewOp4Val = 0;
- if (isGPR32Register(Op2->getReg()) || Op2->getReg() == ARM64::WZR) {
- NewOp3Val = (32 - Op3Val) & 0x1f;
- NewOp4Val = 31 - Op3Val;
- } else {
- NewOp3Val = (64 - Op3Val) & 0x3f;
- NewOp4Val = 63 - Op3Val;
- }
-
- const MCExpr *NewOp3 =
- MCConstantExpr::Create(NewOp3Val, getContext());
- const MCExpr *NewOp4 =
- MCConstantExpr::Create(NewOp4Val, getContext());
-
- Operands[0] = ARM64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
- Op3->getEndLoc(), getContext());
- Operands.push_back(ARM64Operand::CreateImm(
- NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext()));
- delete Op3;
- delete Op;
- }
- }
-
- // FIXME: Horrible hack to handle the optional LSL shift for vector
- // instructions.
- } else if (NumOperands == 4 && (Tok == "bic" || Tok == "orr")) {
- ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
- ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
- (Op1->isVectorReg() && Op2->isToken() && Op3->isImm()))
- Operands.push_back(ARM64Operand::CreateShifter(ARM64_AM::LSL, 0, IDLoc,
- IDLoc, getContext()));
- } else if (NumOperands == 4 && (Tok == "movi" || Tok == "mvni")) {
- ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
- ARM64Operand *Op2 = static_cast<ARM64Operand *>(Operands[2]);
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- if ((Op1->isToken() && Op2->isVectorReg() && Op3->isImm()) ||
- (Op1->isVectorReg() && Op2->isToken() && Op3->isImm())) {
- StringRef Suffix = Op1->isToken() ? Op1->getToken() : Op2->getToken();
- // Canonicalize on lower-case for ease of comparison.
- std::string CanonicalSuffix = Suffix.lower();
- if (Tok != "movi" ||
- (CanonicalSuffix != ".1d" && CanonicalSuffix != ".2d" &&
- CanonicalSuffix != ".8b" && CanonicalSuffix != ".16b"))
- Operands.push_back(ARM64Operand::CreateShifter(
- ARM64_AM::LSL, 0, IDLoc, IDLoc, getContext()));
- }
- }
- } else if (NumOperands == 5) {
- // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and
- // UBFIZ -> UBFM aliases.
- if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") {
- ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
-
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
-
- if (Op3CE && Op4CE) {
- uint64_t Op3Val = Op3CE->getValue();
- uint64_t Op4Val = Op4CE->getValue();
-
- uint64_t NewOp3Val = 0;
- if (isGPR32Register(Op1->getReg()))
- NewOp3Val = (32 - Op3Val) & 0x1f;
- else
- NewOp3Val = (64 - Op3Val) & 0x3f;
-
- uint64_t NewOp4Val = Op4Val - 1;
-
- const MCExpr *NewOp3 =
- MCConstantExpr::Create(NewOp3Val, getContext());
- const MCExpr *NewOp4 =
- MCConstantExpr::Create(NewOp4Val, getContext());
- Operands[3] = ARM64Operand::CreateImm(NewOp3, Op3->getStartLoc(),
- Op3->getEndLoc(), getContext());
- Operands[4] = ARM64Operand::CreateImm(NewOp4, Op4->getStartLoc(),
- Op4->getEndLoc(), getContext());
- if (Tok == "bfi")
- Operands[0] = ARM64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
- else if (Tok == "sbfiz")
- Operands[0] = ARM64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
- else if (Tok == "ubfiz")
- Operands[0] = ARM64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- else
- llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op3;
- delete Op4;
- }
- }
-
- // FIXME: Horrible hack to handle the BFXIL->BFM, SBFX->SBFM, and
- // UBFX -> UBFM aliases.
- } else if (NumOperands == 5 &&
- (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) {
- ARM64Operand *Op1 = static_cast<ARM64Operand *>(Operands[1]);
- ARM64Operand *Op3 = static_cast<ARM64Operand *>(Operands[3]);
- ARM64Operand *Op4 = static_cast<ARM64Operand *>(Operands[4]);
-
- if (Op1->isReg() && Op3->isImm() && Op4->isImm()) {
- const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm());
- const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm());
-
- if (Op3CE && Op4CE) {
- uint64_t Op3Val = Op3CE->getValue();
- uint64_t Op4Val = Op4CE->getValue();
- uint64_t NewOp4Val = Op3Val + Op4Val - 1;
-
- if (NewOp4Val >= Op3Val) {
- const MCExpr *NewOp4 =
- MCConstantExpr::Create(NewOp4Val, getContext());
- Operands[4] = ARM64Operand::CreateImm(
- NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext());
- if (Tok == "bfxil")
- Operands[0] = ARM64Operand::CreateToken(
- "bfm", false, Op->getStartLoc(), getContext());
- else if (Tok == "sbfx")
- Operands[0] = ARM64Operand::CreateToken(
- "sbfm", false, Op->getStartLoc(), getContext());
- else if (Tok == "ubfx")
- Operands[0] = ARM64Operand::CreateToken(
- "ubfm", false, Op->getStartLoc(), getContext());
- else
- llvm_unreachable("No valid mnemonic for alias?");
-
- delete Op;
- delete Op4;
- }
- }
- }
- }
- }
- // FIXME: Horrible hack for tbz and tbnz with Wn register operand.
- // InstAlias can't quite handle this since the reg classes aren't
- // subclasses.
- if (NumOperands == 4 && (Tok == "tbz" || Tok == "tbnz")) {
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
- if (Op->isImm()) {
- if (const MCConstantExpr *OpCE = dyn_cast<MCConstantExpr>(Op->getImm())) {
- if (OpCE->getValue() < 32) {
- // The source register can be Wn here, but the matcher expects a
- // GPR64. Twiddle it here if necessary.
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[1] = ARM64Operand::CreateReg(
- Reg, false, Op->getStartLoc(), Op->getEndLoc(), getContext());
- delete Op;
- }
- }
- }
- }
- }
- // FIXME: Horrible hack for sxtw and uxtw with Wn src and Xd dst operands.
- // InstAlias can't quite handle this since the reg classes aren't
- // subclasses.
- if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) {
- // The source register can be Wn here, but the matcher expects a
- // GPR64. Twiddle it here if necessary.
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
- }
- }
- // FIXME: Likewise for [su]xt[bh] with a Xd dst operand
- else if (NumOperands == 3 &&
- (Tok == "sxtb" || Tok == "uxtb" || Tok == "sxth" || Tok == "uxth")) {
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[1]);
- if (Op->isReg() && isGPR64Reg(Op->getReg())) {
- // The source register can be Wn here, but the matcher expects a
- // GPR64. Twiddle it here if necessary.
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[2]);
- if (Op->isReg()) {
- unsigned Reg = getXRegFromWReg(Op->getReg());
- Operands[2] = ARM64Operand::CreateReg(Reg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete Op;
- }
- }
- }
-
- // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR.
- if (NumOperands == 3 && Tok == "fmov") {
- ARM64Operand *RegOp = static_cast<ARM64Operand *>(Operands[1]);
- ARM64Operand *ImmOp = static_cast<ARM64Operand *>(Operands[2]);
- if (RegOp->isReg() && ImmOp->isFPImm() &&
- ImmOp->getFPImm() == (unsigned)-1) {
- unsigned zreg =
- isFPR32Register(RegOp->getReg()) ? ARM64::WZR : ARM64::XZR;
- Operands[2] = ARM64Operand::CreateReg(zreg, false, Op->getStartLoc(),
- Op->getEndLoc(), getContext());
- delete ImmOp;
- }
- }
-
- // FIXME: Horrible hack to handle the literal .d[1] vector index on
- // FMOV instructions. The index isn't an actual instruction operand
- // but rather syntactic sugar. It really should be part of the mnemonic,
- // not the operand, but whatever.
- if ((NumOperands == 5) && Tok == "fmov") {
- // If the last operand is a vectorindex of '1', then replace it with
- // a '[' '1' ']' token sequence, which is what the matcher
- // (annoyingly) expects for a literal vector index operand.
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[NumOperands - 1]);
- if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
- SMLoc Loc = Op->getStartLoc();
- Operands.pop_back();
- Operands.push_back(
- ARM64Operand::CreateToken("[", false, Loc, getContext()));
- Operands.push_back(
- ARM64Operand::CreateToken("1", false, Loc, getContext()));
- Operands.push_back(
- ARM64Operand::CreateToken("]", false, Loc, getContext()));
- } else if (Op->isReg()) {
- // Similarly, check the destination operand for the GPR->High-lane
- // variant.
- unsigned OpNo = NumOperands - 2;
- ARM64Operand *Op = static_cast<ARM64Operand *>(Operands[OpNo]);
- if (Op->isVectorIndexD() && Op->getVectorIndex() == 1) {
- SMLoc Loc = Op->getStartLoc();
- Operands[OpNo] =
- ARM64Operand::CreateToken("[", false, Loc, getContext());
- Operands.insert(
- Operands.begin() + OpNo + 1,
- ARM64Operand::CreateToken("1", false, Loc, getContext()));
- Operands.insert(
- Operands.begin() + OpNo + 2,
- ARM64Operand::CreateToken("]", false, Loc, getContext()));
- }
- }
- }
-
- MCInst Inst;
- // First try to match against the secondary set of tables containing the
- // short-form NEON instructions (e.g. "fadd.2s v0, v1, v2").
- unsigned MatchResult =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 1);
-
- // If that fails, try against the alternate table containing long-form NEON:
- // "fadd v0.2s, v1.2s, v2.2s"
- if (MatchResult != Match_Success)
- MatchResult =
- MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm, 0);
-
- switch (MatchResult) {
- case Match_Success: {
- // Perform range checking and other semantic validations
- SmallVector<SMLoc, 8> OperandLocs;
- NumOperands = Operands.size();
- for (unsigned i = 1; i < NumOperands; ++i)
- OperandLocs.push_back(Operands[i]->getStartLoc());
- if (validateInstruction(Inst, OperandLocs))
- return true;
-
- Inst.setLoc(IDLoc);
- Out.EmitInstruction(Inst, STI);
- return false;
- }
- case Match_MissingFeature:
- case Match_MnemonicFail:
- return showMatchError(IDLoc, MatchResult);
- case Match_InvalidOperand: {
- SMLoc ErrorLoc = IDLoc;
- if (ErrorInfo != ~0U) {
- if (ErrorInfo >= Operands.size())
- return Error(IDLoc, "too few operands for instruction");
-
- ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
- if (ErrorLoc == SMLoc())
- ErrorLoc = IDLoc;
- }
- // If the match failed on a suffix token operand, tweak the diagnostic
- // accordingly.
- if (((ARM64Operand *)Operands[ErrorInfo])->isToken() &&
- ((ARM64Operand *)Operands[ErrorInfo])->isTokenSuffix())
- MatchResult = Match_InvalidSuffix;
-
- return showMatchError(ErrorLoc, MatchResult);
- }
- case Match_InvalidMemoryIndexedSImm9: {
- // If there is not a '!' after the memory operand that failed, we really
- // want the diagnostic for the non-pre-indexed instruction variant instead.
- // Be careful to check for the post-indexed variant as well, which also
- // uses this match diagnostic. Also exclude the explicitly unscaled
- // mnemonics, as they want the unscaled diagnostic as well.
- if (Operands.size() == ErrorInfo + 1 &&
- !((ARM64Operand *)Operands[ErrorInfo])->isImm() &&
- !Tok.startswith("stur") && !Tok.startswith("ldur")) {
- // whether we want an Indexed64 or Indexed32 diagnostic depends on
- // the register class of the previous operand. Default to 64 in case
- // we see something unexpected.
- MatchResult = Match_InvalidMemoryIndexed64;
- if (ErrorInfo) {
- ARM64Operand *PrevOp = (ARM64Operand *)Operands[ErrorInfo - 1];
- if (PrevOp->isReg() && ARM64MCRegisterClasses[ARM64::GPR32RegClassID]
- .contains(PrevOp->getReg()))
- MatchResult = Match_InvalidMemoryIndexed32;
- }
- }
- SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
- if (ErrorLoc == SMLoc())
- ErrorLoc = IDLoc;
- return showMatchError(ErrorLoc, MatchResult);
- }
- case Match_InvalidMemoryIndexed32:
- case Match_InvalidMemoryIndexed64:
- case Match_InvalidMemoryIndexed128:
- // If there is a '!' after the memory operand that failed, we really
- // want the diagnostic for the pre-indexed instruction variant instead.
- if (Operands.size() > ErrorInfo + 1 &&
- ((ARM64Operand *)Operands[ErrorInfo + 1])->isTokenEqual("!"))
- MatchResult = Match_InvalidMemoryIndexedSImm9;
- // FALL THROUGH
- case Match_InvalidMemoryIndexed8:
- case Match_InvalidMemoryIndexed16:
- case Match_InvalidMemoryIndexed32SImm7:
- case Match_InvalidMemoryIndexed64SImm7:
- case Match_InvalidMemoryIndexed128SImm7:
- case Match_InvalidImm1_8:
- case Match_InvalidImm1_16:
- case Match_InvalidImm1_32:
- case Match_InvalidImm1_64: {
- // Any time we get here, there's nothing fancy to do. Just get the
- // operand SMLoc and display the diagnostic.
- SMLoc ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getStartLoc();
- // If it's a memory operand, the error is with the offset immediate,
- // so get that location instead.
- if (((ARM64Operand *)Operands[ErrorInfo])->isMem())
- ErrorLoc = ((ARM64Operand *)Operands[ErrorInfo])->getOffsetLoc();
- if (ErrorLoc == SMLoc())
- ErrorLoc = IDLoc;
- return showMatchError(ErrorLoc, MatchResult);
- }
- }
-
- llvm_unreachable("Implement any new match types added!");
- return true;
-}
-
-/// ParseDirective parses the arm specific directives
-bool ARM64AsmParser::ParseDirective(AsmToken DirectiveID) {
- StringRef IDVal = DirectiveID.getIdentifier();
- SMLoc Loc = DirectiveID.getLoc();
- if (IDVal == ".hword")
- return parseDirectiveWord(2, Loc);
- if (IDVal == ".word")
- return parseDirectiveWord(4, Loc);
- if (IDVal == ".xword")
- return parseDirectiveWord(8, Loc);
- if (IDVal == ".tlsdesccall")
- return parseDirectiveTLSDescCall(Loc);
-
- return parseDirectiveLOH(IDVal, Loc);
-}
-
-/// parseDirectiveWord
-/// ::= .word [ expression (, expression)* ]
-bool ARM64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- for (;;) {
- const MCExpr *Value;
- if (getParser().parseExpression(Value))
- return true;
-
- getParser().getStreamer().EmitValue(Value, Size);
-
- if (getLexer().is(AsmToken::EndOfStatement))
- break;
-
- // FIXME: Improve diagnostic.
- if (getLexer().isNot(AsmToken::Comma))
- return Error(L, "unexpected token in directive");
- Parser.Lex();
- }
- }
-
- Parser.Lex();
- return false;
-}
-
-// parseDirectiveTLSDescCall:
-// ::= .tlsdesccall symbol
-bool ARM64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
- StringRef Name;
- if (getParser().parseIdentifier(Name))
- return Error(L, "expected symbol after directive");
-
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, getContext());
- Expr = ARM64MCExpr::Create(Expr, ARM64MCExpr::VK_TLSDESC, getContext());
-
- MCInst Inst;
- Inst.setOpcode(ARM64::TLSDESCCALL);
- Inst.addOperand(MCOperand::CreateExpr(Expr));
-
- getParser().getStreamer().EmitInstruction(Inst, STI);
- return false;
-}
-
-/// ::= .loh <lohName | lohId> label1, ..., labelN
-/// The number of arguments depends on the loh identifier.
-bool ARM64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
- if (IDVal != MCLOHDirectiveName())
- return true;
- MCLOHType Kind;
- if (getParser().getTok().isNot(AsmToken::Identifier)) {
- if (getParser().getTok().isNot(AsmToken::Integer))
- return TokError("expected an identifier or a number in directive");
- // We successfully get a numeric value for the identifier.
- // Check if it is valid.
- int64_t Id = getParser().getTok().getIntVal();
- Kind = (MCLOHType)Id;
- // Check that Id does not overflow MCLOHType.
- if (!isValidMCLOHType(Kind) || Id != Kind)
- return TokError("invalid numeric identifier in directive");
- } else {
- StringRef Name = getTok().getIdentifier();
- // We successfully parse an identifier.
- // Check if it is a recognized one.
- int Id = MCLOHNameToId(Name);
-
- if (Id == -1)
- return TokError("invalid identifier in directive");
- Kind = (MCLOHType)Id;
- }
- // Consume the identifier.
- Lex();
- // Get the number of arguments of this LOH.
- int NbArgs = MCLOHIdToNbArgs(Kind);
-
- assert(NbArgs != -1 && "Invalid number of arguments");
-
- SmallVector<MCSymbol *, 3> Args;
- for (int Idx = 0; Idx < NbArgs; ++Idx) {
- StringRef Name;
- if (getParser().parseIdentifier(Name))
- return TokError("expected identifier in directive");
- Args.push_back(getContext().GetOrCreateSymbol(Name));
-
- if (Idx + 1 == NbArgs)
- break;
- if (getLexer().isNot(AsmToken::Comma))
- return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
- Lex();
- }
- if (getLexer().isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
-
- getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
- return false;
-}
-
-bool
-ARM64AsmParser::classifySymbolRef(const MCExpr *Expr,
- ARM64MCExpr::VariantKind &ELFRefKind,
- MCSymbolRefExpr::VariantKind &DarwinRefKind,
- const MCConstantExpr *&Addend) {
- ELFRefKind = ARM64MCExpr::VK_INVALID;
- DarwinRefKind = MCSymbolRefExpr::VK_None;
-
- if (const ARM64MCExpr *AE = dyn_cast<ARM64MCExpr>(Expr)) {
- ELFRefKind = AE->getKind();
- Expr = AE->getSubExpr();
- }
-
- const MCSymbolRefExpr *SE = dyn_cast<MCSymbolRefExpr>(Expr);
- if (SE) {
- // It's a simple symbol reference with no addend.
- DarwinRefKind = SE->getKind();
- Addend = 0;
- return true;
- }
-
- const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(Expr);
- if (!BE)
- return false;
-
- SE = dyn_cast<MCSymbolRefExpr>(BE->getLHS());
- if (!SE)
- return false;
- DarwinRefKind = SE->getKind();
-
- if (BE->getOpcode() != MCBinaryExpr::Add)
- return false;
-
- // See if the addend is is a constant, otherwise there's more going
- // on here than we can deal with.
- Addend = dyn_cast<MCConstantExpr>(BE->getRHS());
- if (!Addend)
- return false;
-
- // It's some symbol reference + a constant addend, but really
- // shouldn't use both Darwin and ELF syntax.
- return ELFRefKind == ARM64MCExpr::VK_INVALID ||
- DarwinRefKind == MCSymbolRefExpr::VK_None;
-}
-
-/// Force static initialization.
-extern "C" void LLVMInitializeARM64AsmParser() {
- RegisterMCAsmParser<ARM64AsmParser> X(TheARM64Target);
-}
-
-#define GET_REGISTER_MATCHER
-#define GET_MATCHER_IMPLEMENTATION
-#include "ARM64GenAsmMatcher.inc"
-
-// Define this matcher function after the auto-generated include so we
-// have the match class enum definitions.
-unsigned ARM64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
- unsigned Kind) {
- ARM64Operand *Op = static_cast<ARM64Operand *>(AsmOp);
- // If the kind is a token for a literal immediate, check if our asm
- // operand matches. This is for InstAliases which have a fixed-value
- // immediate in the syntax.
- int64_t ExpectedVal;
- switch (Kind) {
- default:
- return Match_InvalidOperand;
- case MCK__35_0:
- ExpectedVal = 0;
- break;
- case MCK__35_1:
- ExpectedVal = 1;
- break;
- case MCK__35_12:
- ExpectedVal = 12;
- break;
- case MCK__35_16:
- ExpectedVal = 16;
- break;
- case MCK__35_2:
- ExpectedVal = 2;
- break;
- case MCK__35_24:
- ExpectedVal = 24;
- break;
- case MCK__35_3:
- ExpectedVal = 3;
- break;
- case MCK__35_32:
- ExpectedVal = 32;
- break;
- case MCK__35_4:
- ExpectedVal = 4;
- break;
- case MCK__35_48:
- ExpectedVal = 48;
- break;
- case MCK__35_6:
- ExpectedVal = 6;
- break;
- case MCK__35_64:
- ExpectedVal = 64;
- break;
- case MCK__35_8:
- ExpectedVal = 8;
- break;
- }
- if (!Op->isImm())
- return Match_InvalidOperand;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm());
- if (!CE)
- return Match_InvalidOperand;
- if (CE->getValue() == ExpectedVal)
- return Match_Success;
- return Match_InvalidOperand;
-}
diff --git a/lib/Target/ARM64/AsmParser/CMakeLists.txt b/lib/Target/ARM64/AsmParser/CMakeLists.txt
deleted file mode 100644
index 826158b..0000000
--- a/lib/Target/ARM64/AsmParser/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64AsmParser
- ARM64AsmParser.cpp
- )
-
diff --git a/lib/Target/ARM64/AsmParser/LLVMBuild.txt b/lib/Target/ARM64/AsmParser/LLVMBuild.txt
deleted file mode 100644
index 2c8fafe..0000000
--- a/lib/Target/ARM64/AsmParser/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/AsmParser/LLVMBuild.txt ---------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64AsmParser
-parent = ARM64
-required_libraries = ARM64Desc ARM64Info MC MCParser Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/AsmParser/Makefile b/lib/Target/ARM64/AsmParser/Makefile
deleted file mode 100644
index d25c47f..0000000
--- a/lib/Target/ARM64/AsmParser/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmParser
-
-# Hack: we need to include 'main' ARM target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/CMakeLists.txt b/lib/Target/ARM64/CMakeLists.txt
deleted file mode 100644
index 6de861c..0000000
--- a/lib/Target/ARM64/CMakeLists.txt
+++ /dev/null
@@ -1,50 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ARM64.td)
-
-tablegen(LLVM ARM64GenRegisterInfo.inc -gen-register-info)
-tablegen(LLVM ARM64GenInstrInfo.inc -gen-instr-info)
-tablegen(LLVM ARM64GenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(LLVM ARM64GenMCPseudoLowering.inc -gen-pseudo-lowering)
-tablegen(LLVM ARM64GenAsmWriter.inc -gen-asm-writer)
-tablegen(LLVM ARM64GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
-tablegen(LLVM ARM64GenAsmMatcher.inc -gen-asm-matcher)
-tablegen(LLVM ARM64GenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM ARM64GenFastISel.inc -gen-fast-isel)
-tablegen(LLVM ARM64GenCallingConv.inc -gen-callingconv)
-tablegen(LLVM ARM64GenSubtargetInfo.inc -gen-subtarget)
-tablegen(LLVM ARM64GenDisassemblerTables.inc -gen-disassembler)
-add_public_tablegen_target(ARM64CommonTableGen)
-
-add_llvm_target(ARM64CodeGen
- ARM64AddressTypePromotion.cpp
- ARM64AdvSIMDScalarPass.cpp
- ARM64AsmPrinter.cpp
- ARM64BranchRelaxation.cpp
- ARM64CleanupLocalDynamicTLSPass.cpp
- ARM64CollectLOH.cpp
- ARM64ConditionalCompares.cpp
- ARM64DeadRegisterDefinitionsPass.cpp
- ARM64ExpandPseudoInsts.cpp
- ARM64FastISel.cpp
- ARM64FrameLowering.cpp
- ARM64ISelDAGToDAG.cpp
- ARM64ISelLowering.cpp
- ARM64InstrInfo.cpp
- ARM64LoadStoreOptimizer.cpp
- ARM64MCInstLower.cpp
- ARM64PromoteConstant.cpp
- ARM64RegisterInfo.cpp
- ARM64SelectionDAGInfo.cpp
- ARM64StorePairSuppress.cpp
- ARM64Subtarget.cpp
- ARM64TargetMachine.cpp
- ARM64TargetObjectFile.cpp
- ARM64TargetTransformInfo.cpp
-)
-
-add_dependencies(LLVMARM64CodeGen intrinsics_gen)
-
-add_subdirectory(TargetInfo)
-add_subdirectory(AsmParser)
-add_subdirectory(Disassembler)
-add_subdirectory(InstPrinter)
-add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp b/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
deleted file mode 100644
index 44c501f..0000000
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.cpp
+++ /dev/null
@@ -1,2142 +0,0 @@
-//===- ARM64Disassembler.cpp - Disassembler for ARM64 -----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "arm64-disassembler"
-
-#include "ARM64Disassembler.h"
-#include "ARM64Subtarget.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MemoryObject.h"
-#include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-
-// Pull DecodeStatus and its enum values into the global namespace.
-typedef llvm::MCDisassembler::DecodeStatus DecodeStatus;
-
-// Forward declare these because the autogenerated code will reference them.
-// Definitions are further down.
-static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR128_loRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR16RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeFPR8RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR64spRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeGPR32spRegisterClass(llvm::MCInst &Inst,
- unsigned RegNo, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeQQQQRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeDDDDRegisterClass(llvm::MCInst &Inst, unsigned RegNo,
- uint64_t Address,
- const void *Decoder);
-
-static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
- uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Address,
- const void *Decoder);
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Address, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder);
-
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder);
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder);
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder);
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder);
-
-#include "ARM64GenDisassemblerTables.inc"
-#include "ARM64GenInstrInfo.inc"
-
-using namespace llvm;
-
-#define Success llvm::MCDisassembler::Success
-#define Fail llvm::MCDisassembler::Fail
-
-static MCDisassembler *createARM64Disassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new ARM64Disassembler(STI);
-}
-
-DecodeStatus ARM64Disassembler::getInstruction(MCInst &MI, uint64_t &Size,
- const MemoryObject &Region,
- uint64_t Address,
- raw_ostream &os,
- raw_ostream &cs) const {
- CommentStream = &cs;
-
- uint8_t bytes[4];
-
- Size = 0;
- // We want to read exactly 4 bytes of data.
- if (Region.readBytes(Address, 4, (uint8_t *)bytes) == -1)
- return Fail;
- Size = 4;
-
- // Encoded as a small-endian 32-bit word in the stream.
- uint32_t insn =
- (bytes[3] << 24) | (bytes[2] << 16) | (bytes[1] << 8) | (bytes[0] << 0);
-
- // Calling the auto-generated decoder function.
- DecodeStatus result =
- decodeInstruction(DecoderTable32, MI, insn, Address, this, STI);
- if (!result)
- return Fail;
-
- return Success;
-}
-
-static MCSymbolRefExpr::VariantKind
-getVariant(uint64_t LLVMDisassembler_VariantKind) {
- switch (LLVMDisassembler_VariantKind) {
- case LLVMDisassembler_VariantKind_None:
- return MCSymbolRefExpr::VK_None;
- case LLVMDisassembler_VariantKind_ARM64_PAGE:
- return MCSymbolRefExpr::VK_PAGE;
- case LLVMDisassembler_VariantKind_ARM64_PAGEOFF:
- return MCSymbolRefExpr::VK_PAGEOFF;
- case LLVMDisassembler_VariantKind_ARM64_GOTPAGE:
- return MCSymbolRefExpr::VK_GOTPAGE;
- case LLVMDisassembler_VariantKind_ARM64_GOTPAGEOFF:
- return MCSymbolRefExpr::VK_GOTPAGEOFF;
- case LLVMDisassembler_VariantKind_ARM64_TLVP:
- case LLVMDisassembler_VariantKind_ARM64_TLVOFF:
- default:
- assert(0 && "bad LLVMDisassembler_VariantKind");
- return MCSymbolRefExpr::VK_None;
- }
-}
-
-/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
-/// operand in place of the immediate Value in the MCInst. The immediate
-/// Value has not had any PC adjustment made by the caller. If the instruction
-/// is a branch that adds the PC to the immediate Value then isBranch is
-/// Success, else Fail. If the getOpInfo() function was set as part of the
-/// setupForSymbolicDisassembly() call then that function is called to get any
-/// symbolic information at the Address for this instrution. If that returns
-/// non-zero then the symbolic information it returns is used to create an
-/// MCExpr and that is added as an operand to the MCInst. If getOpInfo()
-/// returns zero and isBranch is Success then a symbol look up for
-/// Address + Value is done and if a symbol is found an MCExpr is created with
-/// that, else an MCExpr with Address + Value is created. If getOpInfo()
-/// returns zero and isBranch is Fail then the the Opcode of the MCInst is
-/// tested and for ADRP an other instructions that help to load of pointers
-/// a symbol look up is done to see it is returns a specific reference type
-/// to add to the comment stream. This function returns Success if it adds
-/// an operand to the MCInst and Fail otherwise.
-bool ARM64Disassembler::tryAddingSymbolicOperand(uint64_t Address, int Value,
- bool isBranch,
- uint64_t InstSize, MCInst &MI,
- uint32_t insn) const {
- LLVMOpInfoCallback getOpInfo = getLLVMOpInfoCallback();
-
- struct LLVMOpInfo1 SymbolicOp;
- memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
- SymbolicOp.Value = Value;
- void *DisInfo = getDisInfoBlock();
- uint64_t ReferenceType;
- const char *ReferenceName;
- const char *Name;
- LLVMSymbolLookupCallback SymbolLookUp = getLLVMSymbolLookupCallback();
- if (!getOpInfo ||
- !getOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
- if (isBranch) {
- if (SymbolLookUp) {
- ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
- Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
- &ReferenceName);
- if (Name) {
- SymbolicOp.AddSymbol.Name = Name;
- SymbolicOp.AddSymbol.Present = Success;
- SymbolicOp.Value = 0;
- } else {
- SymbolicOp.Value = Address + Value;
- }
- if (ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
- (*CommentStream) << "symbol stub for: " << ReferenceName;
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_Message)
- (*CommentStream) << "Objc message: " << ReferenceName;
- } else {
- return false;
- }
- } else if (MI.getOpcode() == ARM64::ADRP) {
- if (SymbolLookUp) {
- ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADRP;
- Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
- &ReferenceName);
- (*CommentStream) << format("0x%llx",
- 0xfffffffffffff000LL & (Address + Value));
- } else {
- return false;
- }
- } else if (MI.getOpcode() == ARM64::ADDXri ||
- MI.getOpcode() == ARM64::LDRXui ||
- MI.getOpcode() == ARM64::LDRXl || MI.getOpcode() == ARM64::ADR) {
- if (SymbolLookUp) {
- if (MI.getOpcode() == ARM64::ADDXri)
- ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADDXri;
- else if (MI.getOpcode() == ARM64::LDRXui)
- ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXui;
- if (MI.getOpcode() == ARM64::LDRXl) {
- ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_LDRXl;
- Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
- &ReferenceName);
- } else if (MI.getOpcode() == ARM64::ADR) {
- ReferenceType = LLVMDisassembler_ReferenceType_In_ARM64_ADR;
- Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType, Address,
- &ReferenceName);
- } else {
- Name = SymbolLookUp(DisInfo, insn, &ReferenceType, Address,
- &ReferenceName);
- }
- if (ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
- (*CommentStream) << "literal pool symbol address: " << ReferenceName;
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr)
- (*CommentStream) << "literal pool for: \"" << ReferenceName << "\"";
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
- (*CommentStream) << "Objc cfstring ref: @\"" << ReferenceName << "\"";
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_Message)
- (*CommentStream) << "Objc message: " << ReferenceName;
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
- (*CommentStream) << "Objc message ref: " << ReferenceName;
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
- (*CommentStream) << "Objc selector ref: " << ReferenceName;
- else if (ReferenceType ==
- LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
- (*CommentStream) << "Objc class ref: " << ReferenceName;
- // For these instructions, the SymbolLookUp() above is just to get the
- // ReferenceType and ReferenceName. We want to make sure not to
- // fall through so we don't build an MCExpr to leave the disassembly
- // of the immediate values of these instructions to the InstPrinter.
- return false;
- } else {
- return false;
- }
- } else {
- return false;
- }
- }
-
- MCContext *Ctx = getMCContext();
- const MCExpr *Add = NULL;
- if (SymbolicOp.AddSymbol.Present) {
- if (SymbolicOp.AddSymbol.Name) {
- StringRef Name(SymbolicOp.AddSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- MCSymbolRefExpr::VariantKind Variant = getVariant(SymbolicOp.VariantKind);
- if (Variant != MCSymbolRefExpr::VK_None)
- Add = MCSymbolRefExpr::Create(Sym, Variant, *Ctx);
- else
- Add = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Sub = NULL;
- if (SymbolicOp.SubtractSymbol.Present) {
- if (SymbolicOp.SubtractSymbol.Name) {
- StringRef Name(SymbolicOp.SubtractSymbol.Name);
- MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name);
- Sub = MCSymbolRefExpr::Create(Sym, *Ctx);
- } else {
- Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx);
- }
- }
-
- const MCExpr *Off = NULL;
- if (SymbolicOp.Value != 0)
- Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx);
-
- const MCExpr *Expr;
- if (Sub) {
- const MCExpr *LHS;
- if (Add)
- LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx);
- else
- LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx);
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx);
- else
- Expr = LHS;
- } else if (Add) {
- if (Off != 0)
- Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx);
- else
- Expr = Add;
- } else {
- if (Off != 0)
- Expr = Off;
- else
- Expr = MCConstantExpr::Create(0, *Ctx);
- }
-
- MI.addOperand(MCOperand::CreateExpr(Expr));
-
- return true;
-}
-
-extern "C" void LLVMInitializeARM64Disassembler() {
- TargetRegistry::RegisterMCDisassembler(TheARM64Target,
- createARM64Disassembler);
-}
-
-static const unsigned FPR128DecoderTable[] = {
- ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5,
- ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11,
- ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
- ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
- ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
- ARM64::Q30, ARM64::Q31
-};
-
-static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = FPR128DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static DecodeStatus DecodeFPR128_loRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 15)
- return Fail;
- return DecodeFPR128RegisterClass(Inst, RegNo, Addr, Decoder);
-}
-
-static const unsigned FPR64DecoderTable[] = {
- ARM64::D0, ARM64::D1, ARM64::D2, ARM64::D3, ARM64::D4, ARM64::D5,
- ARM64::D6, ARM64::D7, ARM64::D8, ARM64::D9, ARM64::D10, ARM64::D11,
- ARM64::D12, ARM64::D13, ARM64::D14, ARM64::D15, ARM64::D16, ARM64::D17,
- ARM64::D18, ARM64::D19, ARM64::D20, ARM64::D21, ARM64::D22, ARM64::D23,
- ARM64::D24, ARM64::D25, ARM64::D26, ARM64::D27, ARM64::D28, ARM64::D29,
- ARM64::D30, ARM64::D31
-};
-
-static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = FPR64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned FPR32DecoderTable[] = {
- ARM64::S0, ARM64::S1, ARM64::S2, ARM64::S3, ARM64::S4, ARM64::S5,
- ARM64::S6, ARM64::S7, ARM64::S8, ARM64::S9, ARM64::S10, ARM64::S11,
- ARM64::S12, ARM64::S13, ARM64::S14, ARM64::S15, ARM64::S16, ARM64::S17,
- ARM64::S18, ARM64::S19, ARM64::S20, ARM64::S21, ARM64::S22, ARM64::S23,
- ARM64::S24, ARM64::S25, ARM64::S26, ARM64::S27, ARM64::S28, ARM64::S29,
- ARM64::S30, ARM64::S31
-};
-
-static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = FPR32DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned FPR16DecoderTable[] = {
- ARM64::H0, ARM64::H1, ARM64::H2, ARM64::H3, ARM64::H4, ARM64::H5,
- ARM64::H6, ARM64::H7, ARM64::H8, ARM64::H9, ARM64::H10, ARM64::H11,
- ARM64::H12, ARM64::H13, ARM64::H14, ARM64::H15, ARM64::H16, ARM64::H17,
- ARM64::H18, ARM64::H19, ARM64::H20, ARM64::H21, ARM64::H22, ARM64::H23,
- ARM64::H24, ARM64::H25, ARM64::H26, ARM64::H27, ARM64::H28, ARM64::H29,
- ARM64::H30, ARM64::H31
-};
-
-static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = FPR16DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned FPR8DecoderTable[] = {
- ARM64::B0, ARM64::B1, ARM64::B2, ARM64::B3, ARM64::B4, ARM64::B5,
- ARM64::B6, ARM64::B7, ARM64::B8, ARM64::B9, ARM64::B10, ARM64::B11,
- ARM64::B12, ARM64::B13, ARM64::B14, ARM64::B15, ARM64::B16, ARM64::B17,
- ARM64::B18, ARM64::B19, ARM64::B20, ARM64::B21, ARM64::B22, ARM64::B23,
- ARM64::B24, ARM64::B25, ARM64::B26, ARM64::B27, ARM64::B28, ARM64::B29,
- ARM64::B30, ARM64::B31
-};
-
-static DecodeStatus DecodeFPR8RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = FPR8DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned GPR64DecoderTable[] = {
- ARM64::X0, ARM64::X1, ARM64::X2, ARM64::X3, ARM64::X4, ARM64::X5,
- ARM64::X6, ARM64::X7, ARM64::X8, ARM64::X9, ARM64::X10, ARM64::X11,
- ARM64::X12, ARM64::X13, ARM64::X14, ARM64::X15, ARM64::X16, ARM64::X17,
- ARM64::X18, ARM64::X19, ARM64::X20, ARM64::X21, ARM64::X22, ARM64::X23,
- ARM64::X24, ARM64::X25, ARM64::X26, ARM64::X27, ARM64::X28, ARM64::FP,
- ARM64::LR, ARM64::XZR
-};
-
-static DecodeStatus DecodeGPR64RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = GPR64DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static DecodeStatus DecodeGPR64spRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = GPR64DecoderTable[RegNo];
- if (Register == ARM64::XZR)
- Register = ARM64::SP;
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned GPR32DecoderTable[] = {
- ARM64::W0, ARM64::W1, ARM64::W2, ARM64::W3, ARM64::W4, ARM64::W5,
- ARM64::W6, ARM64::W7, ARM64::W8, ARM64::W9, ARM64::W10, ARM64::W11,
- ARM64::W12, ARM64::W13, ARM64::W14, ARM64::W15, ARM64::W16, ARM64::W17,
- ARM64::W18, ARM64::W19, ARM64::W20, ARM64::W21, ARM64::W22, ARM64::W23,
- ARM64::W24, ARM64::W25, ARM64::W26, ARM64::W27, ARM64::W28, ARM64::W29,
- ARM64::W30, ARM64::WZR
-};
-
-static DecodeStatus DecodeGPR32RegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = GPR32DecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static DecodeStatus DecodeGPR32spRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = GPR32DecoderTable[RegNo];
- if (Register == ARM64::WZR)
- Register = ARM64::WSP;
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned VectorDecoderTable[] = {
- ARM64::Q0, ARM64::Q1, ARM64::Q2, ARM64::Q3, ARM64::Q4, ARM64::Q5,
- ARM64::Q6, ARM64::Q7, ARM64::Q8, ARM64::Q9, ARM64::Q10, ARM64::Q11,
- ARM64::Q12, ARM64::Q13, ARM64::Q14, ARM64::Q15, ARM64::Q16, ARM64::Q17,
- ARM64::Q18, ARM64::Q19, ARM64::Q20, ARM64::Q21, ARM64::Q22, ARM64::Q23,
- ARM64::Q24, ARM64::Q25, ARM64::Q26, ARM64::Q27, ARM64::Q28, ARM64::Q29,
- ARM64::Q30, ARM64::Q31
-};
-
-static DecodeStatus DecodeVectorRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
-
- unsigned Register = VectorDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned QQDecoderTable[] = {
- ARM64::Q0_Q1, ARM64::Q1_Q2, ARM64::Q2_Q3, ARM64::Q3_Q4,
- ARM64::Q4_Q5, ARM64::Q5_Q6, ARM64::Q6_Q7, ARM64::Q7_Q8,
- ARM64::Q8_Q9, ARM64::Q9_Q10, ARM64::Q10_Q11, ARM64::Q11_Q12,
- ARM64::Q12_Q13, ARM64::Q13_Q14, ARM64::Q14_Q15, ARM64::Q15_Q16,
- ARM64::Q16_Q17, ARM64::Q17_Q18, ARM64::Q18_Q19, ARM64::Q19_Q20,
- ARM64::Q20_Q21, ARM64::Q21_Q22, ARM64::Q22_Q23, ARM64::Q23_Q24,
- ARM64::Q24_Q25, ARM64::Q25_Q26, ARM64::Q26_Q27, ARM64::Q27_Q28,
- ARM64::Q28_Q29, ARM64::Q29_Q30, ARM64::Q30_Q31, ARM64::Q31_Q0
-};
-
-static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = QQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned QQQDecoderTable[] = {
- ARM64::Q0_Q1_Q2, ARM64::Q1_Q2_Q3, ARM64::Q2_Q3_Q4,
- ARM64::Q3_Q4_Q5, ARM64::Q4_Q5_Q6, ARM64::Q5_Q6_Q7,
- ARM64::Q6_Q7_Q8, ARM64::Q7_Q8_Q9, ARM64::Q8_Q9_Q10,
- ARM64::Q9_Q10_Q11, ARM64::Q10_Q11_Q12, ARM64::Q11_Q12_Q13,
- ARM64::Q12_Q13_Q14, ARM64::Q13_Q14_Q15, ARM64::Q14_Q15_Q16,
- ARM64::Q15_Q16_Q17, ARM64::Q16_Q17_Q18, ARM64::Q17_Q18_Q19,
- ARM64::Q18_Q19_Q20, ARM64::Q19_Q20_Q21, ARM64::Q20_Q21_Q22,
- ARM64::Q21_Q22_Q23, ARM64::Q22_Q23_Q24, ARM64::Q23_Q24_Q25,
- ARM64::Q24_Q25_Q26, ARM64::Q25_Q26_Q27, ARM64::Q26_Q27_Q28,
- ARM64::Q27_Q28_Q29, ARM64::Q28_Q29_Q30, ARM64::Q29_Q30_Q31,
- ARM64::Q30_Q31_Q0, ARM64::Q31_Q0_Q1
-};
-
-static DecodeStatus DecodeQQQRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = QQQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned QQQQDecoderTable[] = {
- ARM64::Q0_Q1_Q2_Q3, ARM64::Q1_Q2_Q3_Q4, ARM64::Q2_Q3_Q4_Q5,
- ARM64::Q3_Q4_Q5_Q6, ARM64::Q4_Q5_Q6_Q7, ARM64::Q5_Q6_Q7_Q8,
- ARM64::Q6_Q7_Q8_Q9, ARM64::Q7_Q8_Q9_Q10, ARM64::Q8_Q9_Q10_Q11,
- ARM64::Q9_Q10_Q11_Q12, ARM64::Q10_Q11_Q12_Q13, ARM64::Q11_Q12_Q13_Q14,
- ARM64::Q12_Q13_Q14_Q15, ARM64::Q13_Q14_Q15_Q16, ARM64::Q14_Q15_Q16_Q17,
- ARM64::Q15_Q16_Q17_Q18, ARM64::Q16_Q17_Q18_Q19, ARM64::Q17_Q18_Q19_Q20,
- ARM64::Q18_Q19_Q20_Q21, ARM64::Q19_Q20_Q21_Q22, ARM64::Q20_Q21_Q22_Q23,
- ARM64::Q21_Q22_Q23_Q24, ARM64::Q22_Q23_Q24_Q25, ARM64::Q23_Q24_Q25_Q26,
- ARM64::Q24_Q25_Q26_Q27, ARM64::Q25_Q26_Q27_Q28, ARM64::Q26_Q27_Q28_Q29,
- ARM64::Q27_Q28_Q29_Q30, ARM64::Q28_Q29_Q30_Q31, ARM64::Q29_Q30_Q31_Q0,
- ARM64::Q30_Q31_Q0_Q1, ARM64::Q31_Q0_Q1_Q2
-};
-
-static DecodeStatus DecodeQQQQRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = QQQQDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned DDDecoderTable[] = {
- ARM64::D0_D1, ARM64::D1_D2, ARM64::D2_D3, ARM64::D3_D4,
- ARM64::D4_D5, ARM64::D5_D6, ARM64::D6_D7, ARM64::D7_D8,
- ARM64::D8_D9, ARM64::D9_D10, ARM64::D10_D11, ARM64::D11_D12,
- ARM64::D12_D13, ARM64::D13_D14, ARM64::D14_D15, ARM64::D15_D16,
- ARM64::D16_D17, ARM64::D17_D18, ARM64::D18_D19, ARM64::D19_D20,
- ARM64::D20_D21, ARM64::D21_D22, ARM64::D22_D23, ARM64::D23_D24,
- ARM64::D24_D25, ARM64::D25_D26, ARM64::D26_D27, ARM64::D27_D28,
- ARM64::D28_D29, ARM64::D29_D30, ARM64::D30_D31, ARM64::D31_D0
-};
-
-static DecodeStatus DecodeDDRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = DDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned DDDDecoderTable[] = {
- ARM64::D0_D1_D2, ARM64::D1_D2_D3, ARM64::D2_D3_D4,
- ARM64::D3_D4_D5, ARM64::D4_D5_D6, ARM64::D5_D6_D7,
- ARM64::D6_D7_D8, ARM64::D7_D8_D9, ARM64::D8_D9_D10,
- ARM64::D9_D10_D11, ARM64::D10_D11_D12, ARM64::D11_D12_D13,
- ARM64::D12_D13_D14, ARM64::D13_D14_D15, ARM64::D14_D15_D16,
- ARM64::D15_D16_D17, ARM64::D16_D17_D18, ARM64::D17_D18_D19,
- ARM64::D18_D19_D20, ARM64::D19_D20_D21, ARM64::D20_D21_D22,
- ARM64::D21_D22_D23, ARM64::D22_D23_D24, ARM64::D23_D24_D25,
- ARM64::D24_D25_D26, ARM64::D25_D26_D27, ARM64::D26_D27_D28,
- ARM64::D27_D28_D29, ARM64::D28_D29_D30, ARM64::D29_D30_D31,
- ARM64::D30_D31_D0, ARM64::D31_D0_D1
-};
-
-static DecodeStatus DecodeDDDRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr, const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = DDDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static const unsigned DDDDDecoderTable[] = {
- ARM64::D0_D1_D2_D3, ARM64::D1_D2_D3_D4, ARM64::D2_D3_D4_D5,
- ARM64::D3_D4_D5_D6, ARM64::D4_D5_D6_D7, ARM64::D5_D6_D7_D8,
- ARM64::D6_D7_D8_D9, ARM64::D7_D8_D9_D10, ARM64::D8_D9_D10_D11,
- ARM64::D9_D10_D11_D12, ARM64::D10_D11_D12_D13, ARM64::D11_D12_D13_D14,
- ARM64::D12_D13_D14_D15, ARM64::D13_D14_D15_D16, ARM64::D14_D15_D16_D17,
- ARM64::D15_D16_D17_D18, ARM64::D16_D17_D18_D19, ARM64::D17_D18_D19_D20,
- ARM64::D18_D19_D20_D21, ARM64::D19_D20_D21_D22, ARM64::D20_D21_D22_D23,
- ARM64::D21_D22_D23_D24, ARM64::D22_D23_D24_D25, ARM64::D23_D24_D25_D26,
- ARM64::D24_D25_D26_D27, ARM64::D25_D26_D27_D28, ARM64::D26_D27_D28_D29,
- ARM64::D27_D28_D29_D30, ARM64::D28_D29_D30_D31, ARM64::D29_D30_D31_D0,
- ARM64::D30_D31_D0_D1, ARM64::D31_D0_D1_D2
-};
-
-static DecodeStatus DecodeDDDDRegisterClass(MCInst &Inst, unsigned RegNo,
- uint64_t Addr,
- const void *Decoder) {
- if (RegNo > 31)
- return Fail;
- unsigned Register = DDDDDecoderTable[RegNo];
- Inst.addOperand(MCOperand::CreateReg(Register));
- return Success;
-}
-
-static DecodeStatus DecodeFixedPointScaleImm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(64 - Imm));
- return Success;
-}
-
-static DecodeStatus DecodeCondBranchTarget(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- int64_t ImmVal = Imm;
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- // Sign-extend 19-bit immediate.
- if (ImmVal & (1 << (19 - 1)))
- ImmVal |= ~((1LL << 19) - 1);
-
- if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal << 2,
- Inst.getOpcode() != ARM64::LDRXl, 4, Inst))
- Inst.addOperand(MCOperand::CreateImm(ImmVal));
- return Success;
-}
-
-static DecodeStatus DecodeSystemRegister(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Address,
- const void *Decoder) {
- Inst.addOperand(MCOperand::CreateImm(Imm | 0x8000));
- return Success;
-}
-
-static DecodeStatus DecodeVecShiftRImm(llvm::MCInst &Inst, unsigned Imm,
- unsigned Add) {
- Inst.addOperand(MCOperand::CreateImm(Add - Imm));
- return Success;
-}
-
-static DecodeStatus DecodeVecShiftLImm(llvm::MCInst &Inst, unsigned Imm,
- unsigned Add) {
- Inst.addOperand(MCOperand::CreateImm((Imm + Add) & (Add - 1)));
- return Success;
-}
-
-static DecodeStatus DecodeVecShiftR64Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm, 64);
-}
-
-static DecodeStatus DecodeVecShiftR64ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm | 0x20, 64);
-}
-
-static DecodeStatus DecodeVecShiftR32Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm, 32);
-}
-
-static DecodeStatus DecodeVecShiftR32ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm | 0x10, 32);
-}
-
-static DecodeStatus DecodeVecShiftR16Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm, 16);
-}
-
-static DecodeStatus DecodeVecShiftR16ImmNarrow(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm | 0x8, 16);
-}
-
-static DecodeStatus DecodeVecShiftR8Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftRImm(Inst, Imm, 8);
-}
-
-static DecodeStatus DecodeVecShiftL64Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftLImm(Inst, Imm, 64);
-}
-
-static DecodeStatus DecodeVecShiftL32Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftLImm(Inst, Imm, 32);
-}
-
-static DecodeStatus DecodeVecShiftL16Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftLImm(Inst, Imm, 16);
-}
-
-static DecodeStatus DecodeVecShiftL8Imm(llvm::MCInst &Inst, unsigned Imm,
- uint64_t Addr, const void *Decoder) {
- return DecodeVecShiftLImm(Inst, Imm, 8);
-}
-
-static DecodeStatus DecodeThreeAddrSRegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rm = fieldFromInstruction(insn, 16, 5);
- unsigned shiftHi = fieldFromInstruction(insn, 22, 2);
- unsigned shiftLo = fieldFromInstruction(insn, 10, 6);
- unsigned shift = (shiftHi << 6) | shiftLo;
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::ANDWrs:
- case ARM64::ANDSWrs:
- case ARM64::BICWrs:
- case ARM64::BICSWrs:
- case ARM64::ORRWrs:
- case ARM64::ORNWrs:
- case ARM64::EORWrs:
- case ARM64::EONWrs:
- case ARM64::ADDWrs:
- case ARM64::ADDSWrs:
- case ARM64::SUBWrs:
- case ARM64::SUBSWrs: {
- DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- }
- case ARM64::ANDXrs:
- case ARM64::ANDSXrs:
- case ARM64::BICXrs:
- case ARM64::BICSXrs:
- case ARM64::ORRXrs:
- case ARM64::ORNXrs:
- case ARM64::EORXrs:
- case ARM64::EONXrs:
- case ARM64::ADDXrs:
- case ARM64::ADDSXrs:
- case ARM64::SUBXrs:
- case ARM64::SUBSXrs:
- DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- }
-
- Inst.addOperand(MCOperand::CreateImm(shift));
- return Success;
-}
-
-static DecodeStatus DecodeMoveImmInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned imm = fieldFromInstruction(insn, 5, 16);
- unsigned shift = fieldFromInstruction(insn, 21, 2);
- shift <<= 4;
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::MOVZWi:
- case ARM64::MOVNWi:
- case ARM64::MOVKWi:
- DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::MOVZXi:
- case ARM64::MOVNXi:
- case ARM64::MOVKXi:
- DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- break;
- }
-
- if (Inst.getOpcode() == ARM64::MOVKWi || Inst.getOpcode() == ARM64::MOVKXi)
- Inst.addOperand(Inst.getOperand(0));
-
- Inst.addOperand(MCOperand::CreateImm(imm));
- Inst.addOperand(MCOperand::CreateImm(shift));
- return Success;
-}
-
-static DecodeStatus DecodeUnsignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned offset = fieldFromInstruction(insn, 10, 12);
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::PRFMui:
- // Rt is an immediate in prefetch.
- Inst.addOperand(MCOperand::CreateImm(Rt));
- break;
- case ARM64::STRBBui:
- case ARM64::LDRBBui:
- case ARM64::LDRSBWui:
- case ARM64::STRHHui:
- case ARM64::LDRHHui:
- case ARM64::LDRSHWui:
- case ARM64::STRWui:
- case ARM64::LDRWui:
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRSBXui:
- case ARM64::LDRSHXui:
- case ARM64::LDRSWui:
- case ARM64::STRXui:
- case ARM64::LDRXui:
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRQui:
- case ARM64::STRQui:
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRDui:
- case ARM64::STRDui:
- DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRSui:
- case ARM64::STRSui:
- DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRHui:
- case ARM64::STRHui:
- DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRBui:
- case ARM64::STRBui:
- DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- if (!Dis->tryAddingSymbolicOperand(Addr, offset, Fail, 4, Inst, insn))
- Inst.addOperand(MCOperand::CreateImm(offset));
- return Success;
-}
-
-static DecodeStatus DecodeSignedLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- int64_t offset = fieldFromInstruction(insn, 12, 9);
-
- // offset is a 9-bit signed immediate, so sign extend it to
- // fill the unsigned.
- if (offset & (1 << (9 - 1)))
- offset |= ~((1LL << 9) - 1);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::PRFUMi:
- // Rt is an immediate in prefetch.
- Inst.addOperand(MCOperand::CreateImm(Rt));
- break;
- case ARM64::STURBBi:
- case ARM64::LDURBBi:
- case ARM64::LDURSBWi:
- case ARM64::STURHHi:
- case ARM64::LDURHHi:
- case ARM64::LDURSHWi:
- case ARM64::STURWi:
- case ARM64::LDURWi:
- case ARM64::LDTRSBWi:
- case ARM64::LDTRSHWi:
- case ARM64::STTRWi:
- case ARM64::LDTRWi:
- case ARM64::STTRHi:
- case ARM64::LDTRHi:
- case ARM64::LDTRBi:
- case ARM64::STTRBi:
- case ARM64::LDRSBWpre:
- case ARM64::LDRSHWpre:
- case ARM64::STRBBpre:
- case ARM64::LDRBBpre:
- case ARM64::STRHHpre:
- case ARM64::LDRHHpre:
- case ARM64::STRWpre:
- case ARM64::LDRWpre:
- case ARM64::LDRSBWpost:
- case ARM64::LDRSHWpost:
- case ARM64::STRBBpost:
- case ARM64::LDRBBpost:
- case ARM64::STRHHpost:
- case ARM64::LDRHHpost:
- case ARM64::STRWpost:
- case ARM64::LDRWpost:
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURSBXi:
- case ARM64::LDURSHXi:
- case ARM64::LDURSWi:
- case ARM64::STURXi:
- case ARM64::LDURXi:
- case ARM64::LDTRSBXi:
- case ARM64::LDTRSHXi:
- case ARM64::LDTRSWi:
- case ARM64::STTRXi:
- case ARM64::LDTRXi:
- case ARM64::LDRSBXpre:
- case ARM64::LDRSHXpre:
- case ARM64::STRXpre:
- case ARM64::LDRSWpre:
- case ARM64::LDRXpre:
- case ARM64::LDRSBXpost:
- case ARM64::LDRSHXpost:
- case ARM64::STRXpost:
- case ARM64::LDRSWpost:
- case ARM64::LDRXpost:
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURQi:
- case ARM64::STURQi:
- case ARM64::LDRQpre:
- case ARM64::STRQpre:
- case ARM64::LDRQpost:
- case ARM64::STRQpost:
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURDi:
- case ARM64::STURDi:
- case ARM64::LDRDpre:
- case ARM64::STRDpre:
- case ARM64::LDRDpost:
- case ARM64::STRDpost:
- DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURSi:
- case ARM64::STURSi:
- case ARM64::LDRSpre:
- case ARM64::STRSpre:
- case ARM64::LDRSpost:
- case ARM64::STRSpost:
- DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURHi:
- case ARM64::STURHi:
- case ARM64::LDRHpre:
- case ARM64::STRHpre:
- case ARM64::LDRHpost:
- case ARM64::STRHpost:
- DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDURBi:
- case ARM64::STURBi:
- case ARM64::LDRBpre:
- case ARM64::STRBpre:
- case ARM64::LDRBpost:
- case ARM64::STRBpost:
- DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(offset));
- return Success;
-}
-
-static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
- unsigned Rs = fieldFromInstruction(insn, 16, 5);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::STLXRW:
- case ARM64::STLXRB:
- case ARM64::STLXRH:
- case ARM64::STXRW:
- case ARM64::STXRB:
- case ARM64::STXRH:
- DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
- // FALLTHROUGH
- case ARM64::LDARW:
- case ARM64::LDARB:
- case ARM64::LDARH:
- case ARM64::LDAXRW:
- case ARM64::LDAXRB:
- case ARM64::LDAXRH:
- case ARM64::LDXRW:
- case ARM64::LDXRB:
- case ARM64::LDXRH:
- case ARM64::STLRW:
- case ARM64::STLRB:
- case ARM64::STLRH:
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::STLXRX:
- case ARM64::STXRX:
- DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
- // FALLTHROUGH
- case ARM64::LDARX:
- case ARM64::LDAXRX:
- case ARM64::LDXRX:
- case ARM64::STLRX:
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::STLXPW:
- case ARM64::STXPW:
- DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
- // FALLTHROUGH
- case ARM64::LDAXPW:
- case ARM64::LDXPW:
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- case ARM64::STLXPX:
- case ARM64::STXPX:
- DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
- // FALLTHROUGH
- case ARM64::LDAXPX:
- case ARM64::LDXPX:
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- return Success;
-}
-
-static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rt2 = fieldFromInstruction(insn, 10, 5);
- int64_t offset = fieldFromInstruction(insn, 15, 7);
-
- // offset is a 7-bit signed immediate, so sign extend it to
- // fill the unsigned.
- if (offset & (1 << (7 - 1)))
- offset |= ~((1LL << 7) - 1);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::LDNPXi:
- case ARM64::STNPXi:
- case ARM64::LDPXpost:
- case ARM64::STPXpost:
- case ARM64::LDPSWpost:
- case ARM64::LDPXi:
- case ARM64::STPXi:
- case ARM64::LDPSWi:
- case ARM64::LDPXpre:
- case ARM64::STPXpre:
- case ARM64::LDPSWpre:
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- case ARM64::LDNPWi:
- case ARM64::STNPWi:
- case ARM64::LDPWpost:
- case ARM64::STPWpost:
- case ARM64::LDPWi:
- case ARM64::STPWi:
- case ARM64::LDPWpre:
- case ARM64::STPWpre:
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- case ARM64::LDNPQi:
- case ARM64::STNPQi:
- case ARM64::LDPQpost:
- case ARM64::STPQpost:
- case ARM64::LDPQi:
- case ARM64::STPQi:
- case ARM64::LDPQpre:
- case ARM64::STPQpre:
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeFPR128RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- case ARM64::LDNPDi:
- case ARM64::STNPDi:
- case ARM64::LDPDpost:
- case ARM64::STPDpost:
- case ARM64::LDPDi:
- case ARM64::STPDi:
- case ARM64::LDPDpre:
- case ARM64::STPDpre:
- DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeFPR64RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- case ARM64::LDNPSi:
- case ARM64::STNPSi:
- case ARM64::LDPSpost:
- case ARM64::STPSpost:
- case ARM64::LDPSi:
- case ARM64::STPSi:
- case ARM64::LDPSpre:
- case ARM64::STPSpre:
- DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeFPR32RegisterClass(Inst, Rt2, Addr, Decoder);
- break;
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(offset));
- return Success;
-}
-
-static DecodeStatus DecodeRegOffsetLdStInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rt = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rm = fieldFromInstruction(insn, 16, 5);
- unsigned extendHi = fieldFromInstruction(insn, 13, 3);
- unsigned extendLo = fieldFromInstruction(insn, 12, 1);
- unsigned extend = 0;
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::LDRSWro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRXro:
- case ARM64::STRXro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRWro:
- case ARM64::STRWro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRQro:
- case ARM64::STRQro:
- extend = (extendHi << 1) | extendLo;
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRDro:
- case ARM64::STRDro:
- extend = (extendHi << 1) | extendLo;
- DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRSro:
- case ARM64::STRSro:
- extend = (extendHi << 1) | extendLo;
- DecodeFPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRHro:
- extend = (extendHi << 1) | extendLo;
- DecodeFPR16RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRBro:
- extend = (extendHi << 1) | extendLo;
- DecodeFPR8RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRBBro:
- case ARM64::STRBBro:
- case ARM64::LDRSBWro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRHHro:
- case ARM64::STRHHro:
- case ARM64::LDRSHWro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRSHXro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LDRSBXro:
- extend = (extendHi << 1) | extendLo;
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::PRFMro:
- extend = (extendHi << 1) | extendLo;
- Inst.addOperand(MCOperand::CreateImm(Rt));
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
-
- if (extendHi == 0x3)
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- else
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
-
- Inst.addOperand(MCOperand::CreateImm(extend));
- return Success;
-}
-
-static DecodeStatus DecodeAddSubERegInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Rm = fieldFromInstruction(insn, 16, 5);
- unsigned extend = fieldFromInstruction(insn, 10, 6);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::ADDWrx:
- case ARM64::SUBWrx:
- DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- case ARM64::ADDSWrx:
- case ARM64::SUBSWrx:
- DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- case ARM64::ADDXrx:
- case ARM64::SUBXrx:
- DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- case ARM64::ADDSXrx:
- case ARM64::SUBSXrx:
- DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- case ARM64::ADDXrx64:
- case ARM64::ADDSXrx64:
- case ARM64::SUBXrx64:
- case ARM64::SUBSXrx64:
- DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- }
-
- Inst.addOperand(MCOperand::CreateImm(extend));
- return Success;
-}
-
-static DecodeStatus DecodeLogicalImmInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Datasize = fieldFromInstruction(insn, 31, 1);
- unsigned imm;
-
- if (Datasize) {
- DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
- imm = fieldFromInstruction(insn, 10, 13);
- if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 64))
- return Fail;
- } else {
- DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR32RegisterClass(Inst, Rn, Addr, Decoder);
- imm = fieldFromInstruction(insn, 10, 12);
- if (!ARM64_AM::isValidDecodeLogicalImmediate(imm, 32))
- return Fail;
- }
- Inst.addOperand(MCOperand::CreateImm(imm));
- return Success;
-}
-
-static DecodeStatus DecodeModImmInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned cmode = fieldFromInstruction(insn, 12, 4);
- unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
- imm |= fieldFromInstruction(insn, 5, 5);
-
- if (Inst.getOpcode() == ARM64::MOVID)
- DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
- else
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
-
- Inst.addOperand(MCOperand::CreateImm(imm));
-
- switch (Inst.getOpcode()) {
- default:
- break;
- case ARM64::MOVIv4i16:
- case ARM64::MOVIv8i16:
- case ARM64::MVNIv4i16:
- case ARM64::MVNIv8i16:
- case ARM64::MOVIv2i32:
- case ARM64::MOVIv4i32:
- case ARM64::MVNIv2i32:
- case ARM64::MVNIv4i32:
- Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
- break;
- case ARM64::MOVIv2s_msl:
- case ARM64::MOVIv4s_msl:
- case ARM64::MVNIv2s_msl:
- case ARM64::MVNIv4s_msl:
- Inst.addOperand(MCOperand::CreateImm(cmode & 1 ? 0x110 : 0x108));
- break;
- }
-
- return Success;
-}
-
-static DecodeStatus DecodeModImmTiedInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned cmode = fieldFromInstruction(insn, 12, 4);
- unsigned imm = fieldFromInstruction(insn, 16, 3) << 5;
- imm |= fieldFromInstruction(insn, 5, 5);
-
- // Tied operands added twice.
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
- DecodeVectorRegisterClass(Inst, Rd, Addr, Decoder);
-
- Inst.addOperand(MCOperand::CreateImm(imm));
- Inst.addOperand(MCOperand::CreateImm((cmode & 6) << 2));
-
- return Success;
-}
-
-static DecodeStatus DecodeAdrInstruction(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- int64_t imm = fieldFromInstruction(insn, 5, 19) << 2;
- imm |= fieldFromInstruction(insn, 29, 2);
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- // Sign-extend the 21-bit immediate.
- if (imm & (1 << (21 - 1)))
- imm |= ~((1LL << 21) - 1);
-
- DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- if (!Dis->tryAddingSymbolicOperand(Addr, imm, Fail, 4, Inst, insn))
- Inst.addOperand(MCOperand::CreateImm(imm));
-
- return Success;
-}
-
-static DecodeStatus DecodeBaseAddSubImm(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
- unsigned Rd = fieldFromInstruction(insn, 0, 5);
- unsigned Rn = fieldFromInstruction(insn, 5, 5);
- unsigned Imm = fieldFromInstruction(insn, 10, 14);
- unsigned S = fieldFromInstruction(insn, 29, 1);
- unsigned Datasize = fieldFromInstruction(insn, 31, 1);
-
- unsigned ShifterVal = (Imm >> 12) & 3;
- unsigned ImmVal = Imm & 0xFFF;
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- if (ShifterVal != 0 && ShifterVal != 1)
- return Fail;
-
- if (Datasize) {
- if (Rd == 31 && !S)
- DecodeGPR64spRegisterClass(Inst, Rd, Addr, Decoder);
- else
- DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- } else {
- if (Rd == 31 && !S)
- DecodeGPR32spRegisterClass(Inst, Rd, Addr, Decoder);
- else
- DecodeGPR32RegisterClass(Inst, Rd, Addr, Decoder);
- DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
- }
-
- if (!Dis->tryAddingSymbolicOperand(Addr, ImmVal, Fail, 4, Inst, insn))
- Inst.addOperand(MCOperand::CreateImm(ImmVal));
- Inst.addOperand(MCOperand::CreateImm(12 * ShifterVal));
- return Success;
-}
-
-static DecodeStatus DecodeUnconditionalBranch(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
- int64_t imm = fieldFromInstruction(insn, 0, 26);
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- // Sign-extend the 26-bit immediate.
- if (imm & (1 << (26 - 1)))
- imm |= ~((1LL << 26) - 1);
-
- if (!Dis->tryAddingSymbolicOperand(Addr, imm << 2, true, 4, Inst))
- Inst.addOperand(MCOperand::CreateImm(imm));
-
- return Success;
-}
-
-static DecodeStatus DecodeSystemCPSRInstruction(llvm::MCInst &Inst,
- uint32_t insn, uint64_t Addr,
- const void *Decoder) {
- uint64_t op1 = fieldFromInstruction(insn, 16, 3);
- uint64_t op2 = fieldFromInstruction(insn, 5, 3);
- uint64_t crm = fieldFromInstruction(insn, 8, 4);
-
- Inst.addOperand(MCOperand::CreateImm((op1 << 3) | op2));
- Inst.addOperand(MCOperand::CreateImm(crm));
-
- return Success;
-}
-
-static DecodeStatus DecodeTestAndBranch(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
- uint64_t Rt = fieldFromInstruction(insn, 0, 5);
- uint64_t bit = fieldFromInstruction(insn, 31, 1) << 5;
- bit |= fieldFromInstruction(insn, 19, 5);
- int64_t dst = fieldFromInstruction(insn, 5, 14);
- const ARM64Disassembler *Dis =
- static_cast<const ARM64Disassembler *>(Decoder);
-
- // Sign-extend 14-bit immediate.
- if (dst & (1 << (14 - 1)))
- dst |= ~((1LL << 14) - 1);
-
- DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
- Inst.addOperand(MCOperand::CreateImm(bit));
- if (!Dis->tryAddingSymbolicOperand(Addr, dst << 2, true, 4, Inst))
- Inst.addOperand(MCOperand::CreateImm(dst));
-
- return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStPost(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
- uint64_t Rd = fieldFromInstruction(insn, 0, 5);
- uint64_t Rn = fieldFromInstruction(insn, 5, 5);
- uint64_t Rm = fieldFromInstruction(insn, 16, 5);
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::ST1Onev8b_POST:
- case ARM64::ST1Onev4h_POST:
- case ARM64::ST1Onev2s_POST:
- case ARM64::ST1Onev1d_POST:
- case ARM64::LD1Onev8b_POST:
- case ARM64::LD1Onev4h_POST:
- case ARM64::LD1Onev2s_POST:
- case ARM64::LD1Onev1d_POST:
- DecodeFPR64RegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Onev16b_POST:
- case ARM64::ST1Onev8h_POST:
- case ARM64::ST1Onev4s_POST:
- case ARM64::ST1Onev2d_POST:
- case ARM64::LD1Onev16b_POST:
- case ARM64::LD1Onev8h_POST:
- case ARM64::LD1Onev4s_POST:
- case ARM64::LD1Onev2d_POST:
- DecodeFPR128RegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Twov8b_POST:
- case ARM64::ST1Twov4h_POST:
- case ARM64::ST1Twov2s_POST:
- case ARM64::ST1Twov1d_POST:
- case ARM64::ST2Twov8b_POST:
- case ARM64::ST2Twov4h_POST:
- case ARM64::ST2Twov2s_POST:
- case ARM64::LD1Twov8b_POST:
- case ARM64::LD1Twov4h_POST:
- case ARM64::LD1Twov2s_POST:
- case ARM64::LD1Twov1d_POST:
- case ARM64::LD2Twov8b_POST:
- case ARM64::LD2Twov4h_POST:
- case ARM64::LD2Twov2s_POST:
- DecodeDDRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Threev8b_POST:
- case ARM64::ST1Threev4h_POST:
- case ARM64::ST1Threev2s_POST:
- case ARM64::ST1Threev1d_POST:
- case ARM64::ST3Threev8b_POST:
- case ARM64::ST3Threev4h_POST:
- case ARM64::ST3Threev2s_POST:
- case ARM64::LD1Threev8b_POST:
- case ARM64::LD1Threev4h_POST:
- case ARM64::LD1Threev2s_POST:
- case ARM64::LD1Threev1d_POST:
- case ARM64::LD3Threev8b_POST:
- case ARM64::LD3Threev4h_POST:
- case ARM64::LD3Threev2s_POST:
- DecodeDDDRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Fourv8b_POST:
- case ARM64::ST1Fourv4h_POST:
- case ARM64::ST1Fourv2s_POST:
- case ARM64::ST1Fourv1d_POST:
- case ARM64::ST4Fourv8b_POST:
- case ARM64::ST4Fourv4h_POST:
- case ARM64::ST4Fourv2s_POST:
- case ARM64::LD1Fourv8b_POST:
- case ARM64::LD1Fourv4h_POST:
- case ARM64::LD1Fourv2s_POST:
- case ARM64::LD1Fourv1d_POST:
- case ARM64::LD4Fourv8b_POST:
- case ARM64::LD4Fourv4h_POST:
- case ARM64::LD4Fourv2s_POST:
- DecodeDDDDRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Twov16b_POST:
- case ARM64::ST1Twov8h_POST:
- case ARM64::ST1Twov4s_POST:
- case ARM64::ST1Twov2d_POST:
- case ARM64::ST2Twov16b_POST:
- case ARM64::ST2Twov8h_POST:
- case ARM64::ST2Twov4s_POST:
- case ARM64::ST2Twov2d_POST:
- case ARM64::LD1Twov16b_POST:
- case ARM64::LD1Twov8h_POST:
- case ARM64::LD1Twov4s_POST:
- case ARM64::LD1Twov2d_POST:
- case ARM64::LD2Twov16b_POST:
- case ARM64::LD2Twov8h_POST:
- case ARM64::LD2Twov4s_POST:
- case ARM64::LD2Twov2d_POST:
- DecodeQQRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Threev16b_POST:
- case ARM64::ST1Threev8h_POST:
- case ARM64::ST1Threev4s_POST:
- case ARM64::ST1Threev2d_POST:
- case ARM64::ST3Threev16b_POST:
- case ARM64::ST3Threev8h_POST:
- case ARM64::ST3Threev4s_POST:
- case ARM64::ST3Threev2d_POST:
- case ARM64::LD1Threev16b_POST:
- case ARM64::LD1Threev8h_POST:
- case ARM64::LD1Threev4s_POST:
- case ARM64::LD1Threev2d_POST:
- case ARM64::LD3Threev16b_POST:
- case ARM64::LD3Threev8h_POST:
- case ARM64::LD3Threev4s_POST:
- case ARM64::LD3Threev2d_POST:
- DecodeQQQRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- case ARM64::ST1Fourv16b_POST:
- case ARM64::ST1Fourv8h_POST:
- case ARM64::ST1Fourv4s_POST:
- case ARM64::ST1Fourv2d_POST:
- case ARM64::ST4Fourv16b_POST:
- case ARM64::ST4Fourv8h_POST:
- case ARM64::ST4Fourv4s_POST:
- case ARM64::ST4Fourv2d_POST:
- case ARM64::LD1Fourv16b_POST:
- case ARM64::LD1Fourv8h_POST:
- case ARM64::LD1Fourv4s_POST:
- case ARM64::LD1Fourv2d_POST:
- case ARM64::LD4Fourv16b_POST:
- case ARM64::LD4Fourv8h_POST:
- case ARM64::LD4Fourv4s_POST:
- case ARM64::LD4Fourv2d_POST:
- DecodeQQQQRegisterClass(Inst, Rd, Addr, Decoder);
- break;
- }
-
- DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStSingle(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr, const void *Decoder) {
- uint64_t Rt = fieldFromInstruction(insn, 0, 5);
- uint64_t Rn = fieldFromInstruction(insn, 5, 5);
- uint64_t Rm = fieldFromInstruction(insn, 16, 5);
- uint64_t size = fieldFromInstruction(insn, 10, 2);
- uint64_t S = fieldFromInstruction(insn, 12, 1);
- uint64_t Q = fieldFromInstruction(insn, 30, 1);
- uint64_t index = 0;
-
- switch (Inst.getOpcode()) {
- case ARM64::ST1i8:
- case ARM64::ST1i8_POST:
- case ARM64::ST2i8:
- case ARM64::ST2i8_POST:
- case ARM64::ST3i8_POST:
- case ARM64::ST3i8:
- case ARM64::ST4i8_POST:
- case ARM64::ST4i8:
- index = (Q << 3) | (S << 2) | size;
- break;
- case ARM64::ST1i16:
- case ARM64::ST1i16_POST:
- case ARM64::ST2i16:
- case ARM64::ST2i16_POST:
- case ARM64::ST3i16_POST:
- case ARM64::ST3i16:
- case ARM64::ST4i16_POST:
- case ARM64::ST4i16:
- index = (Q << 2) | (S << 1) | (size >> 1);
- break;
- case ARM64::ST1i32:
- case ARM64::ST1i32_POST:
- case ARM64::ST2i32:
- case ARM64::ST2i32_POST:
- case ARM64::ST3i32_POST:
- case ARM64::ST3i32:
- case ARM64::ST4i32_POST:
- case ARM64::ST4i32:
- index = (Q << 1) | S;
- break;
- case ARM64::ST1i64:
- case ARM64::ST1i64_POST:
- case ARM64::ST2i64:
- case ARM64::ST2i64_POST:
- case ARM64::ST3i64_POST:
- case ARM64::ST3i64:
- case ARM64::ST4i64_POST:
- case ARM64::ST4i64:
- index = Q;
- break;
- }
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::LD1Rv8b:
- case ARM64::LD1Rv8b_POST:
- case ARM64::LD1Rv4h:
- case ARM64::LD1Rv4h_POST:
- case ARM64::LD1Rv2s:
- case ARM64::LD1Rv2s_POST:
- case ARM64::LD1Rv1d:
- case ARM64::LD1Rv1d_POST:
- DecodeFPR64RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD1Rv16b:
- case ARM64::LD1Rv16b_POST:
- case ARM64::LD1Rv8h:
- case ARM64::LD1Rv8h_POST:
- case ARM64::LD1Rv4s:
- case ARM64::LD1Rv4s_POST:
- case ARM64::LD1Rv2d:
- case ARM64::LD1Rv2d_POST:
- case ARM64::ST1i8:
- case ARM64::ST1i8_POST:
- case ARM64::ST1i16:
- case ARM64::ST1i16_POST:
- case ARM64::ST1i32:
- case ARM64::ST1i32_POST:
- case ARM64::ST1i64:
- case ARM64::ST1i64_POST:
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD2Rv16b:
- case ARM64::LD2Rv16b_POST:
- case ARM64::LD2Rv8h:
- case ARM64::LD2Rv8h_POST:
- case ARM64::LD2Rv4s:
- case ARM64::LD2Rv4s_POST:
- case ARM64::LD2Rv2d:
- case ARM64::LD2Rv2d_POST:
- case ARM64::ST2i8:
- case ARM64::ST2i8_POST:
- case ARM64::ST2i16:
- case ARM64::ST2i16_POST:
- case ARM64::ST2i32:
- case ARM64::ST2i32_POST:
- case ARM64::ST2i64:
- case ARM64::ST2i64_POST:
- DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD2Rv8b:
- case ARM64::LD2Rv8b_POST:
- case ARM64::LD2Rv4h:
- case ARM64::LD2Rv4h_POST:
- case ARM64::LD2Rv2s:
- case ARM64::LD2Rv2s_POST:
- case ARM64::LD2Rv1d:
- case ARM64::LD2Rv1d_POST:
- DecodeDDRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD3Rv8b:
- case ARM64::LD3Rv8b_POST:
- case ARM64::LD3Rv4h:
- case ARM64::LD3Rv4h_POST:
- case ARM64::LD3Rv2s:
- case ARM64::LD3Rv2s_POST:
- case ARM64::LD3Rv1d:
- case ARM64::LD3Rv1d_POST:
- DecodeDDDRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD3Rv16b:
- case ARM64::LD3Rv16b_POST:
- case ARM64::LD3Rv8h:
- case ARM64::LD3Rv8h_POST:
- case ARM64::LD3Rv4s:
- case ARM64::LD3Rv4s_POST:
- case ARM64::LD3Rv2d:
- case ARM64::LD3Rv2d_POST:
- case ARM64::ST3i8:
- case ARM64::ST3i8_POST:
- case ARM64::ST3i16:
- case ARM64::ST3i16_POST:
- case ARM64::ST3i32:
- case ARM64::ST3i32_POST:
- case ARM64::ST3i64:
- case ARM64::ST3i64_POST:
- DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD4Rv8b:
- case ARM64::LD4Rv8b_POST:
- case ARM64::LD4Rv4h:
- case ARM64::LD4Rv4h_POST:
- case ARM64::LD4Rv2s:
- case ARM64::LD4Rv2s_POST:
- case ARM64::LD4Rv1d:
- case ARM64::LD4Rv1d_POST:
- DecodeDDDDRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD4Rv16b:
- case ARM64::LD4Rv16b_POST:
- case ARM64::LD4Rv8h:
- case ARM64::LD4Rv8h_POST:
- case ARM64::LD4Rv4s:
- case ARM64::LD4Rv4s_POST:
- case ARM64::LD4Rv2d:
- case ARM64::LD4Rv2d_POST:
- case ARM64::ST4i8:
- case ARM64::ST4i8_POST:
- case ARM64::ST4i16:
- case ARM64::ST4i16_POST:
- case ARM64::ST4i32:
- case ARM64::ST4i32_POST:
- case ARM64::ST4i64:
- case ARM64::ST4i64_POST:
- DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- }
-
- switch (Inst.getOpcode()) {
- case ARM64::LD1Rv8b:
- case ARM64::LD1Rv8b_POST:
- case ARM64::LD1Rv16b:
- case ARM64::LD1Rv16b_POST:
- case ARM64::LD1Rv4h:
- case ARM64::LD1Rv4h_POST:
- case ARM64::LD1Rv8h:
- case ARM64::LD1Rv8h_POST:
- case ARM64::LD1Rv4s:
- case ARM64::LD1Rv4s_POST:
- case ARM64::LD1Rv2s:
- case ARM64::LD1Rv2s_POST:
- case ARM64::LD1Rv1d:
- case ARM64::LD1Rv1d_POST:
- case ARM64::LD1Rv2d:
- case ARM64::LD1Rv2d_POST:
- case ARM64::LD2Rv8b:
- case ARM64::LD2Rv8b_POST:
- case ARM64::LD2Rv16b:
- case ARM64::LD2Rv16b_POST:
- case ARM64::LD2Rv4h:
- case ARM64::LD2Rv4h_POST:
- case ARM64::LD2Rv8h:
- case ARM64::LD2Rv8h_POST:
- case ARM64::LD2Rv2s:
- case ARM64::LD2Rv2s_POST:
- case ARM64::LD2Rv4s:
- case ARM64::LD2Rv4s_POST:
- case ARM64::LD2Rv2d:
- case ARM64::LD2Rv2d_POST:
- case ARM64::LD2Rv1d:
- case ARM64::LD2Rv1d_POST:
- case ARM64::LD3Rv8b:
- case ARM64::LD3Rv8b_POST:
- case ARM64::LD3Rv16b:
- case ARM64::LD3Rv16b_POST:
- case ARM64::LD3Rv4h:
- case ARM64::LD3Rv4h_POST:
- case ARM64::LD3Rv8h:
- case ARM64::LD3Rv8h_POST:
- case ARM64::LD3Rv2s:
- case ARM64::LD3Rv2s_POST:
- case ARM64::LD3Rv4s:
- case ARM64::LD3Rv4s_POST:
- case ARM64::LD3Rv2d:
- case ARM64::LD3Rv2d_POST:
- case ARM64::LD3Rv1d:
- case ARM64::LD3Rv1d_POST:
- case ARM64::LD4Rv8b:
- case ARM64::LD4Rv8b_POST:
- case ARM64::LD4Rv16b:
- case ARM64::LD4Rv16b_POST:
- case ARM64::LD4Rv4h:
- case ARM64::LD4Rv4h_POST:
- case ARM64::LD4Rv8h:
- case ARM64::LD4Rv8h_POST:
- case ARM64::LD4Rv2s:
- case ARM64::LD4Rv2s_POST:
- case ARM64::LD4Rv4s:
- case ARM64::LD4Rv4s_POST:
- case ARM64::LD4Rv2d:
- case ARM64::LD4Rv2d_POST:
- case ARM64::LD4Rv1d:
- case ARM64::LD4Rv1d_POST:
- break;
- default:
- Inst.addOperand(MCOperand::CreateImm(index));
- }
-
- DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-
- switch (Inst.getOpcode()) {
- case ARM64::ST1i8_POST:
- case ARM64::ST1i16_POST:
- case ARM64::ST1i32_POST:
- case ARM64::ST1i64_POST:
- case ARM64::LD1Rv8b_POST:
- case ARM64::LD1Rv16b_POST:
- case ARM64::LD1Rv4h_POST:
- case ARM64::LD1Rv8h_POST:
- case ARM64::LD1Rv2s_POST:
- case ARM64::LD1Rv4s_POST:
- case ARM64::LD1Rv1d_POST:
- case ARM64::LD1Rv2d_POST:
- case ARM64::ST2i8_POST:
- case ARM64::ST2i16_POST:
- case ARM64::ST2i32_POST:
- case ARM64::ST2i64_POST:
- case ARM64::LD2Rv8b_POST:
- case ARM64::LD2Rv16b_POST:
- case ARM64::LD2Rv4h_POST:
- case ARM64::LD2Rv8h_POST:
- case ARM64::LD2Rv2s_POST:
- case ARM64::LD2Rv4s_POST:
- case ARM64::LD2Rv2d_POST:
- case ARM64::LD2Rv1d_POST:
- case ARM64::ST3i8_POST:
- case ARM64::ST3i16_POST:
- case ARM64::ST3i32_POST:
- case ARM64::ST3i64_POST:
- case ARM64::LD3Rv8b_POST:
- case ARM64::LD3Rv16b_POST:
- case ARM64::LD3Rv4h_POST:
- case ARM64::LD3Rv8h_POST:
- case ARM64::LD3Rv2s_POST:
- case ARM64::LD3Rv4s_POST:
- case ARM64::LD3Rv2d_POST:
- case ARM64::LD3Rv1d_POST:
- case ARM64::ST4i8_POST:
- case ARM64::ST4i16_POST:
- case ARM64::ST4i32_POST:
- case ARM64::ST4i64_POST:
- case ARM64::LD4Rv8b_POST:
- case ARM64::LD4Rv16b_POST:
- case ARM64::LD4Rv4h_POST:
- case ARM64::LD4Rv8h_POST:
- case ARM64::LD4Rv2s_POST:
- case ARM64::LD4Rv4s_POST:
- case ARM64::LD4Rv2d_POST:
- case ARM64::LD4Rv1d_POST:
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- }
- return Success;
-}
-
-static DecodeStatus DecodeSIMDLdStSingleTied(llvm::MCInst &Inst, uint32_t insn,
- uint64_t Addr,
- const void *Decoder) {
- uint64_t Rt = fieldFromInstruction(insn, 0, 5);
- uint64_t Rn = fieldFromInstruction(insn, 5, 5);
- uint64_t Rm = fieldFromInstruction(insn, 16, 5);
- uint64_t size = fieldFromInstruction(insn, 10, 2);
- uint64_t S = fieldFromInstruction(insn, 12, 1);
- uint64_t Q = fieldFromInstruction(insn, 30, 1);
- uint64_t index = 0;
-
- switch (Inst.getOpcode()) {
- case ARM64::LD1i8:
- case ARM64::LD1i8_POST:
- case ARM64::LD2i8:
- case ARM64::LD2i8_POST:
- case ARM64::LD3i8_POST:
- case ARM64::LD3i8:
- case ARM64::LD4i8_POST:
- case ARM64::LD4i8:
- index = (Q << 3) | (S << 2) | size;
- break;
- case ARM64::LD1i16:
- case ARM64::LD1i16_POST:
- case ARM64::LD2i16:
- case ARM64::LD2i16_POST:
- case ARM64::LD3i16_POST:
- case ARM64::LD3i16:
- case ARM64::LD4i16_POST:
- case ARM64::LD4i16:
- index = (Q << 2) | (S << 1) | (size >> 1);
- break;
- case ARM64::LD1i32:
- case ARM64::LD1i32_POST:
- case ARM64::LD2i32:
- case ARM64::LD2i32_POST:
- case ARM64::LD3i32_POST:
- case ARM64::LD3i32:
- case ARM64::LD4i32_POST:
- case ARM64::LD4i32:
- index = (Q << 1) | S;
- break;
- case ARM64::LD1i64:
- case ARM64::LD1i64_POST:
- case ARM64::LD2i64:
- case ARM64::LD2i64_POST:
- case ARM64::LD3i64_POST:
- case ARM64::LD3i64:
- case ARM64::LD4i64_POST:
- case ARM64::LD4i64:
- index = Q;
- break;
- }
-
- switch (Inst.getOpcode()) {
- default:
- return Fail;
- case ARM64::LD1i8:
- case ARM64::LD1i8_POST:
- case ARM64::LD1i16:
- case ARM64::LD1i16_POST:
- case ARM64::LD1i32:
- case ARM64::LD1i32_POST:
- case ARM64::LD1i64:
- case ARM64::LD1i64_POST:
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- DecodeFPR128RegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD2i8:
- case ARM64::LD2i8_POST:
- case ARM64::LD2i16:
- case ARM64::LD2i16_POST:
- case ARM64::LD2i32:
- case ARM64::LD2i32_POST:
- case ARM64::LD2i64:
- case ARM64::LD2i64_POST:
- DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
- DecodeQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD3i8:
- case ARM64::LD3i8_POST:
- case ARM64::LD3i16:
- case ARM64::LD3i16_POST:
- case ARM64::LD3i32:
- case ARM64::LD3i32_POST:
- case ARM64::LD3i64:
- case ARM64::LD3i64_POST:
- DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
- DecodeQQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- case ARM64::LD4i8:
- case ARM64::LD4i8_POST:
- case ARM64::LD4i16:
- case ARM64::LD4i16_POST:
- case ARM64::LD4i32:
- case ARM64::LD4i32_POST:
- case ARM64::LD4i64:
- case ARM64::LD4i64_POST:
- DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
- DecodeQQQQRegisterClass(Inst, Rt, Addr, Decoder);
- break;
- }
-
- Inst.addOperand(MCOperand::CreateImm(index));
- DecodeGPR64RegisterClass(Inst, Rn, Addr, Decoder);
-
- switch (Inst.getOpcode()) {
- case ARM64::LD1i8_POST:
- case ARM64::LD1i16_POST:
- case ARM64::LD1i32_POST:
- case ARM64::LD1i64_POST:
- case ARM64::LD2i8_POST:
- case ARM64::LD2i16_POST:
- case ARM64::LD2i32_POST:
- case ARM64::LD2i64_POST:
- case ARM64::LD3i8_POST:
- case ARM64::LD3i16_POST:
- case ARM64::LD3i32_POST:
- case ARM64::LD3i64_POST:
- case ARM64::LD4i8_POST:
- case ARM64::LD4i16_POST:
- case ARM64::LD4i32_POST:
- case ARM64::LD4i64_POST:
- DecodeGPR64RegisterClass(Inst, Rm, Addr, Decoder);
- break;
- }
- return Success;
-}
diff --git a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h b/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
deleted file mode 100644
index 35efc8d..0000000
--- a/lib/Target/ARM64/Disassembler/ARM64Disassembler.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- ARM64Disassembler.h - Disassembler for ARM64 -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64DISASSEMBLER_H
-#define ARM64DISASSEMBLER_H
-
-#include "llvm/MC/MCDisassembler.h"
-
-namespace llvm {
-
-class MCInst;
-class MemoryObject;
-class raw_ostream;
-
-class ARM64Disassembler : public MCDisassembler {
-public:
- ARM64Disassembler(const MCSubtargetInfo &STI) : MCDisassembler(STI) {}
-
- ~ARM64Disassembler() {}
-
- /// getInstruction - See MCDisassembler.
- MCDisassembler::DecodeStatus getInstruction(MCInst &instr, uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
-
- /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic
- /// operand in place of the immediate Value in the MCInst. The immediate
- /// Value has not had any PC adjustment made by the caller. If the instruction
- /// adds the PC to the immediate Value then InstsAddsAddressToValue is true,
- /// else false. If the getOpInfo() function was set as part of the
- /// setupForSymbolicDisassembly() call then that function is called to get any
- /// symbolic information at the Address for this instrution. If that returns
- /// non-zero then the symbolic information it returns is used to create an
- /// MCExpr and that is added as an operand to the MCInst. This function
- /// returns true if it adds an operand to the MCInst and false otherwise.
- bool tryAddingSymbolicOperand(uint64_t Address, int Value,
- bool InstsAddsAddressToValue, uint64_t InstSize,
- MCInst &MI, uint32_t insn = 0) const;
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/Disassembler/CMakeLists.txt b/lib/Target/ARM64/Disassembler/CMakeLists.txt
deleted file mode 100644
index ad998c2..0000000
--- a/lib/Target/ARM64/Disassembler/CMakeLists.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64Disassembler
- ARM64Disassembler.cpp
- )
-# workaround for hanging compilation on MSVC8, 9 and 10
-#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
-#set_property(
-# SOURCE ARMDisassembler.cpp
-# PROPERTY COMPILE_FLAGS "/Od"
-# )
-#endif()
-add_dependencies(LLVMARM64Disassembler ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/Disassembler/LLVMBuild.txt b/lib/Target/ARM64/Disassembler/LLVMBuild.txt
deleted file mode 100644
index 5935ee6..0000000
--- a/lib/Target/ARM64/Disassembler/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/Disassembler/LLVMBuild.txt ------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Disassembler
-parent = ARM64
-required_libraries = ARM64Desc ARM64Info MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/Disassembler/Makefile b/lib/Target/ARM64/Disassembler/Makefile
deleted file mode 100644
index 479d00c..0000000
--- a/lib/Target/ARM64/Disassembler/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM64/Disassembler/Makefile --------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Disassembler
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
deleted file mode 100644
index bb90707..0000000
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.cpp
+++ /dev/null
@@ -1,1428 +0,0 @@
-//===-- ARM64InstPrinter.cpp - Convert ARM64 MCInst to assembly syntax ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "asm-printer"
-#include "ARM64InstPrinter.h"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter.inc"
-#define GET_INSTRUCTION_NAME
-#define PRINT_ALIAS_INSTR
-#include "ARM64GenAsmWriter1.inc"
-
-ARM64InstPrinter::ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : MCInstPrinter(MAI, MII, MRI) {
- // Initialize the set of available features.
- setAvailableFeatures(STI.getFeatureBits());
-}
-
-ARM64AppleInstPrinter::ARM64AppleInstPrinter(const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI)
- : ARM64InstPrinter(MAI, MII, MRI, STI) {}
-
-void ARM64InstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
- // This is for .cfi directives.
- OS << getRegisterName(RegNo);
-}
-
-void ARM64InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
- // Check for special encodings and print the cannonical alias instead.
-
- unsigned Opcode = MI->getOpcode();
-
- if (Opcode == ARM64::SYS || Opcode == ARM64::SYSxt)
- if (printSysAlias(MI, O)) {
- printAnnotation(O, Annot);
- return;
- }
-
- // TBZ/TBNZ should print the register operand as a Wreg if the bit
- // number is < 32.
- if ((Opcode == ARM64::TBNZ || Opcode == ARM64::TBZ) &&
- MI->getOperand(1).getImm() < 32) {
- MCInst newMI = *MI;
- unsigned Reg = MI->getOperand(0).getReg();
- newMI.getOperand(0).setReg(getWRegFromXReg(Reg));
- printInstruction(&newMI, O);
- printAnnotation(O, Annot);
- return;
- }
-
- // SBFM/UBFM should print to a nicer aliased form if possible.
- if (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri ||
- Opcode == ARM64::UBFMXri || Opcode == ARM64::UBFMWri) {
- const MCOperand &Op0 = MI->getOperand(0);
- const MCOperand &Op1 = MI->getOperand(1);
- const MCOperand &Op2 = MI->getOperand(2);
- const MCOperand &Op3 = MI->getOperand(3);
-
- if (Op2.isImm() && Op2.getImm() == 0 && Op3.isImm()) {
- bool IsSigned = (Opcode == ARM64::SBFMXri || Opcode == ARM64::SBFMWri);
- const char *AsmMnemonic = 0;
-
- switch (Op3.getImm()) {
- default:
- break;
- case 7:
- AsmMnemonic = IsSigned ? "sxtb" : "uxtb";
- break;
- case 15:
- AsmMnemonic = IsSigned ? "sxth" : "uxth";
- break;
- case 31:
- AsmMnemonic = IsSigned ? "sxtw" : "uxtw";
- break;
- }
-
- if (AsmMnemonic) {
- O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
- << ", " << getRegisterName(Op1.getReg());
- printAnnotation(O, Annot);
- return;
- }
- }
-
- // All immediate shifts are aliases, implemented using the Bitfield
- // instruction. In all cases the immediate shift amount shift must be in
- // the range 0 to (reg.size -1).
- if (Op2.isImm() && Op3.isImm()) {
- const char *AsmMnemonic = 0;
- int shift = 0;
- int64_t immr = Op2.getImm();
- int64_t imms = Op3.getImm();
- if (Opcode == ARM64::UBFMWri && imms != 0x1F && ((imms + 1) == immr)) {
- AsmMnemonic = "lsl";
- shift = 31 - imms;
- } else if (Opcode == ARM64::UBFMXri && imms != 0x3f &&
- ((imms + 1 == immr))) {
- AsmMnemonic = "lsl";
- shift = 63 - imms;
- } else if (Opcode == ARM64::UBFMWri && imms == 0x1f) {
- AsmMnemonic = "lsr";
- shift = immr;
- } else if (Opcode == ARM64::UBFMXri && imms == 0x3f) {
- AsmMnemonic = "lsr";
- shift = immr;
- } else if (Opcode == ARM64::SBFMWri && imms == 0x1f) {
- AsmMnemonic = "asr";
- shift = immr;
- } else if (Opcode == ARM64::SBFMXri && imms == 0x3f) {
- AsmMnemonic = "asr";
- shift = immr;
- }
- if (AsmMnemonic) {
- O << '\t' << AsmMnemonic << '\t' << getRegisterName(Op0.getReg())
- << ", " << getRegisterName(Op1.getReg()) << ", #" << shift;
- printAnnotation(O, Annot);
- return;
- }
- }
- }
-
- // Symbolic operands for MOVZ, MOVN and MOVK already imply a shift
- // (e.g. :gottprel_g1: is always going to be "lsl #16") so it should not be
- // printed.
- if ((Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi ||
- Opcode == ARM64::MOVNXi || Opcode == ARM64::MOVNWi) &&
- MI->getOperand(1).isExpr()) {
- if (Opcode == ARM64::MOVZXi || Opcode == ARM64::MOVZWi)
- O << "\tmovz\t";
- else
- O << "\tmovn\t";
-
- O << getRegisterName(MI->getOperand(0).getReg()) << ", #"
- << *MI->getOperand(1).getExpr();
- return;
- }
-
- if ((Opcode == ARM64::MOVKXi || Opcode == ARM64::MOVKWi) &&
- MI->getOperand(2).isExpr()) {
- O << "\tmovk\t" << getRegisterName(MI->getOperand(0).getReg()) << ", #"
- << *MI->getOperand(2).getExpr();
- return;
- }
-
- // ANDS WZR, Wn, #imm ==> TST Wn, #imm
- // ANDS XZR, Xn, #imm ==> TST Xn, #imm
- if (Opcode == ARM64::ANDSWri && MI->getOperand(0).getReg() == ARM64::WZR) {
- O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printLogicalImm32(MI, 2, O);
- return;
- }
- if (Opcode == ARM64::ANDSXri && MI->getOperand(0).getReg() == ARM64::XZR) {
- O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printLogicalImm64(MI, 2, O);
- return;
- }
- // ANDS WZR, Wn, Wm{, lshift #imm} ==> TST Wn{, lshift #imm}
- // ANDS XZR, Xn, Xm{, lshift #imm} ==> TST Xn{, lshift #imm}
- if ((Opcode == ARM64::ANDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
- (Opcode == ARM64::ANDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
- O << "\ttst\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printShiftedRegister(MI, 2, O);
- return;
- }
-
- // SUBS WZR, Wn, #imm ==> CMP Wn, #imm
- // SUBS XZR, Xn, #imm ==> CMP Xn, #imm
- if ((Opcode == ARM64::SUBSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
- (Opcode == ARM64::SUBSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
- O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printAddSubImm(MI, 2, O);
- return;
- }
- // SUBS WZR, Wn, Wm{, lshift #imm} ==> CMP Wn, Wm{, lshift #imm}
- // SUBS XZR, Xn, Xm{, lshift #imm} ==> CMP Xn, Xm{, lshift #imm}
- if ((Opcode == ARM64::SUBSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
- (Opcode == ARM64::SUBSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
- O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printShiftedRegister(MI, 2, O);
- return;
- }
- // SUBS XZR, Xn, Wm, uxtb #imm ==> CMP Xn, uxtb #imm
- // SUBS WZR, Wn, Xm, uxtb #imm ==> CMP Wn, uxtb #imm
- if ((Opcode == ARM64::SUBSXrx && MI->getOperand(0).getReg() == ARM64::XZR) ||
- (Opcode == ARM64::SUBSWrx && MI->getOperand(0).getReg() == ARM64::WZR)) {
- O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printExtendedRegister(MI, 2, O);
- return;
- }
- // SUBS XZR, Xn, Xm, uxtx #imm ==> CMP Xn, uxtb #imm
- if (Opcode == ARM64::SUBSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
- O << "\tcmp\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
- << getRegisterName(MI->getOperand(2).getReg());
- printExtend(MI, 3, O);
- return;
- }
-
- // ADDS WZR, Wn, #imm ==> CMN Wn, #imm
- // ADDS XZR, Xn, #imm ==> CMN Xn, #imm
- if ((Opcode == ARM64::ADDSWri && MI->getOperand(0).getReg() == ARM64::WZR) ||
- (Opcode == ARM64::ADDSXri && MI->getOperand(0).getReg() == ARM64::XZR)) {
- O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printAddSubImm(MI, 2, O);
- return;
- }
- // ADDS WZR, Wn, Wm{, lshift #imm} ==> CMN Wn, Wm{, lshift #imm}
- // ADDS XZR, Xn, Xm{, lshift #imm} ==> CMN Xn, Xm{, lshift #imm}
- if ((Opcode == ARM64::ADDSWrs && MI->getOperand(0).getReg() == ARM64::WZR) ||
- (Opcode == ARM64::ADDSXrs && MI->getOperand(0).getReg() == ARM64::XZR)) {
- O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printShiftedRegister(MI, 2, O);
- return;
- }
- // ADDS XZR, Xn, Wm, uxtb #imm ==> CMN Xn, uxtb #imm
- if (Opcode == ARM64::ADDSXrx && MI->getOperand(0).getReg() == ARM64::XZR) {
- O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", ";
- printExtendedRegister(MI, 2, O);
- return;
- }
- // ADDS XZR, Xn, Xm, uxtx #imm ==> CMN Xn, uxtb #imm
- if (Opcode == ARM64::ADDSXrx64 && MI->getOperand(0).getReg() == ARM64::XZR) {
- O << "\tcmn\t" << getRegisterName(MI->getOperand(1).getReg()) << ", "
- << getRegisterName(MI->getOperand(2).getReg());
- printExtend(MI, 3, O);
- return;
- }
-
- if (!printAliasInstr(MI, O))
- printInstruction(MI, O);
-
- printAnnotation(O, Annot);
-}
-
-static bool isTblTbxInstruction(unsigned Opcode, StringRef &Layout,
- bool &IsTbx) {
- switch (Opcode) {
- case ARM64::TBXv8i8One:
- case ARM64::TBXv8i8Two:
- case ARM64::TBXv8i8Three:
- case ARM64::TBXv8i8Four:
- IsTbx = true;
- Layout = ".8b";
- return true;
- case ARM64::TBLv8i8One:
- case ARM64::TBLv8i8Two:
- case ARM64::TBLv8i8Three:
- case ARM64::TBLv8i8Four:
- IsTbx = false;
- Layout = ".8b";
- return true;
- case ARM64::TBXv16i8One:
- case ARM64::TBXv16i8Two:
- case ARM64::TBXv16i8Three:
- case ARM64::TBXv16i8Four:
- IsTbx = true;
- Layout = ".16b";
- return true;
- case ARM64::TBLv16i8One:
- case ARM64::TBLv16i8Two:
- case ARM64::TBLv16i8Three:
- case ARM64::TBLv16i8Four:
- IsTbx = false;
- Layout = ".16b";
- return true;
- default:
- return false;
- }
-}
-
-struct LdStNInstrDesc {
- unsigned Opcode;
- const char *Mnemonic;
- const char *Layout;
- int LaneOperand;
- int NaturalOffset;
-};
-
-static LdStNInstrDesc LdStNInstInfo[] = {
- { ARM64::LD1i8, "ld1", ".b", 2, 0 },
- { ARM64::LD1i16, "ld1", ".h", 2, 0 },
- { ARM64::LD1i32, "ld1", ".s", 2, 0 },
- { ARM64::LD1i64, "ld1", ".d", 2, 0 },
- { ARM64::LD1i8_POST, "ld1", ".b", 2, 1 },
- { ARM64::LD1i16_POST, "ld1", ".h", 2, 2 },
- { ARM64::LD1i32_POST, "ld1", ".s", 2, 4 },
- { ARM64::LD1i64_POST, "ld1", ".d", 2, 8 },
- { ARM64::LD1Rv16b, "ld1r", ".16b", 0, 0 },
- { ARM64::LD1Rv8h, "ld1r", ".8h", 0, 0 },
- { ARM64::LD1Rv4s, "ld1r", ".4s", 0, 0 },
- { ARM64::LD1Rv2d, "ld1r", ".2d", 0, 0 },
- { ARM64::LD1Rv8b, "ld1r", ".8b", 0, 0 },
- { ARM64::LD1Rv4h, "ld1r", ".4h", 0, 0 },
- { ARM64::LD1Rv2s, "ld1r", ".2s", 0, 0 },
- { ARM64::LD1Rv1d, "ld1r", ".1d", 0, 0 },
- { ARM64::LD1Rv16b_POST, "ld1r", ".16b", 0, 1 },
- { ARM64::LD1Rv8h_POST, "ld1r", ".8h", 0, 2 },
- { ARM64::LD1Rv4s_POST, "ld1r", ".4s", 0, 4 },
- { ARM64::LD1Rv2d_POST, "ld1r", ".2d", 0, 8 },
- { ARM64::LD1Rv8b_POST, "ld1r", ".8b", 0, 1 },
- { ARM64::LD1Rv4h_POST, "ld1r", ".4h", 0, 2 },
- { ARM64::LD1Rv2s_POST, "ld1r", ".2s", 0, 4 },
- { ARM64::LD1Rv1d_POST, "ld1r", ".1d", 0, 8 },
- { ARM64::LD1Onev16b, "ld1", ".16b", 0, 0 },
- { ARM64::LD1Onev8h, "ld1", ".8h", 0, 0 },
- { ARM64::LD1Onev4s, "ld1", ".4s", 0, 0 },
- { ARM64::LD1Onev2d, "ld1", ".2d", 0, 0 },
- { ARM64::LD1Onev8b, "ld1", ".8b", 0, 0 },
- { ARM64::LD1Onev4h, "ld1", ".4h", 0, 0 },
- { ARM64::LD1Onev2s, "ld1", ".2s", 0, 0 },
- { ARM64::LD1Onev1d, "ld1", ".1d", 0, 0 },
- { ARM64::LD1Onev16b_POST, "ld1", ".16b", 0, 16 },
- { ARM64::LD1Onev8h_POST, "ld1", ".8h", 0, 16 },
- { ARM64::LD1Onev4s_POST, "ld1", ".4s", 0, 16 },
- { ARM64::LD1Onev2d_POST, "ld1", ".2d", 0, 16 },
- { ARM64::LD1Onev8b_POST, "ld1", ".8b", 0, 8 },
- { ARM64::LD1Onev4h_POST, "ld1", ".4h", 0, 8 },
- { ARM64::LD1Onev2s_POST, "ld1", ".2s", 0, 8 },
- { ARM64::LD1Onev1d_POST, "ld1", ".1d", 0, 8 },
- { ARM64::LD1Twov16b, "ld1", ".16b", 0, 0 },
- { ARM64::LD1Twov8h, "ld1", ".8h", 0, 0 },
- { ARM64::LD1Twov4s, "ld1", ".4s", 0, 0 },
- { ARM64::LD1Twov2d, "ld1", ".2d", 0, 0 },
- { ARM64::LD1Twov8b, "ld1", ".8b", 0, 0 },
- { ARM64::LD1Twov4h, "ld1", ".4h", 0, 0 },
- { ARM64::LD1Twov2s, "ld1", ".2s", 0, 0 },
- { ARM64::LD1Twov1d, "ld1", ".1d", 0, 0 },
- { ARM64::LD1Twov16b_POST, "ld1", ".16b", 0, 32 },
- { ARM64::LD1Twov8h_POST, "ld1", ".8h", 0, 32 },
- { ARM64::LD1Twov4s_POST, "ld1", ".4s", 0, 32 },
- { ARM64::LD1Twov2d_POST, "ld1", ".2d", 0, 32 },
- { ARM64::LD1Twov8b_POST, "ld1", ".8b", 0, 16 },
- { ARM64::LD1Twov4h_POST, "ld1", ".4h", 0, 16 },
- { ARM64::LD1Twov2s_POST, "ld1", ".2s", 0, 16 },
- { ARM64::LD1Twov1d_POST, "ld1", ".1d", 0, 16 },
- { ARM64::LD1Threev16b, "ld1", ".16b", 0, 0 },
- { ARM64::LD1Threev8h, "ld1", ".8h", 0, 0 },
- { ARM64::LD1Threev4s, "ld1", ".4s", 0, 0 },
- { ARM64::LD1Threev2d, "ld1", ".2d", 0, 0 },
- { ARM64::LD1Threev8b, "ld1", ".8b", 0, 0 },
- { ARM64::LD1Threev4h, "ld1", ".4h", 0, 0 },
- { ARM64::LD1Threev2s, "ld1", ".2s", 0, 0 },
- { ARM64::LD1Threev1d, "ld1", ".1d", 0, 0 },
- { ARM64::LD1Threev16b_POST, "ld1", ".16b", 0, 48 },
- { ARM64::LD1Threev8h_POST, "ld1", ".8h", 0, 48 },
- { ARM64::LD1Threev4s_POST, "ld1", ".4s", 0, 48 },
- { ARM64::LD1Threev2d_POST, "ld1", ".2d", 0, 48 },
- { ARM64::LD1Threev8b_POST, "ld1", ".8b", 0, 24 },
- { ARM64::LD1Threev4h_POST, "ld1", ".4h", 0, 24 },
- { ARM64::LD1Threev2s_POST, "ld1", ".2s", 0, 24 },
- { ARM64::LD1Threev1d_POST, "ld1", ".1d", 0, 24 },
- { ARM64::LD1Fourv16b, "ld1", ".16b", 0, 0 },
- { ARM64::LD1Fourv8h, "ld1", ".8h", 0, 0 },
- { ARM64::LD1Fourv4s, "ld1", ".4s", 0, 0 },
- { ARM64::LD1Fourv2d, "ld1", ".2d", 0, 0 },
- { ARM64::LD1Fourv8b, "ld1", ".8b", 0, 0 },
- { ARM64::LD1Fourv4h, "ld1", ".4h", 0, 0 },
- { ARM64::LD1Fourv2s, "ld1", ".2s", 0, 0 },
- { ARM64::LD1Fourv1d, "ld1", ".1d", 0, 0 },
- { ARM64::LD1Fourv16b_POST, "ld1", ".16b", 0, 64 },
- { ARM64::LD1Fourv8h_POST, "ld1", ".8h", 0, 64 },
- { ARM64::LD1Fourv4s_POST, "ld1", ".4s", 0, 64 },
- { ARM64::LD1Fourv2d_POST, "ld1", ".2d", 0, 64 },
- { ARM64::LD1Fourv8b_POST, "ld1", ".8b", 0, 32 },
- { ARM64::LD1Fourv4h_POST, "ld1", ".4h", 0, 32 },
- { ARM64::LD1Fourv2s_POST, "ld1", ".2s", 0, 32 },
- { ARM64::LD1Fourv1d_POST, "ld1", ".1d", 0, 32 },
- { ARM64::LD2i8, "ld2", ".b", 2, 0 },
- { ARM64::LD2i16, "ld2", ".h", 2, 0 },
- { ARM64::LD2i32, "ld2", ".s", 2, 0 },
- { ARM64::LD2i64, "ld2", ".d", 2, 0 },
- { ARM64::LD2i8_POST, "ld2", ".b", 2, 2 },
- { ARM64::LD2i16_POST, "ld2", ".h", 2, 4 },
- { ARM64::LD2i32_POST, "ld2", ".s", 2, 8 },
- { ARM64::LD2i64_POST, "ld2", ".d", 2, 16 },
- { ARM64::LD2Rv16b, "ld2r", ".16b", 0, 0 },
- { ARM64::LD2Rv8h, "ld2r", ".8h", 0, 0 },
- { ARM64::LD2Rv4s, "ld2r", ".4s", 0, 0 },
- { ARM64::LD2Rv2d, "ld2r", ".2d", 0, 0 },
- { ARM64::LD2Rv8b, "ld2r", ".8b", 0, 0 },
- { ARM64::LD2Rv4h, "ld2r", ".4h", 0, 0 },
- { ARM64::LD2Rv2s, "ld2r", ".2s", 0, 0 },
- { ARM64::LD2Rv1d, "ld2r", ".1d", 0, 0 },
- { ARM64::LD2Rv16b_POST, "ld2r", ".16b", 0, 2 },
- { ARM64::LD2Rv8h_POST, "ld2r", ".8h", 0, 4 },
- { ARM64::LD2Rv4s_POST, "ld2r", ".4s", 0, 8 },
- { ARM64::LD2Rv2d_POST, "ld2r", ".2d", 0, 16 },
- { ARM64::LD2Rv8b_POST, "ld2r", ".8b", 0, 2 },
- { ARM64::LD2Rv4h_POST, "ld2r", ".4h", 0, 4 },
- { ARM64::LD2Rv2s_POST, "ld2r", ".2s", 0, 8 },
- { ARM64::LD2Rv1d_POST, "ld2r", ".1d", 0, 16 },
- { ARM64::LD2Twov16b, "ld2", ".16b", 0, 0 },
- { ARM64::LD2Twov8h, "ld2", ".8h", 0, 0 },
- { ARM64::LD2Twov4s, "ld2", ".4s", 0, 0 },
- { ARM64::LD2Twov2d, "ld2", ".2d", 0, 0 },
- { ARM64::LD2Twov8b, "ld2", ".8b", 0, 0 },
- { ARM64::LD2Twov4h, "ld2", ".4h", 0, 0 },
- { ARM64::LD2Twov2s, "ld2", ".2s", 0, 0 },
- { ARM64::LD2Twov16b_POST, "ld2", ".16b", 0, 32 },
- { ARM64::LD2Twov8h_POST, "ld2", ".8h", 0, 32 },
- { ARM64::LD2Twov4s_POST, "ld2", ".4s", 0, 32 },
- { ARM64::LD2Twov2d_POST, "ld2", ".2d", 0, 32 },
- { ARM64::LD2Twov8b_POST, "ld2", ".8b", 0, 16 },
- { ARM64::LD2Twov4h_POST, "ld2", ".4h", 0, 16 },
- { ARM64::LD2Twov2s_POST, "ld2", ".2s", 0, 16 },
- { ARM64::LD3i8, "ld3", ".b", 2, 0 },
- { ARM64::LD3i16, "ld3", ".h", 2, 0 },
- { ARM64::LD3i32, "ld3", ".s", 2, 0 },
- { ARM64::LD3i64, "ld3", ".d", 2, 0 },
- { ARM64::LD3i8_POST, "ld3", ".b", 2, 3 },
- { ARM64::LD3i16_POST, "ld3", ".h", 2, 6 },
- { ARM64::LD3i32_POST, "ld3", ".s", 2, 12 },
- { ARM64::LD3i64_POST, "ld3", ".d", 2, 24 },
- { ARM64::LD3Rv16b, "ld3r", ".16b", 0, 0 },
- { ARM64::LD3Rv8h, "ld3r", ".8h", 0, 0 },
- { ARM64::LD3Rv4s, "ld3r", ".4s", 0, 0 },
- { ARM64::LD3Rv2d, "ld3r", ".2d", 0, 0 },
- { ARM64::LD3Rv8b, "ld3r", ".8b", 0, 0 },
- { ARM64::LD3Rv4h, "ld3r", ".4h", 0, 0 },
- { ARM64::LD3Rv2s, "ld3r", ".2s", 0, 0 },
- { ARM64::LD3Rv1d, "ld3r", ".1d", 0, 0 },
- { ARM64::LD3Rv16b_POST, "ld3r", ".16b", 0, 3 },
- { ARM64::LD3Rv8h_POST, "ld3r", ".8h", 0, 6 },
- { ARM64::LD3Rv4s_POST, "ld3r", ".4s", 0, 12 },
- { ARM64::LD3Rv2d_POST, "ld3r", ".2d", 0, 24 },
- { ARM64::LD3Rv8b_POST, "ld3r", ".8b", 0, 3 },
- { ARM64::LD3Rv4h_POST, "ld3r", ".4h", 0, 6 },
- { ARM64::LD3Rv2s_POST, "ld3r", ".2s", 0, 12 },
- { ARM64::LD3Rv1d_POST, "ld3r", ".1d", 0, 24 },
- { ARM64::LD3Threev16b, "ld3", ".16b", 0, 0 },
- { ARM64::LD3Threev8h, "ld3", ".8h", 0, 0 },
- { ARM64::LD3Threev4s, "ld3", ".4s", 0, 0 },
- { ARM64::LD3Threev2d, "ld3", ".2d", 0, 0 },
- { ARM64::LD3Threev8b, "ld3", ".8b", 0, 0 },
- { ARM64::LD3Threev4h, "ld3", ".4h", 0, 0 },
- { ARM64::LD3Threev2s, "ld3", ".2s", 0, 0 },
- { ARM64::LD3Threev16b_POST, "ld3", ".16b", 0, 48 },
- { ARM64::LD3Threev8h_POST, "ld3", ".8h", 0, 48 },
- { ARM64::LD3Threev4s_POST, "ld3", ".4s", 0, 48 },
- { ARM64::LD3Threev2d_POST, "ld3", ".2d", 0, 48 },
- { ARM64::LD3Threev8b_POST, "ld3", ".8b", 0, 24 },
- { ARM64::LD3Threev4h_POST, "ld3", ".4h", 0, 24 },
- { ARM64::LD3Threev2s_POST, "ld3", ".2s", 0, 24 },
- { ARM64::LD4i8, "ld4", ".b", 2, 0 },
- { ARM64::LD4i16, "ld4", ".h", 2, 0 },
- { ARM64::LD4i32, "ld4", ".s", 2, 0 },
- { ARM64::LD4i64, "ld4", ".d", 2, 0 },
- { ARM64::LD4i8_POST, "ld4", ".b", 2, 4 },
- { ARM64::LD4i16_POST, "ld4", ".h", 2, 8 },
- { ARM64::LD4i32_POST, "ld4", ".s", 2, 16 },
- { ARM64::LD4i64_POST, "ld4", ".d", 2, 32 },
- { ARM64::LD4Rv16b, "ld4r", ".16b", 0, 0 },
- { ARM64::LD4Rv8h, "ld4r", ".8h", 0, 0 },
- { ARM64::LD4Rv4s, "ld4r", ".4s", 0, 0 },
- { ARM64::LD4Rv2d, "ld4r", ".2d", 0, 0 },
- { ARM64::LD4Rv8b, "ld4r", ".8b", 0, 0 },
- { ARM64::LD4Rv4h, "ld4r", ".4h", 0, 0 },
- { ARM64::LD4Rv2s, "ld4r", ".2s", 0, 0 },
- { ARM64::LD4Rv1d, "ld4r", ".1d", 0, 0 },
- { ARM64::LD4Rv16b_POST, "ld4r", ".16b", 0, 4 },
- { ARM64::LD4Rv8h_POST, "ld4r", ".8h", 0, 8 },
- { ARM64::LD4Rv4s_POST, "ld4r", ".4s", 0, 16 },
- { ARM64::LD4Rv2d_POST, "ld4r", ".2d", 0, 32 },
- { ARM64::LD4Rv8b_POST, "ld4r", ".8b", 0, 4 },
- { ARM64::LD4Rv4h_POST, "ld4r", ".4h", 0, 8 },
- { ARM64::LD4Rv2s_POST, "ld4r", ".2s", 0, 16 },
- { ARM64::LD4Rv1d_POST, "ld4r", ".1d", 0, 32 },
- { ARM64::LD4Fourv16b, "ld4", ".16b", 0, 0 },
- { ARM64::LD4Fourv8h, "ld4", ".8h", 0, 0 },
- { ARM64::LD4Fourv4s, "ld4", ".4s", 0, 0 },
- { ARM64::LD4Fourv2d, "ld4", ".2d", 0, 0 },
- { ARM64::LD4Fourv8b, "ld4", ".8b", 0, 0 },
- { ARM64::LD4Fourv4h, "ld4", ".4h", 0, 0 },
- { ARM64::LD4Fourv2s, "ld4", ".2s", 0, 0 },
- { ARM64::LD4Fourv16b_POST, "ld4", ".16b", 0, 64 },
- { ARM64::LD4Fourv8h_POST, "ld4", ".8h", 0, 64 },
- { ARM64::LD4Fourv4s_POST, "ld4", ".4s", 0, 64 },
- { ARM64::LD4Fourv2d_POST, "ld4", ".2d", 0, 64 },
- { ARM64::LD4Fourv8b_POST, "ld4", ".8b", 0, 32 },
- { ARM64::LD4Fourv4h_POST, "ld4", ".4h", 0, 32 },
- { ARM64::LD4Fourv2s_POST, "ld4", ".2s", 0, 32 },
- { ARM64::ST1i8, "st1", ".b", 1, 0 },
- { ARM64::ST1i16, "st1", ".h", 1, 0 },
- { ARM64::ST1i32, "st1", ".s", 1, 0 },
- { ARM64::ST1i64, "st1", ".d", 1, 0 },
- { ARM64::ST1i8_POST, "st1", ".b", 1, 1 },
- { ARM64::ST1i16_POST, "st1", ".h", 1, 2 },
- { ARM64::ST1i32_POST, "st1", ".s", 1, 4 },
- { ARM64::ST1i64_POST, "st1", ".d", 1, 8 },
- { ARM64::ST1Onev16b, "st1", ".16b", 0, 0 },
- { ARM64::ST1Onev8h, "st1", ".8h", 0, 0 },
- { ARM64::ST1Onev4s, "st1", ".4s", 0, 0 },
- { ARM64::ST1Onev2d, "st1", ".2d", 0, 0 },
- { ARM64::ST1Onev8b, "st1", ".8b", 0, 0 },
- { ARM64::ST1Onev4h, "st1", ".4h", 0, 0 },
- { ARM64::ST1Onev2s, "st1", ".2s", 0, 0 },
- { ARM64::ST1Onev1d, "st1", ".1d", 0, 0 },
- { ARM64::ST1Onev16b_POST, "st1", ".16b", 0, 16 },
- { ARM64::ST1Onev8h_POST, "st1", ".8h", 0, 16 },
- { ARM64::ST1Onev4s_POST, "st1", ".4s", 0, 16 },
- { ARM64::ST1Onev2d_POST, "st1", ".2d", 0, 16 },
- { ARM64::ST1Onev8b_POST, "st1", ".8b", 0, 8 },
- { ARM64::ST1Onev4h_POST, "st1", ".4h", 0, 8 },
- { ARM64::ST1Onev2s_POST, "st1", ".2s", 0, 8 },
- { ARM64::ST1Onev1d_POST, "st1", ".1d", 0, 8 },
- { ARM64::ST1Twov16b, "st1", ".16b", 0, 0 },
- { ARM64::ST1Twov8h, "st1", ".8h", 0, 0 },
- { ARM64::ST1Twov4s, "st1", ".4s", 0, 0 },
- { ARM64::ST1Twov2d, "st1", ".2d", 0, 0 },
- { ARM64::ST1Twov8b, "st1", ".8b", 0, 0 },
- { ARM64::ST1Twov4h, "st1", ".4h", 0, 0 },
- { ARM64::ST1Twov2s, "st1", ".2s", 0, 0 },
- { ARM64::ST1Twov1d, "st1", ".1d", 0, 0 },
- { ARM64::ST1Twov16b_POST, "st1", ".16b", 0, 32 },
- { ARM64::ST1Twov8h_POST, "st1", ".8h", 0, 32 },
- { ARM64::ST1Twov4s_POST, "st1", ".4s", 0, 32 },
- { ARM64::ST1Twov2d_POST, "st1", ".2d", 0, 32 },
- { ARM64::ST1Twov8b_POST, "st1", ".8b", 0, 16 },
- { ARM64::ST1Twov4h_POST, "st1", ".4h", 0, 16 },
- { ARM64::ST1Twov2s_POST, "st1", ".2s", 0, 16 },
- { ARM64::ST1Twov1d_POST, "st1", ".1d", 0, 16 },
- { ARM64::ST1Threev16b, "st1", ".16b", 0, 0 },
- { ARM64::ST1Threev8h, "st1", ".8h", 0, 0 },
- { ARM64::ST1Threev4s, "st1", ".4s", 0, 0 },
- { ARM64::ST1Threev2d, "st1", ".2d", 0, 0 },
- { ARM64::ST1Threev8b, "st1", ".8b", 0, 0 },
- { ARM64::ST1Threev4h, "st1", ".4h", 0, 0 },
- { ARM64::ST1Threev2s, "st1", ".2s", 0, 0 },
- { ARM64::ST1Threev1d, "st1", ".1d", 0, 0 },
- { ARM64::ST1Threev16b_POST, "st1", ".16b", 0, 48 },
- { ARM64::ST1Threev8h_POST, "st1", ".8h", 0, 48 },
- { ARM64::ST1Threev4s_POST, "st1", ".4s", 0, 48 },
- { ARM64::ST1Threev2d_POST, "st1", ".2d", 0, 48 },
- { ARM64::ST1Threev8b_POST, "st1", ".8b", 0, 24 },
- { ARM64::ST1Threev4h_POST, "st1", ".4h", 0, 24 },
- { ARM64::ST1Threev2s_POST, "st1", ".2s", 0, 24 },
- { ARM64::ST1Threev1d_POST, "st1", ".1d", 0, 24 },
- { ARM64::ST1Fourv16b, "st1", ".16b", 0, 0 },
- { ARM64::ST1Fourv8h, "st1", ".8h", 0, 0 },
- { ARM64::ST1Fourv4s, "st1", ".4s", 0, 0 },
- { ARM64::ST1Fourv2d, "st1", ".2d", 0, 0 },
- { ARM64::ST1Fourv8b, "st1", ".8b", 0, 0 },
- { ARM64::ST1Fourv4h, "st1", ".4h", 0, 0 },
- { ARM64::ST1Fourv2s, "st1", ".2s", 0, 0 },
- { ARM64::ST1Fourv1d, "st1", ".1d", 0, 0 },
- { ARM64::ST1Fourv16b_POST, "st1", ".16b", 0, 64 },
- { ARM64::ST1Fourv8h_POST, "st1", ".8h", 0, 64 },
- { ARM64::ST1Fourv4s_POST, "st1", ".4s", 0, 64 },
- { ARM64::ST1Fourv2d_POST, "st1", ".2d", 0, 64 },
- { ARM64::ST1Fourv8b_POST, "st1", ".8b", 0, 32 },
- { ARM64::ST1Fourv4h_POST, "st1", ".4h", 0, 32 },
- { ARM64::ST1Fourv2s_POST, "st1", ".2s", 0, 32 },
- { ARM64::ST1Fourv1d_POST, "st1", ".1d", 0, 32 },
- { ARM64::ST2i8, "st2", ".b", 1, 0 },
- { ARM64::ST2i16, "st2", ".h", 1, 0 },
- { ARM64::ST2i32, "st2", ".s", 1, 0 },
- { ARM64::ST2i64, "st2", ".d", 1, 0 },
- { ARM64::ST2i8_POST, "st2", ".b", 1, 2 },
- { ARM64::ST2i16_POST, "st2", ".h", 1, 4 },
- { ARM64::ST2i32_POST, "st2", ".s", 1, 8 },
- { ARM64::ST2i64_POST, "st2", ".d", 1, 16 },
- { ARM64::ST2Twov16b, "st2", ".16b", 0, 0 },
- { ARM64::ST2Twov8h, "st2", ".8h", 0, 0 },
- { ARM64::ST2Twov4s, "st2", ".4s", 0, 0 },
- { ARM64::ST2Twov2d, "st2", ".2d", 0, 0 },
- { ARM64::ST2Twov8b, "st2", ".8b", 0, 0 },
- { ARM64::ST2Twov4h, "st2", ".4h", 0, 0 },
- { ARM64::ST2Twov2s, "st2", ".2s", 0, 0 },
- { ARM64::ST2Twov16b_POST, "st2", ".16b", 0, 32 },
- { ARM64::ST2Twov8h_POST, "st2", ".8h", 0, 32 },
- { ARM64::ST2Twov4s_POST, "st2", ".4s", 0, 32 },
- { ARM64::ST2Twov2d_POST, "st2", ".2d", 0, 32 },
- { ARM64::ST2Twov8b_POST, "st2", ".8b", 0, 16 },
- { ARM64::ST2Twov4h_POST, "st2", ".4h", 0, 16 },
- { ARM64::ST2Twov2s_POST, "st2", ".2s", 0, 16 },
- { ARM64::ST3i8, "st3", ".b", 1, 0 },
- { ARM64::ST3i16, "st3", ".h", 1, 0 },
- { ARM64::ST3i32, "st3", ".s", 1, 0 },
- { ARM64::ST3i64, "st3", ".d", 1, 0 },
- { ARM64::ST3i8_POST, "st3", ".b", 1, 3 },
- { ARM64::ST3i16_POST, "st3", ".h", 1, 6 },
- { ARM64::ST3i32_POST, "st3", ".s", 1, 12 },
- { ARM64::ST3i64_POST, "st3", ".d", 1, 24 },
- { ARM64::ST3Threev16b, "st3", ".16b", 0, 0 },
- { ARM64::ST3Threev8h, "st3", ".8h", 0, 0 },
- { ARM64::ST3Threev4s, "st3", ".4s", 0, 0 },
- { ARM64::ST3Threev2d, "st3", ".2d", 0, 0 },
- { ARM64::ST3Threev8b, "st3", ".8b", 0, 0 },
- { ARM64::ST3Threev4h, "st3", ".4h", 0, 0 },
- { ARM64::ST3Threev2s, "st3", ".2s", 0, 0 },
- { ARM64::ST3Threev16b_POST, "st3", ".16b", 0, 48 },
- { ARM64::ST3Threev8h_POST, "st3", ".8h", 0, 48 },
- { ARM64::ST3Threev4s_POST, "st3", ".4s", 0, 48 },
- { ARM64::ST3Threev2d_POST, "st3", ".2d", 0, 48 },
- { ARM64::ST3Threev8b_POST, "st3", ".8b", 0, 24 },
- { ARM64::ST3Threev4h_POST, "st3", ".4h", 0, 24 },
- { ARM64::ST3Threev2s_POST, "st3", ".2s", 0, 24 },
- { ARM64::ST4i8, "st4", ".b", 1, 0 },
- { ARM64::ST4i16, "st4", ".h", 1, 0 },
- { ARM64::ST4i32, "st4", ".s", 1, 0 },
- { ARM64::ST4i64, "st4", ".d", 1, 0 },
- { ARM64::ST4i8_POST, "st4", ".b", 1, 4 },
- { ARM64::ST4i16_POST, "st4", ".h", 1, 8 },
- { ARM64::ST4i32_POST, "st4", ".s", 1, 16 },
- { ARM64::ST4i64_POST, "st4", ".d", 1, 32 },
- { ARM64::ST4Fourv16b, "st4", ".16b", 0, 0 },
- { ARM64::ST4Fourv8h, "st4", ".8h", 0, 0 },
- { ARM64::ST4Fourv4s, "st4", ".4s", 0, 0 },
- { ARM64::ST4Fourv2d, "st4", ".2d", 0, 0 },
- { ARM64::ST4Fourv8b, "st4", ".8b", 0, 0 },
- { ARM64::ST4Fourv4h, "st4", ".4h", 0, 0 },
- { ARM64::ST4Fourv2s, "st4", ".2s", 0, 0 },
- { ARM64::ST4Fourv16b_POST, "st4", ".16b", 0, 64 },
- { ARM64::ST4Fourv8h_POST, "st4", ".8h", 0, 64 },
- { ARM64::ST4Fourv4s_POST, "st4", ".4s", 0, 64 },
- { ARM64::ST4Fourv2d_POST, "st4", ".2d", 0, 64 },
- { ARM64::ST4Fourv8b_POST, "st4", ".8b", 0, 32 },
- { ARM64::ST4Fourv4h_POST, "st4", ".4h", 0, 32 },
- { ARM64::ST4Fourv2s_POST, "st4", ".2s", 0, 32 },
-};
-
-static LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
- unsigned Idx;
- for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
- if (LdStNInstInfo[Idx].Opcode == Opcode)
- return &LdStNInstInfo[Idx];
-
- return 0;
-}
-
-void ARM64AppleInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
- StringRef Annot) {
- unsigned Opcode = MI->getOpcode();
- StringRef Layout, Mnemonic;
-
- bool IsTbx;
- if (isTblTbxInstruction(MI->getOpcode(), Layout, IsTbx)) {
- O << "\t" << (IsTbx ? "tbx" : "tbl") << Layout << '\t'
- << getRegisterName(MI->getOperand(0).getReg(), ARM64::vreg) << ", ";
-
- unsigned ListOpNum = IsTbx ? 2 : 1;
- printVectorList(MI, ListOpNum, O, "");
-
- O << ", "
- << getRegisterName(MI->getOperand(ListOpNum + 1).getReg(), ARM64::vreg);
- printAnnotation(O, Annot);
- return;
- }
-
- if (LdStNInstrDesc *LdStDesc = getLdStNInstrDesc(Opcode)) {
- O << "\t" << LdStDesc->Mnemonic << LdStDesc->Layout << '\t';
-
- // Now onto the operands: first a vector list with possible lane
- // specifier. E.g. { v0 }[2]
- printVectorList(MI, 0, O, "");
-
- if (LdStDesc->LaneOperand != 0)
- O << '[' << MI->getOperand(LdStDesc->LaneOperand).getImm() << ']';
-
- // Next the address: [xN]
- unsigned AddrOpNum = LdStDesc->LaneOperand + 1;
- unsigned AddrReg = MI->getOperand(AddrOpNum).getReg();
- O << ", [" << getRegisterName(AddrReg) << ']';
-
- // Finally, there might be a post-indexed offset.
- if (LdStDesc->NaturalOffset != 0) {
- unsigned Reg = MI->getOperand(AddrOpNum + 1).getReg();
- if (Reg != ARM64::XZR)
- O << ", " << getRegisterName(Reg);
- else {
- assert(LdStDesc->NaturalOffset && "no offset on post-inc instruction?");
- O << ", #" << LdStDesc->NaturalOffset;
- }
- }
-
- printAnnotation(O, Annot);
- return;
- }
-
- ARM64InstPrinter::printInst(MI, O, Annot);
-}
-
-bool ARM64InstPrinter::printSysAlias(const MCInst *MI, raw_ostream &O) {
-#ifndef NDEBUG
- unsigned Opcode = MI->getOpcode();
- assert((Opcode == ARM64::SYS || Opcode == ARM64::SYSxt) &&
- "Invalid opcode for SYS alias!");
-#endif
-
- const char *Asm = 0;
- const MCOperand &Op1 = MI->getOperand(0);
- const MCOperand &Cn = MI->getOperand(1);
- const MCOperand &Cm = MI->getOperand(2);
- const MCOperand &Op2 = MI->getOperand(3);
-
- unsigned Op1Val = Op1.getImm();
- unsigned CnVal = Cn.getImm();
- unsigned CmVal = Cm.getImm();
- unsigned Op2Val = Op2.getImm();
-
- if (CnVal == 7) {
- switch (CmVal) {
- default:
- break;
-
- // IC aliases
- case 1:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tialluis";
- break;
- case 5:
- if (Op1Val == 0 && Op2Val == 0)
- Asm = "ic\tiallu";
- else if (Op1Val == 3 && Op2Val == 1)
- Asm = "ic\tivau";
- break;
-
- // DC aliases
- case 4:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tzva";
- break;
- case 6:
- if (Op1Val == 0 && Op2Val == 1)
- Asm = "dc\tivac";
- if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tisw";
- break;
- case 10:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcsw";
- break;
- case 11:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcvau";
- break;
- case 14:
- if (Op1Val == 3 && Op2Val == 1)
- Asm = "dc\tcivac";
- else if (Op1Val == 0 && Op2Val == 2)
- Asm = "dc\tcisw";
- break;
-
- // AT aliases
- case 8:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e1r"; break;
- case 1: Asm = "at\ts1e1w"; break;
- case 2: Asm = "at\ts1e0r"; break;
- case 3: Asm = "at\ts1e0w"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e2r"; break;
- case 1: Asm = "at\ts1e2w"; break;
- case 4: Asm = "at\ts12e1r"; break;
- case 5: Asm = "at\ts12e1w"; break;
- case 6: Asm = "at\ts12e0r"; break;
- case 7: Asm = "at\ts12e0w"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "at\ts1e3r"; break;
- case 1: Asm = "at\ts1e3w"; break;
- }
- break;
- }
- break;
- }
- } else if (CnVal == 8) {
- // TLBI aliases
- switch (CmVal) {
- default:
- break;
- case 3:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1is"; break;
- case 1: Asm = "tlbi\tvae1is"; break;
- case 2: Asm = "tlbi\taside1is"; break;
- case 3: Asm = "tlbi\tvaae1is"; break;
- case 5: Asm = "tlbi\tvale1is"; break;
- case 7: Asm = "tlbi\tvaale1is"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2is"; break;
- case 1: Asm = "tlbi\tvae2is"; break;
- case 4: Asm = "tlbi\talle1is"; break;
- case 5: Asm = "tlbi\tvale2is"; break;
- case 6: Asm = "tlbi\tvmalls12e1is"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3is"; break;
- case 1: Asm = "tlbi\tvae3is"; break;
- case 5: Asm = "tlbi\tvale3is"; break;
- }
- break;
- }
- break;
- case 4:
- switch (Op1Val) {
- default:
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 1: Asm = "tlbi\tipas2e1"; break;
- case 5: Asm = "tlbi\tipas2le1"; break;
- }
- break;
- }
- break;
- case 7:
- switch (Op1Val) {
- default:
- break;
- case 0:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\tvmalle1"; break;
- case 1: Asm = "tlbi\tvae1"; break;
- case 2: Asm = "tlbi\taside1"; break;
- case 3: Asm = "tlbi\tvaae1"; break;
- case 5: Asm = "tlbi\tvale1"; break;
- case 7: Asm = "tlbi\tvaale1"; break;
- }
- break;
- case 4:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle2"; break;
- case 1: Asm = "tlbi\tvae2"; break;
- case 4: Asm = "tlbi\talle1"; break;
- case 5: Asm = "tlbi\tvale2"; break;
- case 6: Asm = "tlbi\tvmalls12e1"; break;
- }
- break;
- case 6:
- switch (Op2Val) {
- default:
- break;
- case 0: Asm = "tlbi\talle3"; break;
- case 1: Asm = "tlbi\tvae3"; break;
- case 5: Asm = "tlbi\tvale3"; break;
- }
- break;
- }
- break;
- }
- }
-
- if (Asm) {
- O << '\t' << Asm;
- if (MI->getNumOperands() == 5)
- O << ", " << getRegisterName(MI->getOperand(4).getReg());
- }
-
- return Asm != 0;
-}
-
-void ARM64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- O << getRegisterName(Reg);
- } else if (Op.isImm()) {
- O << '#' << Op.getImm();
- } else {
- assert(Op.isExpr() && "unknown operand kind in printOperand");
- O << *Op.getExpr();
- }
-}
-
-void ARM64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo,
- unsigned Imm, raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- if (Op.isReg()) {
- unsigned Reg = Op.getReg();
- if (Reg == ARM64::XZR)
- O << "#" << Imm;
- else
- O << getRegisterName(Reg);
- } else
- assert(0 && "unknown operand kind in printPostIncOperand64");
-}
-
-void ARM64InstPrinter::printPostIncOperand1(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 1, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand2(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 2, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand3(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 3, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand4(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 4, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand6(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 6, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand8(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 8, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand12(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 12, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand16(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 16, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand24(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 24, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand32(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 32, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand48(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 48, O);
-}
-
-void ARM64InstPrinter::printPostIncOperand64(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printPostIncOperand(MI, OpNo, 64, O);
-}
-
-void ARM64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert(Op.isReg() && "Non-register vreg operand!");
- unsigned Reg = Op.getReg();
- O << getRegisterName(Reg, ARM64::vreg);
-}
-
-void ARM64InstPrinter::printSysCROperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNo);
- assert(Op.isImm() && "System instruction C[nm] operands must be immediates!");
- O << "c" << Op.getImm();
-}
-
-void ARM64InstPrinter::printAddSubImm(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- if (MO.isImm()) {
- unsigned Val = (MO.getImm() & 0xfff);
- assert(Val == MO.getImm() && "Add/sub immediate out of range!");
- unsigned Shift =
- ARM64_AM::getShiftValue(MI->getOperand(OpNum + 1).getImm());
- O << '#' << (Val << Shift);
- // Distinguish "0, lsl #12" from "0, lsl #0".
- if (Val == 0 && Shift != 0)
- printShifter(MI, OpNum + 1, O);
- } else {
- assert(MO.isExpr() && "Unexpected operand type!");
- O << *MO.getExpr();
- printShifter(MI, OpNum + 1, O);
- }
-}
-
-void ARM64InstPrinter::printLogicalImm32(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- uint64_t Val = MI->getOperand(OpNum).getImm();
- O << "#0x";
- O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 32));
-}
-
-void ARM64InstPrinter::printLogicalImm64(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- uint64_t Val = MI->getOperand(OpNum).getImm();
- O << "#0x";
- O.write_hex(ARM64_AM::decodeLogicalImmediate(Val, 64));
-}
-
-void ARM64InstPrinter::printShifter(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNum).getImm();
- // LSL #0 should not be printed.
- if (ARM64_AM::getShiftType(Val) == ARM64_AM::LSL &&
- ARM64_AM::getShiftValue(Val) == 0)
- return;
- O << ", " << ARM64_AM::getShiftName(ARM64_AM::getShiftType(Val)) << " #"
- << ARM64_AM::getShiftValue(Val);
-}
-
-void ARM64InstPrinter::printShiftedRegister(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << getRegisterName(MI->getOperand(OpNum).getReg());
- printShifter(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printExtendedRegister(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << getRegisterName(MI->getOperand(OpNum).getReg());
- printExtend(MI, OpNum + 1, O);
-}
-
-void ARM64InstPrinter::printExtend(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNum).getImm();
- ARM64_AM::ExtendType ExtType = ARM64_AM::getArithExtendType(Val);
- unsigned ShiftVal = ARM64_AM::getArithShiftValue(Val);
-
- // If the destination or first source register operand is [W]SP, print
- // UXTW/UXTX as LSL, and if the shift amount is also zero, print nothing at
- // all.
- if (ExtType == ARM64_AM::UXTW || ExtType == ARM64_AM::UXTX) {
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src1 = MI->getOperand(1).getReg();
- if (Dest == ARM64::SP || Dest == ARM64::WSP || Src1 == ARM64::SP ||
- Src1 == ARM64::WSP) {
- if (ShiftVal != 0)
- O << ", lsl #" << ShiftVal;
- return;
- }
- }
- O << ", " << ARM64_AM::getExtendName(ExtType);
- if (ShiftVal != 0)
- O << " #" << ShiftVal;
-}
-
-void ARM64InstPrinter::printDotCondCode(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
- if (CC != ARM64CC::AL)
- O << '.' << ARM64CC::getCondCodeName(CC);
-}
-
-void ARM64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- ARM64CC::CondCode CC = (ARM64CC::CondCode)MI->getOperand(OpNum).getImm();
- O << ARM64CC::getCondCodeName(CC);
-}
-
-void ARM64InstPrinter::printAMNoIndex(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']';
-}
-
-void ARM64InstPrinter::printImmScale4(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << '#' << 4 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printImmScale8(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << '#' << 8 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printImmScale16(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << '#' << 16 * MI->getOperand(OpNum).getImm();
-}
-
-void ARM64InstPrinter::printAMIndexed(const MCInst *MI, unsigned OpNum,
- unsigned Scale, raw_ostream &O) {
- const MCOperand MO1 = MI->getOperand(OpNum + 1);
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg());
- if (MO1.isImm()) {
- if (MO1.getImm() != 0)
- O << ", #" << (MO1.getImm() * Scale);
- } else {
- assert(MO1.isExpr() && "Unexpected operand type!");
- O << ", " << *MO1.getExpr();
- }
- O << ']';
-}
-
-void ARM64InstPrinter::printPrefetchOp(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- unsigned prfop = MI->getOperand(OpNum).getImm();
- if (ARM64_AM::isNamedPrefetchOp(prfop))
- O << ARM64_AM::getPrefetchOpName((ARM64_AM::PrefetchOp)prfop);
- else
- O << '#' << prfop;
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed32(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
- << 4 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed64(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
- << 8 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed128(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
- << 16 * MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryPostIndexed(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ']' << ", #"
- << MI->getOperand(OpNum + 1).getImm();
-}
-
-void ARM64InstPrinter::printMemoryRegOffset(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, int LegalShiftAmt) {
- O << '[' << getRegisterName(MI->getOperand(OpNum).getReg()) << ", "
- << getRegisterName(MI->getOperand(OpNum + 1).getReg());
-
- unsigned Val = MI->getOperand(OpNum + 2).getImm();
- ARM64_AM::ExtendType ExtType = ARM64_AM::getMemExtendType(Val);
- bool DoShift = ARM64_AM::getMemDoShift(Val);
-
- if (ExtType == ARM64_AM::UXTX) {
- if (DoShift)
- O << ", lsl";
- } else
- O << ", " << ARM64_AM::getExtendName(ExtType);
-
- if (DoShift)
- O << " #" << LegalShiftAmt;
-
- O << "]";
-}
-
-void ARM64InstPrinter::printMemoryRegOffset8(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printMemoryRegOffset(MI, OpNum, O, 0);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset16(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printMemoryRegOffset(MI, OpNum, O, 1);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset32(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printMemoryRegOffset(MI, OpNum, O, 2);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset64(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printMemoryRegOffset(MI, OpNum, O, 3);
-}
-
-void ARM64InstPrinter::printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- printMemoryRegOffset(MI, OpNum, O, 4);
-}
-
-void ARM64InstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &MO = MI->getOperand(OpNum);
- O << '#';
- if (MO.isFPImm())
- // FIXME: Should this ever happen?
- O << MO.getFPImm();
- else
- O << ARM64_AM::getFPImmFloat(MO.getImm());
-}
-
-static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) {
- while (Stride--) {
- switch (Reg) {
- default:
- assert(0 && "Vector register expected!");
- case ARM64::Q0: Reg = ARM64::Q1; break;
- case ARM64::Q1: Reg = ARM64::Q2; break;
- case ARM64::Q2: Reg = ARM64::Q3; break;
- case ARM64::Q3: Reg = ARM64::Q4; break;
- case ARM64::Q4: Reg = ARM64::Q5; break;
- case ARM64::Q5: Reg = ARM64::Q6; break;
- case ARM64::Q6: Reg = ARM64::Q7; break;
- case ARM64::Q7: Reg = ARM64::Q8; break;
- case ARM64::Q8: Reg = ARM64::Q9; break;
- case ARM64::Q9: Reg = ARM64::Q10; break;
- case ARM64::Q10: Reg = ARM64::Q11; break;
- case ARM64::Q11: Reg = ARM64::Q12; break;
- case ARM64::Q12: Reg = ARM64::Q13; break;
- case ARM64::Q13: Reg = ARM64::Q14; break;
- case ARM64::Q14: Reg = ARM64::Q15; break;
- case ARM64::Q15: Reg = ARM64::Q16; break;
- case ARM64::Q16: Reg = ARM64::Q17; break;
- case ARM64::Q17: Reg = ARM64::Q18; break;
- case ARM64::Q18: Reg = ARM64::Q19; break;
- case ARM64::Q19: Reg = ARM64::Q20; break;
- case ARM64::Q20: Reg = ARM64::Q21; break;
- case ARM64::Q21: Reg = ARM64::Q22; break;
- case ARM64::Q22: Reg = ARM64::Q23; break;
- case ARM64::Q23: Reg = ARM64::Q24; break;
- case ARM64::Q24: Reg = ARM64::Q25; break;
- case ARM64::Q25: Reg = ARM64::Q26; break;
- case ARM64::Q26: Reg = ARM64::Q27; break;
- case ARM64::Q27: Reg = ARM64::Q28; break;
- case ARM64::Q28: Reg = ARM64::Q29; break;
- case ARM64::Q29: Reg = ARM64::Q30; break;
- case ARM64::Q30: Reg = ARM64::Q31; break;
- // Vector lists can wrap around.
- case ARM64::Q31:
- Reg = ARM64::Q0;
- break;
- }
- }
- return Reg;
-}
-
-void ARM64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum,
- raw_ostream &O, StringRef LayoutSuffix) {
- unsigned Reg = MI->getOperand(OpNum).getReg();
-
- O << "{ ";
-
- // Work out how many registers there are in the list (if there is an actual
- // list).
- unsigned NumRegs = 1;
- if (MRI.getRegClass(ARM64::DDRegClassID).contains(Reg) ||
- MRI.getRegClass(ARM64::QQRegClassID).contains(Reg))
- NumRegs = 2;
- else if (MRI.getRegClass(ARM64::DDDRegClassID).contains(Reg) ||
- MRI.getRegClass(ARM64::QQQRegClassID).contains(Reg))
- NumRegs = 3;
- else if (MRI.getRegClass(ARM64::DDDDRegClassID).contains(Reg) ||
- MRI.getRegClass(ARM64::QQQQRegClassID).contains(Reg))
- NumRegs = 4;
-
- // Now forget about the list and find out what the first register is.
- if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::dsub0))
- Reg = FirstReg;
- else if (unsigned FirstReg = MRI.getSubReg(Reg, ARM64::qsub0))
- Reg = FirstReg;
-
- // If it's a D-reg, we need to promote it to the equivalent Q-reg before
- // printing (otherwise getRegisterName fails).
- if (MRI.getRegClass(ARM64::FPR64RegClassID).contains(Reg)) {
- const MCRegisterClass &FPR128RC = MRI.getRegClass(ARM64::FPR128RegClassID);
- Reg = MRI.getMatchingSuperReg(Reg, ARM64::dsub, &FPR128RC);
- }
-
- for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) {
- O << getRegisterName(Reg, ARM64::vreg) << LayoutSuffix;
- if (i + 1 != NumRegs)
- O << ", ";
- }
-
- O << " }";
-}
-
-void ARM64InstPrinter::printImplicitlyTypedVectorList(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- printVectorList(MI, OpNum, O, "");
-}
-
-template <unsigned NumLanes, char LaneKind>
-void ARM64InstPrinter::printTypedVectorList(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- std::string Suffix(".");
- if (NumLanes)
- Suffix += itostr(NumLanes) + LaneKind;
- else
- Suffix += LaneKind;
-
- printVectorList(MI, OpNum, O, Suffix);
-}
-
-void ARM64InstPrinter::printVectorIndex(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "[" << MI->getOperand(OpNum).getImm() << "]";
-}
-
-void ARM64InstPrinter::printAlignedBranchTarget(const MCInst *MI,
- unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNum);
-
- // If the label has already been resolved to an immediate offset (say, when
- // we're running the disassembler), just print the immediate.
- if (Op.isImm()) {
- O << "#" << (Op.getImm() << 2);
- return;
- }
-
- // If the branch target is simply an address then print it in hex.
- const MCConstantExpr *BranchTarget =
- dyn_cast<MCConstantExpr>(MI->getOperand(OpNum).getExpr());
- int64_t Address;
- if (BranchTarget && BranchTarget->EvaluateAsAbsolute(Address)) {
- O << "0x";
- O.write_hex(Address);
- } else {
- // Otherwise, just print the expression.
- O << *MI->getOperand(OpNum).getExpr();
- }
-}
-
-void ARM64InstPrinter::printAdrpLabel(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- const MCOperand &Op = MI->getOperand(OpNum);
-
- // If the label has already been resolved to an immediate offset (say, when
- // we're running the disassembler), just print the immediate.
- if (Op.isImm()) {
- O << "#" << (Op.getImm() << 12);
- return;
- }
-
- // Otherwise, just print the expression.
- O << *MI->getOperand(OpNum).getExpr();
-}
-
-void ARM64InstPrinter::printBarrierOption(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNo).getImm();
- const char *Name = ARM64SYS::getBarrierOptName((ARM64SYS::BarrierOption)Val);
- if (Name)
- O << Name;
- else
- O << "#" << Val;
-}
-
-void ARM64InstPrinter::printSystemRegister(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNo).getImm();
- const char *Name =
- ARM64SYS::getSystemRegisterName((ARM64SYS::SystemRegister)Val);
- if (Name) {
- O << Name;
- return;
- }
-
- unsigned Op0 = 2 | ((Val >> 14) & 1);
- unsigned Op1 = (Val >> 11) & 7;
- unsigned CRn = (Val >> 7) & 0xf;
- unsigned CRm = (Val >> 3) & 0xf;
- unsigned Op2 = Val & 7;
-
- O << 'S' << Op0 << '_' << Op1 << "_C" << CRn << "_C" << CRm << '_' << Op2;
-}
-
-void ARM64InstPrinter::printSystemCPSRField(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned Val = MI->getOperand(OpNo).getImm();
- const char *Name = ARM64SYS::getCPSRFieldName((ARM64SYS::CPSRField)Val);
- O << Name;
-}
-
-void ARM64InstPrinter::printSIMDType10Operand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- unsigned RawVal = MI->getOperand(OpNo).getImm();
- uint64_t Val = ARM64_AM::decodeAdvSIMDModImmType10(RawVal);
- O << format("#%#016llx", Val);
-}
diff --git a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h b/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
deleted file mode 100644
index ff66ff0..0000000
--- a/lib/Target/ARM64/InstPrinter/ARM64InstPrinter.h
+++ /dev/null
@@ -1,157 +0,0 @@
-//===-- ARM64InstPrinter.h - Convert ARM64 MCInst to assembly syntax ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class prints an ARM64 MCInst to a .s file.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64INSTPRINTER_H
-#define ARM64INSTPRINTER_H
-
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-
-namespace llvm {
-
-class MCOperand;
-
-class ARM64InstPrinter : public MCInstPrinter {
-public:
- ARM64InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
-
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
-
- // Autogenerated by tblgen.
- virtual void printInstruction(const MCInst *MI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
- virtual StringRef getRegName(unsigned RegNo) const {
- return getRegisterName(RegNo);
- }
- static const char *getRegisterName(unsigned RegNo,
- unsigned AltIdx = ARM64::NoRegAltName);
-
-protected:
- bool printSysAlias(const MCInst *MI, raw_ostream &O);
- // Operand printers
- void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand(const MCInst *MI, unsigned OpNo, unsigned Imm,
- raw_ostream &O);
- void printPostIncOperand1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand2(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand3(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand4(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand6(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand8(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand12(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand16(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand24(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand32(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand48(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printPostIncOperand64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printVRegOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSysCROperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAddSubImm(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printLogicalImm64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShifter(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printShiftedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printExtendedRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printExtend(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printDotCondCode(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAlignedBranchTarget(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printAMIndexed(const MCInst *MI, unsigned OpNum, unsigned Scale,
- raw_ostream &O);
- void printAMIndexed128(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 16, O);
- }
-
- void printAMIndexed64(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 8, O);
- }
-
- void printAMIndexed32(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 4, O);
- }
-
- void printAMIndexed16(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 2, O);
- }
-
- void printAMIndexed8(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 1, O);
- }
- void printAMUnscaled(const MCInst *MI, unsigned OpNum, raw_ostream &O) {
- printAMIndexed(MI, OpNum, 1, O);
- }
- void printAMNoIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printImmScale4(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printImmScale8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printImmScale16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printPrefetchOp(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryPostIndexed(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryPostIndexed32(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printMemoryPostIndexed64(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printMemoryPostIndexed128(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
- void printMemoryRegOffset(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- int LegalShiftAmt);
- void printMemoryRegOffset8(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryRegOffset16(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryRegOffset32(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryRegOffset64(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printMemoryRegOffset128(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- void printFPImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O,
- StringRef LayoutSuffix);
-
- /// Print a list of vector registers where the type suffix is implicit
- /// (i.e. attached to the instruction rather than the registers).
- void printImplicitlyTypedVectorList(const MCInst *MI, unsigned OpNum,
- raw_ostream &O);
-
- template <unsigned NumLanes, char LaneKind>
- void printTypedVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
- void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printBarrierOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSystemRegister(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSystemCPSRField(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-};
-
-class ARM64AppleInstPrinter : public ARM64InstPrinter {
-public:
- ARM64AppleInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
-
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
-
- virtual void printInstruction(const MCInst *MI, raw_ostream &O);
- virtual bool printAliasInstr(const MCInst *MI, raw_ostream &O);
- virtual StringRef getRegName(unsigned RegNo) const {
- return getRegisterName(RegNo);
- }
- static const char *getRegisterName(unsigned RegNo,
- unsigned AltIdx = ARM64::NoRegAltName);
-};
-}
-
-#endif
diff --git a/lib/Target/ARM64/InstPrinter/CMakeLists.txt b/lib/Target/ARM64/InstPrinter/CMakeLists.txt
deleted file mode 100644
index b8ee12c..0000000
--- a/lib/Target/ARM64/InstPrinter/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64AsmPrinter
- ARM64InstPrinter.cpp
- )
-
-add_dependencies(LLVMARM64AsmPrinter ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt b/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
deleted file mode 100644
index 2ec83d2..0000000
--- a/lib/Target/ARM64/InstPrinter/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/InstPrinter/LLVMBuild.txt -------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64AsmPrinter
-parent = ARM64
-required_libraries = MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/InstPrinter/Makefile b/lib/Target/ARM64/InstPrinter/Makefile
deleted file mode 100644
index a59efb0..0000000
--- a/lib/Target/ARM64/InstPrinter/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM64/AsmPrinter/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64AsmPrinter
-
-# Hack: we need to include 'main' arm target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/LLVMBuild.txt b/lib/Target/ARM64/LLVMBuild.txt
deleted file mode 100644
index 45b0628..0000000
--- a/lib/Target/ARM64/LLVMBuild.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-;===- ./lib/Target/ARM64/LLVMBuild.txt -------------------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[common]
-subdirectories = AsmParser Disassembler InstPrinter MCTargetDesc TargetInfo
-
-[component_0]
-type = TargetGroup
-name = ARM64
-parent = Target
-has_asmparser = 1
-has_asmprinter = 1
-has_disassembler = 1
-has_jit = 1
-
-[component_1]
-type = Library
-name = ARM64CodeGen
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Desc ARM64Info Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
deleted file mode 100644
index 26813e2..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64AsmBackend.cpp
+++ /dev/null
@@ -1,533 +0,0 @@
-//===-- ARM64AsmBackend.cpp - ARM64 Assembler Backend ---------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64.h"
-#include "ARM64RegisterInfo.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCFixupKindInfo.h"
-#include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSectionMachO.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachO.h"
-using namespace llvm;
-
-namespace {
-
-class ARM64AsmBackend : public MCAsmBackend {
- static const unsigned PCRelFlagVal =
- MCFixupKindInfo::FKF_IsAlignedDownTo32Bits | MCFixupKindInfo::FKF_IsPCRel;
-
-public:
- ARM64AsmBackend(const Target &T) : MCAsmBackend() {}
-
- unsigned getNumFixupKinds() const { return ARM64::NumTargetFixupKinds; }
-
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[ARM64::NumTargetFixupKinds] = {
- // This table *must* be in the order that the fixup_* kinds are defined in
- // ARM64FixupKinds.h.
- //
- // Name Offset (bits) Size (bits) Flags
- { "fixup_arm64_pcrel_adr_imm21", 0, 32, PCRelFlagVal },
- { "fixup_arm64_pcrel_adrp_imm21", 0, 32, PCRelFlagVal },
- { "fixup_arm64_add_imm12", 10, 12, 0 },
- { "fixup_arm64_ldst_imm12_scale1", 10, 12, 0 },
- { "fixup_arm64_ldst_imm12_scale2", 10, 12, 0 },
- { "fixup_arm64_ldst_imm12_scale4", 10, 12, 0 },
- { "fixup_arm64_ldst_imm12_scale8", 10, 12, 0 },
- { "fixup_arm64_ldst_imm12_scale16", 10, 12, 0 },
- { "fixup_arm64_movw", 5, 16, 0 },
- { "fixup_arm64_pcrel_branch14", 5, 14, PCRelFlagVal },
- { "fixup_arm64_pcrel_imm19", 5, 19, PCRelFlagVal },
- { "fixup_arm64_pcrel_branch26", 0, 26, PCRelFlagVal },
- { "fixup_arm64_pcrel_call26", 0, 26, PCRelFlagVal },
- { "fixup_arm64_tlsdesc_call", 0, 0, 0 }
- };
-
- if (Kind < FirstTargetFixupKind)
- return MCAsmBackend::getFixupKindInfo(Kind);
-
- assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
- "Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
- }
-
- void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const;
-
- bool mayNeedRelaxation(const MCInst &Inst) const;
- bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const;
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const;
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
-
- void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
-
- unsigned getPointerSize() const { return 8; }
-};
-
-} // end anonymous namespace
-
-/// \brief The number of bytes the fixup may change.
-static unsigned getFixupKindNumBytes(unsigned Kind) {
- switch (Kind) {
- default:
- assert(0 && "Unknown fixup kind!");
-
- case ARM64::fixup_arm64_tlsdesc_call:
- return 0;
-
- case FK_Data_1:
- return 1;
-
- case FK_Data_2:
- case ARM64::fixup_arm64_movw:
- return 2;
-
- case ARM64::fixup_arm64_pcrel_branch14:
- case ARM64::fixup_arm64_add_imm12:
- case ARM64::fixup_arm64_ldst_imm12_scale1:
- case ARM64::fixup_arm64_ldst_imm12_scale2:
- case ARM64::fixup_arm64_ldst_imm12_scale4:
- case ARM64::fixup_arm64_ldst_imm12_scale8:
- case ARM64::fixup_arm64_ldst_imm12_scale16:
- case ARM64::fixup_arm64_pcrel_imm19:
- return 3;
-
- case ARM64::fixup_arm64_pcrel_adr_imm21:
- case ARM64::fixup_arm64_pcrel_adrp_imm21:
- case ARM64::fixup_arm64_pcrel_branch26:
- case ARM64::fixup_arm64_pcrel_call26:
- case FK_Data_4:
- return 4;
-
- case FK_Data_8:
- return 8;
- }
-}
-
-static unsigned AdrImmBits(unsigned Value) {
- unsigned lo2 = Value & 0x3;
- unsigned hi19 = (Value & 0x1ffffc) >> 2;
- return (hi19 << 5) | (lo2 << 29);
-}
-
-static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
- int64_t SignedValue = static_cast<int64_t>(Value);
- switch (Kind) {
- default:
- assert(false && "Unknown fixup kind!");
- case ARM64::fixup_arm64_pcrel_adr_imm21:
- if (SignedValue > 2097151 || SignedValue < -2097152)
- report_fatal_error("fixup value out of range");
- return AdrImmBits(Value & 0x1fffffULL);
- case ARM64::fixup_arm64_pcrel_adrp_imm21:
- return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
- case ARM64::fixup_arm64_pcrel_imm19:
- // Signed 21-bit immediate
- if (SignedValue > 2097151 || SignedValue < -2097152)
- report_fatal_error("fixup value out of range");
- // Low two bits are not encoded.
- return (Value >> 2) & 0x7ffff;
- case ARM64::fixup_arm64_add_imm12:
- case ARM64::fixup_arm64_ldst_imm12_scale1:
- // Unsigned 12-bit immediate
- if (Value >= 0x1000)
- report_fatal_error("invalid imm12 fixup value");
- return Value;
- case ARM64::fixup_arm64_ldst_imm12_scale2:
- // Unsigned 12-bit immediate which gets multiplied by 2
- if (Value & 1 || Value >= 0x2000)
- report_fatal_error("invalid imm12 fixup value");
- return Value >> 1;
- case ARM64::fixup_arm64_ldst_imm12_scale4:
- // Unsigned 12-bit immediate which gets multiplied by 4
- if (Value & 3 || Value >= 0x4000)
- report_fatal_error("invalid imm12 fixup value");
- return Value >> 2;
- case ARM64::fixup_arm64_ldst_imm12_scale8:
- // Unsigned 12-bit immediate which gets multiplied by 8
- if (Value & 7 || Value >= 0x8000)
- report_fatal_error("invalid imm12 fixup value");
- return Value >> 3;
- case ARM64::fixup_arm64_ldst_imm12_scale16:
- // Unsigned 12-bit immediate which gets multiplied by 16
- if (Value & 15 || Value >= 0x10000)
- report_fatal_error("invalid imm12 fixup value");
- return Value >> 4;
- case ARM64::fixup_arm64_movw:
- report_fatal_error("no resolvable MOVZ/MOVK fixups supported yet");
- return Value;
- case ARM64::fixup_arm64_pcrel_branch14:
- // Signed 16-bit immediate
- if (SignedValue > 32767 || SignedValue < -32768)
- report_fatal_error("fixup value out of range");
- // Low two bits are not encoded (4-byte alignment assumed).
- if (Value & 0x3)
- report_fatal_error("fixup not sufficiently aligned");
- return (Value >> 2) & 0x3fff;
- case ARM64::fixup_arm64_pcrel_branch26:
- case ARM64::fixup_arm64_pcrel_call26:
- // Signed 28-bit immediate
- if (SignedValue > 134217727 || SignedValue < -134217728)
- report_fatal_error("fixup value out of range");
- // Low two bits are not encoded (4-byte alignment assumed).
- if (Value & 0x3)
- report_fatal_error("fixup not sufficiently aligned");
- return (Value >> 2) & 0x3ffffff;
- case FK_Data_1:
- case FK_Data_2:
- case FK_Data_4:
- case FK_Data_8:
- return Value;
- }
-}
-
-void ARM64AsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
- unsigned DataSize, uint64_t Value,
- bool IsPCRel) const {
- unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
- if (!Value)
- return; // Doesn't change encoding.
- MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
- // Apply any target-specific value adjustments.
- Value = adjustFixupValue(Fixup.getKind(), Value);
-
- // Shift the value into position.
- Value <<= Info.TargetOffset;
-
- unsigned Offset = Fixup.getOffset();
- assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
-
- // For each byte of the fragment that the fixup touches, mask in the
- // bits from the fixup value.
- for (unsigned i = 0; i != NumBytes; ++i)
- Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-}
-
-bool ARM64AsmBackend::mayNeedRelaxation(const MCInst &Inst) const {
- return false;
-}
-
-bool ARM64AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
- // FIXME: This isn't correct for ARM64. Just moving the "generic" logic
- // into the targets for now.
- //
- // Relax if the value is too big for a (signed) i8.
- return int64_t(Value) != int64_t(int8_t(Value));
-}
-
-void ARM64AsmBackend::relaxInstruction(const MCInst &Inst, MCInst &Res) const {
- assert(false && "ARM64AsmBackend::relaxInstruction() unimplemented");
-}
-
-bool ARM64AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- // If the count is not 4-byte aligned, we must be writing data into the text
- // section (otherwise we have unaligned instructions, and thus have far
- // bigger problems), so just write zeros instead.
- if ((Count & 3) != 0) {
- for (uint64_t i = 0, e = (Count & 3); i != e; ++i)
- OW->Write8(0);
- }
-
- // We are properly aligned, so write NOPs as requested.
- Count /= 4;
- for (uint64_t i = 0; i != Count; ++i)
- OW->Write32(0xd503201f);
- return true;
-}
-
-namespace {
-
-namespace CU {
-
-/// \brief Compact unwind encoding values.
-enum CompactUnwindEncodings {
- /// \brief A "frameless" leaf function, where no non-volatile registers are
- /// saved. The return remains in LR throughout the function.
- UNWIND_ARM64_MODE_FRAMELESS = 0x02000000,
-
- /// \brief No compact unwind encoding available. Instead the low 23-bits of
- /// the compact unwind encoding is the offset of the DWARF FDE in the
- /// __eh_frame section. This mode is never used in object files. It is only
- /// generated by the linker in final linked images, which have only DWARF info
- /// for a function.
- UNWIND_ARM64_MODE_DWARF = 0x03000000,
-
- /// \brief This is a standard arm64 prologue where FP/LR are immediately
- /// pushed on the stack, then SP is copied to FP. If there are any
- /// non-volatile register saved, they are copied into the stack fame in pairs
- /// in a contiguous ranger right below the saved FP/LR pair. Any subset of the
- /// five X pairs and four D pairs can be saved, but the memory layout must be
- /// in register number order.
- UNWIND_ARM64_MODE_FRAME = 0x04000000,
-
- /// \brief Frame register pair encodings.
- UNWIND_ARM64_FRAME_X19_X20_PAIR = 0x00000001,
- UNWIND_ARM64_FRAME_X21_X22_PAIR = 0x00000002,
- UNWIND_ARM64_FRAME_X23_X24_PAIR = 0x00000004,
- UNWIND_ARM64_FRAME_X25_X26_PAIR = 0x00000008,
- UNWIND_ARM64_FRAME_X27_X28_PAIR = 0x00000010,
- UNWIND_ARM64_FRAME_D8_D9_PAIR = 0x00000100,
- UNWIND_ARM64_FRAME_D10_D11_PAIR = 0x00000200,
- UNWIND_ARM64_FRAME_D12_D13_PAIR = 0x00000400,
- UNWIND_ARM64_FRAME_D14_D15_PAIR = 0x00000800
-};
-
-} // end CU namespace
-
-// FIXME: This should be in a separate file.
-class DarwinARM64AsmBackend : public ARM64AsmBackend {
- const MCRegisterInfo &MRI;
-
- /// \brief Encode compact unwind stack adjustment for frameless functions.
- /// See UNWIND_ARM64_FRAMELESS_STACK_SIZE_MASK in compact_unwind_encoding.h.
- /// The stack size always needs to be 16 byte aligned.
- uint32_t encodeStackAdjustment(uint32_t StackSize) const {
- return (StackSize / 16) << 12;
- }
-
-public:
- DarwinARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI)
- : ARM64AsmBackend(T), MRI(MRI) {}
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARM64MachObjectWriter(OS, MachO::CPU_TYPE_ARM64,
- MachO::CPU_SUBTYPE_ARM64_ALL);
- }
-
- virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
- // Any section for which the linker breaks things into atoms needs to
- // preserve symbols, including assembler local symbols, to identify
- // those atoms. These sections are:
- // Sections of type:
- //
- // S_CSTRING_LITERALS (e.g. __cstring)
- // S_LITERAL_POINTERS (e.g. objc selector pointers)
- // S_16BYTE_LITERALS, S_8BYTE_LITERALS, S_4BYTE_LITERALS
- //
- // Sections named:
- //
- // __TEXT,__eh_frame
- // __TEXT,__ustring
- // __DATA,__cfstring
- // __DATA,__objc_classrefs
- // __DATA,__objc_catlist
- //
- // FIXME: It would be better if the compiler used actual linker local
- // symbols for each of these sections rather than preserving what
- // are ostensibly assembler local symbols.
- const MCSectionMachO &SMO = static_cast<const MCSectionMachO &>(Section);
- return (SMO.getType() == MachO::S_CSTRING_LITERALS ||
- SMO.getType() == MachO::S_4BYTE_LITERALS ||
- SMO.getType() == MachO::S_8BYTE_LITERALS ||
- SMO.getType() == MachO::S_16BYTE_LITERALS ||
- SMO.getType() == MachO::S_LITERAL_POINTERS ||
- (SMO.getSegmentName() == "__TEXT" &&
- (SMO.getSectionName() == "__eh_frame" ||
- SMO.getSectionName() == "__ustring")) ||
- (SMO.getSegmentName() == "__DATA" &&
- (SMO.getSectionName() == "__cfstring" ||
- SMO.getSectionName() == "__objc_classrefs" ||
- SMO.getSectionName() == "__objc_catlist")));
- }
-
- /// \brief Generate the compact unwind encoding from the CFI directives.
- virtual uint32_t
- generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const
- override {
- if (Instrs.empty())
- return CU::UNWIND_ARM64_MODE_FRAMELESS;
-
- bool HasFP = false;
- unsigned StackSize = 0;
-
- uint32_t CompactUnwindEncoding = 0;
- for (size_t i = 0, e = Instrs.size(); i != e; ++i) {
- const MCCFIInstruction &Inst = Instrs[i];
-
- switch (Inst.getOperation()) {
- default:
- // Cannot handle this directive: bail out.
- return CU::UNWIND_ARM64_MODE_DWARF;
- case MCCFIInstruction::OpDefCfa: {
- // Defines a frame pointer.
- assert(getXRegFromWReg(MRI.getLLVMRegNum(Inst.getRegister(), true)) ==
- ARM64::FP &&
- "Invalid frame pointer!");
- assert(i + 2 < e && "Insufficient CFI instructions to define a frame!");
-
- const MCCFIInstruction &LRPush = Instrs[++i];
- assert(LRPush.getOperation() == MCCFIInstruction::OpOffset &&
- "Link register not pushed!");
- const MCCFIInstruction &FPPush = Instrs[++i];
- assert(FPPush.getOperation() == MCCFIInstruction::OpOffset &&
- "Frame pointer not pushed!");
-
- unsigned LRReg = MRI.getLLVMRegNum(LRPush.getRegister(), true);
- unsigned FPReg = MRI.getLLVMRegNum(FPPush.getRegister(), true);
-
- LRReg = getXRegFromWReg(LRReg);
- FPReg = getXRegFromWReg(FPReg);
-
- assert(LRReg == ARM64::LR && FPReg == ARM64::FP &&
- "Pushing invalid registers for frame!");
-
- // Indicate that the function has a frame.
- CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAME;
- HasFP = true;
- break;
- }
- case MCCFIInstruction::OpDefCfaOffset: {
- assert(StackSize == 0 && "We already have the CFA offset!");
- StackSize = std::abs(Inst.getOffset());
- break;
- }
- case MCCFIInstruction::OpOffset: {
- // Registers are saved in pairs. We expect there to be two consecutive
- // `.cfi_offset' instructions with the appropriate registers specified.
- unsigned Reg1 = MRI.getLLVMRegNum(Inst.getRegister(), true);
- if (i + 1 == e)
- return CU::UNWIND_ARM64_MODE_DWARF;
-
- const MCCFIInstruction &Inst2 = Instrs[++i];
- if (Inst2.getOperation() != MCCFIInstruction::OpOffset)
- return CU::UNWIND_ARM64_MODE_DWARF;
- unsigned Reg2 = MRI.getLLVMRegNum(Inst2.getRegister(), true);
-
- // N.B. The encodings must be in register number order, and the X
- // registers before the D registers.
-
- // X19/X20 pair = 0x00000001,
- // X21/X22 pair = 0x00000002,
- // X23/X24 pair = 0x00000004,
- // X25/X26 pair = 0x00000008,
- // X27/X28 pair = 0x00000010
- Reg1 = getXRegFromWReg(Reg1);
- Reg2 = getXRegFromWReg(Reg2);
-
- if (Reg1 == ARM64::X19 && Reg2 == ARM64::X20 &&
- (CompactUnwindEncoding & 0xF1E) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X19_X20_PAIR;
- else if (Reg1 == ARM64::X21 && Reg2 == ARM64::X22 &&
- (CompactUnwindEncoding & 0xF1C) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X21_X22_PAIR;
- else if (Reg1 == ARM64::X23 && Reg2 == ARM64::X24 &&
- (CompactUnwindEncoding & 0xF18) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X23_X24_PAIR;
- else if (Reg1 == ARM64::X25 && Reg2 == ARM64::X26 &&
- (CompactUnwindEncoding & 0xF10) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X25_X26_PAIR;
- else if (Reg1 == ARM64::X27 && Reg2 == ARM64::X28 &&
- (CompactUnwindEncoding & 0xF00) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_X27_X28_PAIR;
- else {
- Reg1 = getDRegFromBReg(Reg1);
- Reg2 = getDRegFromBReg(Reg2);
-
- // D8/D9 pair = 0x00000100,
- // D10/D11 pair = 0x00000200,
- // D12/D13 pair = 0x00000400,
- // D14/D15 pair = 0x00000800
- if (Reg1 == ARM64::D8 && Reg2 == ARM64::D9 &&
- (CompactUnwindEncoding & 0xE00) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D8_D9_PAIR;
- else if (Reg1 == ARM64::D10 && Reg2 == ARM64::D11 &&
- (CompactUnwindEncoding & 0xC00) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D10_D11_PAIR;
- else if (Reg1 == ARM64::D12 && Reg2 == ARM64::D13 &&
- (CompactUnwindEncoding & 0x800) == 0)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D12_D13_PAIR;
- else if (Reg1 == ARM64::D14 && Reg2 == ARM64::D15)
- CompactUnwindEncoding |= CU::UNWIND_ARM64_FRAME_D14_D15_PAIR;
- else
- // A pair was pushed which we cannot handle.
- return CU::UNWIND_ARM64_MODE_DWARF;
- }
-
- break;
- }
- }
- }
-
- if (!HasFP) {
- // With compact unwind info we can only represent stack adjustments of up
- // to 65520 bytes.
- if (StackSize > 65520)
- return CU::UNWIND_ARM64_MODE_DWARF;
-
- CompactUnwindEncoding |= CU::UNWIND_ARM64_MODE_FRAMELESS;
- CompactUnwindEncoding |= encodeStackAdjustment(StackSize);
- }
-
- return CompactUnwindEncoding;
- }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-class ELFARM64AsmBackend : public ARM64AsmBackend {
-public:
- uint8_t OSABI;
-
- ELFARM64AsmBackend(const Target &T, uint8_t OSABI)
- : ARM64AsmBackend(T), OSABI(OSABI) {}
-
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createARM64ELFObjectWriter(OS, OSABI);
- }
-
- void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFixup &Fixup, const MCFragment *DF,
- const MCValue &Target, uint64_t &Value,
- bool &IsResolved) override;
-};
-
-void ELFARM64AsmBackend::processFixupValue(const MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFixup &Fixup,
- const MCFragment *DF,
- const MCValue &Target,
- uint64_t &Value, bool &IsResolved) {
- // The ADRP instruction adds some multiple of 0x1000 to the current PC &
- // ~0xfff. This means that the required offset to reach a symbol can vary by
- // up to one step depending on where the ADRP is in memory. For example:
- //
- // ADRP x0, there
- // there:
- //
- // If the ADRP occurs at address 0xffc then "there" will be at 0x1000 and
- // we'll need that as an offset. At any other address "there" will be in the
- // same page as the ADRP and the instruction should encode 0x0. Assuming the
- // section isn't 0x1000-aligned, we therefore need to delegate this decision
- // to the linker -- a relocation!
- if ((uint32_t)Fixup.getKind() == ARM64::fixup_arm64_pcrel_adrp_imm21)
- IsResolved = false;
-}
-}
-
-MCAsmBackend *llvm::createARM64AsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return new DarwinARM64AsmBackend(T, MRI);
-
- assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
- return new ELFARM64AsmBackend(T, TheTriple.getOS());
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
deleted file mode 100644
index d3c2cf7..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64BaseInfo.h
+++ /dev/null
@@ -1,998 +0,0 @@
-//===-- ARM64BaseInfo.h - Top level definitions for ARM64 -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains small standalone helper functions and enum definitions for
-// the ARM64 target useful for the compiler back-end and the MC libraries.
-// As such, it deliberately does not include references to LLVM core
-// code gen types, passes, etc..
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64BASEINFO_H
-#define ARM64BASEINFO_H
-
-#include "ARM64MCTargetDesc.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-inline static unsigned getWRegFromXReg(unsigned Reg) {
- switch (Reg) {
- case ARM64::X0: return ARM64::W0;
- case ARM64::X1: return ARM64::W1;
- case ARM64::X2: return ARM64::W2;
- case ARM64::X3: return ARM64::W3;
- case ARM64::X4: return ARM64::W4;
- case ARM64::X5: return ARM64::W5;
- case ARM64::X6: return ARM64::W6;
- case ARM64::X7: return ARM64::W7;
- case ARM64::X8: return ARM64::W8;
- case ARM64::X9: return ARM64::W9;
- case ARM64::X10: return ARM64::W10;
- case ARM64::X11: return ARM64::W11;
- case ARM64::X12: return ARM64::W12;
- case ARM64::X13: return ARM64::W13;
- case ARM64::X14: return ARM64::W14;
- case ARM64::X15: return ARM64::W15;
- case ARM64::X16: return ARM64::W16;
- case ARM64::X17: return ARM64::W17;
- case ARM64::X18: return ARM64::W18;
- case ARM64::X19: return ARM64::W19;
- case ARM64::X20: return ARM64::W20;
- case ARM64::X21: return ARM64::W21;
- case ARM64::X22: return ARM64::W22;
- case ARM64::X23: return ARM64::W23;
- case ARM64::X24: return ARM64::W24;
- case ARM64::X25: return ARM64::W25;
- case ARM64::X26: return ARM64::W26;
- case ARM64::X27: return ARM64::W27;
- case ARM64::X28: return ARM64::W28;
- case ARM64::FP: return ARM64::W29;
- case ARM64::LR: return ARM64::W30;
- case ARM64::SP: return ARM64::WSP;
- case ARM64::XZR: return ARM64::WZR;
- }
- // For anything else, return it unchanged.
- return Reg;
-}
-
-inline static unsigned getXRegFromWReg(unsigned Reg) {
- switch (Reg) {
- case ARM64::W0: return ARM64::X0;
- case ARM64::W1: return ARM64::X1;
- case ARM64::W2: return ARM64::X2;
- case ARM64::W3: return ARM64::X3;
- case ARM64::W4: return ARM64::X4;
- case ARM64::W5: return ARM64::X5;
- case ARM64::W6: return ARM64::X6;
- case ARM64::W7: return ARM64::X7;
- case ARM64::W8: return ARM64::X8;
- case ARM64::W9: return ARM64::X9;
- case ARM64::W10: return ARM64::X10;
- case ARM64::W11: return ARM64::X11;
- case ARM64::W12: return ARM64::X12;
- case ARM64::W13: return ARM64::X13;
- case ARM64::W14: return ARM64::X14;
- case ARM64::W15: return ARM64::X15;
- case ARM64::W16: return ARM64::X16;
- case ARM64::W17: return ARM64::X17;
- case ARM64::W18: return ARM64::X18;
- case ARM64::W19: return ARM64::X19;
- case ARM64::W20: return ARM64::X20;
- case ARM64::W21: return ARM64::X21;
- case ARM64::W22: return ARM64::X22;
- case ARM64::W23: return ARM64::X23;
- case ARM64::W24: return ARM64::X24;
- case ARM64::W25: return ARM64::X25;
- case ARM64::W26: return ARM64::X26;
- case ARM64::W27: return ARM64::X27;
- case ARM64::W28: return ARM64::X28;
- case ARM64::W29: return ARM64::FP;
- case ARM64::W30: return ARM64::LR;
- case ARM64::WSP: return ARM64::SP;
- case ARM64::WZR: return ARM64::XZR;
- }
- // For anything else, return it unchanged.
- return Reg;
-}
-
-static inline unsigned getBRegFromDReg(unsigned Reg) {
- switch (Reg) {
- case ARM64::D0: return ARM64::B0;
- case ARM64::D1: return ARM64::B1;
- case ARM64::D2: return ARM64::B2;
- case ARM64::D3: return ARM64::B3;
- case ARM64::D4: return ARM64::B4;
- case ARM64::D5: return ARM64::B5;
- case ARM64::D6: return ARM64::B6;
- case ARM64::D7: return ARM64::B7;
- case ARM64::D8: return ARM64::B8;
- case ARM64::D9: return ARM64::B9;
- case ARM64::D10: return ARM64::B10;
- case ARM64::D11: return ARM64::B11;
- case ARM64::D12: return ARM64::B12;
- case ARM64::D13: return ARM64::B13;
- case ARM64::D14: return ARM64::B14;
- case ARM64::D15: return ARM64::B15;
- case ARM64::D16: return ARM64::B16;
- case ARM64::D17: return ARM64::B17;
- case ARM64::D18: return ARM64::B18;
- case ARM64::D19: return ARM64::B19;
- case ARM64::D20: return ARM64::B20;
- case ARM64::D21: return ARM64::B21;
- case ARM64::D22: return ARM64::B22;
- case ARM64::D23: return ARM64::B23;
- case ARM64::D24: return ARM64::B24;
- case ARM64::D25: return ARM64::B25;
- case ARM64::D26: return ARM64::B26;
- case ARM64::D27: return ARM64::B27;
- case ARM64::D28: return ARM64::B28;
- case ARM64::D29: return ARM64::B29;
- case ARM64::D30: return ARM64::B30;
- case ARM64::D31: return ARM64::B31;
- }
- // For anything else, return it unchanged.
- return Reg;
-}
-
-
-static inline unsigned getDRegFromBReg(unsigned Reg) {
- switch (Reg) {
- case ARM64::B0: return ARM64::D0;
- case ARM64::B1: return ARM64::D1;
- case ARM64::B2: return ARM64::D2;
- case ARM64::B3: return ARM64::D3;
- case ARM64::B4: return ARM64::D4;
- case ARM64::B5: return ARM64::D5;
- case ARM64::B6: return ARM64::D6;
- case ARM64::B7: return ARM64::D7;
- case ARM64::B8: return ARM64::D8;
- case ARM64::B9: return ARM64::D9;
- case ARM64::B10: return ARM64::D10;
- case ARM64::B11: return ARM64::D11;
- case ARM64::B12: return ARM64::D12;
- case ARM64::B13: return ARM64::D13;
- case ARM64::B14: return ARM64::D14;
- case ARM64::B15: return ARM64::D15;
- case ARM64::B16: return ARM64::D16;
- case ARM64::B17: return ARM64::D17;
- case ARM64::B18: return ARM64::D18;
- case ARM64::B19: return ARM64::D19;
- case ARM64::B20: return ARM64::D20;
- case ARM64::B21: return ARM64::D21;
- case ARM64::B22: return ARM64::D22;
- case ARM64::B23: return ARM64::D23;
- case ARM64::B24: return ARM64::D24;
- case ARM64::B25: return ARM64::D25;
- case ARM64::B26: return ARM64::D26;
- case ARM64::B27: return ARM64::D27;
- case ARM64::B28: return ARM64::D28;
- case ARM64::B29: return ARM64::D29;
- case ARM64::B30: return ARM64::D30;
- case ARM64::B31: return ARM64::D31;
- }
- // For anything else, return it unchanged.
- return Reg;
-}
-
-namespace ARM64CC {
-
-// The CondCodes constants map directly to the 4-bit encoding of the condition
-// field for predicated instructions.
-enum CondCode { // Meaning (integer) Meaning (floating-point)
- EQ = 0x0, // Equal Equal
- NE = 0x1, // Not equal Not equal, or unordered
- CS = 0x2, // Carry set >, ==, or unordered
- CC = 0x3, // Carry clear Less than
- MI = 0x4, // Minus, negative Less than
- PL = 0x5, // Plus, positive or zero >, ==, or unordered
- VS = 0x6, // Overflow Unordered
- VC = 0x7, // No overflow Not unordered
- HI = 0x8, // Unsigned higher Greater than, or unordered
- LS = 0x9, // Unsigned lower or same Less than or equal
- GE = 0xa, // Greater than or equal Greater than or equal
- LT = 0xb, // Less than Less than, or unordered
- GT = 0xc, // Greater than Greater than
- LE = 0xd, // Less than or equal <, ==, or unordered
- AL = 0xe // Always (unconditional) Always (unconditional)
-};
-
-inline static const char *getCondCodeName(CondCode Code) {
- // cond<0> is ignored when cond<3:1> = 111, where 1110 is 0xe (aka AL).
- if ((Code & AL) == AL)
- Code = AL;
- switch (Code) {
- case EQ: return "eq";
- case NE: return "ne";
- case CS: return "cs";
- case CC: return "cc";
- case MI: return "mi";
- case PL: return "pl";
- case VS: return "vs";
- case VC: return "vc";
- case HI: return "hi";
- case LS: return "ls";
- case GE: return "ge";
- case LT: return "lt";
- case GT: return "gt";
- case LE: return "le";
- case AL: return "al";
- }
- llvm_unreachable("Unknown condition code");
-}
-
-inline static CondCode getInvertedCondCode(CondCode Code) {
- switch (Code) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return NE;
- case NE: return EQ;
- case CS: return CC;
- case CC: return CS;
- case MI: return PL;
- case PL: return MI;
- case VS: return VC;
- case VC: return VS;
- case HI: return LS;
- case LS: return HI;
- case GE: return LT;
- case LT: return GE;
- case GT: return LE;
- case LE: return GT;
- }
-}
-
-/// Given a condition code, return NZCV flags that would satisfy that condition.
-/// The flag bits are in the format expected by the ccmp instructions.
-/// Note that many different flag settings can satisfy a given condition code,
-/// this function just returns one of them.
-inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
- // NZCV flags encoded as expected by ccmp instructions, ARMv8 ISA 5.5.7.
- enum { N = 8, Z = 4, C = 2, V = 1 };
- switch (Code) {
- default: llvm_unreachable("Unknown condition code");
- case EQ: return Z; // Z == 1
- case NE: return 0; // Z == 0
- case CS: return C; // C == 1
- case CC: return 0; // C == 0
- case MI: return N; // N == 1
- case PL: return 0; // N == 0
- case VS: return V; // V == 1
- case VC: return 0; // V == 0
- case HI: return C; // C == 1 && Z == 0
- case LS: return 0; // C == 0 || Z == 1
- case GE: return 0; // N == V
- case LT: return N; // N != V
- case GT: return 0; // Z == 0 && N == V
- case LE: return Z; // Z == 1 || N != V
- }
-}
-} // end namespace ARM64CC
-
-namespace ARM64SYS {
-enum BarrierOption {
- InvalidBarrier = 0xff,
- OSHLD = 0x1,
- OSHST = 0x2,
- OSH = 0x3,
- NSHLD = 0x5,
- NSHST = 0x6,
- NSH = 0x7,
- ISHLD = 0x9,
- ISHST = 0xa,
- ISH = 0xb,
- LD = 0xd,
- ST = 0xe,
- SY = 0xf
-};
-
-inline static const char *getBarrierOptName(BarrierOption Opt) {
- switch (Opt) {
- default: return NULL;
- case 0x1: return "oshld";
- case 0x2: return "oshst";
- case 0x3: return "osh";
- case 0x5: return "nshld";
- case 0x6: return "nshst";
- case 0x7: return "nsh";
- case 0x9: return "ishld";
- case 0xa: return "ishst";
- case 0xb: return "ish";
- case 0xd: return "ld";
- case 0xe: return "st";
- case 0xf: return "sy";
- }
-}
-
-#define A64_SYSREG_ENC(op0,CRn,op2,CRm,op1) ((op0) << 14 | (op1) << 11 | \
- (CRn) << 7 | (CRm) << 3 | (op2))
-enum SystemRegister {
- InvalidSystemReg = 0,
- // Table in section 3.10.3
- SPSR_EL1 = 0xc200,
- SPSR_svc = SPSR_EL1,
- ELR_EL1 = 0xc201,
- SP_EL0 = 0xc208,
- SPSel = 0xc210,
- CurrentEL = 0xc212,
- DAIF = 0xda11,
- NZCV = 0xda10,
- FPCR = 0xda20,
- FPSR = 0xda21,
- DSPSR = 0xda28,
- DLR = 0xda29,
- SPSR_EL2 = 0xe200,
- SPSR_hyp = SPSR_EL2,
- ELR_EL2 = 0xe201,
- SP_EL1 = 0xe208,
- SPSR_irq = 0xe218,
- SPSR_abt = 0xe219,
- SPSR_und = 0xe21a,
- SPSR_fiq = 0xe21b,
- SPSR_EL3 = 0xf200,
- ELR_EL3 = 0xf201,
- SP_EL2 = 0xf208,
-
-
- // Table in section 3.10.8
- MIDR_EL1 = 0xc000,
- CTR_EL0 = 0xd801,
- MPIDR_EL1 = 0xc005,
- ECOIDR_EL1 = 0xc006,
- DCZID_EL0 = 0xd807,
- MVFR0_EL1 = 0xc018,
- MVFR1_EL1 = 0xc019,
- ID_AA64PFR0_EL1 = 0xc020,
- ID_AA64PFR1_EL1 = 0xc021,
- ID_AA64DFR0_EL1 = 0xc028,
- ID_AA64DFR1_EL1 = 0xc029,
- ID_AA64ISAR0_EL1 = 0xc030,
- ID_AA64ISAR1_EL1 = 0xc031,
- ID_AA64MMFR0_EL1 = 0xc038,
- ID_AA64MMFR1_EL1 = 0xc039,
- CCSIDR_EL1 = 0xc800,
- CLIDR_EL1 = 0xc801,
- AIDR_EL1 = 0xc807,
- CSSELR_EL1 = 0xd000,
- VPIDR_EL2 = 0xe000,
- VMPIDR_EL2 = 0xe005,
- SCTLR_EL1 = 0xc080,
- SCTLR_EL2 = 0xe080,
- SCTLR_EL3 = 0xf080,
- ACTLR_EL1 = 0xc081,
- ACTLR_EL2 = 0xe081,
- ACTLR_EL3 = 0xf081,
- CPACR_EL1 = 0xc082,
- CPTR_EL2 = 0xe08a,
- CPTR_EL3 = 0xf08a,
- SCR_EL3 = 0xf088,
- HCR_EL2 = 0xe088,
- MDCR_EL2 = 0xe089,
- MDCR_EL3 = 0xf099,
- HSTR_EL2 = 0xe08b,
- HACR_EL2 = 0xe08f,
- TTBR0_EL1 = 0xc100,
- TTBR1_EL1 = 0xc101,
- TTBR0_EL2 = 0xe100,
- TTBR0_EL3 = 0xf100,
- VTTBR_EL2 = 0xe108,
- TCR_EL1 = 0xc102,
- TCR_EL2 = 0xe102,
- TCR_EL3 = 0xf102,
- VTCR_EL2 = 0xe10a,
- ADFSR_EL1 = 0xc288,
- AIFSR_EL1 = 0xc289,
- ADFSR_EL2 = 0xe288,
- AIFSR_EL2 = 0xe289,
- ADFSR_EL3 = 0xf288,
- AIFSR_EL3 = 0xf289,
- ESR_EL1 = 0xc290,
- ESR_EL2 = 0xe290,
- ESR_EL3 = 0xf290,
- FAR_EL1 = 0xc300,
- FAR_EL2 = 0xe300,
- FAR_EL3 = 0xf300,
- HPFAR_EL2 = 0xe304,
- PAR_EL1 = 0xc3a0,
- MAIR_EL1 = 0xc510,
- MAIR_EL2 = 0xe510,
- MAIR_EL3 = 0xf510,
- AMAIR_EL1 = 0xc518,
- AMAIR_EL2 = 0xe518,
- AMAIR_EL3 = 0xf518,
- VBAR_EL1 = 0xc600,
- VBAR_EL2 = 0xe600,
- VBAR_EL3 = 0xf600,
- RVBAR_EL1 = 0xc601,
- RVBAR_EL2 = 0xe601,
- RVBAR_EL3 = 0xf601,
- ISR_EL1 = 0xc608,
- CONTEXTIDR_EL1 = 0xc681,
- TPIDR_EL0 = 0xde82,
- TPIDRRO_EL0 = 0xde83,
- TPIDR_EL1 = 0xc684,
- TPIDR_EL2 = 0xe682,
- TPIDR_EL3 = 0xf682,
- TEECR32_EL1 = 0x9000,
- CNTFRQ_EL0 = 0xdf00,
- CNTPCT_EL0 = 0xdf01,
- CNTVCT_EL0 = 0xdf02,
- CNTVOFF_EL2 = 0xe703,
- CNTKCTL_EL1 = 0xc708,
- CNTHCTL_EL2 = 0xe708,
- CNTP_TVAL_EL0 = 0xdf10,
- CNTP_CTL_EL0 = 0xdf11,
- CNTP_CVAL_EL0 = 0xdf12,
- CNTV_TVAL_EL0 = 0xdf18,
- CNTV_CTL_EL0 = 0xdf19,
- CNTV_CVAL_EL0 = 0xdf1a,
- CNTHP_TVAL_EL2 = 0xe710,
- CNTHP_CTL_EL2 = 0xe711,
- CNTHP_CVAL_EL2 = 0xe712,
- CNTPS_TVAL_EL1 = 0xff10,
- CNTPS_CTL_EL1 = 0xff11,
- CNTPS_CVAL_EL1= 0xff12,
-
- PMEVCNTR0_EL0 = 0xdf40,
- PMEVCNTR1_EL0 = 0xdf41,
- PMEVCNTR2_EL0 = 0xdf42,
- PMEVCNTR3_EL0 = 0xdf43,
- PMEVCNTR4_EL0 = 0xdf44,
- PMEVCNTR5_EL0 = 0xdf45,
- PMEVCNTR6_EL0 = 0xdf46,
- PMEVCNTR7_EL0 = 0xdf47,
- PMEVCNTR8_EL0 = 0xdf48,
- PMEVCNTR9_EL0 = 0xdf49,
- PMEVCNTR10_EL0 = 0xdf4a,
- PMEVCNTR11_EL0 = 0xdf4b,
- PMEVCNTR12_EL0 = 0xdf4c,
- PMEVCNTR13_EL0 = 0xdf4d,
- PMEVCNTR14_EL0 = 0xdf4e,
- PMEVCNTR15_EL0 = 0xdf4f,
- PMEVCNTR16_EL0 = 0xdf50,
- PMEVCNTR17_EL0 = 0xdf51,
- PMEVCNTR18_EL0 = 0xdf52,
- PMEVCNTR19_EL0 = 0xdf53,
- PMEVCNTR20_EL0 = 0xdf54,
- PMEVCNTR21_EL0 = 0xdf55,
- PMEVCNTR22_EL0 = 0xdf56,
- PMEVCNTR23_EL0 = 0xdf57,
- PMEVCNTR24_EL0 = 0xdf58,
- PMEVCNTR25_EL0 = 0xdf59,
- PMEVCNTR26_EL0 = 0xdf5a,
- PMEVCNTR27_EL0 = 0xdf5b,
- PMEVCNTR28_EL0 = 0xdf5c,
- PMEVCNTR29_EL0 = 0xdf5d,
- PMEVCNTR30_EL0 = 0xdf5e,
-
- PMEVTYPER0_EL0 = 0xdf60,
- PMEVTYPER1_EL0 = 0xdf61,
- PMEVTYPER2_EL0 = 0xdf62,
- PMEVTYPER3_EL0 = 0xdf63,
- PMEVTYPER4_EL0 = 0xdf64,
- PMEVTYPER5_EL0 = 0xdf65,
- PMEVTYPER6_EL0 = 0xdf66,
- PMEVTYPER7_EL0 = 0xdf67,
- PMEVTYPER8_EL0 = 0xdf68,
- PMEVTYPER9_EL0 = 0xdf69,
- PMEVTYPER10_EL0 = 0xdf6a,
- PMEVTYPER11_EL0 = 0xdf6b,
- PMEVTYPER12_EL0 = 0xdf6c,
- PMEVTYPER13_EL0 = 0xdf6d,
- PMEVTYPER14_EL0 = 0xdf6e,
- PMEVTYPER15_EL0 = 0xdf6f,
- PMEVTYPER16_EL0 = 0xdf70,
- PMEVTYPER17_EL0 = 0xdf71,
- PMEVTYPER18_EL0 = 0xdf72,
- PMEVTYPER19_EL0 = 0xdf73,
- PMEVTYPER20_EL0 = 0xdf74,
- PMEVTYPER21_EL0 = 0xdf75,
- PMEVTYPER22_EL0 = 0xdf76,
- PMEVTYPER23_EL0 = 0xdf77,
- PMEVTYPER24_EL0 = 0xdf78,
- PMEVTYPER25_EL0 = 0xdf79,
- PMEVTYPER26_EL0 = 0xdf7a,
- PMEVTYPER27_EL0 = 0xdf7b,
- PMEVTYPER28_EL0 = 0xdf7c,
- PMEVTYPER29_EL0 = 0xdf7d,
- PMEVTYPER30_EL0 = 0xdf7e,
-
- PMCCFILTR_EL0 = 0xdf7f,
-
- RMR_EL3 = 0xf602,
- RMR_EL2 = 0xd602,
- RMR_EL1 = 0xce02,
-
- // Debug Architecture 5.3, Table 17.
- MDCCSR_EL0 = A64_SYSREG_ENC(2, 0, 0, 1, 3),
- MDCCINT_EL1 = A64_SYSREG_ENC(2, 0, 0, 2, 0),
- DBGDTR_EL0 = A64_SYSREG_ENC(2, 0, 0, 4, 3),
- DBGDTRRX_EL0 = A64_SYSREG_ENC(2, 0, 0, 5, 3),
- DBGDTRTX_EL0 = DBGDTRRX_EL0,
- DBGVCR32_EL2 = A64_SYSREG_ENC(2, 0, 0, 7, 4),
- OSDTRRX_EL1 = A64_SYSREG_ENC(2, 0, 2, 0, 0),
- MDSCR_EL1 = A64_SYSREG_ENC(2, 0, 2, 2, 0),
- OSDTRTX_EL1 = A64_SYSREG_ENC(2, 0, 2, 3, 0),
- OSECCR_EL11 = A64_SYSREG_ENC(2, 0, 2, 6, 0),
-
- DBGBVR0_EL1 = A64_SYSREG_ENC(2, 0, 4, 0, 0),
- DBGBVR1_EL1 = A64_SYSREG_ENC(2, 0, 4, 1, 0),
- DBGBVR2_EL1 = A64_SYSREG_ENC(2, 0, 4, 2, 0),
- DBGBVR3_EL1 = A64_SYSREG_ENC(2, 0, 4, 3, 0),
- DBGBVR4_EL1 = A64_SYSREG_ENC(2, 0, 4, 4, 0),
- DBGBVR5_EL1 = A64_SYSREG_ENC(2, 0, 4, 5, 0),
- DBGBVR6_EL1 = A64_SYSREG_ENC(2, 0, 4, 6, 0),
- DBGBVR7_EL1 = A64_SYSREG_ENC(2, 0, 4, 7, 0),
- DBGBVR8_EL1 = A64_SYSREG_ENC(2, 0, 4, 8, 0),
- DBGBVR9_EL1 = A64_SYSREG_ENC(2, 0, 4, 9, 0),
- DBGBVR10_EL1 = A64_SYSREG_ENC(2, 0, 4, 10, 0),
- DBGBVR11_EL1 = A64_SYSREG_ENC(2, 0, 4, 11, 0),
- DBGBVR12_EL1 = A64_SYSREG_ENC(2, 0, 4, 12, 0),
- DBGBVR13_EL1 = A64_SYSREG_ENC(2, 0, 4, 13, 0),
- DBGBVR14_EL1 = A64_SYSREG_ENC(2, 0, 4, 14, 0),
- DBGBVR15_EL1 = A64_SYSREG_ENC(2, 0, 4, 15, 0),
-
- DBGBCR0_EL1 = A64_SYSREG_ENC(2, 0, 5, 0, 0),
- DBGBCR1_EL1 = A64_SYSREG_ENC(2, 0, 5, 1, 0),
- DBGBCR2_EL1 = A64_SYSREG_ENC(2, 0, 5, 2, 0),
- DBGBCR3_EL1 = A64_SYSREG_ENC(2, 0, 5, 3, 0),
- DBGBCR4_EL1 = A64_SYSREG_ENC(2, 0, 5, 4, 0),
- DBGBCR5_EL1 = A64_SYSREG_ENC(2, 0, 5, 5, 0),
- DBGBCR6_EL1 = A64_SYSREG_ENC(2, 0, 5, 6, 0),
- DBGBCR7_EL1 = A64_SYSREG_ENC(2, 0, 5, 7, 0),
- DBGBCR8_EL1 = A64_SYSREG_ENC(2, 0, 5, 8, 0),
- DBGBCR9_EL1 = A64_SYSREG_ENC(2, 0, 5, 9, 0),
- DBGBCR10_EL1 = A64_SYSREG_ENC(2, 0, 5, 10, 0),
- DBGBCR11_EL1 = A64_SYSREG_ENC(2, 0, 5, 11, 0),
- DBGBCR12_EL1 = A64_SYSREG_ENC(2, 0, 5, 12, 0),
- DBGBCR13_EL1 = A64_SYSREG_ENC(2, 0, 5, 13, 0),
- DBGBCR14_EL1 = A64_SYSREG_ENC(2, 0, 5, 14, 0),
- DBGBCR15_EL1 = A64_SYSREG_ENC(2, 0, 5, 15, 0),
-
- DBGWVR0_EL1 = A64_SYSREG_ENC(2, 0, 6, 0, 0),
- DBGWVR1_EL1 = A64_SYSREG_ENC(2, 0, 6, 1, 0),
- DBGWVR2_EL1 = A64_SYSREG_ENC(2, 0, 6, 2, 0),
- DBGWVR3_EL1 = A64_SYSREG_ENC(2, 0, 6, 3, 0),
- DBGWVR4_EL1 = A64_SYSREG_ENC(2, 0, 6, 4, 0),
- DBGWVR5_EL1 = A64_SYSREG_ENC(2, 0, 6, 5, 0),
- DBGWVR6_EL1 = A64_SYSREG_ENC(2, 0, 6, 6, 0),
- DBGWVR7_EL1 = A64_SYSREG_ENC(2, 0, 6, 7, 0),
- DBGWVR8_EL1 = A64_SYSREG_ENC(2, 0, 6, 8, 0),
- DBGWVR9_EL1 = A64_SYSREG_ENC(2, 0, 6, 9, 0),
- DBGWVR10_EL1 = A64_SYSREG_ENC(2, 0, 6, 10, 0),
- DBGWVR11_EL1 = A64_SYSREG_ENC(2, 0, 6, 11, 0),
- DBGWVR12_EL1 = A64_SYSREG_ENC(2, 0, 6, 12, 0),
- DBGWVR13_EL1 = A64_SYSREG_ENC(2, 0, 6, 13, 0),
- DBGWVR14_EL1 = A64_SYSREG_ENC(2, 0, 6, 14, 0),
- DBGWVR15_EL1 = A64_SYSREG_ENC(2, 0, 6, 15, 0),
-
- DBGWCR0_EL1 = A64_SYSREG_ENC(2, 0, 7, 0, 0),
- DBGWCR1_EL1 = A64_SYSREG_ENC(2, 0, 7, 1, 0),
- DBGWCR2_EL1 = A64_SYSREG_ENC(2, 0, 7, 2, 0),
- DBGWCR3_EL1 = A64_SYSREG_ENC(2, 0, 7, 3, 0),
- DBGWCR4_EL1 = A64_SYSREG_ENC(2, 0, 7, 4, 0),
- DBGWCR5_EL1 = A64_SYSREG_ENC(2, 0, 7, 5, 0),
- DBGWCR6_EL1 = A64_SYSREG_ENC(2, 0, 7, 6, 0),
- DBGWCR7_EL1 = A64_SYSREG_ENC(2, 0, 7, 7, 0),
- DBGWCR8_EL1 = A64_SYSREG_ENC(2, 0, 7, 8, 0),
- DBGWCR9_EL1 = A64_SYSREG_ENC(2, 0, 7, 9, 0),
- DBGWCR10_EL1 = A64_SYSREG_ENC(2, 0, 7, 10, 0),
- DBGWCR11_EL1 = A64_SYSREG_ENC(2, 0, 7, 11, 0),
- DBGWCR12_EL1 = A64_SYSREG_ENC(2, 0, 7, 12, 0),
- DBGWCR13_EL1 = A64_SYSREG_ENC(2, 0, 7, 13, 0),
- DBGWCR14_EL1 = A64_SYSREG_ENC(2, 0, 7, 14, 0),
- DBGWCR15_EL1 = A64_SYSREG_ENC(2, 0, 7, 15, 0),
-
- MDRAR_EL1 = A64_SYSREG_ENC(2, 1, 0, 0, 0),
- OSLAR_EL1 = A64_SYSREG_ENC(2, 1, 4, 0, 0),
- OSLSR_EL1 = A64_SYSREG_ENC(2, 1, 4, 1, 0),
- OSDLR_EL1 = A64_SYSREG_ENC(2, 1, 4, 3, 0),
- DBGPRCR_EL1 = A64_SYSREG_ENC(2, 1, 4, 4, 0),
-
- DBGCLAIMSET_EL1 = A64_SYSREG_ENC(2, 7, 6, 8, 0),
- DBGCLAIMCLR_EL1 = A64_SYSREG_ENC(2, 7, 6, 9, 0),
- DBGAUTHSTATUS_EL1 = A64_SYSREG_ENC(2, 7, 6, 14, 0),
-
- DBGDEVID2 = A64_SYSREG_ENC(2, 7, 7, 0, 0),
- DBGDEVID1 = A64_SYSREG_ENC(2, 7, 7, 1, 0),
- DBGDEVID0 = A64_SYSREG_ENC(2, 7, 7, 2, 0),
-
- // The following registers are defined to allow access from AArch64 to
- // registers which are only used in the AArch32 architecture.
- DACR32_EL2 = 0xe180,
- IFSR32_EL2 = 0xe281,
- TEEHBR32_EL1 = 0x9080,
- SDER32_EL3 = 0xf089,
- FPEXC32_EL2 = 0xe298,
-
- // Cyclone specific system registers
- CPM_IOACC_CTL_EL3 = 0xff90,
-
- // Architectural system registers
- ID_PFR0_EL1 = 0xc008,
- ID_PFR1_EL1 = 0xc009,
- ID_DFR0_EL1 = 0xc00a,
- ID_AFR0_EL1 = 0xc00b,
- ID_ISAR0_EL1 = 0xc010,
- ID_ISAR1_EL1 = 0xc011,
- ID_ISAR2_EL1 = 0xc012,
- ID_ISAR3_EL1 = 0xc013,
- ID_ISAR4_EL1 = 0xc014,
- ID_ISAR5_EL1 = 0xc015,
- AFSR1_EL1 = 0xc289, // note same as old AIFSR_EL1
- AFSR0_EL1 = 0xc288, // note same as old ADFSR_EL1
- REVIDR_EL1 = 0xc006 // note same as old ECOIDR_EL1
-
-};
-#undef A64_SYSREG_ENC
-
-static inline const char *getSystemRegisterName(SystemRegister Reg) {
- switch(Reg) {
- default: return NULL; // Caller is responsible for handling invalid value.
- case SPSR_EL1: return "SPSR_EL1";
- case ELR_EL1: return "ELR_EL1";
- case SP_EL0: return "SP_EL0";
- case SPSel: return "SPSel";
- case DAIF: return "DAIF";
- case CurrentEL: return "CurrentEL";
- case NZCV: return "NZCV";
- case FPCR: return "FPCR";
- case FPSR: return "FPSR";
- case DSPSR: return "DSPSR";
- case DLR: return "DLR";
- case SPSR_EL2: return "SPSR_EL2";
- case ELR_EL2: return "ELR_EL2";
- case SP_EL1: return "SP_EL1";
- case SPSR_irq: return "SPSR_irq";
- case SPSR_abt: return "SPSR_abt";
- case SPSR_und: return "SPSR_und";
- case SPSR_fiq: return "SPSR_fiq";
- case SPSR_EL3: return "SPSR_EL3";
- case ELR_EL3: return "ELR_EL3";
- case SP_EL2: return "SP_EL2";
- case MIDR_EL1: return "MIDR_EL1";
- case CTR_EL0: return "CTR_EL0";
- case MPIDR_EL1: return "MPIDR_EL1";
- case DCZID_EL0: return "DCZID_EL0";
- case MVFR0_EL1: return "MVFR0_EL1";
- case MVFR1_EL1: return "MVFR1_EL1";
- case ID_AA64PFR0_EL1: return "ID_AA64PFR0_EL1";
- case ID_AA64PFR1_EL1: return "ID_AA64PFR1_EL1";
- case ID_AA64DFR0_EL1: return "ID_AA64DFR0_EL1";
- case ID_AA64DFR1_EL1: return "ID_AA64DFR1_EL1";
- case ID_AA64ISAR0_EL1: return "ID_AA64ISAR0_EL1";
- case ID_AA64ISAR1_EL1: return "ID_AA64ISAR1_EL1";
- case ID_AA64MMFR0_EL1: return "ID_AA64MMFR0_EL1";
- case ID_AA64MMFR1_EL1: return "ID_AA64MMFR1_EL1";
- case CCSIDR_EL1: return "CCSIDR_EL1";
- case CLIDR_EL1: return "CLIDR_EL1";
- case AIDR_EL1: return "AIDR_EL1";
- case CSSELR_EL1: return "CSSELR_EL1";
- case VPIDR_EL2: return "VPIDR_EL2";
- case VMPIDR_EL2: return "VMPIDR_EL2";
- case SCTLR_EL1: return "SCTLR_EL1";
- case SCTLR_EL2: return "SCTLR_EL2";
- case SCTLR_EL3: return "SCTLR_EL3";
- case ACTLR_EL1: return "ACTLR_EL1";
- case ACTLR_EL2: return "ACTLR_EL2";
- case ACTLR_EL3: return "ACTLR_EL3";
- case CPACR_EL1: return "CPACR_EL1";
- case CPTR_EL2: return "CPTR_EL2";
- case CPTR_EL3: return "CPTR_EL3";
- case SCR_EL3: return "SCR_EL3";
- case HCR_EL2: return "HCR_EL2";
- case MDCR_EL2: return "MDCR_EL2";
- case MDCR_EL3: return "MDCR_EL3";
- case HSTR_EL2: return "HSTR_EL2";
- case HACR_EL2: return "HACR_EL2";
- case TTBR0_EL1: return "TTBR0_EL1";
- case TTBR1_EL1: return "TTBR1_EL1";
- case TTBR0_EL2: return "TTBR0_EL2";
- case TTBR0_EL3: return "TTBR0_EL3";
- case VTTBR_EL2: return "VTTBR_EL2";
- case TCR_EL1: return "TCR_EL1";
- case TCR_EL2: return "TCR_EL2";
- case TCR_EL3: return "TCR_EL3";
- case VTCR_EL2: return "VTCR_EL2";
- case ADFSR_EL2: return "ADFSR_EL2";
- case AIFSR_EL2: return "AIFSR_EL2";
- case ADFSR_EL3: return "ADFSR_EL3";
- case AIFSR_EL3: return "AIFSR_EL3";
- case ESR_EL1: return "ESR_EL1";
- case ESR_EL2: return "ESR_EL2";
- case ESR_EL3: return "ESR_EL3";
- case FAR_EL1: return "FAR_EL1";
- case FAR_EL2: return "FAR_EL2";
- case FAR_EL3: return "FAR_EL3";
- case HPFAR_EL2: return "HPFAR_EL2";
- case PAR_EL1: return "PAR_EL1";
- case MAIR_EL1: return "MAIR_EL1";
- case MAIR_EL2: return "MAIR_EL2";
- case MAIR_EL3: return "MAIR_EL3";
- case AMAIR_EL1: return "AMAIR_EL1";
- case AMAIR_EL2: return "AMAIR_EL2";
- case AMAIR_EL3: return "AMAIR_EL3";
- case VBAR_EL1: return "VBAR_EL1";
- case VBAR_EL2: return "VBAR_EL2";
- case VBAR_EL3: return "VBAR_EL3";
- case RVBAR_EL1: return "RVBAR_EL1";
- case RVBAR_EL2: return "RVBAR_EL2";
- case RVBAR_EL3: return "RVBAR_EL3";
- case ISR_EL1: return "ISR_EL1";
- case CONTEXTIDR_EL1: return "CONTEXTIDR_EL1";
- case TPIDR_EL0: return "TPIDR_EL0";
- case TPIDRRO_EL0: return "TPIDRRO_EL0";
- case TPIDR_EL1: return "TPIDR_EL1";
- case TPIDR_EL2: return "TPIDR_EL2";
- case TPIDR_EL3: return "TPIDR_EL3";
- case TEECR32_EL1: return "TEECR32_EL1";
- case CNTFRQ_EL0: return "CNTFRQ_EL0";
- case CNTPCT_EL0: return "CNTPCT_EL0";
- case CNTVCT_EL0: return "CNTVCT_EL0";
- case CNTVOFF_EL2: return "CNTVOFF_EL2";
- case CNTKCTL_EL1: return "CNTKCTL_EL1";
- case CNTHCTL_EL2: return "CNTHCTL_EL2";
- case CNTP_TVAL_EL0: return "CNTP_TVAL_EL0";
- case CNTP_CTL_EL0: return "CNTP_CTL_EL0";
- case CNTP_CVAL_EL0: return "CNTP_CVAL_EL0";
- case CNTV_TVAL_EL0: return "CNTV_TVAL_EL0";
- case CNTV_CTL_EL0: return "CNTV_CTL_EL0";
- case CNTV_CVAL_EL0: return "CNTV_CVAL_EL0";
- case CNTHP_TVAL_EL2: return "CNTHP_TVAL_EL2";
- case CNTHP_CTL_EL2: return "CNTHP_CTL_EL2";
- case CNTHP_CVAL_EL2: return "CNTHP_CVAL_EL2";
- case CNTPS_TVAL_EL1: return "CNTPS_TVAL_EL1";
- case CNTPS_CTL_EL1: return "CNTPS_CTL_EL1";
- case CNTPS_CVAL_EL1: return "CNTPS_CVAL_EL1";
- case DACR32_EL2: return "DACR32_EL2";
- case IFSR32_EL2: return "IFSR32_EL2";
- case TEEHBR32_EL1: return "TEEHBR32_EL1";
- case SDER32_EL3: return "SDER32_EL3";
- case FPEXC32_EL2: return "FPEXC32_EL2";
- case PMEVCNTR0_EL0: return "PMEVCNTR0_EL0";
- case PMEVCNTR1_EL0: return "PMEVCNTR1_EL0";
- case PMEVCNTR2_EL0: return "PMEVCNTR2_EL0";
- case PMEVCNTR3_EL0: return "PMEVCNTR3_EL0";
- case PMEVCNTR4_EL0: return "PMEVCNTR4_EL0";
- case PMEVCNTR5_EL0: return "PMEVCNTR5_EL0";
- case PMEVCNTR6_EL0: return "PMEVCNTR6_EL0";
- case PMEVCNTR7_EL0: return "PMEVCNTR7_EL0";
- case PMEVCNTR8_EL0: return "PMEVCNTR8_EL0";
- case PMEVCNTR9_EL0: return "PMEVCNTR9_EL0";
- case PMEVCNTR10_EL0: return "PMEVCNTR10_EL0";
- case PMEVCNTR11_EL0: return "PMEVCNTR11_EL0";
- case PMEVCNTR12_EL0: return "PMEVCNTR12_EL0";
- case PMEVCNTR13_EL0: return "PMEVCNTR13_EL0";
- case PMEVCNTR14_EL0: return "PMEVCNTR14_EL0";
- case PMEVCNTR15_EL0: return "PMEVCNTR15_EL0";
- case PMEVCNTR16_EL0: return "PMEVCNTR16_EL0";
- case PMEVCNTR17_EL0: return "PMEVCNTR17_EL0";
- case PMEVCNTR18_EL0: return "PMEVCNTR18_EL0";
- case PMEVCNTR19_EL0: return "PMEVCNTR19_EL0";
- case PMEVCNTR20_EL0: return "PMEVCNTR20_EL0";
- case PMEVCNTR21_EL0: return "PMEVCNTR21_EL0";
- case PMEVCNTR22_EL0: return "PMEVCNTR22_EL0";
- case PMEVCNTR23_EL0: return "PMEVCNTR23_EL0";
- case PMEVCNTR24_EL0: return "PMEVCNTR24_EL0";
- case PMEVCNTR25_EL0: return "PMEVCNTR25_EL0";
- case PMEVCNTR26_EL0: return "PMEVCNTR26_EL0";
- case PMEVCNTR27_EL0: return "PMEVCNTR27_EL0";
- case PMEVCNTR28_EL0: return "PMEVCNTR28_EL0";
- case PMEVCNTR29_EL0: return "PMEVCNTR29_EL0";
- case PMEVCNTR30_EL0: return "PMEVCNTR30_EL0";
- case PMEVTYPER0_EL0: return "PMEVTYPER0_EL0";
- case PMEVTYPER1_EL0: return "PMEVTYPER1_EL0";
- case PMEVTYPER2_EL0: return "PMEVTYPER2_EL0";
- case PMEVTYPER3_EL0: return "PMEVTYPER3_EL0";
- case PMEVTYPER4_EL0: return "PMEVTYPER4_EL0";
- case PMEVTYPER5_EL0: return "PMEVTYPER5_EL0";
- case PMEVTYPER6_EL0: return "PMEVTYPER6_EL0";
- case PMEVTYPER7_EL0: return "PMEVTYPER7_EL0";
- case PMEVTYPER8_EL0: return "PMEVTYPER8_EL0";
- case PMEVTYPER9_EL0: return "PMEVTYPER9_EL0";
- case PMEVTYPER10_EL0: return "PMEVTYPER10_EL0";
- case PMEVTYPER11_EL0: return "PMEVTYPER11_EL0";
- case PMEVTYPER12_EL0: return "PMEVTYPER12_EL0";
- case PMEVTYPER13_EL0: return "PMEVTYPER13_EL0";
- case PMEVTYPER14_EL0: return "PMEVTYPER14_EL0";
- case PMEVTYPER15_EL0: return "PMEVTYPER15_EL0";
- case PMEVTYPER16_EL0: return "PMEVTYPER16_EL0";
- case PMEVTYPER17_EL0: return "PMEVTYPER17_EL0";
- case PMEVTYPER18_EL0: return "PMEVTYPER18_EL0";
- case PMEVTYPER19_EL0: return "PMEVTYPER19_EL0";
- case PMEVTYPER20_EL0: return "PMEVTYPER20_EL0";
- case PMEVTYPER21_EL0: return "PMEVTYPER21_EL0";
- case PMEVTYPER22_EL0: return "PMEVTYPER22_EL0";
- case PMEVTYPER23_EL0: return "PMEVTYPER23_EL0";
- case PMEVTYPER24_EL0: return "PMEVTYPER24_EL0";
- case PMEVTYPER25_EL0: return "PMEVTYPER25_EL0";
- case PMEVTYPER26_EL0: return "PMEVTYPER26_EL0";
- case PMEVTYPER27_EL0: return "PMEVTYPER27_EL0";
- case PMEVTYPER28_EL0: return "PMEVTYPER28_EL0";
- case PMEVTYPER29_EL0: return "PMEVTYPER29_EL0";
- case PMEVTYPER30_EL0: return "PMEVTYPER30_EL0";
- case PMCCFILTR_EL0: return "PMCCFILTR_EL0";
- case RMR_EL3: return "RMR_EL3";
- case RMR_EL2: return "RMR_EL2";
- case RMR_EL1: return "RMR_EL1";
- case CPM_IOACC_CTL_EL3: return "CPM_IOACC_CTL_EL3";
- case MDCCSR_EL0: return "MDCCSR_EL0";
- case MDCCINT_EL1: return "MDCCINT_EL1";
- case DBGDTR_EL0: return "DBGDTR_EL0";
- case DBGDTRRX_EL0: return "DBGDTRRX_EL0";
- case DBGVCR32_EL2: return "DBGVCR32_EL2";
- case OSDTRRX_EL1: return "OSDTRRX_EL1";
- case MDSCR_EL1: return "MDSCR_EL1";
- case OSDTRTX_EL1: return "OSDTRTX_EL1";
- case OSECCR_EL11: return "OSECCR_EL11";
- case DBGBVR0_EL1: return "DBGBVR0_EL1";
- case DBGBVR1_EL1: return "DBGBVR1_EL1";
- case DBGBVR2_EL1: return "DBGBVR2_EL1";
- case DBGBVR3_EL1: return "DBGBVR3_EL1";
- case DBGBVR4_EL1: return "DBGBVR4_EL1";
- case DBGBVR5_EL1: return "DBGBVR5_EL1";
- case DBGBVR6_EL1: return "DBGBVR6_EL1";
- case DBGBVR7_EL1: return "DBGBVR7_EL1";
- case DBGBVR8_EL1: return "DBGBVR8_EL1";
- case DBGBVR9_EL1: return "DBGBVR9_EL1";
- case DBGBVR10_EL1: return "DBGBVR10_EL1";
- case DBGBVR11_EL1: return "DBGBVR11_EL1";
- case DBGBVR12_EL1: return "DBGBVR12_EL1";
- case DBGBVR13_EL1: return "DBGBVR13_EL1";
- case DBGBVR14_EL1: return "DBGBVR14_EL1";
- case DBGBVR15_EL1: return "DBGBVR15_EL1";
- case DBGBCR0_EL1: return "DBGBCR0_EL1";
- case DBGBCR1_EL1: return "DBGBCR1_EL1";
- case DBGBCR2_EL1: return "DBGBCR2_EL1";
- case DBGBCR3_EL1: return "DBGBCR3_EL1";
- case DBGBCR4_EL1: return "DBGBCR4_EL1";
- case DBGBCR5_EL1: return "DBGBCR5_EL1";
- case DBGBCR6_EL1: return "DBGBCR6_EL1";
- case DBGBCR7_EL1: return "DBGBCR7_EL1";
- case DBGBCR8_EL1: return "DBGBCR8_EL1";
- case DBGBCR9_EL1: return "DBGBCR9_EL1";
- case DBGBCR10_EL1: return "DBGBCR10_EL1";
- case DBGBCR11_EL1: return "DBGBCR11_EL1";
- case DBGBCR12_EL1: return "DBGBCR12_EL1";
- case DBGBCR13_EL1: return "DBGBCR13_EL1";
- case DBGBCR14_EL1: return "DBGBCR14_EL1";
- case DBGBCR15_EL1: return "DBGBCR15_EL1";
- case DBGWVR0_EL1: return "DBGWVR0_EL1";
- case DBGWVR1_EL1: return "DBGWVR1_EL1";
- case DBGWVR2_EL1: return "DBGWVR2_EL1";
- case DBGWVR3_EL1: return "DBGWVR3_EL1";
- case DBGWVR4_EL1: return "DBGWVR4_EL1";
- case DBGWVR5_EL1: return "DBGWVR5_EL1";
- case DBGWVR6_EL1: return "DBGWVR6_EL1";
- case DBGWVR7_EL1: return "DBGWVR7_EL1";
- case DBGWVR8_EL1: return "DBGWVR8_EL1";
- case DBGWVR9_EL1: return "DBGWVR9_EL1";
- case DBGWVR10_EL1: return "DBGWVR10_EL1";
- case DBGWVR11_EL1: return "DBGWVR11_EL1";
- case DBGWVR12_EL1: return "DBGWVR12_EL1";
- case DBGWVR13_EL1: return "DBGWVR13_EL1";
- case DBGWVR14_EL1: return "DBGWVR14_EL1";
- case DBGWVR15_EL1: return "DBGWVR15_EL1";
- case DBGWCR0_EL1: return "DBGWCR0_EL1";
- case DBGWCR1_EL1: return "DBGWCR1_EL1";
- case DBGWCR2_EL1: return "DBGWCR2_EL1";
- case DBGWCR3_EL1: return "DBGWCR3_EL1";
- case DBGWCR4_EL1: return "DBGWCR4_EL1";
- case DBGWCR5_EL1: return "DBGWCR5_EL1";
- case DBGWCR6_EL1: return "DBGWCR6_EL1";
- case DBGWCR7_EL1: return "DBGWCR7_EL1";
- case DBGWCR8_EL1: return "DBGWCR8_EL1";
- case DBGWCR9_EL1: return "DBGWCR9_EL1";
- case DBGWCR10_EL1: return "DBGWCR10_EL1";
- case DBGWCR11_EL1: return "DBGWCR11_EL1";
- case DBGWCR12_EL1: return "DBGWCR12_EL1";
- case DBGWCR13_EL1: return "DBGWCR13_EL1";
- case DBGWCR14_EL1: return "DBGWCR14_EL1";
- case DBGWCR15_EL1: return "DBGWCR15_EL1";
- case MDRAR_EL1: return "MDRAR_EL1";
- case OSLAR_EL1: return "OSLAR_EL1";
- case OSLSR_EL1: return "OSLSR_EL1";
- case OSDLR_EL1: return "OSDLR_EL1";
- case DBGPRCR_EL1: return "DBGPRCR_EL1";
- case DBGCLAIMSET_EL1: return "DBGCLAIMSET_EL1";
- case DBGCLAIMCLR_EL1: return "DBGCLAIMCLR_EL1";
- case DBGAUTHSTATUS_EL1: return "DBGAUTHSTATUS_EL1";
- case DBGDEVID2: return "DBGDEVID2";
- case DBGDEVID1: return "DBGDEVID1";
- case DBGDEVID0: return "DBGDEVID0";
- case ID_PFR0_EL1: return "ID_PFR0_EL1";
- case ID_PFR1_EL1: return "ID_PFR1_EL1";
- case ID_DFR0_EL1: return "ID_DFR0_EL1";
- case ID_AFR0_EL1: return "ID_AFR0_EL1";
- case ID_ISAR0_EL1: return "ID_ISAR0_EL1";
- case ID_ISAR1_EL1: return "ID_ISAR1_EL1";
- case ID_ISAR2_EL1: return "ID_ISAR2_EL1";
- case ID_ISAR3_EL1: return "ID_ISAR3_EL1";
- case ID_ISAR4_EL1: return "ID_ISAR4_EL1";
- case ID_ISAR5_EL1: return "ID_ISAR5_EL1";
- case AFSR1_EL1: return "AFSR1_EL1";
- case AFSR0_EL1: return "AFSR0_EL1";
- case REVIDR_EL1: return "REVIDR_EL1";
- }
-}
-
-enum CPSRField {
- InvalidCPSRField = 0xff,
- cpsr_SPSel = 0x5,
- cpsr_DAIFSet = 0x1e,
- cpsr_DAIFClr = 0x1f
-};
-
-static inline const char *getCPSRFieldName(CPSRField Val) {
- switch(Val) {
- default: assert(0 && "Invalid system register value!");
- case cpsr_SPSel: return "SPSel";
- case cpsr_DAIFSet: return "DAIFSet";
- case cpsr_DAIFClr: return "DAIFClr";
- }
-}
-
-} // end namespace ARM64SYS
-
-namespace ARM64II {
- /// Target Operand Flag enum.
- enum TOF {
- //===------------------------------------------------------------------===//
- // ARM64 Specific MachineOperand flags.
-
- MO_NO_FLAG,
-
- MO_FRAGMENT = 0x7,
-
- /// MO_PAGE - A symbol operand with this flag represents the pc-relative
- /// offset of the 4K page containing the symbol. This is used with the
- /// ADRP instruction.
- MO_PAGE = 1,
-
- /// MO_PAGEOFF - A symbol operand with this flag represents the offset of
- /// that symbol within a 4K page. This offset is added to the page address
- /// to produce the complete address.
- MO_PAGEOFF = 2,
-
- /// MO_G3 - A symbol operand with this flag (granule 3) represents the high
- /// 16-bits of a 64-bit address, used in a MOVZ or MOVK instruction
- MO_G3 = 3,
-
- /// MO_G2 - A symbol operand with this flag (granule 2) represents the bits
- /// 32-47 of a 64-bit address, used in a MOVZ or MOVK instruction
- MO_G2 = 4,
-
- /// MO_G1 - A symbol operand with this flag (granule 1) represents the bits
- /// 16-31 of a 64-bit address, used in a MOVZ or MOVK instruction
- MO_G1 = 5,
-
- /// MO_G0 - A symbol operand with this flag (granule 0) represents the bits
- /// 0-15 of a 64-bit address, used in a MOVZ or MOVK instruction
- MO_G0 = 6,
-
- /// MO_GOT - This flag indicates that a symbol operand represents the
- /// address of the GOT entry for the symbol, rather than the address of
- /// the symbol itself.
- MO_GOT = 8,
-
- /// MO_NC - Indicates whether the linker is expected to check the symbol
- /// reference for overflow. For example in an ADRP/ADD pair of relocations
- /// the ADRP usually does check, but not the ADD.
- MO_NC = 0x10,
-
- /// MO_TLS - Indicates that the operand being accessed is some kind of
- /// thread-local symbol. On Darwin, only one type of thread-local access
- /// exists (pre linker-relaxation), but on ELF the TLSModel used for the
- /// referee will affect interpretation.
- MO_TLS = 0x20
- };
-} // end namespace ARM64II
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
deleted file mode 100644
index 1a132a1..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFObjectWriter.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-//===-- ARM64ELFObjectWriter.cpp - ARM64 ELF Writer -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file handles ELF-specific object emission, converting LLVM's internal
-// fixups into the appropriate relocations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "MCTargetDesc/ARM64MCTargetDesc.h"
-#include "llvm/MC/MCELFObjectWriter.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-namespace {
-class ARM64ELFObjectWriter : public MCELFObjectTargetWriter {
-public:
- ARM64ELFObjectWriter(uint8_t OSABI);
-
- virtual ~ARM64ELFObjectWriter();
-
-protected:
- unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
- bool IsPCRel) const override;
-
-private:
-};
-}
-
-ARM64ELFObjectWriter::ARM64ELFObjectWriter(uint8_t OSABI)
- : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
- /*HasRelocationAddend*/ true) {}
-
-ARM64ELFObjectWriter::~ARM64ELFObjectWriter() {}
-
-unsigned ARM64ELFObjectWriter::GetRelocType(const MCValue &Target,
- const MCFixup &Fixup,
- bool IsPCRel) const {
- ARM64MCExpr::VariantKind RefKind =
- static_cast<ARM64MCExpr::VariantKind>(Target.getRefKind());
- ARM64MCExpr::VariantKind SymLoc = ARM64MCExpr::getSymbolLoc(RefKind);
- bool IsNC = ARM64MCExpr::isNotChecked(RefKind);
-
- assert((!Target.getSymA() ||
- Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None) &&
- "Should only be expression-level modifiers here");
-
- assert((!Target.getSymB() ||
- Target.getSymB()->getKind() == MCSymbolRefExpr::VK_None) &&
- "Should only be expression-level modifiers here");
-
- if (IsPCRel) {
- switch ((unsigned)Fixup.getKind()) {
- case FK_Data_2:
- return ELF::R_AARCH64_PREL16;
- case FK_Data_4:
- return ELF::R_AARCH64_PREL32;
- case FK_Data_8:
- return ELF::R_AARCH64_PREL64;
- case ARM64::fixup_arm64_pcrel_adr_imm21:
- llvm_unreachable("No ELF relocations supported for ADR at the moment");
- case ARM64::fixup_arm64_pcrel_adrp_imm21:
- if (SymLoc == ARM64MCExpr::VK_ABS && !IsNC)
- return ELF::R_AARCH64_ADR_PREL_PG_HI21;
- if (SymLoc == ARM64MCExpr::VK_GOT && !IsNC)
- return ELF::R_AARCH64_ADR_GOT_PAGE;
- if (SymLoc == ARM64MCExpr::VK_GOTTPREL && !IsNC)
- return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
- if (SymLoc == ARM64MCExpr::VK_TLSDESC && !IsNC)
- return ELF::R_AARCH64_TLSDESC_ADR_PAGE;
- llvm_unreachable("invalid symbol kind for ADRP relocation");
- case ARM64::fixup_arm64_pcrel_branch26:
- return ELF::R_AARCH64_JUMP26;
- case ARM64::fixup_arm64_pcrel_call26:
- return ELF::R_AARCH64_CALL26;
- case ARM64::fixup_arm64_pcrel_imm19:
- return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
- default:
- llvm_unreachable("Unsupported pc-relative fixup kind");
- }
- } else {
- switch ((unsigned)Fixup.getKind()) {
- case FK_Data_2:
- return ELF::R_AARCH64_ABS16;
- case FK_Data_4:
- return ELF::R_AARCH64_ABS32;
- case FK_Data_8:
- return ELF::R_AARCH64_ABS64;
- case ARM64::fixup_arm64_add_imm12:
- if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
- return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
- return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
- return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
- return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
- return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_ADD_ABS_LO12_NC;
-
- report_fatal_error("invalid fixup for add (uimm12) instruction");
- return 0;
- case ARM64::fixup_arm64_ldst_imm12_scale1:
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
- return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
- return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
- return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
- return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
-
- report_fatal_error("invalid fixup for 8-bit load/store instruction");
- return 0;
- case ARM64::fixup_arm64_ldst_imm12_scale2:
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
- return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
- return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
- return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
- return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
-
- report_fatal_error("invalid fixup for 16-bit load/store instruction");
- return 0;
- case ARM64::fixup_arm64_ldst_imm12_scale4:
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
- return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
- return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
- return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
- return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
-
- report_fatal_error("invalid fixup for 32-bit load/store instruction");
- return 0;
- case ARM64::fixup_arm64_ldst_imm12_scale8:
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_GOT && IsNC)
- return ELF::R_AARCH64_LD64_GOT_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && !IsNC)
- return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_DTPREL && IsNC)
- return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TPREL && !IsNC)
- return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
- if (SymLoc == ARM64MCExpr::VK_TPREL && IsNC)
- return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_GOTTPREL && IsNC)
- return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
- if (SymLoc == ARM64MCExpr::VK_TLSDESC && IsNC)
- return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
-
- report_fatal_error("invalid fixup for 64-bit load/store instruction");
- return 0;
- case ARM64::fixup_arm64_ldst_imm12_scale16:
- if (SymLoc == ARM64MCExpr::VK_ABS && IsNC)
- return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
-
- report_fatal_error("invalid fixup for 128-bit load/store instruction");
- return 0;
- case ARM64::fixup_arm64_movw:
- if (RefKind == ARM64MCExpr::VK_ABS_G3)
- return ELF::R_AARCH64_MOVW_UABS_G3;
- if (RefKind == ARM64MCExpr::VK_ABS_G2)
- return ELF::R_AARCH64_MOVW_UABS_G2;
- if (RefKind == ARM64MCExpr::VK_ABS_G2_NC)
- return ELF::R_AARCH64_MOVW_UABS_G2_NC;
- if (RefKind == ARM64MCExpr::VK_ABS_G1)
- return ELF::R_AARCH64_MOVW_UABS_G1;
- if (RefKind == ARM64MCExpr::VK_ABS_G1_NC)
- return ELF::R_AARCH64_MOVW_UABS_G1_NC;
- if (RefKind == ARM64MCExpr::VK_ABS_G0)
- return ELF::R_AARCH64_MOVW_UABS_G0;
- if (RefKind == ARM64MCExpr::VK_ABS_G0_NC)
- return ELF::R_AARCH64_MOVW_UABS_G0_NC;
- if (RefKind == ARM64MCExpr::VK_DTPREL_G2)
- return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
- if (RefKind == ARM64MCExpr::VK_DTPREL_G1)
- return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
- if (RefKind == ARM64MCExpr::VK_DTPREL_G1_NC)
- return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
- if (RefKind == ARM64MCExpr::VK_DTPREL_G0)
- return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
- if (RefKind == ARM64MCExpr::VK_DTPREL_G0_NC)
- return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
- if (RefKind == ARM64MCExpr::VK_TPREL_G2)
- return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
- if (RefKind == ARM64MCExpr::VK_TPREL_G1)
- return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
- if (RefKind == ARM64MCExpr::VK_TPREL_G1_NC)
- return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
- if (RefKind == ARM64MCExpr::VK_TPREL_G0)
- return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
- if (RefKind == ARM64MCExpr::VK_TPREL_G0_NC)
- return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
- if (RefKind == ARM64MCExpr::VK_GOTTPREL_G1)
- return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
- if (RefKind == ARM64MCExpr::VK_GOTTPREL_G0_NC)
- return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC;
- report_fatal_error("invalid fixup for movz/movk instruction");
- return 0;
- case ARM64::fixup_arm64_tlsdesc_call:
- return ELF::R_AARCH64_TLSDESC_CALL;
- default:
- llvm_unreachable("Unknown ELF relocation type");
- }
- }
-
- llvm_unreachable("Unimplemented fixup -> relocation");
-}
-
-MCObjectWriter *llvm::createARM64ELFObjectWriter(raw_ostream &OS,
- uint8_t OSABI) {
- MCELFObjectTargetWriter *MOTW = new ARM64ELFObjectWriter(OSABI);
- return createELFObjectWriter(MOTW, OS, /*IsLittleEndian=*/true);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
deleted file mode 100644
index 97a3493..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.cpp
+++ /dev/null
@@ -1,158 +0,0 @@
-//===- lib/MC/ARM64ELFStreamer.cpp - ELF Object Output for ARM64 ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits AArch64 ELF .o object files. Different
-// from generic ELF streamer in emitting mapping symbols ($x and $d) to delimit
-// regions of data and code.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCAsmBackend.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCELFStreamer.h"
-#include "llvm/MC/MCELFSymbolFlags.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSection.h"
-#include "llvm/MC/MCSectionELF.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-/// Extend the generic ELFStreamer class so that it can emit mapping symbols at
-/// the appropriate points in the object files. These symbols are defined in the
-/// AArch64 ELF ABI:
-/// infocenter.arm.com/help/topic/com.arm.doc.ihi0056a/IHI0056A_aaelf64.pdf
-///
-/// In brief: $x or $d should be emitted at the start of each contiguous region
-/// of A64 code or data in a section. In practice, this emission does not rely
-/// on explicit assembler directives but on inherent properties of the
-/// directives doing the emission (e.g. ".byte" is data, "add x0, x0, x0" an
-/// instruction).
-///
-/// As a result this system is orthogonal to the DataRegion infrastructure used
-/// by MachO. Beware!
-class ARM64ELFStreamer : public MCELFStreamer {
-public:
- ARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS,
- MCCodeEmitter *Emitter)
- : MCELFStreamer(Context, TAB, OS, Emitter), MappingSymbolCounter(0),
- LastEMS(EMS_None) {}
-
- ~ARM64ELFStreamer() {}
-
- virtual void ChangeSection(const MCSection *Section,
- const MCExpr *Subsection) {
- // We have to keep track of the mapping symbol state of any sections we
- // use. Each one should start off as EMS_None, which is provided as the
- // default constructor by DenseMap::lookup.
- LastMappingSymbols[getPreviousSection().first] = LastEMS;
- LastEMS = LastMappingSymbols.lookup(Section);
-
- MCELFStreamer::ChangeSection(Section, Subsection);
- }
-
- /// This function is the one used to emit instruction data into the ELF
- /// streamer. We override it to add the appropriate mapping symbol if
- /// necessary.
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
- EmitA64MappingSymbol();
- MCELFStreamer::EmitInstruction(Inst, STI);
- }
-
- /// This is one of the functions used to emit data into an ELF section, so the
- /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
- /// if necessary.
- virtual void EmitBytes(StringRef Data) {
- EmitDataMappingSymbol();
- MCELFStreamer::EmitBytes(Data);
- }
-
- /// This is one of the functions used to emit data into an ELF section, so the
- /// ARM64 streamer overrides it to add the appropriate mapping symbol ($d)
- /// if necessary.
- virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {
- EmitDataMappingSymbol();
- MCELFStreamer::EmitValueImpl(Value, Size);
- }
-
-private:
- enum ElfMappingSymbol {
- EMS_None,
- EMS_A64,
- EMS_Data
- };
-
- void EmitDataMappingSymbol() {
- if (LastEMS == EMS_Data)
- return;
- EmitMappingSymbol("$d");
- LastEMS = EMS_Data;
- }
-
- void EmitA64MappingSymbol() {
- if (LastEMS == EMS_A64)
- return;
- EmitMappingSymbol("$x");
- LastEMS = EMS_A64;
- }
-
- void EmitMappingSymbol(StringRef Name) {
- MCSymbol *Start = getContext().CreateTempSymbol();
- EmitLabel(Start);
-
- MCSymbol *Symbol = getContext().GetOrCreateSymbol(
- Name + "." + Twine(MappingSymbolCounter++));
-
- MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- MCELF::SetType(SD, ELF::STT_NOTYPE);
- MCELF::SetBinding(SD, ELF::STB_LOCAL);
- SD.setExternal(false);
- Symbol->setSection(*getCurrentSection().first);
-
- const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext());
- Symbol->setVariableValue(Value);
- }
-
- int64_t MappingSymbolCounter;
-
- DenseMap<const MCSection *, ElfMappingSymbol> LastMappingSymbols;
- ElfMappingSymbol LastEMS;
-
- /// @}
-};
-}
-
-namespace llvm {
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack) {
- ARM64ELFStreamer *S = new ARM64ELFStreamer(Context, TAB, OS, Emitter);
- if (RelaxAll)
- S->getAssembler().setRelaxAll(true);
- if (NoExecStack)
- S->getAssembler().setNoExecStack(true);
- return S;
-}
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h b/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
deleted file mode 100644
index 72dadbc..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64ELFStreamer.h
+++ /dev/null
@@ -1,26 +0,0 @@
-//===-- ARM64ELFStreamer.h - ELF Streamer for ARM64 -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ELF streamer information for the ARM64 backend.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_AARCH64_ELF_STREAMER_H
-#define LLVM_AARCH64_ELF_STREAMER_H
-
-#include "llvm/MC/MCELFStreamer.h"
-
-namespace llvm {
-
-MCELFStreamer *createARM64ELFStreamer(MCContext &Context, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- bool RelaxAll, bool NoExecStack);
-}
-
-#endif // ARM64_ELF_STREAMER_H
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h b/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
deleted file mode 100644
index 02eb91f..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64FixupKinds.h
+++ /dev/null
@@ -1,72 +0,0 @@
-//===-- ARM64FixupKinds.h - ARM64 Specific Fixup Entries --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARM64FIXUPKINDS_H
-#define LLVM_ARM64FIXUPKINDS_H
-
-#include "llvm/MC/MCFixup.h"
-
-namespace llvm {
-namespace ARM64 {
-
-enum Fixups {
- // fixup_arm64_pcrel_adr_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADR instruction.
- fixup_arm64_pcrel_adr_imm21 = FirstTargetFixupKind,
-
- // fixup_arm64_pcrel_adrp_imm21 - A 21-bit pc-relative immediate inserted into
- // an ADRP instruction.
- fixup_arm64_pcrel_adrp_imm21,
-
- // fixup_arm64_imm12 - 12-bit fixup for add/sub instructions.
- // No alignment adjustment. All value bits are encoded.
- fixup_arm64_add_imm12,
-
- // fixup_arm64_ldst_imm12_* - unsigned 12-bit fixups for load and
- // store instructions.
- fixup_arm64_ldst_imm12_scale1,
- fixup_arm64_ldst_imm12_scale2,
- fixup_arm64_ldst_imm12_scale4,
- fixup_arm64_ldst_imm12_scale8,
- fixup_arm64_ldst_imm12_scale16,
-
- // FIXME: comment
- fixup_arm64_movw,
-
- // fixup_arm64_pcrel_imm14 - The high 14 bits of a 21-bit pc-relative
- // immediate.
- fixup_arm64_pcrel_branch14,
-
- // fixup_arm64_pcrel_imm19 - The high 19 bits of a 21-bit pc-relative
- // immediate. Same encoding as fixup_arm64_pcrel_adrhi, except this
- // is not used as part of a lo/hi pair and thus generates relocations
- // directly when necessary.
- fixup_arm64_pcrel_imm19,
-
- // fixup_arm64_pcrel_branch26 - The high 26 bits of a 28-bit pc-relative
- // immediate.
- fixup_arm64_pcrel_branch26,
-
- // fixup_arm64_pcrel_call26 - The high 26 bits of a 28-bit pc-relative
- // immediate. Distinguished from branch26 only on ELF.
- fixup_arm64_pcrel_call26,
-
- // fixup_arm64_tlsdesc_call - zero-space placeholder for the ELF
- // R_AARCH64_TLSDESC_CALL relocation.
- fixup_arm64_tlsdesc_call,
-
- // Marker
- LastTargetFixupKind,
- NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
-};
-
-} // end namespace ARM64
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
deleted file mode 100644
index 97e0d3c..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-//===-- ARM64MCAsmInfo.cpp - ARM64 asm properties -----------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declarations of the ARM64MCAsmInfo properties.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCAsmInfo.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/CommandLine.h"
-using namespace llvm;
-
-enum AsmWriterVariantTy {
- Default = -1,
- Generic = 0,
- Apple = 1
-};
-
-static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
- "arm64-neon-syntax", cl::init(Default),
- cl::desc("Choose style of NEON code to emit from ARM64 backend:"),
- cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
- clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
- clEnumValEnd));
-
-ARM64MCAsmInfoDarwin::ARM64MCAsmInfoDarwin() {
- // We prefer NEON instructions to be printed in the short form.
- AssemblerDialect = AsmWriterVariant == Default ? 1 : AsmWriterVariant;
-
- PrivateGlobalPrefix = "L";
- SeparatorString = "%%";
- CommentString = ";";
- PointerSize = CalleeSaveStackSlotSize = 8;
-
- AlignmentIsInBytes = false;
- UsesELFSectionDirectiveForBSS = true;
- SupportsDebugInformation = true;
- UseDataRegionDirectives = true;
-
- ExceptionsType = ExceptionHandling::DwarfCFI;
-}
-
-const MCExpr *ARM64MCAsmInfoDarwin::getExprForPersonalitySymbol(
- const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const {
- // On Darwin, we can reference dwarf symbols with foo@GOT-., which
- // is an indirect pc-relative reference. The default implementation
- // won't reference using the GOT, so we need this target-specific
- // version.
- MCContext &Context = Streamer.getContext();
- const MCExpr *Res =
- MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOT, Context);
- MCSymbol *PCSym = Context.CreateTempSymbol();
- Streamer.EmitLabel(PCSym);
- const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context);
- return MCBinaryExpr::CreateSub(Res, PC, Context);
-}
-
-ARM64MCAsmInfoELF::ARM64MCAsmInfoELF() {
- // We prefer NEON instructions to be printed in the short form.
- AssemblerDialect = AsmWriterVariant == Default ? 0 : AsmWriterVariant;
-
- PointerSize = 8;
-
- // ".comm align is in bytes but .align is pow-2."
- AlignmentIsInBytes = false;
-
- CommentString = "//";
- PrivateGlobalPrefix = ".L";
- Code32Directive = ".code\t32";
-
- Data16bitsDirective = "\t.hword\t";
- Data32bitsDirective = "\t.word\t";
- Data64bitsDirective = "\t.xword\t";
-
- UseDataRegionDirectives = false;
-
- WeakRefDirective = "\t.weak\t";
-
- HasLEB128 = true;
- SupportsDebugInformation = true;
-
- // Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfCFI;
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
deleted file mode 100644
index f2d33a7..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCAsmInfo.h
+++ /dev/null
@@ -1,36 +0,0 @@
-//=====-- ARM64MCAsmInfo.h - ARM64 asm properties -----------*- C++ -*--====//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the declaration of the ARM64MCAsmInfo class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64TARGETASMINFO_H
-#define ARM64TARGETASMINFO_H
-
-#include "llvm/MC/MCAsmInfoDarwin.h"
-
-namespace llvm {
-class Target;
-class StringRef;
-class MCStreamer;
-struct ARM64MCAsmInfoDarwin : public MCAsmInfoDarwin {
- explicit ARM64MCAsmInfoDarwin();
- virtual const MCExpr *getExprForPersonalitySymbol(const MCSymbol *Sym,
- unsigned Encoding,
- MCStreamer &Streamer) const;
-};
-
-struct ARM64MCAsmInfoELF : public MCAsmInfo {
- explicit ARM64MCAsmInfoELF();
-};
-
-} // namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
deleted file mode 100644
index 19559f8..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCCodeEmitter.cpp
+++ /dev/null
@@ -1,563 +0,0 @@
-//===-- ARM64/ARM64MCCodeEmitter.cpp - Convert ARM64 code to machine code -===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ARM64MCCodeEmitter class.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "mccodeemitter"
-#include "MCTargetDesc/ARM64AddressingModes.h"
-#include "MCTargetDesc/ARM64BaseInfo.h"
-#include "MCTargetDesc/ARM64FixupKinds.h"
-#include "MCTargetDesc/ARM64MCExpr.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
-STATISTIC(MCNumFixups, "Number of MC fixups created.");
-
-namespace {
-
-class ARM64MCCodeEmitter : public MCCodeEmitter {
- MCContext &Ctx;
-
- ARM64MCCodeEmitter(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
- void operator=(const ARM64MCCodeEmitter &); // DO NOT IMPLEMENT
-public:
- ARM64MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
- MCContext &ctx)
- : Ctx(ctx) {}
-
- ~ARM64MCCodeEmitter() {}
-
- // getBinaryCodeForInstr - TableGen'erated function for getting the
- // binary encoding for an instruction.
- uint64_t getBinaryCodeForInstr(const MCInst &MI,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMachineOpValue - Return binary encoding of operand. If the machine
- /// operand requires relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getAMIndexed8OpValue - Return encoding info for base register
- /// and 12-bit unsigned immediate attached to a load, store or prfm
- /// instruction. If operand requires a relocation, record it and
- /// return zero in that part of the encoding.
- template <uint32_t FixupKind>
- uint32_t getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
- /// target.
- uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
- /// the 2-bit shift field.
- uint32_t getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getCondBranchTargetOpValue - Return the encoded value for a conditional
- /// branch target.
- uint32_t getCondBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
- /// branch target.
- uint32_t getTestBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getBranchTargetOpValue - Return the encoded value for an unconditional
- /// branch target.
- uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMoveWideImmOpValue - Return the encoded value for the immediate operand
- /// of a MOVZ or MOVK instruction.
- uint32_t getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getVecShifterOpValue - Return the encoded value for the vector shifter.
- uint32_t getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getMoveVecShifterOpValue - Return the encoded value for the vector move
- /// shifter (MSL).
- uint32_t getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getFixedPointScaleOpValue - Return the encoded value for the
- // FP-to-fixed-point scale factor.
- uint32_t getFixedPointScaleOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint32_t getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint32_t getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- /// getSIMDShift64OpValue - Return the encoded value for the
- // shift-by-immediate AdvSIMD instructions.
- uint32_t getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint32_t getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint32_t getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint32_t getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
- const MCSubtargetInfo &STI) const;
-
- void EmitByte(unsigned char C, raw_ostream &OS) const { OS << (char)C; }
-
- void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
- // Output the constant in little endian byte order.
- for (unsigned i = 0; i != Size; ++i) {
- EmitByte(Val & 255, OS);
- Val >>= 8;
- }
- }
-
- void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-};
-
-} // end anonymous namespace
-
-MCCodeEmitter *llvm::createARM64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx) {
- return new ARM64MCCodeEmitter(MCII, STI, Ctx);
-}
-
-/// getMachineOpValue - Return binary encoding of operand. If the machine
-/// operand requires relocation, record the relocation and return zero.
-unsigned
-ARM64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg())
- return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
- else {
- assert(MO.isImm() && "did not expect relocated expression");
- return static_cast<unsigned>(MO.getImm());
- }
-
- assert(0 && "Unable to encode MCOperand!");
- return 0;
-}
-
-template <uint32_t FixupKind>
-uint32_t
-ARM64MCCodeEmitter::getAMIndexed8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- unsigned BaseReg = MI.getOperand(OpIdx).getReg();
- BaseReg = Ctx.getRegisterInfo()->getEncodingValue(BaseReg);
-
- const MCOperand &MO = MI.getOperand(OpIdx + 1);
- uint32_t ImmVal = 0;
-
- if (MO.isImm())
- ImmVal = static_cast<uint32_t>(MO.getImm());
- else {
- assert(MO.isExpr() && "unable to encode load/store imm operand");
- MCFixupKind Kind = MCFixupKind(FixupKind);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
- ++MCNumFixups;
- }
-
- return BaseReg | (ImmVal << 5);
-}
-
-/// getAdrLabelOpValue - Return encoding info for 21-bit immediate ADR label
-/// target.
-uint32_t
-ARM64MCCodeEmitter::getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
-
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isExpr() && "Unexpected ADR target type!");
- const MCExpr *Expr = MO.getExpr();
-
- MCFixupKind Kind = MI.getOpcode() == ARM64::ADR
- ? MCFixupKind(ARM64::fixup_arm64_pcrel_adr_imm21)
- : MCFixupKind(ARM64::fixup_arm64_pcrel_adrp_imm21);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
-
- MCNumFixups += 1;
-
- // All of the information is in the fixup.
- return 0;
-}
-
-/// getAddSubImmOpValue - Return encoding for the 12-bit immediate value and
-/// the 2-bit shift field. The shift field is stored in bits 13-14 of the
-/// return value.
-uint32_t
-ARM64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Suboperands are [imm, shifter].
- const MCOperand &MO = MI.getOperand(OpIdx);
- const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
- assert(ARM64_AM::getShiftType(MO1.getImm()) == ARM64_AM::LSL &&
- "unexpected shift type for add/sub immediate");
- unsigned ShiftVal = ARM64_AM::getShiftValue(MO1.getImm());
- assert((ShiftVal == 0 || ShiftVal == 12) &&
- "unexpected shift value for add/sub immediate");
- if (MO.isImm())
- return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
- assert(MO.isExpr() && "Unable to encode MCOperand!");
- const MCExpr *Expr = MO.getExpr();
- assert(ShiftVal == 0 && "shift not allowed on add/sub immediate with fixup");
-
- // Encode the 12 bits of the fixup.
- MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_add_imm12);
- Fixups.push_back(MCFixup::Create(0, Expr, Kind, MI.getLoc()));
-
- ++MCNumFixups;
-
- return 0;
-}
-
-/// getCondBranchTargetOpValue - Return the encoded value for a conditional
-/// branch target.
-uint32_t ARM64MCCodeEmitter::getCondBranchTargetOpValue(
- const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
-
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isExpr() && "Unexpected target type!");
-
- MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_imm19);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
- ++MCNumFixups;
-
- // All of the information is in the fixup.
- return 0;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getMoveWideImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
-
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isExpr() && "Unexpected movz/movk immediate");
-
- Fixups.push_back(MCFixup::Create(
- 0, MO.getExpr(), MCFixupKind(ARM64::fixup_arm64_movw), MI.getLoc()));
-
- ++MCNumFixups;
-
- return 0;
-}
-
-/// getTestBranchTargetOpValue - Return the encoded value for a test-bit-and-
-/// branch target.
-uint32_t ARM64MCCodeEmitter::getTestBranchTargetOpValue(
- const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
-
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isExpr() && "Unexpected ADR target type!");
-
- MCFixupKind Kind = MCFixupKind(ARM64::fixup_arm64_pcrel_branch14);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
- ++MCNumFixups;
-
- // All of the information is in the fixup.
- return 0;
-}
-
-/// getBranchTargetOpValue - Return the encoded value for an unconditional
-/// branch target.
-uint32_t
-ARM64MCCodeEmitter::getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
-
- // If the destination is an immediate, we have nothing to do.
- if (MO.isImm())
- return MO.getImm();
- assert(MO.isExpr() && "Unexpected ADR target type!");
-
- MCFixupKind Kind = MI.getOpcode() == ARM64::BL
- ? MCFixupKind(ARM64::fixup_arm64_pcrel_call26)
- : MCFixupKind(ARM64::fixup_arm64_pcrel_branch26);
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(), Kind, MI.getLoc()));
-
- ++MCNumFixups;
-
- // All of the information is in the fixup.
- return 0;
-}
-
-/// getVecShifterOpValue - Return the encoded value for the vector shifter:
-///
-/// 00 -> 0
-/// 01 -> 8
-/// 10 -> 16
-/// 11 -> 24
-uint32_t
-ARM64MCCodeEmitter::getVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the shift amount!");
-
- switch (MO.getImm()) {
- default:
- break;
- case 0:
- return 0;
- case 8:
- return 1;
- case 16:
- return 2;
- case 24:
- return 3;
- }
-
- assert(false && "Invalid value for vector shift amount!");
- return 0;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the shift amount!");
- return 64 - (MO.getImm());
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift64_32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the shift amount!");
- return 64 - (MO.getImm() | 32);
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the shift amount!");
- return 32 - (MO.getImm() | 16);
-}
-
-uint32_t
-ARM64MCCodeEmitter::getSIMDShift16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the shift amount!");
- return 16 - (MO.getImm() | 8);
-}
-
-/// getFixedPointScaleOpValue - Return the encoded value for the
-// FP-to-fixed-point scale factor.
-uint32_t ARM64MCCodeEmitter::getFixedPointScaleOpValue(
- const MCInst &MI, unsigned OpIdx, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return 64 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return 64 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return 32 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return 16 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftR8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return 8 - MO.getImm();
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL64OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return MO.getImm() - 64;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL32OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return MO.getImm() - 32;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL16OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return MO.getImm() - 16;
-}
-
-uint32_t
-ARM64MCCodeEmitter::getVecShiftL8OpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() && "Expected an immediate value for the scale amount!");
- return MO.getImm() - 8;
-}
-
-/// getMoveVecShifterOpValue - Return the encoded value for the vector move
-/// shifter (MSL).
-uint32_t
-ARM64MCCodeEmitter::getMoveVecShifterOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpIdx);
- assert(MO.isImm() &&
- "Expected an immediate value for the move shift amount!");
- unsigned ShiftVal = ARM64_AM::getShiftValue(MO.getImm());
- assert((ShiftVal == 8 || ShiftVal == 16) && "Invalid shift amount!");
- return ShiftVal == 8 ? 0 : 1;
-}
-
-unsigned ARM64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
- const MCSubtargetInfo &STI) const {
- // If one of the signed fixup kinds is applied to a MOVZ instruction, the
- // eventual result could be either a MOVZ or a MOVN. It's the MCCodeEmitter's
- // job to ensure that any bits possibly affected by this are 0. This means we
- // must zero out bit 30 (essentially emitting a MOVN).
- MCOperand UImm16MO = MI.getOperand(1);
-
- // Nothing to do if there's no fixup.
- if (UImm16MO.isImm())
- return EncodedValue;
-
- return EncodedValue & ~(1u << 30);
-}
-
-void ARM64MCCodeEmitter::EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MI.getOpcode() == ARM64::TLSDESCCALL) {
- // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
- // following (BLR) instruction. It doesn't emit any code itself so it
- // doesn't go through the normal TableGenerated channels.
- MCFixupKind Fixup = MCFixupKind(ARM64::fixup_arm64_tlsdesc_call);
- Fixups.push_back(MCFixup::Create(0, MI.getOperand(0).getExpr(), Fixup));
- return;
- }
-
- uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- EmitConstant(Binary, 4, OS);
- ++MCNumEmitted; // Keep track of the # of mi's emitted.
-}
-
-#include "ARM64GenMCCodeEmitter.inc"
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
deleted file mode 100644
index d4ab140..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-//===-- ARM64MCExpr.cpp - ARM64 specific MC expression classes --------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the implementation of the assembly expression modifiers
-// accepted by the AArch64 architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "aarch64symbolrefexpr"
-#include "ARM64MCExpr.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCELF.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MCValue.h"
-#include "llvm/Object/ELF.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-const ARM64MCExpr *ARM64MCExpr::Create(const MCExpr *Expr, VariantKind Kind,
- MCContext &Ctx) {
- return new (Ctx) ARM64MCExpr(Expr, Kind);
-}
-
-StringRef ARM64MCExpr::getVariantKindName() const {
- switch (static_cast<uint32_t>(getKind())) {
- case VK_CALL: return "";
- case VK_LO12: return ":lo12:";
- case VK_ABS_G3: return ":abs_g3:";
- case VK_ABS_G2: return ":abs_g2:";
- case VK_ABS_G2_NC: return ":abs_g2_nc:";
- case VK_ABS_G1: return ":abs_g1:";
- case VK_ABS_G1_NC: return ":abs_g1_nc:";
- case VK_ABS_G0: return ":abs_g0:";
- case VK_ABS_G0_NC: return ":abs_g0_nc:";
- case VK_DTPREL_G2: return ":dtprel_g2:";
- case VK_DTPREL_G1: return ":dtprel_g1:";
- case VK_DTPREL_G1_NC: return ":dtprel_g1_nc:";
- case VK_DTPREL_G0: return ":dtprel_g0:";
- case VK_DTPREL_G0_NC: return ":dtprel_g0_nc:";
- case VK_DTPREL_LO12: return ":dtprel_lo12:";
- case VK_DTPREL_LO12_NC: return ":dtprel_lo12_nc:";
- case VK_TPREL_G2: return ":tprel_g2:";
- case VK_TPREL_G1: return ":tprel_g1:";
- case VK_TPREL_G1_NC: return ":tprel_g1_nc:";
- case VK_TPREL_G0: return ":tprel_g0:";
- case VK_TPREL_G0_NC: return ":tprel_g0_nc:";
- case VK_TPREL_LO12: return ":tprel_lo12:";
- case VK_TPREL_LO12_NC: return ":tprel_lo12_nc:";
- case VK_TLSDESC_LO12: return ":tlsdesc_lo12:";
- case VK_ABS_PAGE: return "";
- case VK_GOT_PAGE: return ":got:";
- case VK_GOT_LO12: return ":got_lo12:";
- case VK_GOTTPREL_PAGE: return ":gottprel:";
- case VK_GOTTPREL_LO12_NC: return ":gottprel_lo12:";
- case VK_GOTTPREL_G1: return ":gottprel_g1:";
- case VK_GOTTPREL_G0_NC: return ":gottprel_g0_nc:";
- case VK_TLSDESC: return "";
- case VK_TLSDESC_PAGE: return ":tlsdesc:";
- default:
- llvm_unreachable("Invalid ELF symbol kind");
- }
-}
-
-void ARM64MCExpr::PrintImpl(raw_ostream &OS) const {
- if (getKind() != VK_NONE)
- OS << getVariantKindName();
- OS << *Expr;
-}
-
-// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
-// that method should be made public?
-// FIXME: really do above: now that two backends are using it.
-static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) {
- switch (Value->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expr!");
- break;
-
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
- AddValueSymbolsImpl(BE->getLHS(), Asm);
- AddValueSymbolsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef:
- Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
- break;
-
- case MCExpr::Unary:
- AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
- break;
- }
-}
-
-void ARM64MCExpr::AddValueSymbols(MCAssembler *Asm) const {
- AddValueSymbolsImpl(getSubExpr(), Asm);
-}
-
-const MCSection *ARM64MCExpr::FindAssociatedSection() const {
- llvm_unreachable("FIXME: what goes here?");
-}
-
-bool ARM64MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
- if (!getSubExpr()->EvaluateAsRelocatable(Res, Layout))
- return false;
-
- Res =
- MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
-
- return true;
-}
-
-static void fixELFSymbolsInTLSFixupsImpl(const MCExpr *Expr, MCAssembler &Asm) {
- switch (Expr->getKind()) {
- case MCExpr::Target:
- llvm_unreachable("Can't handle nested target expression");
- break;
- case MCExpr::Constant:
- break;
-
- case MCExpr::Binary: {
- const MCBinaryExpr *BE = cast<MCBinaryExpr>(Expr);
- fixELFSymbolsInTLSFixupsImpl(BE->getLHS(), Asm);
- fixELFSymbolsInTLSFixupsImpl(BE->getRHS(), Asm);
- break;
- }
-
- case MCExpr::SymbolRef: {
- // We're known to be under a TLS fixup, so any symbol should be
- // modified. There should be only one.
- const MCSymbolRefExpr &SymRef = *cast<MCSymbolRefExpr>(Expr);
- MCSymbolData &SD = Asm.getOrCreateSymbolData(SymRef.getSymbol());
- MCELF::SetType(SD, ELF::STT_TLS);
- break;
- }
-
- case MCExpr::Unary:
- fixELFSymbolsInTLSFixupsImpl(cast<MCUnaryExpr>(Expr)->getSubExpr(), Asm);
- break;
- }
-}
-
-void ARM64MCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {
- switch (getSymbolLoc(Kind)) {
- default:
- return;
- case VK_DTPREL:
- case VK_GOTTPREL:
- case VK_TPREL:
- case VK_TLSDESC:
- break;
- }
-
- fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
deleted file mode 100644
index a33fe43..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCExpr.h
+++ /dev/null
@@ -1,162 +0,0 @@
-//=---- ARM64MCExpr.h - ARM64 specific MC expression classes ------*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file describes ARM64-specific MCExprs, used for modifiers like
-// ":lo12:" or ":gottprel_g1:".
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ARM64MCEXPR_H
-#define LLVM_ARM64MCEXPR_H
-
-#include "llvm/MC/MCExpr.h"
-#include "llvm/Support/ErrorHandling.h"
-
-namespace llvm {
-
-class ARM64MCExpr : public MCTargetExpr {
-public:
- enum VariantKind {
- VK_NONE = 0x000,
-
- // Symbol locations specifying (roughly speaking) what calculation should be
- // performed to construct the final address for the relocated
- // symbol. E.g. direct, via the GOT, ...
- VK_ABS = 0x001,
- VK_SABS = 0x002,
- VK_GOT = 0x003,
- VK_DTPREL = 0x004,
- VK_GOTTPREL = 0x005,
- VK_TPREL = 0x006,
- VK_TLSDESC = 0x007,
- VK_SymLocBits = 0x00f,
-
- // Variants specifying which part of the final address calculation is
- // used. E.g. the low 12 bits for an ADD/LDR, the middle 16 bits for a
- // MOVZ/MOVK.
- VK_PAGE = 0x010,
- VK_PAGEOFF = 0x020,
- VK_G0 = 0x030,
- VK_G1 = 0x040,
- VK_G2 = 0x050,
- VK_G3 = 0x060,
- VK_AddressFragBits = 0x0f0,
-
- // Whether the final relocation is a checked one (where a linker should
- // perform a range-check on the final address) or not. Note that this field
- // is unfortunately sometimes omitted from the assembly syntax. E.g. :lo12:
- // on its own is a non-checked relocation. We side with ELF on being
- // explicit about this!
- VK_NC = 0x100,
-
- // Convenience definitions for referring to specific textual representations
- // of relocation specifiers. Note that this means the "_NC" is sometimes
- // omitted in line with assembly syntax here (VK_LO12 rather than VK_LO12_NC
- // since a user would write ":lo12:").
- VK_CALL = VK_ABS,
- VK_ABS_PAGE = VK_ABS | VK_PAGE,
- VK_ABS_G3 = VK_ABS | VK_G3,
- VK_ABS_G2 = VK_ABS | VK_G2,
- VK_ABS_G2_NC = VK_ABS | VK_G2 | VK_NC,
- VK_ABS_G1 = VK_ABS | VK_G1,
- VK_ABS_G1_NC = VK_ABS | VK_G1 | VK_NC,
- VK_ABS_G0 = VK_ABS | VK_G0,
- VK_ABS_G0_NC = VK_ABS | VK_G0 | VK_NC,
- VK_LO12 = VK_ABS | VK_PAGEOFF | VK_NC,
- VK_GOT_LO12 = VK_GOT | VK_PAGEOFF | VK_NC,
- VK_GOT_PAGE = VK_GOT | VK_PAGE,
- VK_DTPREL_G2 = VK_DTPREL | VK_G2,
- VK_DTPREL_G1 = VK_DTPREL | VK_G1,
- VK_DTPREL_G1_NC = VK_DTPREL | VK_G1 | VK_NC,
- VK_DTPREL_G0 = VK_DTPREL | VK_G0,
- VK_DTPREL_G0_NC = VK_DTPREL | VK_G0 | VK_NC,
- VK_DTPREL_LO12 = VK_DTPREL | VK_PAGEOFF,
- VK_DTPREL_LO12_NC = VK_DTPREL | VK_PAGEOFF | VK_NC,
- VK_GOTTPREL_PAGE = VK_GOTTPREL | VK_PAGE,
- VK_GOTTPREL_LO12_NC = VK_GOTTPREL | VK_PAGEOFF | VK_NC,
- VK_GOTTPREL_G1 = VK_GOTTPREL | VK_G1,
- VK_GOTTPREL_G0_NC = VK_GOTTPREL | VK_G0 | VK_NC,
- VK_TPREL_G2 = VK_TPREL | VK_G2,
- VK_TPREL_G1 = VK_TPREL | VK_G1,
- VK_TPREL_G1_NC = VK_TPREL | VK_G1 | VK_NC,
- VK_TPREL_G0 = VK_TPREL | VK_G0,
- VK_TPREL_G0_NC = VK_TPREL | VK_G0 | VK_NC,
- VK_TPREL_LO12 = VK_TPREL | VK_PAGEOFF,
- VK_TPREL_LO12_NC = VK_TPREL | VK_PAGEOFF | VK_NC,
- VK_TLSDESC_LO12 = VK_TLSDESC | VK_PAGEOFF | VK_NC,
- VK_TLSDESC_PAGE = VK_TLSDESC | VK_PAGE,
-
- VK_INVALID = 0xfff
- };
-
-private:
- const MCExpr *Expr;
- const VariantKind Kind;
-
- explicit ARM64MCExpr(const MCExpr *Expr, VariantKind Kind)
- : Expr(Expr), Kind(Kind) {}
-
-public:
- /// @name Construction
- /// @{
-
- static const ARM64MCExpr *Create(const MCExpr *Expr, VariantKind Kind,
- MCContext &Ctx);
-
- /// @}
- /// @name Accessors
- /// @{
-
- /// Get the kind of this expression.
- VariantKind getKind() const { return static_cast<VariantKind>(Kind); }
-
- /// Get the expression this modifier applies to.
- const MCExpr *getSubExpr() const { return Expr; }
-
- /// @}
- /// @name VariantKind information extractors.
- /// @{
-
- static VariantKind getSymbolLoc(VariantKind Kind) {
- return static_cast<VariantKind>(Kind & VK_SymLocBits);
- }
-
- static VariantKind getAddressFrag(VariantKind Kind) {
- return static_cast<VariantKind>(Kind & VK_AddressFragBits);
- }
-
- static bool isNotChecked(VariantKind Kind) { return Kind & VK_NC; }
-
- /// @}
-
- /// Convert the variant kind into an ELF-appropriate modifier
- /// (e.g. ":got:", ":lo12:").
- StringRef getVariantKindName() const;
-
- void PrintImpl(raw_ostream &OS) const;
-
- void AddValueSymbols(MCAssembler *) const;
-
- const MCSection *FindAssociatedSection() const;
-
- bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
-
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
-
- static bool classof(const MCExpr *E) {
- return E->getKind() == MCExpr::Target;
- }
-
- static bool classof(const ARM64MCExpr *) { return true; }
-
-};
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
deleted file mode 100644
index 8d54412..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-//===-- ARM64MCTargetDesc.cpp - ARM64 Target Descriptions -------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides ARM64 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#include "ARM64MCTargetDesc.h"
-#include "ARM64ELFStreamer.h"
-#include "ARM64MCAsmInfo.h"
-#include "InstPrinter/ARM64InstPrinter.h"
-#include "llvm/MC/MCCodeGenInfo.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetRegistry.h"
-
-#define GET_INSTRINFO_MC_DESC
-#include "ARM64GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_MC_DESC
-#include "ARM64GenSubtargetInfo.inc"
-
-#define GET_REGINFO_MC_DESC
-#include "ARM64GenRegisterInfo.inc"
-
-using namespace llvm;
-
-static MCInstrInfo *createARM64MCInstrInfo() {
- MCInstrInfo *X = new MCInstrInfo();
- InitARM64MCInstrInfo(X);
- return X;
-}
-
-static MCSubtargetInfo *createARM64MCSubtargetInfo(StringRef TT, StringRef CPU,
- StringRef FS) {
- MCSubtargetInfo *X = new MCSubtargetInfo();
- InitARM64MCSubtargetInfo(X, TT, CPU, FS);
- return X;
-}
-
-static MCRegisterInfo *createARM64MCRegisterInfo(StringRef Triple) {
- MCRegisterInfo *X = new MCRegisterInfo();
- InitARM64MCRegisterInfo(X, ARM64::LR);
- return X;
-}
-
-static MCAsmInfo *createARM64MCAsmInfo(const MCRegisterInfo &MRI,
- StringRef TT) {
- Triple TheTriple(TT);
-
- MCAsmInfo *MAI;
- if (TheTriple.isOSDarwin())
- MAI = new ARM64MCAsmInfoDarwin();
- else {
- assert(TheTriple.isOSBinFormatELF() && "Only expect Darwin or ELF");
- MAI = new ARM64MCAsmInfoELF();
- }
-
- // Initial state of the frame pointer is SP.
- unsigned Reg = MRI.getDwarfRegNum(ARM64::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
- MAI->addInitialFrameState(Inst);
-
- return MAI;
-}
-
-static MCCodeGenInfo *createARM64MCCodeGenInfo(StringRef TT, Reloc::Model RM,
- CodeModel::Model CM,
- CodeGenOpt::Level OL) {
- Triple TheTriple(TT);
- assert((TheTriple.isOSBinFormatELF() || TheTriple.isOSBinFormatMachO()) &&
- "Only expect Darwin and ELF targets");
-
- if (CM == CodeModel::Default)
- CM = CodeModel::Small;
- // The default MCJIT memory managers make no guarantees about where they can
- // find an executable page; JITed code needs to be able to refer to globals
- // no matter how far away they are.
- else if (CM == CodeModel::JITDefault)
- CM = CodeModel::Large;
- else if (CM != CodeModel::Small && CM != CodeModel::Large)
- report_fatal_error("Only small and large code models are allowed on ARM64");
-
- // ARM64 Darwin is always PIC.
- if (TheTriple.isOSDarwin())
- RM = Reloc::PIC_;
- // On ELF platforms the default static relocation model has a smart enough
- // linker to cope with referencing external symbols defined in a shared
- // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
- else if (RM == Reloc::Default || RM == Reloc::DynamicNoPIC)
- RM = Reloc::Static;
-
- MCCodeGenInfo *X = new MCCodeGenInfo();
- X->InitMCCodeGenInfo(RM, CM, OL);
- return X;
-}
-
-static MCInstPrinter *createARM64MCInstPrinter(const Target &T,
- unsigned SyntaxVariant,
- const MCAsmInfo &MAI,
- const MCInstrInfo &MII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI) {
- if (SyntaxVariant == 0)
- return new ARM64InstPrinter(MAI, MII, MRI, STI);
- if (SyntaxVariant == 1)
- return new ARM64AppleInstPrinter(MAI, MII, MRI, STI);
-
- return 0;
-}
-
-static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
- MCContext &Ctx, MCAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *Emitter,
- const MCSubtargetInfo &STI, bool RelaxAll,
- bool NoExecStack) {
- Triple TheTriple(TT);
-
- if (TheTriple.isOSDarwin())
- return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll,
- /*LabelSections*/ true);
-
- return createARM64ELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
-}
-
-// Force static initialization.
-extern "C" void LLVMInitializeARM64TargetMC() {
- // Register the MC asm info.
- RegisterMCAsmInfoFn X(TheARM64Target, createARM64MCAsmInfo);
-
- // Register the MC codegen info.
- TargetRegistry::RegisterMCCodeGenInfo(TheARM64Target,
- createARM64MCCodeGenInfo);
-
- // Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(TheARM64Target, createARM64MCInstrInfo);
-
- // Register the MC register info.
- TargetRegistry::RegisterMCRegInfo(TheARM64Target, createARM64MCRegisterInfo);
-
- // Register the MC subtarget info.
- TargetRegistry::RegisterMCSubtargetInfo(TheARM64Target,
- createARM64MCSubtargetInfo);
-
- // Register the asm backend.
- TargetRegistry::RegisterMCAsmBackend(TheARM64Target, createARM64AsmBackend);
-
- // Register the MC Code Emitter
- TargetRegistry::RegisterMCCodeEmitter(TheARM64Target,
- createARM64MCCodeEmitter);
-
- // Register the object streamer.
- TargetRegistry::RegisterMCObjectStreamer(TheARM64Target, createMCStreamer);
-
- // Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(TheARM64Target,
- createARM64MCInstPrinter);
-}
diff --git a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h b/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
deleted file mode 100644
index 0db2b22..0000000
--- a/lib/Target/ARM64/MCTargetDesc/ARM64MCTargetDesc.h
+++ /dev/null
@@ -1,62 +0,0 @@
-//===-- ARM64MCTargetDesc.h - ARM64 Target Descriptions ---------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides ARM64 specific target descriptions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef ARM64MCTARGETDESC_H
-#define ARM64MCTARGETDESC_H
-
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-namespace llvm {
-class MCAsmBackend;
-class MCCodeEmitter;
-class MCContext;
-class MCInstrInfo;
-class MCRegisterInfo;
-class MCObjectWriter;
-class MCSubtargetInfo;
-class StringRef;
-class Target;
-class raw_ostream;
-
-extern Target TheARM64Target;
-
-MCCodeEmitter *createARM64MCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
- const MCSubtargetInfo &STI,
- MCContext &Ctx);
-MCAsmBackend *createARM64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
- StringRef TT, StringRef CPU);
-
-MCObjectWriter *createARM64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI);
-
-MCObjectWriter *createARM64MachObjectWriter(raw_ostream &OS, uint32_t CPUType,
- uint32_t CPUSubtype);
-
-} // End llvm namespace
-
-// Defines symbolic names for ARM64 registers. This defines a mapping from
-// register name to register number.
-//
-#define GET_REGINFO_ENUM
-#include "ARM64GenRegisterInfo.inc"
-
-// Defines symbolic names for the ARM64 instructions.
-//
-#define GET_INSTRINFO_ENUM
-#include "ARM64GenInstrInfo.inc"
-
-#define GET_SUBTARGETINFO_ENUM
-#include "ARM64GenSubtargetInfo.inc"
-
-#endif
diff --git a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
deleted file mode 100644
index f8665bc..0000000
--- a/lib/Target/ARM64/MCTargetDesc/CMakeLists.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-add_llvm_library(LLVMARM64Desc
- ARM64AsmBackend.cpp
- ARM64ELFObjectWriter.cpp
- ARM64ELFStreamer.cpp
- ARM64MCAsmInfo.cpp
- ARM64MCCodeEmitter.cpp
- ARM64MCExpr.cpp
- ARM64MCTargetDesc.cpp
- ARM64MachObjectWriter.cpp
-)
-add_dependencies(LLVMARM64Desc ARM64CommonTableGen)
-
-# Hack: we need to include 'main' target directory to grab private headers
-include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
deleted file mode 100644
index e4c74d2..0000000
--- a/lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Desc
-parent = ARM64
-required_libraries = ARM64AsmPrinter ARM64Info MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/MCTargetDesc/Makefile b/lib/Target/ARM64/MCTargetDesc/Makefile
deleted file mode 100644
index 013cc63..0000000
--- a/lib/Target/ARM64/MCTargetDesc/Makefile
+++ /dev/null
@@ -1,16 +0,0 @@
-##===- lib/Target/ARM64/TargetDesc/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Desc
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/Makefile b/lib/Target/ARM64/Makefile
deleted file mode 100644
index 5f0f307..0000000
--- a/lib/Target/ARM64/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-##===- lib/Target/ARM64/Makefile ---------------------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-
-LEVEL = ../../..
-LIBRARYNAME = LLVMARM64CodeGen
-TARGET = ARM64
-
-# Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARM64GenRegisterInfo.inc ARM64GenInstrInfo.inc \
- ARM64GenAsmWriter.inc ARM64GenAsmWriter1.inc \
- ARM64GenDAGISel.inc \
- ARM64GenCallingConv.inc ARM64GenAsmMatcher.inc \
- ARM64GenSubtargetInfo.inc ARM64GenMCCodeEmitter.inc \
- ARM64GenFastISel.inc ARM64GenDisassemblerTables.inc \
- ARM64GenMCPseudoLowering.inc
-
-DIRS = TargetInfo InstPrinter AsmParser Disassembler MCTargetDesc
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp b/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
deleted file mode 100644
index dec09ed..0000000
--- a/lib/Target/ARM64/TargetInfo/ARM64TargetInfo.cpp
+++ /dev/null
@@ -1,21 +0,0 @@
-//===-- ARM64TargetInfo.cpp - ARM64 Target Implementation -----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/TargetRegistry.h"
-using namespace llvm;
-
-namespace llvm {
-Target TheARM64Target;
-} // end namespace llvm
-
-extern "C" void LLVMInitializeARM64TargetInfo() {
- RegisterTarget<Triple::arm64, /*HasJIT=*/true> X(TheARM64Target, "arm64",
- "ARM64");
-}
diff --git a/lib/Target/ARM64/TargetInfo/CMakeLists.txt b/lib/Target/ARM64/TargetInfo/CMakeLists.txt
deleted file mode 100644
index a0142c4..0000000
--- a/lib/Target/ARM64/TargetInfo/CMakeLists.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
-
-add_llvm_library(LLVMARM64Info
- ARM64TargetInfo.cpp
- )
-
-add_dependencies(LLVMARM64Info ARM64CommonTableGen)
diff --git a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt b/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
deleted file mode 100644
index 5bea694..0000000
--- a/lib/Target/ARM64/TargetInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-;===- ./lib/Target/ARM64/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===;
-;
-; The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-; http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ARM64Info
-parent = ARM64
-required_libraries = MC Support
-add_to_library_groups = ARM64
-
diff --git a/lib/Target/ARM64/TargetInfo/Makefile b/lib/Target/ARM64/TargetInfo/Makefile
deleted file mode 100644
index 2d5a1a0..0000000
--- a/lib/Target/ARM64/TargetInfo/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-##===- lib/Target/ARM64/TargetInfo/Makefile ----------------*- Makefile -*-===##
-#
-# The LLVM Compiler Infrastructure
-#
-# This file is distributed under the University of Illinois Open Source
-# License. See LICENSE.TXT for details.
-#
-##===----------------------------------------------------------------------===##
-LEVEL = ../../../..
-LIBRARYNAME = LLVMARM64Info
-
-# Hack: we need to include 'main' target directory to grab private headers
-CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
-
-include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index afd1f51..15b574d 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -108,9 +108,9 @@ namespace {
explicit CppWriter(formatted_raw_ostream &o) :
ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
- virtual const char *getPassName() const { return "C++ backend"; }
+ const char *getPassName() const override { return "C++ backend"; }
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
void printProgram(const std::string& fname, const std::string& modName );
void printModule(const std::string& fname, const std::string& modName );
@@ -396,7 +396,7 @@ std::string CppWriter::getCppName(Type* Ty) {
return I->second;
// Okay, let's build a new name for this type. Start with a prefix
- const char* prefix = 0;
+ const char* prefix = nullptr;
switch (Ty->getTypeID()) {
case Type::FunctionTyID: prefix = "FuncTy_"; break;
case Type::StructTyID: prefix = "StructTy_"; break;
@@ -1690,9 +1690,8 @@ void CppWriter::printFunctionUses(const Function* F) {
// Print the function declarations for any functions encountered
nl(Out) << "// Function Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (Function* Fun = dyn_cast<Function>(*I)) {
+ for (auto *GV : gvs) {
+ if (Function *Fun = dyn_cast<Function>(GV)) {
if (!is_inline || Fun != F)
printFunctionHead(Fun);
}
@@ -1700,17 +1699,15 @@ void CppWriter::printFunctionUses(const Function* F) {
// Print the global variable declarations for any variables encountered
nl(Out) << "// Global Variable Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ for (auto *GV : gvs) {
+ if (GlobalVariable *F = dyn_cast<GlobalVariable>(GV))
printVariableHead(F);
}
// Print the constants found
nl(Out) << "// Constant Definitions"; nl(Out);
- for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
- E = consts.end(); I != E; ++I) {
- printConstant(*I);
+ for (const auto *C : consts) {
+ printConstant(C);
}
// Process the global variables definitions now that all the constants have
@@ -1718,10 +1715,9 @@ void CppWriter::printFunctionUses(const Function* F) {
// initializers.
if (GenerationType != GenFunction) {
nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
- printVariableBody(GV);
+ for (const auto &GV : gvs) {
+ if (GlobalVariable *Var = dyn_cast<GlobalVariable>(GV))
+ printVariableBody(Var);
}
}
}
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 477e788..673ade7 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -28,14 +28,12 @@ struct CPPTargetMachine : public TargetMachine {
CodeGenOpt::Level OL)
: TargetMachine(T, TT, CPU, FS, Options) {}
- virtual bool addPassesToEmitFile(PassManagerBase &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- bool DisableVerify,
- AnalysisID StartAfter,
- AnalysisID StopAfter);
-
- virtual const DataLayout *getDataLayout() const { return 0; }
+ bool addPassesToEmitFile(PassManagerBase &PM, formatted_raw_ostream &Out,
+ CodeGenFileType FileType, bool DisableVerify,
+ AnalysisID StartAfter,
+ AnalysisID StopAfter) override;
+
+ const DataLayout *getDataLayout() const override { return nullptr; }
};
extern Target TheCppBackendTarget;
diff --git a/lib/Target/Hexagon/Hexagon.td b/lib/Target/Hexagon/Hexagon.td
index c1b6d45..5f4a6c6 100644
--- a/lib/Target/Hexagon/Hexagon.td
+++ b/lib/Target/Hexagon/Hexagon.td
@@ -200,8 +200,6 @@ class Proc<string Name, SchedMachineModel Model,
list<SubtargetFeature> Features>
: ProcessorModel<Name, Model, Features>;
-def : Proc<"hexagonv2", HexagonModel, [ArchV2]>;
-def : Proc<"hexagonv3", HexagonModel, [ArchV2, ArchV3]>;
def : Proc<"hexagonv4", HexagonModelV4, [ArchV2, ArchV3, ArchV4]>;
def : Proc<"hexagonv5", HexagonModelV4, [ArchV2, ArchV3, ArchV4, ArchV5]>;
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index a588274..2e011bd 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "Hexagon.h"
#include "HexagonAsmPrinter.h"
#include "HexagonMachineFunctionInfo.h"
@@ -56,6 +55,8 @@
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
static cl::opt<bool> AlignCalls(
"hexagon-align-calls", cl::Hidden, cl::init(true),
cl::desc("Insert falign after call instruction for Hexagon target"));
@@ -224,7 +225,7 @@ static MCInstPrinter *createHexagonMCInstPrinter(const Target &T,
if (SyntaxVariant == 0)
return(new HexagonInstPrinter(MAI, MII, MRI));
else
- return NULL;
+ return nullptr;
}
extern "C" void LLVMInitializeHexagonAsmPrinter() {
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.h b/lib/Target/Hexagon/HexagonAsmPrinter.h
index a186dc9..7fe8c57 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -30,21 +30,22 @@ namespace llvm {
Subtarget = &TM.getSubtarget<HexagonSubtarget>();
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Assembly Printer";
}
- bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+ bool isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock *MBB) const override;
- virtual void EmitInstruction(const MachineInstr *MI);
+ void EmitInstruction(const MachineInstr *MI) override;
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
+ raw_ostream &OS) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &OS);
+ raw_ostream &OS) override;
static const char *getRegisterName(unsigned RegNo);
};
diff --git a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 8597f11..de340e0 100644
--- a/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -6,7 +6,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon_cfg"
#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "hexagon_cfg"
+
namespace llvm {
void initializeHexagonCFGOptimizerPass(PassRegistry&);
}
@@ -48,10 +49,10 @@ private:
initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon CFG Optimizer";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
@@ -146,8 +147,8 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::succ_iterator SI = MBB->succ_begin();
MachineBasicBlock* FirstSucc = *SI;
MachineBasicBlock* SecondSucc = *(++SI);
- MachineBasicBlock* LayoutSucc = NULL;
- MachineBasicBlock* JumpAroundTarget = NULL;
+ MachineBasicBlock* LayoutSucc = nullptr;
+ MachineBasicBlock* JumpAroundTarget = nullptr;
if (MBB->isLayoutSuccessor(FirstSucc)) {
LayoutSucc = FirstSucc;
@@ -161,7 +162,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
// The target of the unconditional branch must be JumpAroundTarget.
// TODO: If not, we should not invert the unconditional branch.
- MachineBasicBlock* CondBranchTarget = NULL;
+ MachineBasicBlock* CondBranchTarget = nullptr;
if ((MI->getOpcode() == Hexagon::JMP_t) ||
(MI->getOpcode() == Hexagon::JMP_f)) {
CondBranchTarget = MI->getOperand(1).getMBB();
@@ -239,7 +240,7 @@ bool HexagonCFGOptimizer::runOnMachineFunction(MachineFunction &Fn) {
static void initializePassOnce(PassRegistry &Registry) {
PassInfo *PI = new PassInfo("Hexagon CFG Optimizer", "hexagon-cfg",
- &HexagonCFGOptimizer::ID, 0, false, false);
+ &HexagonCFGOptimizer::ID, nullptr, false, false);
Registry.registerPass(*PI, true);
}
diff --git a/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index 60c933b..aeff680 100644
--- a/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -11,8 +11,6 @@
// to move them together. If we can move them next to each other we do so and
// replace them with a combine instruction.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-copy-combine"
-
#include "llvm/PassSupport.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
@@ -36,6 +34,8 @@
using namespace llvm;
+#define DEBUG_TYPE "hexagon-copy-combine"
+
static
cl::opt<bool> IsCombinesDisabled("disable-merge-into-combines",
cl::Hidden, cl::ZeroOrMore,
@@ -68,15 +68,15 @@ public:
initializeHexagonCopyToCombinePass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Copy-To-Combine Pass";
}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
private:
MachineInstr *findPairable(MachineInstr *I1, bool &DoInsertAtI1);
@@ -262,7 +262,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
unsigned KilledOperand = 0;
if (I2->killsRegister(I2UseReg))
KilledOperand = I2UseReg;
- MachineInstr *KillingInstr = 0;
+ MachineInstr *KillingInstr = nullptr;
for (; I != End; ++I) {
// If the intervening instruction I:
@@ -306,7 +306,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
// Track killed operands. If we move across an instruction that kills our
// operand, we need to update the kill information on the moved I1. It kills
// the operand now.
- MachineInstr *KillingInstr = 0;
+ MachineInstr *KillingInstr = nullptr;
unsigned KilledOperand = 0;
while(++I != End) {
@@ -333,7 +333,7 @@ bool HexagonCopyToCombine::isSafeToMoveTogether(MachineInstr *I1,
// Check for an exact kill (registers match).
if (I1UseReg && I->killsRegister(I1UseReg)) {
- assert(KillingInstr == 0 && "Should only see one killing instruction");
+ assert(!KillingInstr && "Should only see one killing instruction");
KilledOperand = I1UseReg;
KillingInstr = &*I;
}
@@ -506,7 +506,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr *I1,
// Not safe. Stop searching.
break;
}
- return 0;
+ return nullptr;
}
void HexagonCopyToCombine::combine(MachineInstr *I1, MachineInstr *I2,
diff --git a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
index 8a5991f..3dafe80 100644
--- a/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
+++ b/lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp
@@ -60,10 +60,10 @@ class HexagonExpandPredSpillCode : public MachineFunctionPass {
initializeHexagonExpandPredSpillCodePass(Registry);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Expand Predicate Spill Code";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
@@ -187,7 +187,7 @@ static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon Expand Predicate Spill Code";
PassInfo *PI = new PassInfo(Name, "hexagon-spill-pred",
&HexagonExpandPredSpillCode::ID,
- 0, false, false);
+ nullptr, false, false);
Registry.registerPass(*PI, true);
}
diff --git a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index a79264b..d41939a 100644
--- a/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -40,11 +40,13 @@ namespace {
initializeHexagonFixupHwLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const { return "Hexagon Hardware Loop Fixup"; }
+ const char *getPassName() const override {
+ return "Hexagon Hardware Loop Fixup";
+ }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 0ea13d4..d551ca9 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -246,7 +246,7 @@ HexagonFrameLowering::spillCalleeSavedRegisters(
//
unsigned SuperReg = uniqueSuperReg(Reg, TRI);
bool CanUseDblStore = false;
- const TargetRegisterClass* SuperRegClass = 0;
+ const TargetRegisterClass* SuperRegClass = nullptr;
if (ContiguousRegs && (i < CSI.size()-1)) {
unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
@@ -300,7 +300,7 @@ bool HexagonFrameLowering::restoreCalleeSavedRegisters(
// Check if we can use a double-word load.
//
unsigned SuperReg = uniqueSuperReg(Reg, TRI);
- const TargetRegisterClass* SuperRegClass = 0;
+ const TargetRegisterClass* SuperRegClass = nullptr;
bool CanUseDblLoad = false;
if (ContiguousRegs && (i < CSI.size()-1)) {
unsigned SuperRegNext = uniqueSuperReg(CSI[i+1].getReg(), TRI);
diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h
index a62c76a..446af16 100644
--- a/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -28,25 +28,25 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- virtual bool
- spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
-
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
-
- virtual bool
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
+
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
+
+ bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
- int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- bool hasFP(const MachineFunction &MF) const;
+ const TargetRegisterInfo *TRI) const override;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ bool hasFP(const MachineFunction &MF) const override;
bool hasTailCall(MachineBasicBlock &MBB) const;
};
diff --git a/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 936fb11..7f76421 100644
--- a/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -26,7 +26,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hwloops"
#include "llvm/ADT/SmallSet.h"
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
@@ -47,6 +46,8 @@
using namespace llvm;
+#define DEBUG_TYPE "hwloops"
+
#ifndef NDEBUG
static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
#endif
@@ -77,11 +78,11 @@ namespace {
initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const { return "Hexagon Hardware Loops"; }
+ const char *getPassName() const override { return "Hexagon Hardware Loops"; }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -264,8 +265,8 @@ namespace {
return Contents.ImmVal;
}
- void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
- const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+ void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
if (isImm()) { OS << Contents.ImmVal; }
}
@@ -369,7 +370,7 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
} // for (instr)
SmallVector<MachineOperand,2> Cond;
- MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
if (NotAnalyzed)
return false;
@@ -434,37 +435,37 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
"Loop must have more than one incoming edge!");
MachineBasicBlock *Backedge = *PI++;
if (PI == TopMBB->pred_end()) // dead loop?
- return 0;
+ return nullptr;
MachineBasicBlock *Incoming = *PI++;
if (PI != TopMBB->pred_end()) // multiple backedges?
- return 0;
+ return nullptr;
// Make sure there is one incoming and one backedge and determine which
// is which.
if (L->contains(Incoming)) {
if (L->contains(Backedge))
- return 0;
+ return nullptr;
std::swap(Incoming, Backedge);
} else if (!L->contains(Backedge))
- return 0;
+ return nullptr;
// Look for the cmp instruction to determine if we can get a useful trip
// count. The trip count can be either a register or an immediate. The
// location of the value depends upon the type (reg or imm).
MachineBasicBlock *Latch = L->getLoopLatch();
if (!Latch)
- return 0;
+ return nullptr;
unsigned IVReg = 0;
int64_t IVBump = 0;
MachineInstr *IVOp;
bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
if (!FoundIV)
- return 0;
+ return nullptr;
MachineBasicBlock *Preheader = L->getLoopPreheader();
- MachineOperand *InitialValue = 0;
+ MachineOperand *InitialValue = nullptr;
MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
@@ -474,13 +475,13 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
IVReg = IV_Phi->getOperand(i).getReg(); // Want IV reg after bump.
}
if (!InitialValue)
- return 0;
+ return nullptr;
SmallVector<MachineOperand,2> Cond;
- MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
if (NotAnalyzed)
- return 0;
+ return nullptr;
MachineBasicBlock *Header = L->getHeader();
// TB must be non-null. If FB is also non-null, one of them must be
@@ -489,7 +490,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
assert (TB && "Latch block without a branch?");
assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
if (!TB || (FB && TB != Header && FB != Header))
- return 0;
+ return nullptr;
// Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
// to put imm(0), followed by P in the vector Cond.
@@ -505,7 +506,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
Mask, ImmValue);
if (!AnalyzedCmp)
- return 0;
+ return nullptr;
// The comparison operator type determines how we compute the loop
// trip count.
@@ -521,7 +522,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
bool isSwapped = false;
const MachineOperand &Op1 = CondI->getOperand(1);
const MachineOperand &Op2 = CondI->getOperand(2);
- const MachineOperand *EndValue = 0;
+ const MachineOperand *EndValue = nullptr;
if (Op1.isReg()) {
if (Op2.isImm() || Op1.getReg() == IVReg)
@@ -533,7 +534,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
}
if (!EndValue)
- return 0;
+ return nullptr;
switch (CondOpc) {
case Hexagon::CMPEQri:
@@ -552,7 +553,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
case Hexagon::CMPbEQri_V4:
case Hexagon::CMPhEQri_V4: {
if (IVBump != 1)
- return 0;
+ return nullptr;
int64_t InitV, EndV;
// Since the comparisons are "ri", the EndValue should be an
@@ -562,26 +563,26 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
// Allow InitialValue to be a register defined with an immediate.
if (InitialValue->isReg()) {
if (!defWithImmediate(InitialValue->getReg()))
- return 0;
+ return nullptr;
InitV = getImmediate(*InitialValue);
} else {
assert(InitialValue->isImm());
InitV = InitialValue->getImm();
}
if (InitV >= EndV)
- return 0;
+ return nullptr;
if (CondOpc == Hexagon::CMPbEQri_V4) {
if (!isInt<8>(InitV) || !isInt<8>(EndV))
- return 0;
+ return nullptr;
} else { // Hexagon::CMPhEQri_V4
if (!isInt<16>(InitV) || !isInt<16>(EndV))
- return 0;
+ return nullptr;
}
Cmp = !Negated ? Comparison::EQ : Comparison::NE;
break;
}
default:
- return 0;
+ return nullptr;
}
if (isSwapped)
@@ -591,14 +592,14 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
unsigned R = InitialValue->getReg();
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
if (!MDT->properlyDominates(DefBB, Header))
- return 0;
+ return nullptr;
OldInsts.push_back(MRI->getVRegDef(R));
}
if (EndValue->isReg()) {
unsigned R = EndValue->getReg();
MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
if (!MDT->properlyDominates(DefBB, Header))
- return 0;
+ return nullptr;
}
return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
@@ -616,7 +617,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
Comparison::Kind Cmp) const {
// Cannot handle comparison EQ, i.e. while (A == B).
if (Cmp == Comparison::EQ)
- return 0;
+ return nullptr;
// Check if either the start or end values are an assignment of an immediate.
// If so, use the immediate value rather than the register.
@@ -642,11 +643,11 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If loop executes while iv is "less" with the iv value going down, then
// the iv must wrap.
if (CmpLess && IVBump < 0)
- return 0;
+ return nullptr;
// If loop executes while iv is "greater" with the iv value going up, then
// the iv must wrap.
if (CmpGreater && IVBump > 0)
- return 0;
+ return nullptr;
if (Start->isImm() && End->isImm()) {
// Both, start and end are immediates.
@@ -654,15 +655,15 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
int64_t EndV = End->getImm();
int64_t Dist = EndV - StartV;
if (Dist == 0)
- return 0;
+ return nullptr;
bool Exact = (Dist % IVBump) == 0;
if (Cmp == Comparison::NE) {
if (!Exact)
- return 0;
+ return nullptr;
if ((Dist < 0) ^ (IVBump < 0))
- return 0;
+ return nullptr;
}
// For comparisons that include the final value (i.e. include equality
@@ -683,7 +684,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
uint64_t Count = Dist1;
if (Count > 0xFFFFFFFFULL)
- return 0;
+ return nullptr;
return new CountValue(CountValue::CV_Immediate, Count);
}
@@ -695,7 +696,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// If the induction variable bump is not a power of 2, quit.
// Othwerise we'd need a general integer division.
if (!isPowerOf2_64(abs64(IVBump)))
- return 0;
+ return nullptr;
MachineBasicBlock *PH = Loop->getLoopPreheader();
assert (PH && "Should have a preheader by now");
@@ -766,7 +767,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
// Hardware loops cannot handle 64-bit registers. If it's a double
// register, it has to have a subregister.
if (!SR && RC == &Hexagon::DoubleRegsRegClass)
- return 0;
+ return nullptr;
const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
// Compute DistR (register with the distance between Start and End).
@@ -1013,7 +1014,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
MachineBasicBlock *LastMBB = L->getExitingBlock();
// Don't generate hw loop if the loop has more than one exit.
- if (LastMBB == 0)
+ if (!LastMBB)
return false;
MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
@@ -1035,7 +1036,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
SmallVector<MachineInstr*, 2> OldInsts;
// Are we able to determine the trip count for the loop?
CountValue *TripCount = getLoopTripCount(L, OldInsts);
- if (TripCount == 0)
+ if (!TripCount)
return false;
// Is the trip count available in the preheader?
@@ -1127,7 +1128,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
if (LastI != LastMBB->end())
LastI = LastMBB->erase(LastI);
SmallVector<MachineOperand, 0> Cond;
- TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
+ TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
}
} else {
// Conditional branch to loop start; just delete it.
@@ -1196,7 +1197,7 @@ MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
case Hexagon::CONST64_Int_Real:
return DI;
}
- return 0;
+ return nullptr;
}
@@ -1291,7 +1292,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
if (IndRegs.empty())
return false;
- MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
SmallVector<MachineOperand,2> Cond;
// AnalyzeBranch returns true if it fails to analyze branch.
bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
@@ -1322,7 +1323,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
return false;
SmallSet<unsigned,2> CmpRegs;
- MachineOperand *CmpImmOp = 0;
+ MachineOperand *CmpImmOp = nullptr;
// Go over all operands to the compare and look for immediate and register
// operands. Assume that if the compare has a single register use and a
@@ -1420,7 +1421,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
DebugLoc DL;
if (!Latch || Header->hasAddressTaken())
- return 0;
+ return nullptr;
typedef MachineBasicBlock::instr_iterator instr_iterator;
@@ -1429,17 +1430,17 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
typedef std::vector<MachineBasicBlock*> MBBVector;
MBBVector Preds(Header->pred_begin(), Header->pred_end());
SmallVector<MachineOperand,2> Tmp1;
- MachineBasicBlock *TB = 0, *FB = 0;
+ MachineBasicBlock *TB = nullptr, *FB = nullptr;
if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
- return 0;
+ return nullptr;
for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
MachineBasicBlock *PB = *I;
if (PB != Latch) {
bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
if (NotAnalyzed)
- return 0;
+ return nullptr;
}
}
@@ -1515,7 +1516,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
SmallVector<MachineOperand,1> Tmp2;
SmallVector<MachineOperand,1> EmptyCond;
- TB = FB = 0;
+ TB = FB = nullptr;
for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
MachineBasicBlock *PB = *I;
@@ -1525,22 +1526,22 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
(void)NotAnalyzed; // suppress compiler warning
assert (!NotAnalyzed && "Should be analyzable!");
if (TB != Header && (Tmp2.empty() || FB != Header))
- TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
+ TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
PB->ReplaceUsesOfBlockWith(Header, NewPH);
}
}
// It can happen that the latch block will fall through into the header.
// Insert an unconditional branch to the header.
- TB = FB = 0;
+ TB = FB = nullptr;
bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
(void)LatchNotAnalyzed; // suppress compiler warning
assert (!LatchNotAnalyzed && "Should be analyzable!");
if (!TB && !FB)
- TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
+ TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
// Finally, the branch from the preheader to the header.
- TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
+ TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
NewPH->addSuccessor(Header);
return NewPH;
diff --git a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index ed8c786..dabe650 100644
--- a/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-isel"
#include "Hexagon.h"
#include "HexagonISelLowering.h"
#include "HexagonTargetMachine.h"
@@ -23,6 +22,8 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
+#define DEBUG_TYPE "hexagon-isel"
+
static
cl::opt<unsigned>
MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
@@ -61,7 +62,7 @@ public:
}
bool hasNumUsesBelowThresGA(SDNode *N) const;
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
// Complex Pattern Selectors.
inline bool foldGlobalAddress(SDValue &N, SDValue &R);
@@ -78,15 +79,15 @@ public:
bool SelectADDRriU6_1(SDValue& N, SDValue &R1, SDValue &R2);
bool SelectADDRriU6_2(SDValue& N, SDValue &R1, SDValue &R2);
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon DAG->DAG Pattern Instruction Selection";
}
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Offset);
SDNode *SelectLoad(SDNode *N);
@@ -186,7 +187,7 @@ FunctionPass *llvm::createHexagonISelDag(HexagonTargetMachine &TM,
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon DAG->DAG Pattern Instruction Selection";
PassInfo *PI = new PassInfo(Name, "hexagon-isel",
- &SelectionDAGISel::ID, 0, false, false);
+ &SelectionDAGISel::ID, nullptr, false, false);
Registry.registerPass(*PI, true);
}
@@ -1238,7 +1239,7 @@ SDNode *HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
SDNode *PdRs = CurDAG->getMachineNode(Hexagon::TFR_PdRs, dl, MVT::i1,
SDValue(Arg, 0));
Ops.push_back(SDValue(PdRs,0));
- } else if (RC == NULL && (dyn_cast<ConstantSDNode>(Arg) != NULL)) {
+ } else if (!RC && (dyn_cast<ConstantSDNode>(Arg) != nullptr)) {
// This is immediate operand. Lower it here making sure that we DO have
// const SDNode for immediate value.
int32_t Val = cast<ConstantSDNode>(Arg)->getSExtValue();
@@ -1346,7 +1347,7 @@ SDNode *HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
SDNode *HexagonDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp
index 92b794d..b8e5d24 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -39,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "hexagon-lowering"
+
static cl::opt<bool>
EmitJumpTables("hexagon-emit-jump-tables", cl::init(true), cl::Hidden,
cl::desc("Control jump table emission on Hexagon target"));
@@ -135,7 +137,7 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
return false;
}
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
@@ -182,7 +184,7 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const uint16_t RegList[] = {
+ static const MCPhysReg RegList[] = {
Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
Hexagon::R5
};
@@ -205,10 +207,10 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
return false;
}
- static const uint16_t RegList1[] = {
+ static const MCPhysReg RegList1[] = {
Hexagon::D1, Hexagon::D2
};
- static const uint16_t RegList2[] = {
+ static const MCPhysReg RegList2[] = {
Hexagon::R1, Hexagon::R3
};
if (unsigned Reg = State.AllocateReg(RegList1, RegList2, 2)) {
@@ -346,8 +348,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
}
@@ -410,7 +411,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
int NumNamedVarArgParams = -1;
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Callee))
{
- const Function* CalleeFn = NULL;
+ const Function* CalleeFn = nullptr;
Callee = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, MVT::i32);
if ((CalleeFn = dyn_cast<Function>(GA->getGlobal())))
{
@@ -520,8 +521,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty()) {
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0],
- MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
}
if (!isTailCall)
@@ -595,9 +595,9 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (isTailCall)
- return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
- Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
@@ -817,7 +817,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
Sub);
SDValue Ops[2] = { ArgAdjust, CopyChain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue
@@ -916,8 +916,7 @@ const {
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
- MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
if (isVarArg) {
// This will point to the next argument passed via stack.
@@ -1480,7 +1479,7 @@ HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine
const char*
HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case HexagonISD::CONST32: return "HexagonISD::CONST32";
case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
case HexagonISD::CONST32_Int_Real: return "HexagonISD::CONST32_Int_Real";
diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h
index 73da226..4f27c27 100644
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@@ -92,14 +92,14 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
- virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
- virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
- virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const;
+ bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
@@ -109,12 +109,12 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
@@ -133,46 +133,45 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const override;
- virtual MachineBasicBlock
- *EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *BB) const override;
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
- virtual EVT getSetCCResultType(LLVMContext &C, EVT VT) const {
+ EVT getSetCCResultType(LLVMContext &C, EVT VT) const override {
if (!VT.isVector())
return MVT::i1;
else
return EVT::getVectorVT(C, MVT::i1, VT.getVectorNumElements());
}
- virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base, SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ MVT VT) const override;
// Intrinsics
- virtual SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op,
- SelectionDAG &DAG) const;
+ SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
- virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// compare a register against the immediate without having to materialize
/// the immediate into a register.
- virtual bool isLegalICmpImmediate(int64_t Imm) const;
+ bool isLegalICmpImmediate(int64_t Imm) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Hexagon/HexagonInstrFormats.td b/lib/Target/Hexagon/HexagonInstrFormats.td
index d25bfa8..1057343 100644
--- a/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
-// Hexagon Intruction Flags +
+// Hexagon Instruction Flags +
//
// *** Must match HexagonBaseInfo.h ***
//===----------------------------------------------------------------------===//
@@ -68,7 +68,7 @@ def DoubleWordAccess : MemAccessSize<4>;// Double word access instruction (memd)
//===----------------------------------------------------------------------===//
-// Intruction Class Declaration +
+// Instruction Class Declaration +
//===----------------------------------------------------------------------===//
class OpcodeHexagon {
@@ -104,54 +104,72 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
// Solo instructions, i.e., those that cannot be in a packet with others.
bits<1> isSolo = 0;
let TSFlags{5} = isSolo;
+ // Packed only with A or X-type instructions.
+ bits<1> isSoloAX = 0;
+ let TSFlags{6} = isSoloAX;
+ // Only A-type instruction in first slot or nothing.
+ bits<1> isSoloAin1 = 0;
+ let TSFlags{7} = isSoloAin1;
// Predicated instructions.
bits<1> isPredicated = 0;
- let TSFlags{6} = isPredicated;
+ let TSFlags{8} = isPredicated;
bits<1> isPredicatedFalse = 0;
- let TSFlags{7} = isPredicatedFalse;
+ let TSFlags{9} = isPredicatedFalse;
bits<1> isPredicatedNew = 0;
- let TSFlags{8} = isPredicatedNew;
+ let TSFlags{10} = isPredicatedNew;
+ bits<1> isPredicateLate = 0;
+ let TSFlags{11} = isPredicateLate; // Late predicate producer insn.
// New-value insn helper fields.
bits<1> isNewValue = 0;
- let TSFlags{9} = isNewValue; // New-value consumer insn.
+ let TSFlags{12} = isNewValue; // New-value consumer insn.
bits<1> hasNewValue = 0;
- let TSFlags{10} = hasNewValue; // New-value producer insn.
+ let TSFlags{13} = hasNewValue; // New-value producer insn.
bits<3> opNewValue = 0;
- let TSFlags{13-11} = opNewValue; // New-value produced operand.
- bits<2> opNewBits = 0;
- let TSFlags{15-14} = opNewBits; // New-value opcode bits location: 0, 8, 16.
+ let TSFlags{16-14} = opNewValue; // New-value produced operand.
bits<1> isNVStorable = 0;
- let TSFlags{16} = isNVStorable; // Store that can become new-value store.
+ let TSFlags{17} = isNVStorable; // Store that can become new-value store.
bits<1> isNVStore = 0;
- let TSFlags{17} = isNVStore; // New-value store insn.
+ let TSFlags{18} = isNVStore; // New-value store insn.
+ bits<1> isCVLoadable = 0;
+ let TSFlags{19} = isCVLoadable; // Load that can become cur-value load.
+ bits<1> isCVLoad = 0;
+ let TSFlags{20} = isCVLoad; // Cur-value load insn.
// Immediate extender helper fields.
bits<1> isExtendable = 0;
- let TSFlags{18} = isExtendable; // Insn may be extended.
+ let TSFlags{21} = isExtendable; // Insn may be extended.
bits<1> isExtended = 0;
- let TSFlags{19} = isExtended; // Insn must be extended.
+ let TSFlags{22} = isExtended; // Insn must be extended.
bits<3> opExtendable = 0;
- let TSFlags{22-20} = opExtendable; // Which operand may be extended.
+ let TSFlags{25-23} = opExtendable; // Which operand may be extended.
bits<1> isExtentSigned = 0;
- let TSFlags{23} = isExtentSigned; // Signed or unsigned range.
+ let TSFlags{26} = isExtentSigned; // Signed or unsigned range.
bits<5> opExtentBits = 0;
- let TSFlags{28-24} = opExtentBits; //Number of bits of range before extending.
+ let TSFlags{31-27} = opExtentBits; //Number of bits of range before extending.
+ bits<2> opExtentAlign = 0;
+ let TSFlags{33-32} = opExtentAlign; // Alignment exponent before extending.
// If an instruction is valid on a subtarget (v2-v5), set the corresponding
// bit from validSubTargets. v2 is the least significant bit.
// By default, instruction is valid on all subtargets.
SubTarget validSubTargets = HasV2SubT;
- let TSFlags{32-29} = validSubTargets.Value;
+ let TSFlags{37-34} = validSubTargets.Value;
// Addressing mode for load/store instructions.
AddrModeType addrMode = NoAddrMode;
- let TSFlags{35-33} = addrMode.Value;
+ let TSFlags{42-40} = addrMode.Value;
// Memory access size for mem access instructions (load/store)
MemAccessSize accessSize = NoMemAccess;
- let TSFlags{38-36} = accessSize.Value;
+ let TSFlags{45-43} = accessSize.Value;
+
+ bits<1> isTaken = 0;
+ let TSFlags {47} = isTaken; // Branch prediction.
+
+ bits<1> isFP = 0;
+ let TSFlags {48} = isFP; // Floating-point.
// Fields used for relation models.
string BaseOpcode = "";
@@ -173,14 +191,14 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
}
//===----------------------------------------------------------------------===//
-// Intruction Classes Definitions +
+// Instruction Classes Definitions +
//===----------------------------------------------------------------------===//
// LD Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class LDInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, LD, TypeLD>;
+ string cstr = "", InstrItinClass itin = LD_tc_ld_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
let mayLoad = 1 in
class LDInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -199,16 +217,16 @@ class LDInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1 in
class LD0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : LDInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin=LD_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeLD>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
let mayStore = 1 in
class STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ST, TypeST>;
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
string cstr = "">
@@ -216,39 +234,39 @@ class STInst2<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayStore = 1 in
class ST0Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ST0, TypeST>;
+ string cstr = "", InstrItinClass itin = ST_tc_ld_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeST>;
// ST Instruction Class in V2/V3 can take SLOT0 only.
// ST Instruction Class in V4 can take SLOT0 & SLOT1.
// Definition of the instruction class CHANGED from V2/V3 to V4.
class STInstPost<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : STInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT01>
+ : STInst<outs, ins, asmstr, pattern, cstr, itin>;
// SYSTEM Instruction Class in V4 can take SLOT0 only
// In V2/V3 we used ST for this but in v4 ST can take SLOT0 or SLOT1.
class SYSInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, SYS, TypeSYSTEM>;
+ string cstr = "", InstrItinClass itin = ST_tc_3stall_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeSYSTEM>;
// ALU32 Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class ALU32Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU32, TypeALU32>;
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeALU32>;
// ALU64 Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from ALU64 to XTYPE from V2/V3 to V4.
class ALU64Inst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ALU64, TypeXTYPE>;
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = ALU64_tc_2_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
// M Instruction Class in V2/V3.
@@ -256,55 +274,55 @@ class ALU64_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, M, TypeXTYPE>;
+ string cstr = "", InstrItinClass itin = M_tc_3x_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
// M Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from M to XTYPE from V2/V3 to V4.
class MInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : MInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = M_tc_2_SLOT23>
+ : MInst<outs, ins, asmstr, pattern, cstr, itin>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, S, TypeXTYPE>;
+ string cstr = "", InstrItinClass itin = S_2op_tc_1_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeXTYPE>;
// S Instruction Class in V2/V3.
// XTYPE Instruction Class in V4.
// Definition of the instruction class NOT CHANGED.
// Name of the Instruction Class changed from S to XTYPE from V2/V3 to V4.
class SInst_acc<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : SInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = S_3op_tc_1_SLOT23>
+ : SInst<outs, ins, asmstr, pattern, cstr, itin>;
// J Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, J, TypeJ>;
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT23>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJ>;
// JR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class JRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, JR, TypeJR>;
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT2>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeJR>;
// CR Instruction Class in V2/V3/V4.
// Definition of the instruction class NOT CHANGED.
class CRInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, CR, TypeCR>;
+ string cstr = "", InstrItinClass itin = CR_tc_2early_SLOT3>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeCR>;
let isCodeGenOnly = 1, isPseudo = 1 in
class Endloop<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, ENDLOOP, TypeENDLOOP>;
+ string cstr = "", InstrItinClass itin = J_tc_2early_SLOT0123>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeENDLOOP>;
let isCodeGenOnly = 1, isPseudo = 1 in
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -317,39 +335,40 @@ class PseudoM<dag outs, dag ins, string asmstr, list<dag> pattern = [],
: InstHexagon<outs, ins, asmstr, pattern, cstr, PSEUDOM, TypePSEUDO>;
//===----------------------------------------------------------------------===//
-// Intruction Classes Definitions -
+// Instruction Classes Definitions -
//===----------------------------------------------------------------------===//
//
// ALU32 patterns
//.
-class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU32_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU32_ir<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU32_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
+
+class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU32_2op_tc_1_SLOT0123>
+ : ALU32Inst<outs, ins, asmstr, pattern, cstr, itin>;
-class ALU32_ii<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU32Inst<outs, ins, asmstr, pattern, cstr>;
//
// ALU64 patterns.
//
-class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU64_rr<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
-class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern,
- string cstr = "">
- : ALU64Inst<outs, ins, asmstr, pattern, cstr>;
+class ALU64_ri<dag outs, dag ins, string asmstr, list<dag> pattern = [],
+ string cstr = "", InstrItinClass itin = ALU64_tc_1_SLOT23>
+ : ALU64Inst<outs, ins, asmstr, pattern, cstr, itin>;
// Post increment ST Instruction.
class STInstPI<dag outs, dag ins, string asmstr, list<dag> pattern = [],
diff --git a/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index 9fda0da..d92f97b 100644
--- a/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
//----------------------------------------------------------------------------//
-// Hexagon Intruction Flags +
+// Hexagon Instruction Flags
//
// *** Must match BaseInfo.h ***
//----------------------------------------------------------------------------//
@@ -22,30 +22,30 @@ def TypeNV : IType<10>;
def TypePREFIX : IType<30>;
//----------------------------------------------------------------------------//
-// Intruction Classes Definitions +
+// Instruction Classes Definitions
//----------------------------------------------------------------------------//
//
// NV type instructions.
//
class NVInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, NV_V4, TypeNV>;
+ string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeNV>;
class NVInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : NVInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = NCJ_tc_3or4stall_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
// Definition of Post increment new value store.
class NVInstPost_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : NVInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
// Post increment ST Instruction.
let mayStore = 1 in
class NVInstPI_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : NVInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = ST_tc_st_SLOT0>
+ : NVInst<outs, ins, asmstr, pattern, cstr, itin>;
// New-value conditional branch.
class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -54,13 +54,14 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
let mayLoad = 1, mayStore = 1 in
class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : InstHexagon<outs, ins, asmstr, pattern, cstr, MEM_V4, TypeMEMOP>;
+ string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+ : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>;
class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
- string cstr = "">
- : MEMInst<outs, ins, asmstr, pattern, cstr>;
+ string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
+ : MEMInst<outs, ins, asmstr, pattern, cstr, itin>;
let isCodeGenOnly = 1 in
class EXTENDERInst<dag outs, dag ins, string asmstr, list<dag> pattern = []>
- : InstHexagon<outs, ins, asmstr, pattern, "", PREFIX, TypePREFIX>;
+ : InstHexagon<outs, ins, asmstr, pattern, "", EXTENDER_tc_1_SLOT0123,
+ TypePREFIX>;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 21a12de..ea6367a 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -26,13 +26,16 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "hexagon-instrinfo"
+
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "HexagonGenInstrInfo.inc"
#include "HexagonGenDFAPacketizer.inc"
-using namespace llvm;
-
///
/// Constants for Hexagon instructions.
///
@@ -135,7 +138,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
regPos = 1;
}
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) {
// Due to a bug in TailMerging/CFG Optimization, we need to add a
// special case handling of a predicated jump followed by an
@@ -151,7 +154,7 @@ HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
if (NewTBB == NextBB) {
ReverseBranchCondition(Cond);
RemoveBranch(MBB);
- return InsertBranch(MBB, TBB, 0, Cond, DL);
+ return InsertBranch(MBB, TBB, nullptr, Cond, DL);
}
}
BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
@@ -174,8 +177,8 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
- TBB = NULL;
- FBB = NULL;
+ TBB = nullptr;
+ FBB = nullptr;
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::instr_iterator I = MBB.instr_end();
@@ -224,7 +227,7 @@ bool HexagonInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Get the last instruction in the block.
MachineInstr *LastInst = I;
- MachineInstr *SecondLastInst = NULL;
+ MachineInstr *SecondLastInst = nullptr;
// Find one more terminator if present.
do {
if (&*I != LastInst && !I->isBundle() && isUnpredicatedTerminator(I)) {
@@ -557,7 +560,7 @@ MachineInstr *HexagonInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<unsigned> &Ops,
int FI) const {
// Hexagon_TODO: Implement.
- return(0);
+ return nullptr;
}
unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.h b/lib/Target/Hexagon/HexagonInstrInfo.h
index 5da23cb..6b032c9 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -40,124 +40,121 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
+ const HexagonRegisterInfo &getRegisterInfo() const { return RI; }
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
-
- virtual bool analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
- SmallVectorImpl<MachineOperand> &Addr,
- const TargetRegisterClass *RC,
- SmallVectorImpl<MachineInstr*> &NewMIs) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
+
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+ SmallVectorImpl<MachineOperand> &Addr,
+ const TargetRegisterClass *RC,
+ SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const override;
+
+ MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const override {
+ return nullptr;
}
unsigned createVR(MachineFunction* MF, MVT VT) const;
- virtual bool isBranch(const MachineInstr *MI) const;
- virtual bool isPredicable(MachineInstr *MI) const;
- virtual bool
- PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
- unsigned ExtraPredCycles,
- const BranchProbability &Probability) const;
-
- virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumTCycles, unsigned ExtraTCycles,
- MachineBasicBlock &FMBB,
- unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const;
-
- virtual bool isPredicated(const MachineInstr *MI) const;
- virtual bool isPredicated(unsigned Opcode) const;
- virtual bool isPredicatedTrue(const MachineInstr *MI) const;
- virtual bool isPredicatedTrue(unsigned Opcode) const;
- virtual bool isPredicatedNew(const MachineInstr *MI) const;
- virtual bool isPredicatedNew(unsigned Opcode) const;
- virtual bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
- virtual bool
- SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
-
- virtual bool
- ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool
- isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumCycles,
- const BranchProbability &Probability) const;
-
- virtual DFAPacketizer*
+ bool isBranch(const MachineInstr *MI) const;
+ bool isPredicable(MachineInstr *MI) const override;
+ bool PredicateInstruction(MachineInstr *MI,
+ const SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const override;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumTCycles, unsigned ExtraTCycles,
+ MachineBasicBlock &FMBB,
+ unsigned NumFCycles, unsigned ExtraFCycles,
+ const BranchProbability &Probability) const override;
+
+ bool isPredicated(const MachineInstr *MI) const override;
+ bool isPredicated(unsigned Opcode) const;
+ bool isPredicatedTrue(const MachineInstr *MI) const;
+ bool isPredicatedTrue(unsigned Opcode) const;
+ bool isPredicatedNew(const MachineInstr *MI) const;
+ bool isPredicatedNew(unsigned Opcode) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
+ bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
+ const BranchProbability &Probability) const override;
+
+ DFAPacketizer*
CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAG *DAG) const override;
- virtual bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- const MachineFunction &MF) const;
+ bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const override;
bool isValidOffset(const int Opcode, const int Offset) const;
bool isValidAutoIncImm(const EVT VT, const int Offset) const;
bool isMemOp(const MachineInstr *MI) const;
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td
index c96aaca..4dcf101 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -768,12 +768,13 @@ class T_JMP <dag InsDag, list<dag> JumpList = []>
let InputType = "imm", isExtendable = 1, opExtendable = 1, isExtentSigned = 1,
Defs = [PC], isPredicated = 1, opExtentBits = 17 in
-class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>:
+class T_JMP_c <bit PredNot, bit isPredNew, bit isTak>:
JInst<(outs ), (ins PredRegs:$src, brtarget:$dst),
!if(PredNot, "if (!$src", "if ($src")#
!if(isPredNew, ".new) ", ") ")#"jump"#
- !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+ !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> {
+ let isTaken = isTak;
let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
let isPredicatedFalse = PredNot;
let isPredicatedNew = isPredNew;
@@ -784,7 +785,7 @@ class T_JMP_c <bit PredNot, bit isPredNew, bit isTaken>:
let Inst{27-24} = 0b1100;
let Inst{21} = PredNot;
- let Inst{12} = !if(isPredNew, isTaken, zero);
+ let Inst{12} = !if(isPredNew, isTak, zero);
let Inst{11} = isPredNew;
let Inst{9-8} = src;
let Inst{23-22} = dst{16-15};
@@ -806,12 +807,13 @@ class T_JMPr<dag InsDag = (ins IntRegs:$dst)>
}
let Defs = [PC], isPredicated = 1, InputType = "reg" in
-class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>:
+class T_JMPr_c <bit PredNot, bit isPredNew, bit isTak>:
JRInst <(outs ), (ins PredRegs:$src, IntRegs:$dst),
!if(PredNot, "if (!$src", "if ($src")#
!if(isPredNew, ".new) ", ") ")#"jumpr"#
- !if(isPredNew, !if(isTaken, ":t ", ":nt "), " ")#"$dst"> {
+ !if(isPredNew, !if(isTak, ":t ", ":nt "), " ")#"$dst"> {
+ let isTaken = isTak;
let isBrTaken = !if(isPredNew, !if(isTaken, "true", "false"), "");
let isPredicatedFalse = PredNot;
let isPredicatedNew = isPredNew;
@@ -823,7 +825,7 @@ class T_JMPr_c <bit PredNot, bit isPredNew, bit isTaken>:
let Inst{27-22} = 0b001101;
let Inst{21} = PredNot;
let Inst{20-16} = dst;
- let Inst{12} = !if(isPredNew, isTaken, zero);
+ let Inst{12} = !if(isPredNew, isTak, zero);
let Inst{11} = isPredNew;
let Inst{9-8} = src;
let Predicates = !if(isPredNew, [HasV3T], [HasV2T]);
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index a95fb80..db5b7ea 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -1004,13 +1004,13 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel;
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
- bit isNegCond, bit isTaken>
+ bit isNegCond, bit isTak>
: NVInst_V4<(outs),
(ins IntRegs:$src1, IntRegs:$src2, brtarget:$offset),
"if ("#!if(isNegCond, "!","")#mnemonic#
"($src1"#!if(!eq(NvOpNum, 0),".new, ",", ")#
"$src2"#!if(!eq(NvOpNum, 1),".new))","))")#" jump:"
- #!if(isTaken, "t","nt")#" $offset",
+ #!if(isTak, "t","nt")#" $offset",
[]>, Requires<[HasV4T]> {
bits<5> src1;
@@ -1019,6 +1019,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
bits<5> RegOp; // Non-New-Value Operand
bits<11> offset;
+ let isTaken = isTak;
let isBrTaken = !if(isTaken, "true", "false");
let isPredicatedFalse = isNegCond;
@@ -1030,7 +1031,7 @@ class NVJrr_template<string mnemonic, bits<3> majOp, bit NvOpNum,
let Inst{25-23} = majOp;
let Inst{22} = isNegCond;
let Inst{18-16} = Ns;
- let Inst{13} = isTaken;
+ let Inst{13} = isTak;
let Inst{12-8} = RegOp;
let Inst{21-20} = offset{10-9};
let Inst{7-1} = offset{8-2};
@@ -1078,13 +1079,14 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11 in
class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
- bit isTaken>
+ bit isTak>
: NVInst_V4<(outs),
(ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
"if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
- #!if(isTaken, "t","nt")#" $offset",
+ #!if(isTak, "t","nt")#" $offset",
[]>, Requires<[HasV4T]> {
+ let isTaken = isTak;
let isPredicatedFalse = isNegCond;
let isBrTaken = !if(isTaken, "true", "false");
@@ -1097,7 +1099,7 @@ class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
let Inst{25-23} = majOp;
let Inst{22} = isNegCond;
let Inst{18-16} = src1;
- let Inst{13} = isTaken;
+ let Inst{13} = isTak;
let Inst{12-8} = src2;
let Inst{21-20} = offset{10-9};
let Inst{7-1} = offset{8-2};
@@ -1135,14 +1137,15 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11 in
class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
- bit isNegCond, bit isTaken>
+ bit isNegCond, bit isTak>
: NVInst_V4<(outs),
(ins IntRegs:$src1, brtarget:$offset),
"if ("#!if(isNegCond, "!","")#mnemonic
#"($src1.new, #"#ImmVal#")) jump:"
- #!if(isTaken, "t","nt")#" $offset",
+ #!if(isTak, "t","nt")#" $offset",
[]>, Requires<[HasV4T]> {
+ let isTaken = isTak;
let isPredicatedFalse = isNegCond;
let isBrTaken = !if(isTaken, "true", "false");
@@ -1153,7 +1156,7 @@ class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
let Inst{25-23} = majOp;
let Inst{22} = isNegCond;
let Inst{18-16} = src1;
- let Inst{13} = isTaken;
+ let Inst{13} = isTak;
let Inst{21-20} = offset{10-9};
let Inst{7-1} = offset{8-2};
}
@@ -2019,9 +2022,10 @@ multiclass MemOpi_bitPats <PatFrag ldOp, PatFrag stOp, PatLeaf immPred,
// mem[bhw](Rs+#0) = [clrbit|setbit](#U5)
let AddedComplexity = 225 in
- def : Pat <(stOp (OpNode (ldOp addrPred:$addr), immPred:$bitend),
- addrPred:$addr),
- (MI IntRegs:$addr, #0, (xformFunc immPred:$bitend))>;
+ def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
+ immPred:$bitend),
+ (addrPred (i32 IntRegs:$addr), extPred:$offset)),
+ (MI IntRegs:$addr, extPred:$offset, (xformFunc immPred:$bitend))>;
}
multiclass MemOpi_bitExtType<PatFrag ldOpByte, PatFrag ldOpHalf > {
@@ -2065,9 +2069,10 @@ multiclass MemOpr_Pats <PatFrag ldOp, PatFrag stOp, ComplexPattern addrPred,
PatLeaf extPred, InstHexagon MI, SDNode OpNode> {
let AddedComplexity = 141 in
// mem[bhw](Rs+#0) [+-&|]= Rt
- def : Pat <(stOp (OpNode (ldOp addrPred:$addr), (i32 IntRegs:$addend)),
- addrPred:$addr),
- (MI IntRegs:$addr, #0, (i32 IntRegs:$addend) )>;
+ def : Pat <(stOp (OpNode (ldOp (addrPred IntRegs:$addr, extPred:$offset)),
+ (i32 IntRegs:$addend)),
+ (addrPred (i32 IntRegs:$addr), extPred:$offset)),
+ (MI IntRegs:$addr, extPred:$offset, (i32 IntRegs:$addend) )>;
// mem[bhw](Rs+#U6:[012]) [+-&|]= Rt
let AddedComplexity = 150 in
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 51318ff..7dd6e95 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -12,17 +12,17 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "misched"
-
#include "HexagonMachineScheduler.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/IR/Function.h"
using namespace llvm;
+#define DEBUG_TYPE "misched"
+
/// Platform specific modifications to DAG.
void VLIWMachineScheduler::postprocessDAG() {
- SUnit* LastSequentialCall = NULL;
+ SUnit* LastSequentialCall = nullptr;
// Currently we only catch the situation when compare gets scheduled
// before preceding call.
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
@@ -398,13 +398,13 @@ SUnit *ConvergingVLIWScheduler::VLIWSchedBoundary::pickOnlyChoice() {
for (unsigned i = 0; Available.empty(); ++i) {
assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
"permanent hazard"); (void)i;
- ResourceModel->reserveResources(0);
+ ResourceModel->reserveResources(nullptr);
bumpCycle();
releasePending();
}
if (Available.size() == 1)
return *Available.begin();
- return NULL;
+ return nullptr;
}
#ifndef NDEBUG
@@ -424,7 +424,7 @@ void ConvergingVLIWScheduler::traceCandidate(const char *Label,
/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
/// of SU, return it, otherwise return null.
static SUnit *getSingleUnscheduledPred(SUnit *SU) {
- SUnit *OnlyAvailablePred = 0;
+ SUnit *OnlyAvailablePred = nullptr;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
SUnit &Pred = *I->getSUnit();
@@ -432,7 +432,7 @@ static SUnit *getSingleUnscheduledPred(SUnit *SU) {
// We found an available, but not scheduled, predecessor. If it's the
// only one we have found, keep track of it... otherwise give up.
if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
- return 0;
+ return nullptr;
OnlyAvailablePred = &Pred;
}
}
@@ -442,7 +442,7 @@ static SUnit *getSingleUnscheduledPred(SUnit *SU) {
/// getSingleUnscheduledSucc - If there is exactly one unscheduled successor
/// of SU, return it, otherwise return null.
static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
- SUnit *OnlyAvailableSucc = 0;
+ SUnit *OnlyAvailableSucc = nullptr;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
SUnit &Succ = *I->getSUnit();
@@ -450,7 +450,7 @@ static SUnit *getSingleUnscheduledSucc(SUnit *SU) {
// We found an available, but not scheduled, successor. If it's the
// only one we have found, keep track of it... otherwise give up.
if (OnlyAvailableSucc && OnlyAvailableSucc != &Succ)
- return 0;
+ return nullptr;
OnlyAvailableSucc = &Succ;
}
}
@@ -639,7 +639,7 @@ SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
if (DAG->top() == DAG->bottom()) {
assert(Top.Available.empty() && Top.Pending.empty() &&
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
- return NULL;
+ return nullptr;
}
SUnit *SU;
if (llvm::ForceTopDown) {
diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h
index 300f1c7..99100a1 100644
--- a/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -14,7 +14,6 @@
#ifndef HEXAGONASMPRINTER_H
#define HEXAGONASMPRINTER_H
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
@@ -57,7 +56,7 @@ class VLIWResourceModel {
public:
VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
SchedModel(SM), TotalPackets(0) {
- ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
+ ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM, nullptr);
// This hard requirement could be relaxed,
// but for now do not let it proceed.
@@ -94,8 +93,9 @@ VLIWResourceModel(const TargetMachine &TM, const TargetSchedModel *SM) :
/// top-level schedule() driver.
class VLIWMachineScheduler : public ScheduleDAGMILive {
public:
- VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S):
- ScheduleDAGMILive(C, S) {}
+ VLIWMachineScheduler(MachineSchedContext *C,
+ std::unique_ptr<MachineSchedStrategy> S)
+ : ScheduleDAGMILive(C, std::move(S)) {}
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
/// time to do some work.
@@ -120,7 +120,7 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
// Best scheduling cost.
int SCost;
- SchedCandidate(): SU(NULL), SCost(0) {}
+ SchedCandidate(): SU(nullptr), SCost(0) {}
};
/// Represent the type of SchedCandidate found within a single queue.
enum CandResult {
@@ -153,9 +153,9 @@ class ConvergingVLIWScheduler : public MachineSchedStrategy {
/// Pending queues extend the ready queues with the same ID and the
/// PendingFlag set.
VLIWSchedBoundary(unsigned ID, const Twine &Name):
- DAG(0), SchedModel(0), Available(ID, Name+".A"),
+ DAG(nullptr), SchedModel(nullptr), Available(ID, Name+".A"),
Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
- CheckPending(false), HazardRec(0), ResourceModel(0),
+ CheckPending(false), HazardRec(nullptr), ResourceModel(nullptr),
CurrCycle(0), IssueCount(0),
MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
@@ -203,8 +203,9 @@ public:
LogMaxQID = 2
};
- ConvergingVLIWScheduler():
- DAG(0), SchedModel(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
+ ConvergingVLIWScheduler()
+ : DAG(nullptr), SchedModel(nullptr), Top(TopQID, "TopQ"),
+ Bot(BotQID, "BotQ") {}
virtual void initialize(ScheduleDAGMI *dag) override;
diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 3e238bf..b7c03a7 100644
--- a/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -21,7 +21,6 @@
//
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-nvj"
#include "llvm/PassSupport.h"
#include "Hexagon.h"
#include "HexagonInstrInfo.h"
@@ -47,6 +46,8 @@
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "hexagon-nvj"
+
STATISTIC(NumNVJGenerated, "Number of New Value Jump Instructions created");
static cl::opt<int>
@@ -74,16 +75,16 @@ namespace {
initializeHexagonNewValueJumpPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon NewValueJump";
}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
private:
/// \brief A handle to the branch probability pass.
@@ -393,8 +394,8 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
bool MO2IsKill = false;
MachineBasicBlock::iterator jmpPos;
MachineBasicBlock::iterator cmpPos;
- MachineInstr *cmpInstr = NULL, *jmpInstr = NULL;
- MachineBasicBlock *jmpTarget = NULL;
+ MachineInstr *cmpInstr = nullptr, *jmpInstr = nullptr;
+ MachineBasicBlock *jmpTarget = nullptr;
bool afterRA = false;
bool isSecondOpReg = false;
bool isSecondOpNewified = false;
diff --git a/lib/Target/Hexagon/HexagonPeephole.cpp b/lib/Target/Hexagon/HexagonPeephole.cpp
index 5490ecd..48b6159 100644
--- a/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -35,7 +35,6 @@
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-peephole"
#include "Hexagon.h"
#include "HexagonTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
@@ -57,6 +56,8 @@
using namespace llvm;
+#define DEBUG_TYPE "hexagon-peephole"
+
static cl::opt<bool> DisableHexagonPeephole("disable-hexagon-peephole",
cl::Hidden, cl::ZeroOrMore, cl::init(false),
cl::desc("Disable Peephole Optimization"));
@@ -89,13 +90,13 @@ namespace {
initializeHexagonPeepholePass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon optimize redundant zero and size extends";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 9a20dfd..fb466d3 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -43,13 +43,12 @@ HexagonRegisterInfo::HexagonRegisterInfo(HexagonSubtarget &st)
Subtarget(st) {
}
-const uint16_t* HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction
- *MF)
- const {
- static const uint16_t CalleeSavedRegsV2[] = {
+const MCPhysReg *
+HexagonRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ static const MCPhysReg CalleeSavedRegsV2[] = {
Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
};
- static const uint16_t CalleeSavedRegsV3[] = {
+ static const MCPhysReg CalleeSavedRegsV3[] = {
Hexagon::R16, Hexagon::R17, Hexagon::R18, Hexagon::R19,
Hexagon::R20, Hexagon::R21, Hexagon::R22, Hexagon::R23,
Hexagon::R24, Hexagon::R25, Hexagon::R26, Hexagon::R27, 0
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.h b/lib/Target/Hexagon/HexagonRegisterInfo.h
index 89af7c3..648b4af 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -48,16 +48,17 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
HexagonRegisterInfo(HexagonSubtarget &st);
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction *MF = nullptr) const;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
@@ -65,17 +66,17 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
/// requiresRegisterScavenging - returns true since we may need scavenging for
/// a temporary register when generating hardware loop instructions.
- bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
unsigned getFrameRegister() const;
unsigned getStackRegister() const;
};
diff --git a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
index cadcb32..2b459a4 100644
--- a/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
+++ b/lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp
@@ -33,13 +33,13 @@ namespace {
HexagonRemoveExtendArgs() : FunctionPass(ID) {
initializeHexagonRemoveExtendArgsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Remove sign extends";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineFunctionAnalysis>();
AU.addPreserved<MachineFunctionAnalysis>();
AU.addPreserved("stack-protector");
diff --git a/lib/Target/Hexagon/HexagonSchedule.td b/lib/Target/Hexagon/HexagonSchedule.td
index c2cfbb9..528cafc 100644
--- a/lib/Target/Hexagon/HexagonSchedule.td
+++ b/lib/Target/Hexagon/HexagonSchedule.td
@@ -7,57 +7,6 @@
//
//===----------------------------------------------------------------------===//
-// Functional Units
-def LSUNIT : FuncUnit; // SLOT0
-def LUNIT : FuncUnit; // SLOT1
-def MUNIT : FuncUnit; // SLOT2
-def SUNIT : FuncUnit; // SLOT3
-def LOOPUNIT : FuncUnit;
-
-// Itinerary classes
-def ALU32 : InstrItinClass;
-def ALU64 : InstrItinClass;
-def CR : InstrItinClass;
-def J : InstrItinClass;
-def JR : InstrItinClass;
-def LD : InstrItinClass;
-def LD0 : InstrItinClass;
-def M : InstrItinClass;
-def ST : InstrItinClass;
-def ST0 : InstrItinClass;
-def S : InstrItinClass;
-def SYS : InstrItinClass;
-def ENDLOOP : InstrItinClass;
-def PSEUDO : InstrItinClass;
-def PSEUDOM : InstrItinClass;
-
-def HexagonItineraries :
- ProcessorItineraries<[LSUNIT, LUNIT, MUNIT, SUNIT, LOOPUNIT], [], [
- InstrItinData<ALU32 , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
- InstrItinData<ALU64 , [InstrStage<1, [MUNIT, SUNIT]>]>,
- InstrItinData<CR , [InstrStage<1, [SUNIT]>]>,
- InstrItinData<J , [InstrStage<1, [SUNIT, MUNIT]>]>,
- InstrItinData<JR , [InstrStage<1, [MUNIT]>]>,
- InstrItinData<LD , [InstrStage<1, [LUNIT, LSUNIT]>]>,
- InstrItinData<LD0 , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<M , [InstrStage<1, [MUNIT, SUNIT]>]>,
- InstrItinData<ST , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<ST0 , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<S , [InstrStage<1, [SUNIT, MUNIT]>]>,
- InstrItinData<SYS , [InstrStage<1, [LSUNIT]>]>,
- InstrItinData<ENDLOOP, [InstrStage<1, [LOOPUNIT]>]>,
- InstrItinData<PSEUDO , [InstrStage<1, [LUNIT, LSUNIT, MUNIT, SUNIT]>]>,
- InstrItinData<PSEUDOM, [InstrStage<1, [MUNIT, SUNIT], 0>,
- InstrStage<1, [MUNIT, SUNIT]>]>
- ]>;
-
-def HexagonModel : SchedMachineModel {
- // Max issue per cycle == bundle width.
- let IssueWidth = 4;
- let Itineraries = HexagonItineraries;
- let LoadLatency = 1;
-}
-
//===----------------------------------------------------------------------===//
// V4 Machine Info +
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Hexagon/HexagonScheduleV4.td b/lib/Target/Hexagon/HexagonScheduleV4.td
index ef72cf4..a7d2d47 100644
--- a/lib/Target/Hexagon/HexagonScheduleV4.td
+++ b/lib/Target/Hexagon/HexagonScheduleV4.td
@@ -34,29 +34,158 @@ def SLOT3 : FuncUnit;
def SLOT_ENDLOOP: FuncUnit;
// Itinerary classes.
-def NV_V4 : InstrItinClass;
-def MEM_V4 : InstrItinClass;
+def PSEUDO : InstrItinClass;
+def PSEUDOM : InstrItinClass;
// ALU64/M/S Instruction classes of V2 are collectively knownn as XTYPE in V4.
+def DUPLEX : InstrItinClass;
def PREFIX : InstrItinClass;
+def COMPOUND : InstrItinClass;
+
+def ALU32_2op_tc_1_SLOT0123 : InstrItinClass;
+def ALU32_2op_tc_2early_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_2early_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_1_SLOT0123 : InstrItinClass;
+def ALU32_3op_tc_2_SLOT0123 : InstrItinClass;
+def ALU32_ADDI_tc_1_SLOT0123 : InstrItinClass;
+def ALU64_tc_1_SLOT23 : InstrItinClass;
+def ALU64_tc_1or2_SLOT23 : InstrItinClass;
+def ALU64_tc_2_SLOT23 : InstrItinClass;
+def ALU64_tc_2early_SLOT23 : InstrItinClass;
+def ALU64_tc_3x_SLOT23 : InstrItinClass;
+def CR_tc_2_SLOT3 : InstrItinClass;
+def CR_tc_2early_SLOT23 : InstrItinClass;
+def CR_tc_2early_SLOT3 : InstrItinClass;
+def CR_tc_3x_SLOT23 : InstrItinClass;
+def CR_tc_3x_SLOT3 : InstrItinClass;
+def J_tc_2early_SLOT23 : InstrItinClass;
+def J_tc_2early_SLOT2 : InstrItinClass;
+def LD_tc_ld_SLOT01 : InstrItinClass;
+def LD_tc_ld_SLOT0 : InstrItinClass;
+def LD_tc_3or4stall_SLOT0 : InstrItinClass;
+def M_tc_1_SLOT23 : InstrItinClass;
+def M_tc_1or2_SLOT23 : InstrItinClass;
+def M_tc_2_SLOT23 : InstrItinClass;
+def M_tc_3_SLOT23 : InstrItinClass;
+def M_tc_3x_SLOT23 : InstrItinClass;
+def M_tc_3or4x_SLOT23 : InstrItinClass;
+def ST_tc_st_SLOT01 : InstrItinClass;
+def ST_tc_st_SLOT0 : InstrItinClass;
+def ST_tc_ld_SLOT0 : InstrItinClass;
+def ST_tc_3stall_SLOT0 : InstrItinClass;
+def S_2op_tc_1_SLOT23 : InstrItinClass;
+def S_2op_tc_2_SLOT23 : InstrItinClass;
+def S_2op_tc_2early_SLOT23 : InstrItinClass;
+def S_2op_tc_3or4x_SLOT23 : InstrItinClass;
+def S_3op_tc_1_SLOT23 : InstrItinClass;
+def S_3op_tc_1or2_SLOT23 : InstrItinClass;
+def S_3op_tc_2_SLOT23 : InstrItinClass;
+def S_3op_tc_2early_SLOT23 : InstrItinClass;
+def S_3op_tc_3_SLOT23 : InstrItinClass;
+def S_3op_tc_3x_SLOT23 : InstrItinClass;
+def NCJ_tc_3or4stall_SLOT0 : InstrItinClass;
+def V2LDST_tc_ld_SLOT01 : InstrItinClass;
+def V2LDST_tc_st_SLOT0 : InstrItinClass;
+def V2LDST_tc_st_SLOT01 : InstrItinClass;
+def V4LDST_tc_ld_SLOT01 : InstrItinClass;
+def V4LDST_tc_st_SLOT0 : InstrItinClass;
+def V4LDST_tc_st_SLOT01 : InstrItinClass;
+def J_tc_2early_SLOT0123 : InstrItinClass;
+def EXTENDER_tc_1_SLOT0123 : InstrItinClass;
+
def HexagonItinerariesV4 :
ProcessorItineraries<[SLOT0, SLOT1, SLOT2, SLOT3, SLOT_ENDLOOP], [], [
- InstrItinData<ALU32 , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
- InstrItinData<ALU64 , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<CR , [InstrStage<1, [SLOT3]>]>,
- InstrItinData<J , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<JR , [InstrStage<1, [SLOT2]>]>,
- InstrItinData<LD , [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData<LD0 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<M , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<ST , [InstrStage<1, [SLOT0, SLOT1]>]>,
- InstrItinData<ST0 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<S , [InstrStage<1, [SLOT2, SLOT3]>]>,
- InstrItinData<SYS , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<NV_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<MEM_V4 , [InstrStage<1, [SLOT0]>]>,
- InstrItinData<ENDLOOP, [InstrStage<1, [SLOT_ENDLOOP]>]>,
- InstrItinData<PREFIX , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ // ALU32
+ InstrItinData<ALU32_2op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_2op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2early_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_3op_tc_2_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+ InstrItinData<ALU32_ADDI_tc_1_SLOT0123 ,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ // ALU64
+ InstrItinData<ALU64_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<ALU64_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // CR -> System
+ InstrItinData<CR_tc_2_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<CR_tc_2early_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT3 , [InstrStage<1, [SLOT3]>]>,
+
+ // Jump (conditional/unconditional/return etc)
+ // CR
+ InstrItinData<CR_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<CR_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // J
+ InstrItinData<J_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ // JR
+ InstrItinData<J_tc_2early_SLOT2 , [InstrStage<1, [SLOT2]>]>,
+
+ //Load
+ InstrItinData<LD_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<LD_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<LD_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // M
+ InstrItinData<M_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<M_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // Store
+ // ST
+ InstrItinData<ST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ // ST0
+ InstrItinData<ST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<ST_tc_ld_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // S
+ InstrItinData<S_2op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_2op_tc_3or4x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_1or2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2early_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_2_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+ InstrItinData<S_3op_tc_3x_SLOT23 , [InstrStage<1, [SLOT2, SLOT3]>]>,
+
+ // SYS
+ InstrItinData<ST_tc_3stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // New Value Compare Jump
+ InstrItinData<NCJ_tc_3or4stall_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+
+ // Mem ops - MEM_V4
+ InstrItinData<V2LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V2LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V2LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT0 , [InstrStage<1, [SLOT0]>]>,
+ InstrItinData<V4LDST_tc_ld_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+ InstrItinData<V4LDST_tc_st_SLOT01 , [InstrStage<1, [SLOT0, SLOT1]>]>,
+
+ InstrItinData<DUPLEX , [InstrStage<1, [SLOT0]>]>,
+
+ // ENDLOOP
+ InstrItinData<J_tc_2early_SLOT0123 , [InstrStage<1, [SLOT_ENDLOOP]>]>,
+
+ // Extender/PREFIX
+ InstrItinData<EXTENDER_tc_1_SLOT0123,
+ [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
+
+ InstrItinData<COMPOUND , [InstrStage<1, [SLOT2, SLOT3]>]>,
InstrItinData<PSEUDO , [InstrStage<1, [SLOT0, SLOT1, SLOT2, SLOT3]>]>,
InstrItinData<PSEUDOM, [InstrStage<1, [SLOT2, SLOT3], 0>,
InstrStage<1, [SLOT2, SLOT3]>]>
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index c37bf9f..9e1e0fd 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hexagon-selectiondag-info"
#include "HexagonTargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "hexagon-selectiondag-info"
+
bool llvm::flag_aligned_memcpy;
HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine
diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 31f278a..8ba6108 100644
--- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -25,14 +25,13 @@ public:
explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM);
~HexagonSelectionDAGInfo();
- virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const;
+ MachinePointerInfo SrcPtrInfo) const override;
};
}
diff --git a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 5303f44..247207f 100644
--- a/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -17,11 +17,10 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "xfer"
-
-#include "HexagonTargetMachine.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
+#include "HexagonTargetMachine.h"
+#include "HexagonTargetObjectFile.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -44,21 +43,22 @@
using namespace llvm;
+#define DEBUG_TYPE "xfer"
+
namespace {
class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
- const HexagonTargetMachine& QTM;
- const HexagonSubtarget &QST;
+ const HexagonTargetMachine &QTM;
public:
static char ID;
- HexagonSplitConst32AndConst64(const HexagonTargetMachine& TM)
- : MachineFunctionPass(ID), QTM(TM), QST(*TM.getSubtargetImpl()) {}
+ HexagonSplitConst32AndConst64(const HexagonTargetMachine &TM)
+ : MachineFunctionPass(ID), QTM(TM) {}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Split Const32s and Const64s";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
@@ -67,6 +67,12 @@ char HexagonSplitConst32AndConst64::ID = 0;
bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
+ const HexagonTargetObjectFile &TLOF =
+ (const HexagonTargetObjectFile &)
+ QTM.getTargetLowering()->getObjFileLowering();
+ if (TLOF.IsSmallDataEnabled())
+ return true;
+
const TargetInstrInfo *TII = QTM.getInstrInfo();
// Loop over all of the basic blocks
diff --git a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
index 8608e08..9601090 100644
--- a/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
+++ b/lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp
@@ -26,7 +26,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "xfer"
#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
#include "HexagonSubtarget.h"
@@ -49,6 +48,8 @@
using namespace llvm;
+#define DEBUG_TYPE "xfer"
+
namespace llvm {
void initializeHexagonSplitTFRCondSetsPass(PassRegistry&);
}
@@ -67,10 +68,10 @@ class HexagonSplitTFRCondSets : public MachineFunctionPass {
initializeHexagonSplitTFRCondSetsPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Split TFRCondSets";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
@@ -221,7 +222,8 @@ bool HexagonSplitTFRCondSets::runOnMachineFunction(MachineFunction &Fn) {
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "Hexagon Split TFRCondSets";
PassInfo *PI = new PassInfo(Name, "hexagon-split-tfr",
- &HexagonSplitTFRCondSets::ID, 0, false, false);
+ &HexagonSplitTFRCondSets::ID, nullptr, false,
+ false);
Registry.registerPass(*PI, true);
}
diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp
index fca6707..70c87fa 100644
--- a/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -18,6 +18,8 @@
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
+#define DEBUG_TYPE "hexagon-subtarget"
+
#define GET_SUBTARGETINFO_CTOR
#define GET_SUBTARGETINFO_TARGET_DESC
#include "HexagonGenSubtargetInfo.inc"
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9ce1fb8..b923764 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -52,7 +52,7 @@ extern "C" void LLVMInitializeHexagonTarget() {
}
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
- return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler());
+ return new VLIWMachineScheduler(C, make_unique<ConvergingVLIWScheduler>());
}
static MachineSchedRegistry
@@ -79,20 +79,6 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT,
initAsmInfo();
}
-// addPassesForOptimizations - Allow the backend (target) to add Target
-// Independent Optimization passes to the Pass Manager.
-bool HexagonTargetMachine::addPassesForOptimizations(PassManagerBase &PM) {
- if (getOptLevel() != CodeGenOpt::None) {
- PM.add(createConstantPropagationPass());
- PM.add(createLoopSimplifyPass());
- PM.add(createDeadCodeEliminationPass());
- PM.add(createConstantPropagationPass());
- PM.add(createLoopUnrollPass());
- PM.add(createLoopStrengthReducePass());
- }
- return true;
-}
-
namespace {
/// Hexagon Code Generator Pass Configuration Options.
class HexagonPassConfig : public TargetPassConfig {
@@ -113,16 +99,16 @@ public:
return getTM<HexagonTargetMachine>();
}
- virtual ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const {
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
return createVLIWMachineSched(C);
}
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // namespace
@@ -164,16 +150,12 @@ bool HexagonPassConfig::addPostRegAlloc() {
bool HexagonPassConfig::addPreSched2() {
const HexagonTargetMachine &TM = getHexagonTargetMachine();
- const HexagonTargetObjectFile &TLOF =
- (const HexagonTargetObjectFile &)getTargetLowering()->getObjFileLowering();
addPass(createHexagonCopyToCombine());
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
- if (!TLOF.IsSmallDataEnabled()) {
- addPass(createHexagonSplitConst32AndConst64(TM));
- printAndVerify("After hexagon split const32/64 pass");
- }
+ addPass(createHexagonSplitConst32AndConst64(TM));
+ printAndVerify("After hexagon split const32/64 pass");
return true;
}
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h
index cf8f9aa..70b835e 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -41,39 +41,37 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- virtual const HexagonInstrInfo *getInstrInfo() const {
+ const HexagonInstrInfo *getInstrInfo() const override {
return &InstrInfo;
}
- virtual const HexagonSubtarget *getSubtargetImpl() const {
+ const HexagonSubtarget *getSubtargetImpl() const override {
return &Subtarget;
}
- virtual const HexagonRegisterInfo *getRegisterInfo() const {
+ const HexagonRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- virtual const InstrItineraryData* getInstrItineraryData() const {
+ const InstrItineraryData* getInstrItineraryData() const override {
return InstrItins;
}
- virtual const HexagonTargetLowering* getTargetLowering() const {
+ const HexagonTargetLowering* getTargetLowering() const override {
return &TLInfo;
}
- virtual const HexagonFrameLowering* getFrameLowering() const {
+ const HexagonFrameLowering* getFrameLowering() const override {
return &FrameLowering;
}
- virtual const HexagonSelectionDAGInfo* getSelectionDAGInfo() const {
+ const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
- virtual const DataLayout *getDataLayout() const { return &DL; }
+ const DataLayout *getDataLayout() const override { return &DL; }
static unsigned getModuleMatchQuality(const Module &M);
- // Pass Pipeline Configuration.
- virtual bool addPassesForOptimizations(PassManagerBase &PM);
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
};
extern bool flag_aligned_memcpy;
diff --git a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index 976ff2b..87ce960 100644
--- a/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -16,7 +16,6 @@
// prune the dependence.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "packets"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "Hexagon.h"
#include "HexagonMachineFunctionInfo.h"
@@ -51,6 +50,8 @@
using namespace llvm;
+#define DEBUG_TYPE "packets"
+
static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
cl::ZeroOrMore, cl::Hidden, cl::init(true),
cl::desc("Allow non-solo packetization of volatile memory references"));
@@ -69,7 +70,7 @@ namespace {
initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineBranchProbabilityInfo>();
@@ -79,11 +80,11 @@ namespace {
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "Hexagon Packetizer";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
char HexagonPacketizer::ID = 0;
@@ -121,24 +122,25 @@ namespace {
const MachineBranchProbabilityInfo *MBPI);
// initPacketizerState - initialize some internal flags.
- void initPacketizerState();
+ void initPacketizerState() override;
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) override;
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI);
+ bool isSoloInstruction(MachineInstr *MI) override;
// isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
// together.
- bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ);
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override;
// isLegalToPruneDependencies - Is it legal to prune dependece between SUI
// and SUJ.
- bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ);
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override;
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI);
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override;
private:
bool IsCallDependent(MachineInstr* MI, SDep::Kind DepType, unsigned DepReg);
bool PromoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
@@ -390,7 +392,7 @@ static bool IsLoopN(MachineInstr *MI) {
/// callee-saved register.
static bool DoesModifyCalleeSavedReg(MachineInstr *MI,
const TargetRegisterInfo *TRI) {
- for (const uint16_t *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
+ for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(); *CSR; ++CSR) {
unsigned CalleeSavedReg = *CSR;
if (MI->modifiesRegister(CalleeSavedReg, TRI))
return true;
@@ -603,7 +605,7 @@ bool HexagonPacketizerList::CanPromoteToNewValueStore( MachineInstr *MI,
// evaluate identically
unsigned predRegNumSrc = 0;
unsigned predRegNumDst = 0;
- const TargetRegisterClass* predRegClass = NULL;
+ const TargetRegisterClass* predRegClass = nullptr;
// Get predicate register used in the source instruction
for(unsigned opNum = 0; opNum < PacketMI->getNumOperands(); opNum++) {
@@ -1172,7 +1174,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
// of that (IsCallDependent) function. Bug 6216 is opened for this.
//
unsigned DepReg = 0;
- const TargetRegisterClass* RC = NULL;
+ const TargetRegisterClass* RC = nullptr;
if (DepType == SDep::Data) {
DepReg = SUJ->Succs[i].getReg();
RC = QRI->getMinimalPhysRegClass(DepReg);
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
index 33667f4..9942a60 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "HexagonAsmPrinter.h"
#include "Hexagon.h"
#include "HexagonInstPrinter.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#define GET_INSTRUCTION_NAME
#include "HexagonGenAsmWriter.inc"
diff --git a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
index d0cef68..09e3f88 100644
--- a/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
+++ b/lib/Target/Hexagon/InstPrinter/HexagonInstPrinter.h
@@ -27,7 +27,7 @@ namespace llvm {
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI), MII(MII) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
void printInst(const HexagonMCInst *MI, raw_ostream &O, StringRef Annot);
virtual StringRef getOpcodeName(unsigned Opcode) const;
void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index 8519cf3..f8be77c 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -87,70 +87,82 @@ namespace HexagonII {
// Solo instructions.
SoloPos = 5,
SoloMask = 0x1,
+ // Packed only with A or X-type instructions.
+ SoloAXPos = 6,
+ SoloAXMask = 0x1,
+ // Only A-type instruction in first slot or nothing.
+ SoloAin1Pos = 7,
+ SoloAin1Mask = 0x1,
// Predicated instructions.
- PredicatedPos = 6,
+ PredicatedPos = 8,
PredicatedMask = 0x1,
- PredicatedFalsePos = 7,
+ PredicatedFalsePos = 9,
PredicatedFalseMask = 0x1,
- PredicatedNewPos = 8,
+ PredicatedNewPos = 10,
PredicatedNewMask = 0x1,
+ PredicateLatePos = 11,
+ PredicateLateMask = 0x1,
// New-Value consumer instructions.
- NewValuePos = 9,
+ NewValuePos = 12,
NewValueMask = 0x1,
-
// New-Value producer instructions.
- hasNewValuePos = 10,
+ hasNewValuePos = 13,
hasNewValueMask = 0x1,
-
// Which operand consumes or produces a new value.
- NewValueOpPos = 11,
+ NewValueOpPos = 14,
NewValueOpMask = 0x7,
-
- // Which bits encode the new value.
- NewValueBitsPos = 14,
- NewValueBitsMask = 0x3,
-
// Stores that can become new-value stores.
- mayNVStorePos = 16,
+ mayNVStorePos = 17,
mayNVStoreMask = 0x1,
-
// New-value store instructions.
- NVStorePos = 17,
+ NVStorePos = 18,
NVStoreMask = 0x1,
+ // Loads that can become current-value loads.
+ mayCVLoadPos = 19,
+ mayCVLoadMask = 0x1,
+ // Current-value load instructions.
+ CVLoadPos = 20,
+ CVLoadMask = 0x1,
// Extendable insns.
- ExtendablePos = 18,
+ ExtendablePos = 21,
ExtendableMask = 0x1,
-
// Insns must be extended.
- ExtendedPos = 19,
+ ExtendedPos = 22,
ExtendedMask = 0x1,
-
// Which operand may be extended.
- ExtendableOpPos = 20,
+ ExtendableOpPos = 23,
ExtendableOpMask = 0x7,
-
// Signed or unsigned range.
- ExtentSignedPos = 23,
+ ExtentSignedPos = 26,
ExtentSignedMask = 0x1,
-
// Number of bits of range before extending operand.
- ExtentBitsPos = 24,
+ ExtentBitsPos = 27,
ExtentBitsMask = 0x1f,
+ // Alignment power-of-two before extending operand.
+ ExtentAlignPos = 32,
+ ExtentAlignMask = 0x3,
// Valid subtargets
- validSubTargetPos = 29,
+ validSubTargetPos = 34,
validSubTargetMask = 0xf,
// Addressing mode for load/store instructions.
- AddrModePos = 33,
+ AddrModePos = 40,
AddrModeMask = 0x7,
+ // Access size for load/store instructions.
+ MemAccessSizePos = 43,
+ MemAccesSizeMask = 0x7,
+
+ // Branch predicted taken.
+ TakenPos = 47,
+ TakenMask = 0x1,
- // Access size of memory access instructions (load/store).
- MemAccessSizePos = 36,
- MemAccesSizeMask = 0x7
+ // Floating-point instructions.
+ FPPos = 48,
+ FPMask = 0x1
};
// *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index f1a65c3..141e514 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -21,7 +21,7 @@ void HexagonMCAsmInfo::anchor() {}
HexagonMCAsmInfo::HexagonMCAsmInfo(StringRef TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
- Data64bitsDirective = 0; // .xword is only supported by V9.
+ Data64bitsDirective = nullptr; // .xword is only supported by V9.
ZeroDirective = "\t.skip\t";
CommentString = "//";
HasLEB128 = true;
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
index bd8cb76..953d804 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h
@@ -19,7 +19,7 @@
namespace llvm {
class HexagonMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit HexagonMCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
index 3ca71f0..3c52d45 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h
@@ -31,7 +31,7 @@ namespace llvm {
public:
explicit HexagonMCInst():
- MCInst(), MCID(0), packetStart(0), packetEnd(0) {};
+ MCInst(), MCID(nullptr), packetStart(0), packetEnd(0) {};
HexagonMCInst(const MCInstrDesc& mcid):
MCInst(), MCID(&mcid), packetStart(0), packetEnd(0) {};
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 7f103d8..581674d 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -23,6 +23,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "HexagonGenInstrInfo.inc"
@@ -32,8 +34,6 @@
#define GET_REGINFO_MC_DESC
#include "HexagonGenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createHexagonMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitHexagonMCInstrInfo(X);
@@ -60,7 +60,7 @@ static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
// VirtualFP = (R30 + #0).
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
- 0, Hexagon::R30, 0);
+ nullptr, Hexagon::R30, 0);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt
index 13abaf8..1b0837c 100644
--- a/lib/Target/LLVMBuild.txt
+++ b/lib/Target/LLVMBuild.txt
@@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = AArch64 ARM ARM64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore
+subdirectories = ARM AArch64 CppBackend Hexagon MSP430 NVPTX Mips PowerPC R600 Sparc SystemZ X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
index 4b12aea..acf1214 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "MSP430InstPrinter.h"
#include "MSP430.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -21,6 +20,8 @@
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
// Include the auto-generated portion of the assembly writer.
#include "MSP430GenAsmWriter.inc"
@@ -44,7 +45,7 @@ void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier) {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ assert((Modifier == nullptr || Modifier[0] == 0) && "No modifiers supported");
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
O << getRegisterName(Op.getReg());
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index d32eb3a..5afbd20 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -25,17 +25,17 @@ namespace llvm {
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index a7e0e58..ef805bb 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
class StringRef;
class MSP430MCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit MSP430MCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 530e6aa..72adb45 100644
--- a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -20,6 +20,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "MSP430GenInstrInfo.inc"
@@ -29,8 +31,6 @@
#define GET_REGINFO_MC_DESC
#include "MSP430GenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createMSP430MCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitMSP430MCInstrInfo(X);
@@ -66,7 +66,7 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new MSP430InstPrinter(MAI, MII, MRI);
- return 0;
+ return nullptr;
}
extern "C" void LLVMInitializeMSP430TargetMC() {
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 91065d8..22a973e 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "MSP430.h"
#include "InstPrinter/MSP430InstPrinter.h"
#include "MSP430InstrInfo.h"
@@ -35,27 +34,29 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
namespace {
class MSP430AsmPrinter : public AsmPrinter {
public:
MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MSP430 Assembly Printer";
}
void printOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O, const char* Modifier = 0);
+ raw_ostream &O, const char* Modifier = nullptr);
void printSrcMemOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo, unsigned AsmVariant,
- const char *ExtraCode, raw_ostream &O);
- void EmitInstruction(const MachineInstr *MI);
+ const char *ExtraCode, raw_ostream &O) override;
+ void EmitInstruction(const MachineInstr *MI) override;
};
} // end of anonymous namespace
diff --git a/lib/Target/MSP430/MSP430BranchSelector.cpp b/lib/Target/MSP430/MSP430BranchSelector.cpp
index f128427..a96930a 100644
--- a/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msp430-branch-select"
#include "MSP430.h"
#include "MSP430InstrInfo.h"
#include "llvm/ADT/Statistic.h"
@@ -25,6 +24,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "msp430-branch-select"
+
STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace {
@@ -35,9 +36,9 @@ namespace {
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MSP430 Branch Selector";
}
};
diff --git a/lib/Target/MSP430/MSP430FrameLowering.cpp b/lib/Target/MSP430/MSP430FrameLowering.cpp
index ce078a3..82c8b29 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -242,7 +242,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// alignment boundary.
Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
- MachineInstr *New = 0;
+ MachineInstr *New = nullptr;
if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
TII.get(MSP430::SUB16ri), MSP430::SPW)
diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h
index 8370714..d464dd9 100644
--- a/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/lib/Target/MSP430/MSP430FrameLowering.h
@@ -32,26 +32,26 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
- bool hasFP(const MachineFunction &MF) const;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool hasFP(const MachineFunction &MF) const override;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 4152829..a9b9035 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -31,6 +31,8 @@
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "msp430-isel"
+
namespace {
struct MSP430ISelAddressMode {
enum {
@@ -52,17 +54,17 @@ namespace {
unsigned Align; // CP alignment.
MSP430ISelAddressMode()
- : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
- ES(0), JT(-1), Align(0) {
+ : BaseType(RegBase), Disp(0), GV(nullptr), CP(nullptr),
+ BlockAddr(nullptr), ES(nullptr), JT(-1), Align(0) {
}
bool hasSymbolicDisplacement() const {
- return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ return GV != nullptr || CP != nullptr || ES != nullptr || JT != -1;
}
void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
- if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+ if (BaseType == RegBase && Base.Reg.getNode() != nullptr) {
errs() << "Base.Reg ";
Base.Reg.getNode()->dump();
} else if (BaseType == FrameIndexBase) {
@@ -99,7 +101,7 @@ namespace {
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
@@ -107,15 +109,14 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- virtual bool
- SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
// Include the pieces autogenerated from the target description.
#include "MSP430GenDAGISel.inc"
private:
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
SDNode *SelectIndexedLoad(SDNode *Op);
SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
unsigned Opc8, unsigned Opc16);
@@ -199,7 +200,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
case ISD::FrameIndex:
if (AM.BaseType == MSP430ISelAddressMode::RegBase
- && AM.Base.Reg.getNode() == 0) {
+ && AM.Base.Reg.getNode() == nullptr) {
AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
return false;
@@ -228,7 +229,7 @@ bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
// Start with the LHS as an addr mode.
if (!MatchAddress(N.getOperand(0), AM) &&
// Address could not have picked a GV address for the displacement.
- AM.GV == NULL &&
+ AM.GV == nullptr &&
// Check to see if the LHS & C is zero.
CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
AM.Disp += Offset;
@@ -330,7 +331,7 @@ static bool isValidIndexedLoad(const LoadSDNode *LD) {
SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
if (!isValidIndexedLoad(LD))
- return NULL;
+ return nullptr;
MVT VT = LD->getMemoryVT().getSimpleVT();
@@ -343,7 +344,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
Opcode = MSP430::MOV16rm_POST;
break;
default:
- return NULL;
+ return nullptr;
}
return CurDAG->getMachineNode(Opcode, SDLoc(N),
@@ -359,7 +360,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
IsLegalToFold(N1, Op, Op, OptLevel)) {
LoadSDNode *LD = cast<LoadSDNode>(N1);
if (!isValidIndexedLoad(LD))
- return NULL;
+ return nullptr;
MVT VT = LD->getMemoryVT().getSimpleVT();
unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
@@ -367,9 +368,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
SDNode *ResNode =
- CurDAG->SelectNodeTo(Op, Opc,
- VT, MVT::i16, MVT::Other,
- Ops0, 3);
+ CurDAG->SelectNodeTo(Op, Opc, VT, MVT::i16, MVT::Other, Ops0);
cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
// Transfer chain.
ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
@@ -378,7 +377,7 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
return ResNode;
}
- return NULL;
+ return nullptr;
}
@@ -396,7 +395,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
Node->dump(CurDAG);
errs() << "\n");
Node->setNodeId(-1);
- return NULL;
+ return nullptr;
}
// Few custom selection stuff.
@@ -484,7 +483,7 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
SDNode *ResNode = SelectCode(Node);
DEBUG(errs() << "=> ");
- if (ResNode == NULL || ResNode == Node)
+ if (ResNode == nullptr || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index fe163d4..c5901bc 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msp430-lower"
-
#include "MSP430ISelLowering.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
@@ -38,6 +36,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "msp430-lower"
+
typedef enum {
NoHWMult,
HWMultIntr,
@@ -284,7 +284,7 @@ template<typename ArgT>
static void AnalyzeArguments(CCState &State,
SmallVectorImpl<CCValAssign> &ArgLocs,
const SmallVectorImpl<ArgT> &Args) {
- static const uint16_t RegList[] = {
+ static const MCPhysReg RegList[] = {
MSP430::R15W, MSP430::R14W, MSP430::R13W, MSP430::R12W
};
static const unsigned NbRegs = array_lengthof(RegList);
@@ -462,7 +462,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
errs() << "LowerFormalArguments Unhandled argument type: "
<< RegVT.getSimpleVT().SimpleTy << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
case MVT::i16:
unsigned VReg = RegInfo.createVirtualRegister(&MSP430::GR16RegClass);
@@ -568,7 +568,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(Opc, dl, MVT::Other, &RetOps[0], RetOps.size());
+ return DAG.getNode(Opc, dl, MVT::Other, RetOps);
}
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -629,7 +629,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
@@ -659,8 +659,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Transform all store nodes into one single node because all store nodes are
// independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain and
// flag operands which copy the outgoing args into registers. The InFlag in
@@ -695,7 +694,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
- Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
@@ -986,7 +985,7 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
Ops.push_back(Zero);
Ops.push_back(TargetCC);
Ops.push_back(Flag);
- return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
}
}
@@ -1009,7 +1008,7 @@ SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
Ops.push_back(TargetCC);
Ops.push_back(Flag);
- return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, Ops);
}
SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
@@ -1148,7 +1147,7 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return NULL;
+ default: return nullptr;
case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
case MSP430ISD::RRA: return "MSP430ISD::RRA";
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 85a861e..3ced61d 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -73,14 +73,14 @@ namespace llvm {
public:
explicit MSP430TargetLowering(MSP430TargetMachine &TM);
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; }
/// LowerOperation - Provide custom lowering hooks for some operations.
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// getTargetNodeName - This method returns the name of a target specific
/// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -97,15 +97,16 @@ namespace llvm {
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
TargetLowering::ConstraintType
- getConstraintType(const std::string &Constraint) const;
+ getConstraintType(const std::string &Constraint) const override;
std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const override;
/// isTruncateFree - Return true if it's free to truncate a value of type
/// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
/// register R15W to i8 by referencing its sub-register R15B.
- virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
- virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
/// isZExtFree - Return true if any actual instruction that defines a value
/// of type Ty1 implicit zero-extends the value to Ty2 in the result
@@ -115,12 +116,12 @@ namespace llvm {
/// necessarily apply to truncate instructions. e.g. on msp430, all
/// instructions that define 8-bit values implicit zero-extend the result
/// out to 16 bits.
- virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
- virtual bool isZExtFree(EVT VT1, EVT VT2) const;
- virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+ bool isZExtFree(Type *Ty1, Type *Ty2) const override;
+ bool isZExtFree(EVT VT1, EVT VT2) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *BB) const;
+ MachineBasicBlock *BB) const override;
MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
MachineBasicBlock *BB) const;
@@ -148,28 +149,27 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
-
- virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
- SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const override;
+
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
const MSP430Subtarget &Subtarget;
const DataLayout *TD;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 38f73b9..0c04ddb 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -22,11 +22,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#include "MSP430GenInstrInfo.inc"
-using namespace llvm;
-
// Pin the vtable to this file.
void MSP430InstrInfo::anchor() {}
@@ -208,11 +208,11 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
while (std::next(I) != MBB.end())
std::next(I)->eraseFromParent();
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
continue;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index ad2b8cc..1ffcebb 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -50,40 +50,41 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+ const TargetRegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill,
- int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill,
+ int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
// Branch folding goodness
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AllowModify) const override;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ DebugLoc DL) const override;
};
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index f64017e..341fb64 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msp430-reg-info"
-
#include "MSP430RegisterInfo.h"
#include "MSP430.h"
#include "MSP430MachineFunctionInfo.h"
@@ -26,38 +24,40 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "msp430-reg-info"
+
#define GET_REGINFO_TARGET_DESC
#include "MSP430GenRegisterInfo.inc"
-using namespace llvm;
-
// FIXME: Provide proper call frame setup / destroy opcodes.
MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm)
: MSP430GenRegisterInfo(MSP430::PCW), TM(tm) {
StackAlign = TM.getFrameLowering()->getStackAlignment();
}
-const uint16_t*
+const MCPhysReg*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
const Function* F = MF->getFunction();
- static const uint16_t CalleeSavedRegs[] = {
+ static const MCPhysReg CalleeSavedRegs[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
- static const uint16_t CalleeSavedRegsFP[] = {
+ static const MCPhysReg CalleeSavedRegsFP[] = {
MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
- static const uint16_t CalleeSavedRegsIntr[] = {
+ static const MCPhysReg CalleeSavedRegsIntr[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
0
};
- static const uint16_t CalleeSavedRegsIntrFP[] = {
+ static const MCPhysReg CalleeSavedRegsIntrFP[] = {
MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 78047cc..a607528 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -35,18 +35,20 @@ public:
MSP430RegisterInfo(MSP430TargetMachine &tm);
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass*
- getPointerRegClass(const MachineFunction &MF, unsigned Kind = 0) const;
+ getPointerRegClass(const MachineFunction &MF,
+ unsigned Kind = 0) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index 24f45fa..c700383 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msp430-selectiondag-info"
#include "MSP430TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "msp430-selectiondag-info"
+
MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index edeaf34..68ad091 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -15,12 +15,14 @@
#include "MSP430.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "msp430-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "MSP430GenSubtargetInfo.inc"
-using namespace llvm;
-
void MSP430Subtarget::anchor() { }
MSP430Subtarget::MSP430Subtarget(const std::string &TT,
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 98a6003..50be2be 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -51,8 +51,8 @@ public:
return getTM<MSP430TargetMachine>();
}
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
+ bool addInstSelector() override;
+ bool addPreEmitPass() override;
};
} // namespace
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index be695a2..ea5d407 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -43,25 +43,25 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- virtual const TargetFrameLowering *getFrameLowering() const {
+ const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const DataLayout *getDataLayout() const { return &DL;}
- virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+ const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL;}
+ const MSP430Subtarget *getSubtargetImpl() const override { return &Subtarget; }
- virtual const TargetRegisterInfo *getRegisterInfo() const {
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- virtual const MSP430TargetLowering *getTargetLowering() const {
+ const MSP430TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ const MSP430SelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
}; // MSP430TargetMachine.
} // end namespace llvm
diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk
index 74b8a3b..4e8831c 100644
--- a/lib/Target/Mips/Android.mk
+++ b/lib/Target/Mips/Android.mk
@@ -24,6 +24,7 @@ mips_codegen_SRC_FILES := \
MipsCodeEmitter.cpp \
MipsConstantIslandPass.cpp \
MipsDelaySlotFiller.cpp \
+ MipsFastISel.cpp \
MipsFrameLowering.cpp \
MipsInstrInfo.cpp \
MipsISelDAGToDAG.cpp \
diff --git a/lib/Target/Mips/AsmParser/LLVMBuild.txt b/lib/Target/Mips/AsmParser/LLVMBuild.txt
index e7ca243..dd8e3cf 100644
--- a/lib/Target/Mips/AsmParser/LLVMBuild.txt
+++ b/lib/Target/Mips/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = MipsAsmParser
parent = Mips
-required_libraries = MC MCParser Support MipsDesc MipsInfo
+required_libraries = MC MCParser MipsDesc MipsInfo Support
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 911a119..86fd386 100644
--- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -29,6 +29,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-asm-parser"
+
namespace llvm {
class MCInstrInfo;
}
@@ -73,10 +75,10 @@ class MipsAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand *> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm);
+ bool MatchingInlineAsm) override;
/// Parse a register as used in CFI directives
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseParenSuffix(StringRef Name,
SmallVectorImpl<MCParsedAsmOperand *> &Operands);
@@ -84,11 +86,11 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseBracketSuffix(StringRef Name,
SmallVectorImpl<MCParsedAsmOperand *> &Operands);
- bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
- SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand *> &Operands);
+ bool
+ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand *> &Operands) override;
- bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirective(AsmToken DirectiveID) override;
MipsAsmParser::OperandMatchResultTy
parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands);
@@ -135,6 +137,7 @@ class MipsAsmParser : public MCTargetAsmParser {
SmallVectorImpl<MCInst> &Instructions, bool isLoad,
bool isImmOpnd);
bool reportParseError(StringRef ErrorMsg);
+ bool reportParseError(SMLoc Loc, StringRef ErrorMsg);
bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
bool parseRelocOperand(const MCExpr *&Res);
@@ -143,7 +146,9 @@ class MipsAsmParser : public MCTargetAsmParser {
bool isEvaluated(const MCExpr *Expr);
bool parseSetFeature(uint64_t Feature);
+ bool parseDirectiveCPLoad(SMLoc Loc);
bool parseDirectiveCPSetup();
+ bool parseDirectiveNaN();
bool parseDirectiveSet();
bool parseDirectiveOption();
@@ -212,21 +217,22 @@ class MipsAsmParser : public MCTargetAsmParser {
void setFeatureBits(unsigned Feature, StringRef FeatureString) {
if (!(STI.getFeatureBits() & Feature)) {
- setAvailableFeatures(ComputeAvailableFeatures(
- STI.ToggleFeature(FeatureString)));
+ setAvailableFeatures(
+ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
void clearFeatureBits(unsigned Feature, StringRef FeatureString) {
if (STI.getFeatureBits() & Feature) {
- setAvailableFeatures(ComputeAvailableFeatures(
- STI.ToggleFeature(FeatureString)));
+ setAvailableFeatures(
+ ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
public:
MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -266,11 +272,12 @@ public:
/// context).
RegKind_CCR = 128, /// CCR
RegKind_HWRegs = 256, /// HWRegs
+ RegKind_COP3 = 512, /// COP3
/// Potentially any (e.g. $1)
RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 |
RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC |
- RegKind_CCR | RegKind_HWRegs
+ RegKind_CCR | RegKind_HWRegs | RegKind_COP3
};
private:
@@ -422,6 +429,14 @@ private:
return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
}
+ /// Coerce the register to COP3 and return the real register for the
+ /// current target.
+ unsigned getCOP3Reg() const {
+ assert(isRegIdx() && (RegIdx.Kind & RegKind_COP3) && "Invalid access!");
+ unsigned ClassID = Mips::COP3RegClassID;
+ return RegIdx.RegInfo->getRegClass(ClassID).getRegister(RegIdx.Index);
+ }
+
/// Coerce the register to ACC64DSP and return the real register for the
/// current target.
unsigned getACC64DSPReg() const {
@@ -465,7 +480,7 @@ private:
public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediate when possible. Null MCExpr = 0.
- if (Expr == 0)
+ if (!Expr)
Inst.addOperand(MCOperand::CreateImm(0));
else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
@@ -533,6 +548,11 @@ public:
Inst.addOperand(MCOperand::CreateReg(getCOP2Reg()));
}
+ void addCOP3AsmRegOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::CreateReg(getCOP3Reg()));
+ }
+
void addACC64DSPAsmRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getACC64DSPReg()));
@@ -573,7 +593,7 @@ public:
addExpr(Inst, Expr);
}
- bool isReg() const {
+ bool isReg() const override {
// As a special case until we sort out the definition of div/divu, pretend
// that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly.
if (isGPRAsmReg() && RegIdx.Index == 0)
@@ -582,16 +602,16 @@ public:
return Kind == k_PhysRegister;
}
bool isRegIdx() const { return Kind == k_RegisterIndex; }
- bool isImm() const { return Kind == k_Immediate; }
+ bool isImm() const override { return Kind == k_Immediate; }
bool isConstantImm() const {
return isImm() && dyn_cast<MCConstantExpr>(getImm());
}
- bool isToken() const {
+ bool isToken() const override {
// Note: It's not possible to pretend that other operand kinds are tokens.
// The matcher emitter checks tokens first.
return Kind == k_Token;
}
- bool isMem() const { return Kind == k_Memory; }
+ bool isMem() const override { return Kind == k_Memory; }
bool isInvNum() const { return Kind == k_Immediate; }
bool isLSAImm() const {
if (!isConstantImm())
@@ -605,7 +625,7 @@ public:
return StringRef(Tok.Data, Tok.Length);
}
- unsigned getReg() const {
+ unsigned getReg() const override {
// As a special case until we sort out the definition of div/divu, pretend
// that $0/$zero are k_PhysRegister so that MCK_ZERO works correctly.
if (Kind == k_RegisterIndex && RegIdx.Index == 0 &&
@@ -744,6 +764,9 @@ public:
bool isCOP2AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_COP2 && RegIdx.Index <= 31;
}
+ bool isCOP3AsmReg() const {
+ return isRegIdx() && RegIdx.Kind & RegKind_COP3 && RegIdx.Index <= 31;
+ }
bool isMSA128AsmReg() const {
return isRegIdx() && RegIdx.Kind & RegKind_MSA128 && RegIdx.Index <= 31;
}
@@ -752,11 +775,25 @@ public:
}
/// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const { return EndLoc; }
+ SMLoc getEndLoc() const override { return EndLoc; }
- virtual void print(raw_ostream &OS) const {
+ virtual ~MipsOperand() {
+ switch (Kind) {
+ case k_Immediate:
+ break;
+ case k_Memory:
+ delete Mem.Base;
+ break;
+ case k_PhysRegister:
+ case k_RegisterIndex:
+ case k_Token:
+ break;
+ }
+ }
+
+ void print(raw_ostream &OS) const override {
switch (Kind) {
case k_Immediate:
OS << "Imm<";
@@ -906,10 +943,6 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) {
case Mips::LoadImm32Reg:
case Mips::LoadAddr32Imm:
case Mips::LoadAddr32Reg:
- case Mips::SUBi:
- case Mips::SUBiu:
- case Mips::DSUBi:
- case Mips::DSUBiu:
return true;
default:
return false;
@@ -925,30 +958,6 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc,
return expandLoadAddressImm(Inst, IDLoc, Instructions);
case Mips::LoadAddr32Reg:
return expandLoadAddressReg(Inst, IDLoc, Instructions);
- case Mips::SUBi:
- Instructions.push_back(MCInstBuilder(Mips::ADDi)
- .addReg(Inst.getOperand(0).getReg())
- .addReg(Inst.getOperand(1).getReg())
- .addImm(-Inst.getOperand(2).getImm()));
- return;
- case Mips::SUBiu:
- Instructions.push_back(MCInstBuilder(Mips::ADDiu)
- .addReg(Inst.getOperand(0).getReg())
- .addReg(Inst.getOperand(1).getReg())
- .addImm(-Inst.getOperand(2).getImm()));
- return;
- case Mips::DSUBi:
- Instructions.push_back(MCInstBuilder(Mips::DADDi)
- .addReg(Inst.getOperand(0).getReg())
- .addReg(Inst.getOperand(1).getReg())
- .addImm(-Inst.getOperand(2).getImm()));
- return;
- case Mips::DSUBiu:
- Instructions.push_back(MCInstBuilder(Mips::DADDiu)
- .addReg(Inst.getOperand(0).getReg())
- .addReg(Inst.getOperand(1).getReg())
- .addImm(-Inst.getOperand(2).getImm()));
- return;
}
}
@@ -1586,6 +1595,8 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
RegNo = isGP64() ? Operand.getGPR64Reg() : Operand.getGPR32Reg();
}
+ delete &Operand;
+
return (RegNo == (unsigned)-1);
}
@@ -1624,7 +1635,7 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
SmallVectorImpl<MCParsedAsmOperand *> &Operands) {
DEBUG(dbgs() << "parseMemOperand\n");
- const MCExpr *IdVal = 0;
+ const MCExpr *IdVal = nullptr;
SMLoc S;
bool isParenExpr = false;
MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch;
@@ -1654,6 +1665,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
// Zero register assumed, add a memory operand with ZERO as its base.
+ // "Base" will be managed by k_Memory.
MipsOperand *Base = MipsOperand::CreateGPRReg(
0, getContext().getRegisterInfo(), S, E, *this);
Operands.push_back(MipsOperand::CreateMem(Base, IdVal, S, E, *this));
@@ -1679,12 +1691,13 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand(
Parser.Lex(); // Eat the ')' token.
- if (IdVal == 0)
+ if (!IdVal)
IdVal = MCConstantExpr::Create(0, getContext());
// Replace the register operand with the memory operand.
MipsOperand *op = static_cast<MipsOperand *>(Operands.back());
// Remove the register from the operands.
+ // "op" will be managed by k_Memory.
Operands.pop_back();
// Add the memory operand.
if (const MCBinaryExpr *BE = dyn_cast<MCBinaryExpr>(IdVal)) {
@@ -1969,9 +1982,11 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) {
.Case("call_lo", MCSymbolRefExpr::VK_Mips_CALL_LO16)
.Case("higher", MCSymbolRefExpr::VK_Mips_HIGHER)
.Case("highest", MCSymbolRefExpr::VK_Mips_HIGHEST)
+ .Case("pcrel_hi", MCSymbolRefExpr::VK_Mips_PCREL_HI16)
+ .Case("pcrel_lo", MCSymbolRefExpr::VK_Mips_PCREL_LO16)
.Default(MCSymbolRefExpr::VK_None);
- assert (VK != MCSymbolRefExpr::VK_None);
+ assert(VK != MCSymbolRefExpr::VK_None);
return VK;
}
@@ -2089,6 +2104,10 @@ bool MipsAsmParser::reportParseError(StringRef ErrorMsg) {
return Error(Loc, ErrorMsg);
}
+bool MipsAsmParser::reportParseError(SMLoc Loc, StringRef ErrorMsg) {
+ return Error(Loc, ErrorMsg);
+}
+
bool MipsAsmParser::parseSetNoAtDirective() {
// Line should look like: ".set noat".
// set at reg to 0.
@@ -2248,29 +2267,30 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) {
if (getLexer().isNot(AsmToken::EndOfStatement))
return reportParseError("unexpected token in .set directive");
- switch(Feature) {
- default: llvm_unreachable("Unimplemented feature");
- case Mips::FeatureDSP:
- setFeatureBits(Mips::FeatureDSP, "dsp");
- getTargetStreamer().emitDirectiveSetDsp();
+ switch (Feature) {
+ default:
+ llvm_unreachable("Unimplemented feature");
+ case Mips::FeatureDSP:
+ setFeatureBits(Mips::FeatureDSP, "dsp");
+ getTargetStreamer().emitDirectiveSetDsp();
break;
- case Mips::FeatureMicroMips:
- getTargetStreamer().emitDirectiveSetMicroMips();
+ case Mips::FeatureMicroMips:
+ getTargetStreamer().emitDirectiveSetMicroMips();
break;
- case Mips::FeatureMips16:
- getTargetStreamer().emitDirectiveSetMips16();
+ case Mips::FeatureMips16:
+ getTargetStreamer().emitDirectiveSetMips16();
break;
- case Mips::FeatureMips32r2:
- setFeatureBits(Mips::FeatureMips32r2, "mips32r2");
- getTargetStreamer().emitDirectiveSetMips32R2();
+ case Mips::FeatureMips32r2:
+ setFeatureBits(Mips::FeatureMips32r2, "mips32r2");
+ getTargetStreamer().emitDirectiveSetMips32R2();
break;
- case Mips::FeatureMips64:
- setFeatureBits(Mips::FeatureMips64, "mips64");
- getTargetStreamer().emitDirectiveSetMips64();
+ case Mips::FeatureMips64:
+ setFeatureBits(Mips::FeatureMips64, "mips64");
+ getTargetStreamer().emitDirectiveSetMips64();
break;
- case Mips::FeatureMips64r2:
- setFeatureBits(Mips::FeatureMips64r2, "mips64r2");
- getTargetStreamer().emitDirectiveSetMips64R2();
+ case Mips::FeatureMips64r2:
+ setFeatureBits(Mips::FeatureMips64r2, "mips64r2");
+ getTargetStreamer().emitDirectiveSetMips64R2();
break;
}
return false;
@@ -2302,10 +2322,34 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
return Error(Loc, ErrorStr);
}
- Parser.Lex(); // Eat the comma.
+ Parser.Lex(); // Eat the comma.
return true;
}
+bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) {
+ if (Options.isReorder())
+ Warning(Loc, ".cpload in reorder section");
+
+ // FIXME: Warn if cpload is used in Mips16 mode.
+
+ SmallVector<MCParsedAsmOperand *, 1> Reg;
+ OperandMatchResultTy ResTy = ParseAnyRegister(Reg);
+ if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ reportParseError("expected register containing function address");
+ return false;
+ }
+
+ MipsOperand *RegOpnd = static_cast<MipsOperand *>(Reg[0]);
+ if (!RegOpnd->isGPRAsmReg()) {
+ reportParseError(RegOpnd->getStartLoc(), "invalid register");
+ return false;
+ }
+
+ getTargetStreamer().emitDirectiveCpload(RegOpnd->getGPR32Reg());
+ delete RegOpnd;
+ return false;
+}
+
bool MipsAsmParser::parseDirectiveCPSetup() {
unsigned FuncReg;
unsigned Save;
@@ -2336,60 +2380,28 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
if (Parser.parseIdentifier(Name))
reportParseError("expected identifier");
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- unsigned GPReg = getGPR(matchCPURegisterName("gp"));
- // FIXME: The code below this point should be in the TargetStreamers.
- // Only N32 and N64 emit anything for .cpsetup
- // FIXME: We should only emit something for PIC mode too.
- if (!isN32() && !isN64())
- return false;
+ getTargetStreamer().emitDirectiveCpsetup(FuncReg, Save, *Sym, SaveIsReg);
+ return false;
+}
- MCStreamer &TS = getStreamer();
- MCInst Inst;
- // Either store the old $gp in a register or on the stack
- if (SaveIsReg) {
- // move $save, $gpreg
- Inst.setOpcode(Mips::DADDu);
- Inst.addOperand(MCOperand::CreateReg(Save));
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateReg(getGPR(0)));
- } else {
- // sd $gpreg, offset($sp)
- Inst.setOpcode(Mips::SD);
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateReg(getGPR(matchCPURegisterName("sp"))));
- Inst.addOperand(MCOperand::CreateImm(Save));
- }
- TS.EmitInstruction(Inst, STI);
- Inst.clear();
-
- const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
- Sym->getName(), MCSymbolRefExpr::VK_Mips_GPOFF_HI,
- getContext());
- const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
- Sym->getName(), MCSymbolRefExpr::VK_Mips_GPOFF_LO,
- getContext());
- // lui $gp, %hi(%neg(%gp_rel(funcSym)))
- Inst.setOpcode(Mips::LUi);
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateExpr(HiExpr));
- TS.EmitInstruction(Inst, STI);
- Inst.clear();
-
- // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
- Inst.setOpcode(Mips::ADDiu);
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateExpr(LoExpr));
- TS.EmitInstruction(Inst, STI);
- Inst.clear();
-
- // daddu $gp, $gp, $funcreg
- Inst.setOpcode(Mips::DADDu);
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateReg(GPReg));
- Inst.addOperand(MCOperand::CreateReg(FuncReg));
- TS.EmitInstruction(Inst, STI);
+bool MipsAsmParser::parseDirectiveNaN() {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ const AsmToken &Tok = Parser.getTok();
+
+ if (Tok.getString() == "2008") {
+ Parser.Lex();
+ getTargetStreamer().emitDirectiveNaN2008();
+ return false;
+ } else if (Tok.getString() == "legacy") {
+ Parser.Lex();
+ getTargetStreamer().emitDirectiveNaNLegacy();
+ return false;
+ }
+ }
+ // If we don't recognize the option passed to the .nan
+ // directive (e.g. no option or unknown option), emit an error.
+ reportParseError("invalid option in .nan directive");
return false;
}
@@ -2419,15 +2431,15 @@ bool MipsAsmParser::parseDirectiveSet() {
Parser.eatToEndOfStatement();
return false;
} else if (Tok.getString() == "micromips") {
- return parseSetFeature(Mips::FeatureMicroMips);
+ return parseSetFeature(Mips::FeatureMicroMips);
} else if (Tok.getString() == "mips32r2") {
- return parseSetFeature(Mips::FeatureMips32r2);
+ return parseSetFeature(Mips::FeatureMips32r2);
} else if (Tok.getString() == "mips64") {
- return parseSetFeature(Mips::FeatureMips64);
+ return parseSetFeature(Mips::FeatureMips64);
} else if (Tok.getString() == "mips64r2") {
- return parseSetFeature(Mips::FeatureMips64r2);
+ return parseSetFeature(Mips::FeatureMips64r2);
} else if (Tok.getString() == "dsp") {
- return parseSetFeature(Mips::FeatureDSP);
+ return parseSetFeature(Mips::FeatureDSP);
} else {
// It is just an identifier, look for an assignment.
parseSetAssignment();
@@ -2537,6 +2549,8 @@ bool MipsAsmParser::parseDirectiveOption() {
bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
+ if (IDVal == ".cpload")
+ return parseDirectiveCPLoad(DirectiveID.getLoc());
if (IDVal == ".dword") {
parseDataDirective(8, DirectiveID.getLoc());
return false;
@@ -2576,6 +2590,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
}
+ if (IDVal == ".nan")
+ return parseDirectiveNaN();
+
if (IDVal == ".gpword") {
parseDirectiveGpWord();
return false;
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index c304ee3..bf67d71 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -7,6 +7,7 @@ tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter)
tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM MipsGenFastISel.inc -gen-fast-isel)
tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv)
tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget)
tablegen(LLVM MipsGenAsmMatcher.inc -gen-asm-matcher)
@@ -26,6 +27,7 @@ add_llvm_target(MipsCodeGen
MipsCodeEmitter.cpp
MipsConstantIslandPass.cpp
MipsDelaySlotFiller.cpp
+ MipsFastISel.cpp
MipsJITInfo.cpp
MipsInstrInfo.cpp
MipsISelDAGToDAG.cpp
diff --git a/lib/Target/Mips/Disassembler/LLVMBuild.txt b/lib/Target/Mips/Disassembler/LLVMBuild.txt
index 7101c06..bb70fd3 100644
--- a/lib/Target/Mips/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Mips/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = MipsDisassembler
parent = Mips
-required_libraries = MC Support MipsInfo
+required_libraries = MC MipsInfo Support
add_to_library_groups = Mips
diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index fc3b922..95670aa 100644
--- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -14,6 +14,7 @@
#include "Mips.h"
#include "MipsRegisterInfo.h"
#include "MipsSubtarget.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -24,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
@@ -33,19 +36,16 @@ class MipsDisassemblerBase : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- MipsDisassemblerBase(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ MipsDisassemblerBase(const MCSubtargetInfo &STI, MCContext &Ctx,
bool bigEndian) :
- MCDisassembler(STI), RegInfo(Info),
+ MCDisassembler(STI, Ctx),
IsN64(STI.getFeatureBits() & Mips::FeatureN64), isBigEndian(bigEndian) {}
virtual ~MipsDisassemblerBase() {}
- const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
-
bool isN64() const { return IsN64; }
private:
- OwningPtr<const MCRegisterInfo> RegInfo;
bool IsN64;
protected:
bool isBigEndian;
@@ -57,19 +57,23 @@ class MipsDisassembler : public MipsDisassemblerBase {
public:
/// Constructor - Initializes the disassembler.
///
- MipsDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
bool bigEndian) :
- MipsDisassemblerBase(STI, Info, bigEndian) {
+ MipsDisassemblerBase(STI, Ctx, bigEndian) {
IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips;
}
+ bool isMips32r6() const {
+ return STI.getFeatureBits() & Mips::FeatureMips32r6;
+ }
+
/// getInstruction - See MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
};
@@ -78,17 +82,17 @@ class Mips64Disassembler : public MipsDisassemblerBase {
public:
/// Constructor - Initializes the disassembler.
///
- Mips64Disassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info,
+ Mips64Disassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
bool bigEndian) :
- MipsDisassemblerBase(STI, Info, bigEndian) {}
+ MipsDisassemblerBase(STI, Ctx, bigEndian) {}
/// getInstruction - See MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
};
} // end anonymous namespace
@@ -195,6 +199,11 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder);
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
@@ -205,6 +214,16 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
+static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder);
+
// DecodeBranchTargetMM - Decode microMIPS branch offset, which is
// shifted left by 1 bit.
static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
@@ -263,11 +282,40 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
uint64_t Address,
const void *Decoder);
+static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder);
+
/// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't
/// handle.
template <typename InsnType>
static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodeDaddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
+ const void *Decoder);
+
namespace llvm {
extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
TheMips64elTarget;
@@ -275,26 +323,30 @@ extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
static MCDisassembler *createMipsDisassembler(
const Target &T,
- const MCSubtargetInfo &STI) {
- return new MipsDisassembler(STI, T.createMCRegInfo(""), true);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MipsDisassembler(STI, Ctx, true);
}
static MCDisassembler *createMipselDisassembler(
const Target &T,
- const MCSubtargetInfo &STI) {
- return new MipsDisassembler(STI, T.createMCRegInfo(""), false);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new MipsDisassembler(STI, Ctx, false);
}
static MCDisassembler *createMips64Disassembler(
const Target &T,
- const MCSubtargetInfo &STI) {
- return new Mips64Disassembler(STI, T.createMCRegInfo(""), true);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new Mips64Disassembler(STI, Ctx, true);
}
static MCDisassembler *createMips64elDisassembler(
const Target &T,
- const MCSubtargetInfo &STI) {
- return new Mips64Disassembler(STI, T.createMCRegInfo(""), false);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new Mips64Disassembler(STI, Ctx, false);
}
extern "C" void LLVMInitializeMipsDisassembler() {
@@ -311,6 +363,12 @@ extern "C" void LLVMInitializeMipsDisassembler() {
#include "MipsGenDisassemblerTables.inc"
+static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
+ const MipsDisassemblerBase *Dis = static_cast<const MipsDisassemblerBase*>(D);
+ const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
+ return *(RegInfo->getRegClass(RC).begin() + RegNo);
+}
+
template <typename InsnType>
static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
const void *Decoder) {
@@ -357,6 +415,202 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
return MCDisassembler::Success;
}
+template <typename InsnType>
+static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the ADDI instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b001000 sssss ttttt iiiiiiiiiiiiiiii
+ // BOVC if rs >= rt
+ // BEQZALC if rs == 0 && rt != 0
+ // BEQC if rs < rt && rs != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
+
+ if (Rs >= Rt) {
+ MI.setOpcode(Mips::BOVC);
+ HasRs = true;
+ } else if (Rs != 0 && Rs < Rt) {
+ MI.setOpcode(Mips::BEQC);
+ HasRs = true;
+ } else
+ MI.setOpcode(Mips::BEQZALC);
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
+
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodeDaddiGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the ADDI instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b011000 sssss ttttt iiiiiiiiiiiiiiii
+ // BNVC if rs >= rt
+ // BNEZALC if rs == 0 && rt != 0
+ // BNEC if rs < rt && rs != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
+
+ if (Rs >= Rt) {
+ MI.setOpcode(Mips::BNVC);
+ HasRs = true;
+ } else if (Rs != 0 && Rs < Rt) {
+ MI.setOpcode(Mips::BNEC);
+ HasRs = true;
+ } else
+ MI.setOpcode(Mips::BNEZALC);
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
+
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the BLEZL instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b010110 sssss ttttt iiiiiiiiiiiiiiii
+ // Invalid if rs == 0
+ // BLEZC if rs == 0 && rt != 0
+ // BGEZC if rs == rt && rt != 0
+ // BGEC if rs != rt && rs != 0 && rt != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+
+ if (Rt == 0)
+ return MCDisassembler::Fail;
+ else if (Rs == 0)
+ MI.setOpcode(Mips::BLEZC);
+ else if (Rs == Rt)
+ MI.setOpcode(Mips::BGEZC);
+ else
+ return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet.
+
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the BGTZL instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b010111 sssss ttttt iiiiiiiiiiiiiiii
+ // Invalid if rs == 0
+ // BGTZC if rs == 0 && rt != 0
+ // BLTZC if rs == rt && rt != 0
+ // BLTC if rs != rt && rs != 0 && rt != 0
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+
+ if (Rt == 0)
+ return MCDisassembler::Fail;
+ else if (Rs == 0)
+ MI.setOpcode(Mips::BGTZC);
+ else if (Rs == Rt)
+ MI.setOpcode(Mips::BLTZC);
+ else
+ return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet.
+
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn,
+ uint64_t Address,
+ const void *Decoder) {
+ // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled
+ // (otherwise we would have matched the BGTZ instruction from the earlier
+ // ISA's instead).
+ //
+ // We have:
+ // 0b000111 sssss ttttt iiiiiiiiiiiiiiii
+ // BGTZ if rt == 0
+ // BGTZALC if rs == 0 && rt != 0
+ // BLTZALC if rs != 0 && rs == rt
+ // BLTUC if rs != 0 && rs != rt
+
+ InsnType Rs = fieldFromInstruction(insn, 21, 5);
+ InsnType Rt = fieldFromInstruction(insn, 16, 5);
+ InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2;
+ bool HasRs = false;
+ bool HasRt = false;
+
+ if (Rt == 0) {
+ MI.setOpcode(Mips::BGTZ);
+ HasRs = true;
+ } else if (Rs == 0) {
+ MI.setOpcode(Mips::BGTZALC);
+ HasRt = true;
+ } else if (Rs == Rt) {
+ MI.setOpcode(Mips::BLTZALC);
+ HasRs = true;
+ } else
+ return MCDisassembler::Fail; // BLTUC not implemented yet
+
+ if (HasRs)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rs)));
+
+ if (HasRt)
+ MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID,
+ Rt)));
+
+ MI.addOperand(MCOperand::CreateImm(Imm));
+
+ return MCDisassembler::Success;
+}
+
/// readInstruction - read four bytes from the MemoryObject
/// and return 32 bit word sorted according to the given endianess
static DecodeStatus readInstruction32(const MemoryObject &region,
@@ -426,6 +680,15 @@ MipsDisassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
+ if (isMips32r6()) {
+ Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn,
+ Address, this, STI);
+ if (Result != MCDisassembler::Fail) {
+ Size = 4;
+ return Result;
+ }
+ }
+
// Calling the auto-generated decoder function.
Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address,
this, STI);
@@ -469,11 +732,6 @@ Mips64Disassembler::getInstruction(MCInst &instr,
return MCDisassembler::Fail;
}
-static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
- const MipsDisassemblerBase *Dis = static_cast<const MipsDisassemblerBase*>(D);
- return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
-}
-
static DecodeStatus DecodeCPU16RegsRegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
@@ -828,12 +1086,23 @@ static DecodeStatus DecodeMSACtrlRegisterClass(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeCOP2RegisterClass(MCInst &Inst,
+ unsigned RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo > 31)
+ return MCDisassembler::Fail;
+
+ unsigned Reg = getReg(Decoder, Mips::COP2RegClassID, RegNo);
+ Inst.addOperand(MCOperand::CreateReg(Reg));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTarget(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- unsigned BranchOffset = Offset & 0xffff;
- BranchOffset = SignExtend32<18>(BranchOffset << 2) + 4;
+ int32_t BranchOffset = (SignExtend32<16>(Offset) << 2) + 4;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -848,12 +1117,31 @@ static DecodeStatus DecodeJumpTarget(MCInst &Inst,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeBranchTarget21(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ int32_t BranchOffset = SignExtend32<21>(Offset) << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeBranchTarget26(MCInst &Inst,
+ unsigned Offset,
+ uint64_t Address,
+ const void *Decoder) {
+ int32_t BranchOffset = SignExtend32<26>(Offset) << 2;
+
+ Inst.addOperand(MCOperand::CreateImm(BranchOffset));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
unsigned Offset,
uint64_t Address,
const void *Decoder) {
- unsigned BranchOffset = Offset & 0xffff;
- BranchOffset = SignExtend32<18>(BranchOffset << 1);
+ int32_t BranchOffset = SignExtend32<16>(Offset) << 1;
Inst.addOperand(MCOperand::CreateImm(BranchOffset));
return MCDisassembler::Success;
}
@@ -903,3 +1191,9 @@ static DecodeStatus DecodeExtSize(MCInst &Inst,
Inst.addOperand(MCOperand::CreateImm(SignExtend32<16>(Size)));
return MCDisassembler::Success;
}
+
+static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn,
+ uint64_t Address, const void *Decoder) {
+ Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2));
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index c8f08f1..8c79751 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "MipsInstPrinter.h"
#include "MCTargetDesc/MipsMCExpr.h"
#include "MipsInstrInfo.h"
@@ -24,6 +23,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#define PRINT_ALIAS_INSTR
#include "MipsGenAsmWriter.inc"
@@ -165,6 +166,8 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) {
case MCSymbolRefExpr::VK_Mips_GOT_LO16: OS << "%got_lo("; break;
case MCSymbolRefExpr::VK_Mips_CALL_HI16: OS << "%call_hi("; break;
case MCSymbolRefExpr::VK_Mips_CALL_LO16: OS << "%call_lo("; break;
+ case MCSymbolRefExpr::VK_Mips_PCREL_HI16: OS << "%pcrel_hi("; break;
+ case MCSymbolRefExpr::VK_Mips_PCREL_LO16: OS << "%pcrel_lo("; break;
}
OS << SRE->getSymbol();
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
index 2b745f0..550a0f1 100644
--- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -85,10 +85,12 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
private:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 0f99ecc..5375a00 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
// Prepare value for the target space for it
static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
- MCContext *Ctx = NULL) {
+ MCContext *Ctx = nullptr) {
unsigned Kind = Fixup.getKind();
@@ -56,6 +56,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case Mips::fixup_MICROMIPS_GOT_PAGE:
case Mips::fixup_MICROMIPS_GOT_OFST:
case Mips::fixup_MICROMIPS_GOT_DISP:
+ case Mips::fixup_MIPS_PCLO16:
break;
case Mips::fixup_Mips_PC16:
// So far we are only using this type for branches.
@@ -80,6 +81,7 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case Mips::fixup_Mips_GOT_HI16:
case Mips::fixup_Mips_CALL_HI16:
case Mips::fixup_MICROMIPS_HI16:
+ case Mips::fixup_MIPS_PCHI16:
// Get the 2nd 16-bits. Also add 1 if bit 15 is 1.
Value = ((Value + 0x8000) >> 16) & 0xffff;
break;
@@ -102,6 +104,22 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
if (!isIntN(16, Value) && Ctx)
Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup");
break;
+ case Mips::fixup_MIPS_PC21_S2:
+ Value -= 4;
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t) Value / 4;
+ // We now check if Value can be encoded as a 21-bit signed immediate.
+ if (!isIntN(21, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC21 fixup");
+ break;
+ case Mips::fixup_MIPS_PC26_S2:
+ Value -= 4;
+ // Forcing a signed division because Value can be negative.
+ Value = (int64_t) Value / 4;
+ // We now check if Value can be encoded as a 26-bit signed immediate.
+ if (!isIntN(26, Value) && Ctx)
+ Ctx->FatalError(Fixup.getLoc(), "out of range PC26 fixup");
+ break;
}
return Value;
@@ -189,7 +207,7 @@ void MipsAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
const MCFixupKindInfo &MipsAsmBackend::
getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = {
+ const static MCFixupKindInfo LittleEndianInfos[Mips::NumTargetFixupKinds] = {
// This table *must* be in same the order of fixup_* kinds in
// MipsFixupKinds.h.
//
@@ -229,6 +247,10 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_Mips_GOT_LO16", 0, 16, 0 },
{ "fixup_Mips_CALL_HI16", 0, 16, 0 },
{ "fixup_Mips_CALL_LO16", 0, 16, 0 },
+ { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PCLO16", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_MICROMIPS_26_S1", 0, 26, 0 },
{ "fixup_MICROMIPS_HI16", 0, 16, 0 },
{ "fixup_MICROMIPS_LO16", 0, 16, 0 },
@@ -246,12 +268,76 @@ getFixupKindInfo(MCFixupKind Kind) const {
{ "fixup_MICROMIPS_TLS_TPREL_LO16", 0, 16, 0 }
};
+ const static MCFixupKindInfo BigEndianInfos[Mips::NumTargetFixupKinds] = {
+ // This table *must* be in same the order of fixup_* kinds in
+ // MipsFixupKinds.h.
+ //
+ // name offset bits flags
+ { "fixup_Mips_16", 16, 16, 0 },
+ { "fixup_Mips_32", 0, 32, 0 },
+ { "fixup_Mips_REL32", 0, 32, 0 },
+ { "fixup_Mips_26", 6, 26, 0 },
+ { "fixup_Mips_HI16", 16, 16, 0 },
+ { "fixup_Mips_LO16", 16, 16, 0 },
+ { "fixup_Mips_GPREL16", 16, 16, 0 },
+ { "fixup_Mips_LITERAL", 16, 16, 0 },
+ { "fixup_Mips_GOT_Global", 16, 16, 0 },
+ { "fixup_Mips_GOT_Local", 16, 16, 0 },
+ { "fixup_Mips_PC16", 16, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_CALL16", 16, 16, 0 },
+ { "fixup_Mips_GPREL32", 0, 32, 0 },
+ { "fixup_Mips_SHIFT5", 21, 5, 0 },
+ { "fixup_Mips_SHIFT6", 21, 5, 0 },
+ { "fixup_Mips_64", 0, 64, 0 },
+ { "fixup_Mips_TLSGD", 16, 16, 0 },
+ { "fixup_Mips_GOTTPREL", 16, 16, 0 },
+ { "fixup_Mips_TPREL_HI", 16, 16, 0 },
+ { "fixup_Mips_TPREL_LO", 16, 16, 0 },
+ { "fixup_Mips_TLSLDM", 16, 16, 0 },
+ { "fixup_Mips_DTPREL_HI", 16, 16, 0 },
+ { "fixup_Mips_DTPREL_LO", 16, 16, 0 },
+ { "fixup_Mips_Branch_PCRel",16, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_Mips_GPOFF_HI", 16, 16, 0 },
+ { "fixup_Mips_GPOFF_LO", 16, 16, 0 },
+ { "fixup_Mips_GOT_PAGE", 16, 16, 0 },
+ { "fixup_Mips_GOT_OFST", 16, 16, 0 },
+ { "fixup_Mips_GOT_DISP", 16, 16, 0 },
+ { "fixup_Mips_HIGHER", 16, 16, 0 },
+ { "fixup_Mips_HIGHEST", 16, 16, 0 },
+ { "fixup_Mips_GOT_HI16", 16, 16, 0 },
+ { "fixup_Mips_GOT_LO16", 16, 16, 0 },
+ { "fixup_Mips_CALL_HI16", 16, 16, 0 },
+ { "fixup_Mips_CALL_LO16", 16, 16, 0 },
+ { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MIPS_PCLO16", 16, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MICROMIPS_26_S1", 6, 26, 0 },
+ { "fixup_MICROMIPS_HI16", 16, 16, 0 },
+ { "fixup_MICROMIPS_LO16", 16, 16, 0 },
+ { "fixup_MICROMIPS_GOT16", 16, 16, 0 },
+ { "fixup_MICROMIPS_PC16_S1",16, 16, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_MICROMIPS_CALL16", 16, 16, 0 },
+ { "fixup_MICROMIPS_GOT_DISP", 16, 16, 0 },
+ { "fixup_MICROMIPS_GOT_PAGE", 16, 16, 0 },
+ { "fixup_MICROMIPS_GOT_OFST", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_GD", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_LDM", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_HI16", 16, 16, 0 },
+ { "fixup_MICROMIPS_TLS_TPREL_LO16", 16, 16, 0 }
+ };
+
if (Kind < FirstTargetFixupKind)
return MCAsmBackend::getFixupKindInfo(Kind);
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+
+ if (IsLittle)
+ return LittleEndianInfos[Kind - FirstTargetFixupKind];
+ return BigEndianInfos[Kind - FirstTargetFixupKind];
}
/// WriteNopData - Write an (optimal) nop sequence of Count bytes
diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index cc5207a..bc695e6 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -37,14 +37,14 @@ public:
: MCAsmBackend(), OSType(_OSType), IsLittle(_isLittle),
Is64Bit(_is64Bit) {}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const;
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override;
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const;
+ uint64_t Value, bool IsPCRel) const override;
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
- unsigned getNumFixupKinds() const {
+ unsigned getNumFixupKinds() const override {
return Mips::NumTargetFixupKinds;
}
@@ -55,7 +55,7 @@ public:
/// relaxation.
///
/// \param Inst - The instruction to test.
- bool mayNeedRelaxation(const MCInst &Inst) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
return false;
}
@@ -63,7 +63,7 @@ public:
/// fixup requires the associated instruction to be relaxed.
bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+ const MCAsmLayout &Layout) const override {
// FIXME.
assert(0 && "RelaxInstruction() unimplemented");
return false;
@@ -75,16 +75,16 @@ public:
/// \param Inst - The instruction to relax, which may be the same
/// as the output.
/// \param [out] Res On return, the relaxed instruction.
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const {}
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {}
/// @}
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const;
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFixup &Fixup, const MCFragment *DF,
const MCValue &Target, uint64_t &Value,
- bool &IsResolved);
+ bool &IsResolved) override;
}; // class MipsAsmBackend
diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 794978b..74c12ff 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -193,6 +193,18 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target,
case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
Type = ELF::R_MICROMIPS_TLS_TPREL_LO16;
break;
+ case Mips::fixup_MIPS_PC21_S2:
+ Type = ELF::R_MIPS_PC21_S2;
+ break;
+ case Mips::fixup_MIPS_PC26_S2:
+ Type = ELF::R_MIPS_PC26_S2;
+ break;
+ case Mips::fixup_MIPS_PCHI16:
+ Type = ELF::R_MIPS_PCHI16;
+ break;
+ case Mips::fixup_MIPS_PCLO16:
+ Type = ELF::R_MIPS_PCLO16;
+ break;
}
return Type;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index dc6192c..3079004 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -128,6 +128,18 @@ namespace Mips {
// resulting in - R_MIPS_CALL_LO16
fixup_Mips_CALL_LO16,
+ // resulting in - R_MIPS_PC21_S2
+ fixup_MIPS_PC21_S2,
+
+ // resulting in - R_MIPS_PC26_S2
+ fixup_MIPS_PC26_S2,
+
+ // resulting in - R_MIPS_PCHI16
+ fixup_MIPS_PCHI16,
+
+ // resulting in - R_MIPS_PCLO16
+ fixup_MIPS_PCLO16,
+
// resulting in - R_MICROMIPS_26_S1
fixup_MICROMIPS_26_S1,
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 1000113..37ba0c4 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
@@ -20,7 +20,7 @@ namespace llvm {
class StringRef;
class MipsMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit MipsMCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index edd2146..85e0bf1 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -12,8 +12,6 @@
//===----------------------------------------------------------------------===//
//
-#define DEBUG_TYPE "mccodeemitter"
-
#include "MipsMCCodeEmitter.h"
#include "MCTargetDesc/MipsFixupKinds.h"
#include "MCTargetDesc/MipsMCExpr.h"
@@ -28,6 +26,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "mccodeemitter"
+
#define GET_INSTRMAP_INFO
#include "MipsGenInstrInfo.inc"
#undef GET_INSTRMAP_INFO
@@ -242,6 +242,69 @@ getBranchTargetOpValueMM(const MCInst &MI, unsigned OpNo,
return 0;
}
+/// getBranchTarget21OpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
+ assert(MO.isExpr() &&
+ "getBranchTarget21OpValue expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC21_S2)));
+ return 0;
+}
+
+/// getBranchTarget26OpValue - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ // If the destination is an immediate, divide by 4.
+ if (MO.isImm()) return MO.getImm() >> 2;
+
+ assert(MO.isExpr() &&
+ "getBranchTarget26OpValue expects only expressions or immediates");
+
+ const MCExpr *Expr = MO.getExpr();
+ Fixups.push_back(MCFixup::Create(0, Expr,
+ MCFixupKind(Mips::fixup_MIPS_PC26_S2)));
+ return 0;
+}
+
+/// getJumpOffset16OpValue - Return binary encoding of the jump
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ if (MO.isImm()) return MO.getImm();
+
+ assert(MO.isExpr() &&
+ "getJumpOffset16OpValue expects only expressions or an immediate");
+
+ // TODO: Push fixup.
+ return 0;
+}
+
/// getJumpTargetOpValue - Return binary encoding of the jump
/// target operand. If the machine operand requires relocation,
/// record the relocation and return zero.
@@ -417,6 +480,12 @@ getExprOpValue(const MCExpr *Expr,SmallVectorImpl<MCFixup> &Fixups,
case MCSymbolRefExpr::VK_Mips_CALL_LO16:
FixupKind = Mips::fixup_Mips_CALL_LO16;
break;
+ case MCSymbolRefExpr::VK_Mips_PCREL_HI16:
+ FixupKind = Mips::fixup_MIPS_PCHI16;
+ break;
+ case MCSymbolRefExpr::VK_Mips_PCREL_LO16:
+ FixupKind = Mips::fixup_MIPS_PCLO16;
+ break;
} // switch
Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind)));
@@ -548,5 +617,15 @@ MipsMCCodeEmitter::getLSAImmEncoding(const MCInst &MI, unsigned OpNo,
return getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI) - 1;
}
-#include "MipsGenMCCodeEmitter.inc"
+unsigned
+MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ assert(MI.getOperand(OpNo).isImm());
+ // The immediate is encoded as 'immediate << 2'.
+ unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI);
+ assert((Res & 3) == 0);
+ return Res >> 2;
+}
+#include "MipsGenMCCodeEmitter.inc"
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 49a2490..3f7daab 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -52,7 +52,7 @@ public:
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
@@ -88,6 +88,27 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ // getBranchTarget21OpValue - Return binary encoding of the branch
+ // offset operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTarget21OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getBranchTarget26OpValue - Return binary encoding of the branch
+ // offset operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getBranchTarget26OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ // getJumpOffset16OpValue - Return binary encoding of the jump
+ // offset operand. If the machine operand requires relocation,
+ // record the relocation and return zero.
+ unsigned getJumpOffset16OpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
// getMachineOpValue - Return binary encoding of operand. If the machin
// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
@@ -116,6 +137,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
index c7ba12d..21ccc3c 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mipsmcexpr"
#include "MipsMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -15,6 +14,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mipsmcexpr"
+
bool MipsMCExpr::isSupportedBinaryExpr(MCSymbolRefExpr::VariantKind VK,
const MCBinaryExpr *BE) {
switch (VK) {
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
index 722bba7..8d7aacd 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h
@@ -46,16 +46,16 @@ public:
/// getSubExpr - Get the child of this expression.
const MCExpr *getSubExpr() const { return Expr; }
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const {
+ const MCAsmLayout *Layout) const override;
+ void AddValueSymbols(MCAssembler *) const override;
+ const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
// There are no TLS MipsMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
index 6992d06..01d5363 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCNaCl.h
@@ -18,7 +18,7 @@ namespace llvm {
static const unsigned MIPS_NACL_BUNDLE_ALIGN = 4u;
bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx,
- bool *IsStore = NULL);
+ bool *IsStore = nullptr);
bool baseRegNeedsLoadStoreMask(unsigned Reg);
// This function creates an MCELFStreamer for Mips NaCl.
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index eecca68..660e5a7 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -30,6 +30,8 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "MipsGenInstrInfo.inc"
@@ -39,8 +41,6 @@
#define GET_REGINFO_MC_DESC
#include "MipsGenRegisterInfo.inc"
-using namespace llvm;
-
/// Select the Mips CPU for the given triple and cpu name.
/// FIXME: Merge with the copy in MipsSubtarget.cpp
static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) {
@@ -79,7 +79,7 @@ static MCAsmInfo *createMipsMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
MCAsmInfo *MAI = new MipsMCAsmInfo(TT);
unsigned SP = MRI.getDwarfRegNum(Mips::SP, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, SP, 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -124,12 +124,11 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new MipsTargetAsmStreamer(*S, OS);
return S;
}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
index 639a058..cd6be73 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp
@@ -17,8 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-mc-nacl"
-
#include "Mips.h"
#include "MipsELFStreamer.h"
#include "MipsMCNaCl.h"
@@ -26,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-mc-nacl"
+
namespace {
const unsigned IndirectBranchMaskReg = Mips::T6;
@@ -120,7 +120,8 @@ private:
public:
/// This function is the one used to emit instruction data into the ELF
/// streamer. We override it to mask dangerous instructions.
- virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
+ void EmitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &STI) override {
// Sandbox indirect jumps.
if (isIndirectJump(Inst)) {
if (PendingCall)
diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index fb6aff2..a8fa272 100644
--- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -85,6 +85,13 @@ void MipsTargetAsmStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {
}
void MipsTargetAsmStreamer::emitDirectiveAbiCalls() { OS << "\t.abicalls\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveNaN2008() { OS << "\t.nan\t2008\n"; }
+
+void MipsTargetAsmStreamer::emitDirectiveNaNLegacy() {
+ OS << "\t.nan\tlegacy\n";
+}
+
void MipsTargetAsmStreamer::emitDirectiveOptionPic0() {
OS << "\t.option\tpic0\n";
}
@@ -137,6 +144,29 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask,
OS << "," << FPUTopSavedRegOff << '\n';
}
+void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) {
+ OS << "\t.cpload\t$"
+ << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n";
+}
+
+void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo,
+ int RegOrOffset,
+ const MCSymbol &Sym,
+ bool IsReg) {
+ OS << "\t.cpsetup\t$"
+ << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << ", ";
+
+ if (IsReg)
+ OS << "$"
+ << StringRef(MipsInstPrinter::getRegisterName(RegOrOffset)).lower();
+ else
+ OS << RegOrOffset;
+
+ OS << ", ";
+
+ OS << Sym.getName() << "\n";
+}
+
// This part is for ELF object output.
MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
const MCSubtargetInfo &STI)
@@ -180,6 +210,10 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S,
EFlags |= ELF::EF_MIPS_ABI_O32;
}
+ // Other options.
+ if (Features & Mips::FeatureNaN2008)
+ EFlags |= ELF::EF_MIPS_NAN2008;
+
MCA.setELFHeaderEFlags(EFlags);
}
@@ -325,6 +359,21 @@ void MipsTargetELFStreamer::emitDirectiveAbiCalls() {
Flags |= ELF::EF_MIPS_CPIC | ELF::EF_MIPS_PIC;
MCA.setELFHeaderEFlags(Flags);
}
+
+void MipsTargetELFStreamer::emitDirectiveNaN2008() {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags |= ELF::EF_MIPS_NAN2008;
+ MCA.setELFHeaderEFlags(Flags);
+}
+
+void MipsTargetELFStreamer::emitDirectiveNaNLegacy() {
+ MCAssembler &MCA = getStreamer().getAssembler();
+ unsigned Flags = MCA.getELFHeaderEFlags();
+ Flags &= ~ELF::EF_MIPS_NAN2008;
+ MCA.setELFHeaderEFlags(Flags);
+}
+
void MipsTargetELFStreamer::emitDirectiveOptionPic0() {
MCAssembler &MCA = getStreamer().getAssembler();
unsigned Flags = MCA.getELFHeaderEFlags();
@@ -376,3 +425,107 @@ void MipsTargetELFStreamer::emitDirectiveSetMips64R2() {
void MipsTargetELFStreamer::emitDirectiveSetDsp() {
// No action required for ELF output.
}
+
+void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) {
+ // .cpload $reg
+ // This directive expands to:
+ // lui $gp, %hi(_gp_disp)
+ // addui $gp, $gp, %lo(_gp_disp)
+ // addu $gp, $gp, $reg
+ // when support for position independent code is enabled.
+ if (!Pic || (isN32() || isN64()))
+ return;
+
+ // There's a GNU extension controlled by -mno-shared that allows
+ // locally-binding symbols to be accessed using absolute addresses.
+ // This is currently not supported. When supported -mno-shared makes
+ // .cpload expand to:
+ // lui $gp, %hi(__gnu_local_gp)
+ // addiu $gp, $gp, %lo(__gnu_local_gp)
+
+ StringRef SymName("_gp_disp");
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCSymbol *GP_Disp = MCA.getContext().GetOrCreateSymbol(SymName);
+ MCA.getOrCreateSymbolData(*GP_Disp);
+
+ MCInst TmpInst;
+ TmpInst.setOpcode(Mips::LUi);
+ TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+ const MCSymbolRefExpr *HiSym = MCSymbolRefExpr::Create(
+ "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_HI, MCA.getContext());
+ TmpInst.addOperand(MCOperand::CreateExpr(HiSym));
+ getStreamer().EmitInstruction(TmpInst, STI);
+
+ TmpInst.clear();
+
+ TmpInst.setOpcode(Mips::ADDiu);
+ TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+ const MCSymbolRefExpr *LoSym = MCSymbolRefExpr::Create(
+ "_gp_disp", MCSymbolRefExpr::VK_Mips_ABS_LO, MCA.getContext());
+ TmpInst.addOperand(MCOperand::CreateExpr(LoSym));
+ getStreamer().EmitInstruction(TmpInst, STI);
+
+ TmpInst.clear();
+
+ TmpInst.setOpcode(Mips::ADDu);
+ TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::CreateReg(Mips::GP));
+ TmpInst.addOperand(MCOperand::CreateReg(RegNo));
+ getStreamer().EmitInstruction(TmpInst, STI);
+}
+
+void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo,
+ int RegOrOffset,
+ const MCSymbol &Sym,
+ bool IsReg) {
+ // Only N32 and N64 emit anything for .cpsetup iff PIC is set.
+ if (!Pic || !(isN32() || isN64()))
+ return;
+
+ MCAssembler &MCA = getStreamer().getAssembler();
+ MCInst Inst;
+
+ // Either store the old $gp in a register or on the stack
+ if (IsReg) {
+ // move $save, $gpreg
+ Inst.setOpcode(Mips::DADDu);
+ Inst.addOperand(MCOperand::CreateReg(RegOrOffset));
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateReg(Mips::ZERO));
+ } else {
+ // sd $gpreg, offset($sp)
+ Inst.setOpcode(Mips::SD);
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateReg(Mips::SP));
+ Inst.addOperand(MCOperand::CreateImm(RegOrOffset));
+ }
+ getStreamer().EmitInstruction(Inst, STI);
+ Inst.clear();
+
+ const MCSymbolRefExpr *HiExpr = MCSymbolRefExpr::Create(
+ Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_HI, MCA.getContext());
+ const MCSymbolRefExpr *LoExpr = MCSymbolRefExpr::Create(
+ Sym.getName(), MCSymbolRefExpr::VK_Mips_GPOFF_LO, MCA.getContext());
+ // lui $gp, %hi(%neg(%gp_rel(funcSym)))
+ Inst.setOpcode(Mips::LUi);
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateExpr(HiExpr));
+ getStreamer().EmitInstruction(Inst, STI);
+ Inst.clear();
+
+ // addiu $gp, $gp, %lo(%neg(%gp_rel(funcSym)))
+ Inst.setOpcode(Mips::ADDiu);
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateExpr(LoExpr));
+ getStreamer().EmitInstruction(Inst, STI);
+ Inst.clear();
+
+ // daddu $gp, $gp, $funcreg
+ Inst.setOpcode(Mips::DADDu);
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateReg(Mips::GP));
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+ getStreamer().EmitInstruction(Inst, STI);
+}
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index bcf951e..41efa47 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -13,7 +13,7 @@ TARGET = Mips
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
- MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \
+ MipsGenAsmWriter.inc MipsGenFastISel.inc MipsGenCodeEmitter.inc \
MipsGenDAGISel.inc MipsGenCallingConv.inc \
MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc \
MipsGenDisassemblerTables.inc \
diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td
index 91d447a..d95f9b0 100644
--- a/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -28,9 +28,9 @@ def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>,
def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>,
SWXC1_FM_MM<0x88>;
def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>,
- LWXC1_FM_MM<0x148>;
+ LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2;
def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>,
- SWXC1_FM_MM<0x188>;
+ SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2;
def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>,
CEQS_FM_MM<0>;
@@ -70,9 +70,9 @@ def FSQRT_MM : MMRel, ABSS_FT<"sqrt.d", AFGR64Opnd, AFGR64Opnd, II_SQRT_D,
fsqrt>, ROUND_W_FM_MM<1, 0x28>;
def CVT_L_S_MM : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
- ROUND_W_FM_MM<0, 0x4>;
+ ROUND_W_FM_MM<0, 0x4>, INSN_MIPS3_32R2;
def CVT_L_D64_MM : MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
- ROUND_W_FM_MM<1, 0x4>;
+ ROUND_W_FM_MM<1, 0x4>, INSN_MIPS3_32R2;
def FABS_S_MM : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
ABS_FM_MM<0, 0xd>;
@@ -95,7 +95,7 @@ def FNEG_MM : MMRel, ABSS_FT<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>,
ABS_FM_MM<1, 0x2d>;
def FMOV_D32_MM : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
- ABS_FM_MM<1, 0x1>, Requires<[NotFP64bit, HasStdEnc]>;
+ ABS_FM_MM<1, 0x1>, AdditionalRequires<[NotFP64bit]>;
def MOVZ_I_S_MM : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd,
II_MOVZ_S>, CMov_I_F_FM_MM<0x78, 0>;
@@ -124,9 +124,9 @@ def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd,
def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd,
II_MTC1, bitconvert>, MFC1_FM_MM<0xa0>;
def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
- MFC1_FM_MM<3>;
+ MFC1_FM_MM<3>, ISA_MIPS32R2;
def MTHC1_MM : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM_MM<7>;
+ MFC1_FM_MM<7>, ISA_MIPS32R2;
def MADD_S_MM : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
MADDS_FM_MM<0x1>;
diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td
index 3f13e83..9904bc6 100644
--- a/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -218,15 +218,20 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def MSUBU_MM : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM_MM<0x3ec>;
/// Count Leading
- def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>;
- def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>;
+ def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>,
+ ISA_MIPS32;
+ def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>,
+ ISA_MIPS32;
/// Sign Ext In Register Instructions.
- def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, SEB_FM_MM<0x0ac>;
- def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, SEB_FM_MM<0x0ec>;
+ def SEB_MM : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>,
+ SEB_FM_MM<0x0ac>, ISA_MIPS32R2;
+ def SEH_MM : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
+ SEB_FM_MM<0x0ec>, ISA_MIPS32R2;
/// Word Swap Bytes Within Halfwords
- def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>;
+ def WSBH_MM : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM_MM<0x1ec>,
+ ISA_MIPS32R2;
def EXT_MM : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>,
EXT_FM_MM<0x2c>;
@@ -268,8 +273,10 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
def WAIT_MM : WaitMM<"wait">, WAIT_FM_MM;
def ERET_MM : MMRel, ER_FT<"eret">, ER_FM_MM<0x3cd>;
def DERET_MM : MMRel, ER_FT<"deret">, ER_FM_MM<0x38d>;
- def EI_MM : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>;
- def DI_MM : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>;
+ def EI_MM : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>,
+ ISA_MIPS32R2;
+ def DI_MM : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>,
+ ISA_MIPS32R2;
/// Trap Instructions
def TEQ_MM : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM_MM<0x0>;
@@ -296,5 +303,5 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
//===----------------------------------------------------------------------===//
let Predicates = [InMicroMips] in {
- def : InstAlias<"wait", (WAIT_MM 0x0), 1>;
+ def : MipsInstAlias<"wait", (WAIT_MM 0x0), 1>;
}
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index 10a4699..ea16331 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -15,6 +15,33 @@
include "llvm/Target/Target.td"
+// The overall idea of the PredicateControl class is to chop the Predicates list
+// into subsets that are usually overridden independently. This allows
+// subclasses to partially override the predicates of their superclasses without
+// having to re-add all the existing predicates.
+class PredicateControl {
+ // Predicates for the encoding scheme in use such as HasStdEnc
+ list<Predicate> EncodingPredicates = [];
+ // Predicates for the GPR size such as IsGP64bit
+ list<Predicate> GPRPredicates = [];
+ // Predicates for the FGR size and layout such as IsFP64bit
+ list<Predicate> FGRPredicates = [];
+ // Predicates for the instruction group membership such as ISA's and ASE's
+ list<Predicate> InsnPredicates = [];
+ // Predicates for anything else
+ list<Predicate> AdditionalPredicates = [];
+ list<Predicate> Predicates = !listconcat(EncodingPredicates,
+ GPRPredicates,
+ FGRPredicates,
+ InsnPredicates,
+ AdditionalPredicates);
+}
+
+// Like Requires<> but for the AdditionalPredicates list
+class AdditionalRequires<list<Predicate> preds> {
+ list<Predicate> AdditionalPredicates = preds;
+}
+
//===----------------------------------------------------------------------===//
// Register File, Calling Conv, Instruction Descriptions
//===----------------------------------------------------------------------===//
@@ -34,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true",
"General Purpose Registers are 64-bit wide.">;
def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true",
"Support 64-bit FP registers.">;
+def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true",
+ "IEEE 754-2008 NaN encoding.">;
def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
"true", "Only supports single precision float">;
def FeatureO32 : SubtargetFeature<"o32", "MipsABI", "O32",
@@ -46,33 +75,62 @@ def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI",
"Enable eabi ABI">;
def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU",
"true", "Enable vector FPU instructions.">;
-def FeatureSEInReg : SubtargetFeature<"seinreg", "HasSEInReg", "true",
- "Enable 'signext in register' instructions.">;
-def FeatureCondMov : SubtargetFeature<"condmov", "HasCondMov", "true",
- "Enable 'conditional move' instructions.">;
-def FeatureSwap : SubtargetFeature<"swap", "HasSwap", "true",
- "Enable 'byte/half swap' instructions.">;
-def FeatureBitCount : SubtargetFeature<"bitcount", "HasBitCount", "true",
- "Enable 'count leading bits' instructions.">;
-def FeatureFPIdx : SubtargetFeature<"FPIdx", "HasFPIdx", "true",
- "Enable 'FP indexed load/store' instructions.">;
+def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+ "Mips I ISA Support [highly experimental]">;
+def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+ "Mips II ISA Support [highly experimental]",
+ [FeatureMips1]>;
+def FeatureMips3_32 : SubtargetFeature<"mips3_32", "HasMips3_32", "true",
+ "Subset of MIPS-III that is also in MIPS32 "
+ "[highly experimental]">;
+def FeatureMips3_32r2 : SubtargetFeature<"mips3_32r2", "HasMips3_32r2", "true",
+ "Subset of MIPS-III that is also in MIPS32r2 "
+ "[highly experimental]">;
+def FeatureMips3 : SubtargetFeature<"mips3", "MipsArchVersion", "Mips3",
+ "MIPS III ISA Support [highly experimental]",
+ [FeatureMips2, FeatureMips3_32,
+ FeatureMips3_32r2, FeatureGP64Bit,
+ FeatureFP64Bit]>;
+def FeatureMips4_32 : SubtargetFeature<"mips4_32", "HasMips4_32", "true",
+ "Subset of MIPS-IV that is also in MIPS32 "
+ "[highly experimental]">;
+def FeatureMips4_32r2 : SubtargetFeature<"mips4_32r2", "HasMips4_32r2", "true",
+ "Subset of MIPS-IV that is also in MIPS32r2 "
+ "[highly experimental]">;
+def FeatureMips4 : SubtargetFeature<"mips4", "MipsArchVersion",
+ "Mips4", "MIPS IV ISA Support",
+ [FeatureMips3, FeatureMips4_32,
+ FeatureMips4_32r2]>;
+def FeatureMips5_32r2 : SubtargetFeature<"mips5_32r2", "HasMips5_32r2", "true",
+ "Subset of MIPS-V that is also in MIPS32r2 "
+ "[highly experimental]">;
+def FeatureMips5 : SubtargetFeature<"mips5", "MipsArchVersion", "Mips5",
+ "MIPS V ISA Support [highly experimental]",
+ [FeatureMips4, FeatureMips5_32r2]>;
def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
"Mips32 ISA Support",
- [FeatureCondMov, FeatureBitCount]>;
+ [FeatureMips2, FeatureMips3_32,
+ FeatureMips4_32]>;
def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion",
"Mips32r2", "Mips32r2 ISA Support",
- [FeatureMips32, FeatureSEInReg, FeatureSwap,
- FeatureFPIdx]>;
-def FeatureMips4 : SubtargetFeature<"mips4", "MipsArchVersion",
- "Mips4", "MIPS IV ISA Support",
- [FeatureGP64Bit, FeatureFP64Bit,
- FeatureCondMov]>;
+ [FeatureMips3_32r2, FeatureMips4_32r2,
+ FeatureMips5_32r2, FeatureMips32]>;
+def FeatureMips32r6 : SubtargetFeature<"mips32r6", "MipsArchVersion",
+ "Mips32r6",
+ "Mips32r6 ISA Support [experimental]",
+ [FeatureMips32r2, FeatureFP64Bit,
+ FeatureNaN2008]>;
def FeatureMips64 : SubtargetFeature<"mips64", "MipsArchVersion",
"Mips64", "Mips64 ISA Support",
- [FeatureMips4, FeatureMips32, FeatureFPIdx]>;
+ [FeatureMips5, FeatureMips32]>;
def FeatureMips64r2 : SubtargetFeature<"mips64r2", "MipsArchVersion",
"Mips64r2", "Mips64r2 ISA Support",
[FeatureMips64, FeatureMips32r2]>;
+def FeatureMips64r6 : SubtargetFeature<"mips64r6", "MipsArchVersion",
+ "Mips64r6",
+ "Mips64r6 ISA Support [experimental]",
+ [FeatureMips32r6, FeatureMips64r2,
+ FeatureNaN2008]>;
def FeatureMips16 : SubtargetFeature<"mips16", "InMips16Mode", "true",
"Mips16 mode">;
@@ -97,11 +155,18 @@ def FeatureCnMips : SubtargetFeature<"cnmips", "HasCnMips",
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, MipsGenericItineraries, Features>;
+def : Proc<"mips1", [FeatureMips1, FeatureO32]>;
+def : Proc<"mips2", [FeatureMips2, FeatureO32]>;
def : Proc<"mips32", [FeatureMips32, FeatureO32]>;
def : Proc<"mips32r2", [FeatureMips32r2, FeatureO32]>;
+def : Proc<"mips32r6", [FeatureMips32r6, FeatureO32]>;
+
+def : Proc<"mips3", [FeatureMips3, FeatureN64]>;
def : Proc<"mips4", [FeatureMips4, FeatureN64]>;
+def : Proc<"mips5", [FeatureMips5, FeatureN64]>;
def : Proc<"mips64", [FeatureMips64, FeatureN64]>;
def : Proc<"mips64r2", [FeatureMips64r2, FeatureN64]>;
+def : Proc<"mips64r6", [FeatureMips64r6, FeatureN64]>;
def : Proc<"mips16", [FeatureMips16, FeatureO32]>;
def : Proc<"octeon", [FeatureMips64r2, FeatureN64, FeatureCnMips]>;
diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp
index 028b049..c01d03a 100644
--- a/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -71,7 +71,7 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const {
}
if (hasFP(MF))
BuildMI(MBB, MBBI, dl, TII.get(Mips::MoveR3216), Mips::S0)
- .addReg(Mips::SP);
+ .addReg(Mips::SP).setMIFlag(MachineInstr::FrameSetup);
}
diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h
index 8ce2ced..3f7829d 100644
--- a/lib/Target/Mips/Mips16FrameLowering.h
+++ b/lib/Target/Mips/Mips16FrameLowering.h
@@ -24,27 +24,27 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
+ RegScavenger *RS) const override;
};
} // End llvm namespace
diff --git a/lib/Target/Mips/Mips16HardFloat.cpp b/lib/Target/Mips/Mips16HardFloat.cpp
index d321e21..14055d6 100644
--- a/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/lib/Target/Mips/Mips16HardFloat.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips16-hard-float"
#include "Mips16HardFloat.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Value.h"
@@ -20,6 +19,8 @@
#include <algorithm>
#include <string>
+#define DEBUG_TYPE "mips16-hard-float"
+
static void inlineAsmOut
(LLVMContext &C, StringRef AsmString, BasicBlock *BB ) {
std::vector<llvm::Type *> AsmArgTypes;
@@ -354,9 +355,8 @@ static const char *IntrinsicInline[] =
};
static bool isIntrinsicInline(Function *F) {
- return std::binary_search(
- IntrinsicInline, array_endof(IntrinsicInline),
- F->getName());
+ return std::binary_search(std::begin(IntrinsicInline),
+ std::end(IntrinsicInline), F->getName());
}
//
// Returns of float, double and complex need to be handled with a helper
@@ -407,11 +407,11 @@ static bool fixupFPReturnAndCall
CallInst::Create(F, Params, "", &Inst );
} else if (const CallInst *CI = dyn_cast<CallInst>(I)) {
const Value* V = CI->getCalledValue();
- const Type* T = 0;
+ const Type* T = nullptr;
if (V) T = V->getType();
- const PointerType *PFT=0;
+ const PointerType *PFT=nullptr;
if (T) PFT = dyn_cast<PointerType>(T);
- const FunctionType *FT=0;
+ const FunctionType *FT=nullptr;
if (PFT) FT = dyn_cast<FunctionType>(PFT->getElementType());
Function *F_ = CI->getCalledFunction();
if (FT && needsFPReturnHelper(*FT) &&
diff --git a/lib/Target/Mips/Mips16HardFloat.h b/lib/Target/Mips/Mips16HardFloat.h
index b7f712a..826887e 100644
--- a/lib/Target/Mips/Mips16HardFloat.h
+++ b/lib/Target/Mips/Mips16HardFloat.h
@@ -34,11 +34,11 @@ public:
TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MIPS16 Hard Float Pass";
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
protected:
/// Keep a pointer to the MipsSubtarget around so that we can make the right
diff --git a/lib/Target/Mips/Mips16HardFloatInfo.cpp b/lib/Target/Mips/Mips16HardFloatInfo.cpp
index d8b685e..2eb6e5d 100644
--- a/lib/Target/Mips/Mips16HardFloatInfo.cpp
+++ b/lib/Target/Mips/Mips16HardFloatInfo.cpp
@@ -30,7 +30,7 @@ const FuncNameSignature PredefinedFuncs[] = {
{ "__fixunssfsi", { FSig, NoFPRet } },
{ "__fixunssfdi", { FSig, NoFPRet } },
{ "__floatundisf", { NoSig, FRet } },
- { 0, { NoSig, NoFPRet } }
+ { nullptr, { NoSig, NoFPRet } }
};
// just do a search for now. there are very few of these special cases.
@@ -44,7 +44,7 @@ extern FuncSignature const *findFuncSignature(const char *name) {
return &PredefinedFuncs[i].Signature;
i++;
}
- return 0;
+ return nullptr;
}
}
}
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 9e36546..4e86a27 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-isel"
#include "Mips16ISelDAGToDAG.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "mips-isel"
+
bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
if (!Subtarget.inMips16Mode())
return false;
@@ -44,7 +45,7 @@ bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
std::pair<SDNode*, SDNode*>
Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, SDLoc DL, EVT Ty,
bool HasLo, bool HasHi) {
- SDNode *Lo = 0, *Hi = 0;
+ SDNode *Lo = nullptr, *Hi = nullptr;
SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
N->getOperand(1));
SDValue InFlag = SDValue(Mul, 0);
@@ -224,10 +225,12 @@ bool Mips16DAGToDAGISel::selectAddr16(
// If an indexed floating point load/store can be emitted, return false.
const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent);
- if (LS &&
- (LS->getMemoryVT() == MVT::f32 || LS->getMemoryVT() == MVT::f64) &&
- Subtarget.hasFPIdx())
- return false;
+ if (LS) {
+ if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2())
+ return false;
+ if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2())
+ return false;
+ }
}
Base = Addr;
Offset = CurDAG->getTargetConstant(0, ValTy);
@@ -297,7 +300,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
if (!SDValue(Node, 1).use_empty())
ReplaceUses(SDValue(Node, 1), SDValue(LoHi.second, 0));
- return std::make_pair(true, (SDNode*)NULL);
+ return std::make_pair(true, nullptr);
}
case ISD::MULHS:
@@ -308,7 +311,7 @@ std::pair<bool, SDNode*> Mips16DAGToDAGISel::selectNode(SDNode *Node) {
}
}
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
}
FunctionPass *llvm::createMips16ISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.h b/lib/Target/Mips/Mips16ISelDAGToDAG.h
index 49dc6e5..e653b39 100644
--- a/lib/Target/Mips/Mips16ISelDAGToDAG.h
+++ b/lib/Target/Mips/Mips16ISelDAGToDAG.h
@@ -28,16 +28,16 @@ private:
SDValue getMips16SPAliasReg();
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
void getMips16SPRefReg(SDNode *Parent, SDValue &AliasReg);
- virtual bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
- SDValue &Offset, SDValue &Alias);
+ bool selectAddr16(SDNode *Parent, SDValue N, SDValue &Base,
+ SDValue &Offset, SDValue &Alias) override;
- virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+ std::pair<bool, SDNode*> selectNode(SDNode *Node) override;
- virtual void processFunctionAfterISel(MachineFunction &MF);
+ void processFunctionAfterISel(MachineFunction &MF) override;
// Insert instructions to initialize the global base register in the
// first MBB of the function.
diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp
index 5c6f302..9102450 100644
--- a/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -10,7 +10,6 @@
// Subclass of MipsTargetLowering specialized for mips16.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-lower"
#include "Mips16ISelLowering.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsRegisterInfo.h"
@@ -23,6 +22,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-lower"
+
static cl::opt<bool> DontExpandCondPseudos16(
"mips16-dont-expand-cond-pseudo",
cl::init(false),
@@ -353,7 +354,7 @@ unsigned int Mips16TargetLowering::getMips16HelperFunctionStubNumber
#define T P "0" , T1
#define P P_
static char const * vMips16Helper[MAX_STUB_NUMBER+1] =
- {0, T1 };
+ {nullptr, T1 };
#undef P
#define P P_ "sf_"
static char const * sfMips16Helper[MAX_STUB_NUMBER+1] =
@@ -430,7 +431,7 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
- const char* Mips16HelperFunction = 0;
+ const char* Mips16HelperFunction = nullptr;
bool NeedMips16Helper = false;
if (Subtarget->inMips16HardFloat()) {
@@ -443,8 +444,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL, S->getSymbol() };
- if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls),
- Find))
+ if (std::binary_search(std::begin(HardFloatLibCalls),
+ std::end(HardFloatLibCalls), Find))
LookupHelper = false;
else {
const char *Symbol = S->getSymbol();
@@ -471,13 +472,12 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
FuncInfo->setSaveS2();
}
// one more look at list of intrinsics
- if (std::binary_search(Mips16IntrinsicHelper,
- array_endof(Mips16IntrinsicHelper),
- IntrinsicFind)) {
- const Mips16IntrinsicHelperType *h =(std::find(Mips16IntrinsicHelper,
- array_endof(Mips16IntrinsicHelper),
- IntrinsicFind));
- Mips16HelperFunction = h->Helper;
+ const Mips16IntrinsicHelperType *Helper =
+ std::lower_bound(std::begin(Mips16IntrinsicHelper),
+ std::end(Mips16IntrinsicHelper), IntrinsicFind);
+ if (Helper != std::end(Mips16IntrinsicHelper) &&
+ *Helper == IntrinsicFind) {
+ Mips16HelperFunction = Helper->Helper;
NeedMips16Helper = true;
LookupHelper = false;
}
@@ -488,13 +488,13 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
Mips16Libcall Find = { RTLIB::UNKNOWN_LIBCALL,
G->getGlobal()->getName().data() };
- if (std::binary_search(HardFloatLibCalls, array_endof(HardFloatLibCalls),
- Find))
+ if (std::binary_search(std::begin(HardFloatLibCalls),
+ std::end(HardFloatLibCalls), Find))
LookupHelper = false;
}
- if (LookupHelper) Mips16HelperFunction =
- getMips16HelperFunction(CLI.RetTy, CLI.Args, NeedMips16Helper);
-
+ if (LookupHelper)
+ Mips16HelperFunction =
+ getMips16HelperFunction(CLI.RetTy, CLI.getArgs(), NeedMips16Helper);
}
SDValue JumpTarget = Callee;
diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h
index 618ec90..df88333 100644
--- a/lib/Target/Mips/Mips16ISelLowering.h
+++ b/lib/Target/Mips/Mips16ISelLowering.h
@@ -21,17 +21,17 @@ namespace llvm {
public:
explicit Mips16TargetLowering(MipsTargetMachine &TM);
- virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
- bool *Fast) const;
+ bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
+ bool *Fast) const override;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
private:
- virtual bool
- isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const override;
void setMips16HardFloatLibCalls();
@@ -41,11 +41,12 @@ namespace llvm {
const char *getMips16HelperFunction
(Type* RetTy, ArgListTy &Args, bool &needHelper) const;
- virtual void
+ void
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+ CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const override;
MachineBasicBlock *emitSel16(unsigned Opc, MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/lib/Target/Mips/Mips16InstrInfo.cpp b/lib/Target/Mips/Mips16InstrInfo.cpp
index 43c2fbd..79607de 100644
--- a/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -29,6 +29,7 @@
using namespace llvm;
+#define DEBUG_TYPE "mips16-instrinfo"
Mips16InstrInfo::Mips16InstrInfo(MipsTargetMachine &tm)
: MipsInstrInfo(tm, Mips::Bimm16),
diff --git a/lib/Target/Mips/Mips16InstrInfo.h b/lib/Target/Mips/Mips16InstrInfo.h
index e93925c..0dc0046 100644
--- a/lib/Target/Mips/Mips16InstrInfo.h
+++ b/lib/Target/Mips/Mips16InstrInfo.h
@@ -25,46 +25,46 @@ class Mips16InstrInfo : public MipsInstrInfo {
public:
explicit Mips16InstrInfo(MipsTargetMachine &TM);
- virtual const MipsRegisterInfo &getRegisterInfo() const;
+ const MipsRegisterInfo &getRegisterInfo() const override;
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
- virtual void storeRegToStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- int64_t Offset) const;
+ void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const override;
- virtual void loadRegFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- int64_t Offset) const;
+ void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const override;
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
- virtual unsigned getOppositeBranchOpc(unsigned Opc) const;
+ unsigned getOppositeBranchOpc(unsigned Opc) const override;
// Adjust SP by FrameSize bytes. Save RA, S0, S1
void makeFrame(unsigned SP, int64_t FrameSize, MachineBasicBlock &MBB,
@@ -104,9 +104,9 @@ public:
(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, int64_t Imm) const;
unsigned getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const;
+ const MCAsmInfo &MAI) const override;
private:
- virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
+ unsigned getAnalyzableBrOpc(unsigned Opc) const override;
void ExpandRetRA16(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc) const;
diff --git a/lib/Target/Mips/Mips16RegisterInfo.cpp b/lib/Target/Mips/Mips16RegisterInfo.cpp
index 3a50ed9..dbee774 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -39,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips16-registerinfo"
+
Mips16RegisterInfo::Mips16RegisterInfo(const MipsSubtarget &ST)
: MipsRegisterInfo(ST) {}
diff --git a/lib/Target/Mips/Mips16RegisterInfo.h b/lib/Target/Mips/Mips16RegisterInfo.h
index 13e82a3..f59f1a7 100644
--- a/lib/Target/Mips/Mips16RegisterInfo.h
+++ b/lib/Target/Mips/Mips16RegisterInfo.h
@@ -23,24 +23,24 @@ class Mips16RegisterInfo : public MipsRegisterInfo {
public:
Mips16RegisterInfo(const MipsSubtarget &Subtarget);
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
- bool useFPForScavengingIndex(const MachineFunction &MF) const;
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
MachineBasicBlock::iterator &UseMI,
const TargetRegisterClass *RC,
- unsigned Reg) const;
+ unsigned Reg) const override;
- virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+ const TargetRegisterClass *intRegClass(unsigned Size) const override;
private:
- virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
- int FrameIndex, uint64_t StackSize,
- int64_t SPOffset) const;
+ void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td
new file mode 100644
index 0000000..a3f9df5
--- /dev/null
+++ b/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -0,0 +1,386 @@
+//=- Mips32r6InstrFormats.td - Mips32r6 Instruction Formats -*- tablegen -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips32r6 instruction formats.
+//
+//===----------------------------------------------------------------------===//
+
+class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
+ PredicateControl {
+ let DecoderNamespace = "Mips32r6_64r6";
+ let EncodingPredicates = [HasStdEnc];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Field Values
+//
+//===----------------------------------------------------------------------===//
+
+class OPGROUP<bits<6> Val> {
+ bits<6> Value = Val;
+}
+def OPGROUP_COP1 : OPGROUP<0b010001>;
+def OPGROUP_COP2 : OPGROUP<0b010010>;
+def OPGROUP_ADDI : OPGROUP<0b001000>;
+def OPGROUP_AUI : OPGROUP<0b001111>;
+def OPGROUP_BLEZ : OPGROUP<0b000110>;
+def OPGROUP_BGTZ : OPGROUP<0b000111>;
+def OPGROUP_BLEZL : OPGROUP<0b010110>;
+def OPGROUP_BGTZL : OPGROUP<0b010111>;
+def OPGROUP_DADDI : OPGROUP<0b011000>;
+def OPGROUP_DAUI : OPGROUP<0b011101>;
+def OPGROUP_PCREL : OPGROUP<0b111011>;
+def OPGROUP_REGIMM : OPGROUP<0b000001>;
+def OPGROUP_SPECIAL : OPGROUP<0b000000>;
+def OPGROUP_SPECIAL3 : OPGROUP<0b011111>;
+
+class OPCODE2<bits<2> Val> {
+ bits<2> Value = Val;
+}
+def OPCODE2_ADDIUPC : OPCODE2<0b00>;
+def OPCODE2_LWPC : OPCODE2<0b01>;
+def OPCODE2_LWUPC : OPCODE2<0b10>;
+
+class OPCODE5<bits<5> Val> {
+ bits<5> Value = Val;
+}
+def OPCODE5_ALUIPC : OPCODE5<0b11111>;
+def OPCODE5_AUIPC : OPCODE5<0b11110>;
+def OPCODE5_DAHI : OPCODE5<0b00110>;
+def OPCODE5_DATI : OPCODE5<0b11110>;
+def OPCODE5_BC1EQZ : OPCODE5<0b01001>;
+def OPCODE5_BC1NEZ : OPCODE5<0b01101>;
+def OPCODE5_BC2EQZ : OPCODE5<0b01001>;
+def OPCODE5_BC2NEZ : OPCODE5<0b01101>;
+
+class OPCODE6<bits<6> Val> {
+ bits<6> Value = Val;
+}
+def OPCODE6_ALIGN : OPCODE6<0b100000>;
+def OPCODE6_DALIGN : OPCODE6<0b100100>;
+def OPCODE6_BITSWAP : OPCODE6<0b100000>;
+def OPCODE6_DBITSWAP : OPCODE6<0b100100>;
+
+class FIELD_FMT<bits<5> Val> {
+ bits<5> Value = Val;
+}
+def FIELD_FMT_S : FIELD_FMT<0b10000>;
+def FIELD_FMT_D : FIELD_FMT<0b10001>;
+
+class FIELD_CMP_COND<bits<5> Val> {
+ bits<5> Value = Val;
+}
+def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>;
+def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>;
+def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>;
+def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>;
+def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>;
+def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>;
+def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>;
+def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>;
+def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>;
+def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>;
+def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>;
+def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>;
+def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>;
+def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>;
+def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>;
+def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>;
+
+class FIELD_CMP_FORMAT<bits<5> Val> {
+ bits<5> Value = Val;
+}
+def FIELD_CMP_FORMAT_S : FIELD_CMP_FORMAT<0b10100>;
+def FIELD_CMP_FORMAT_D : FIELD_CMP_FORMAT<0b10101>;
+
+//===----------------------------------------------------------------------===//
+//
+// Disambiguators
+//
+//===----------------------------------------------------------------------===//
+//
+// Some encodings are ambiguous except by comparing field values.
+
+class DecodeDisambiguates<string Name> {
+ string DecoderMethod = !strconcat("Decode", Name);
+}
+
+class DecodeDisambiguatedBy<string Name> : DecodeDisambiguates<Name> {
+ string DecoderNamespace = "Mips32r6_64r6_Ambiguous";
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Encoding Formats
+//
+//===----------------------------------------------------------------------===//
+
+class AUI_FM : MipsR6Inst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_AUI.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = imm;
+}
+
+class DAUI_FM : AUI_FM {
+ let Inst{31-26} = OPGROUP_DAUI.Value;
+}
+
+class COP1_2R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst {
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP1.Value;
+ let Inst{25-21} = Format.Value;
+ let Inst{20-16} = 0b00000;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class COP1_3R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst {
+ bits<5> ft;
+ bits<5> fs;
+ bits<5> fd;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP1.Value;
+ let Inst{25-21} = Format.Value;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5-0} = funct;
+}
+
+class COP1_BCCZ_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> ft;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP1.Value;
+ let Inst{25-21} = Operation.Value;
+ let Inst{20-16} = ft;
+ let Inst{15-0} = offset;
+}
+
+class COP2_BCCZ_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> ct;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP2.Value;
+ let Inst{25-21} = Operation.Value;
+ let Inst{20-16} = ct;
+ let Inst{15-0} = offset;
+}
+
+class PCREL16_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_PCREL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = Operation.Value;
+ let Inst{15-0} = imm;
+}
+
+class PCREL19_FM<OPCODE2 Operation> : MipsR6Inst {
+ bits<5> rs;
+ bits<19> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_PCREL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-19} = Operation.Value;
+ let Inst{18-0} = imm;
+}
+
+class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = 0b00000;
+ let Inst{5-0} = Operation.Value;
+}
+
+class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-6} = mulop;
+ let Inst{5-0} = funct;
+}
+
+// This class is ambiguous with other branches:
+// BEQC/BNEC require that rs > rt
+class CMP_BRANCH_2R_OFF16_FM<OPGROUP funct> : MipsR6Inst {
+ bits<5> rs;
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = funct.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+// This class is ambiguous with other branches:
+// BLEZC/BGEZC/BEQZALC/BNEZALC/BGTZALC require that rs == 0 && rt != 0
+// The '1R_RT' in the name means 1 register in the rt field.
+class CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP funct> : MipsR6Inst {
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = funct.Value;
+ let Inst{25-21} = 0b00000;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+// This class is ambiguous with other branches:
+// BLTZC/BGTZC/BLTZALC/BGEZALC require that rs == rt && rt != 0
+// The '1R_BOTH' in the name means 1 register in both the rs and rt fields.
+class CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP funct> : MipsR6Inst {
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = funct.Value;
+ let Inst{25-21} = rt;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+class CMP_BRANCH_OFF21_FM<bits<6> funct> : MipsR6Inst {
+ bits<5> rs; // rs != 0
+ bits<21> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = funct;
+ let Inst{25-21} = rs;
+ let Inst{20-0} = offset;
+}
+
+class JMP_IDX_COMPACT_FM<bits<6> funct> : MipsR6Inst {
+ bits<5> rt;
+ bits<16> offset;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = funct;
+ let Inst{25-21} = 0b000000;
+ let Inst{20-16} = rt;
+ let Inst{15-0} = offset;
+}
+
+class BRANCH_OFF26_FM<bits<6> funct> : MipsR6Inst {
+ bits<32> Inst;
+ bits<26> offset;
+
+ let Inst{31-26} = funct;
+ let Inst{25-0} = offset;
+}
+
+class SPECIAL3_ALIGN_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<2> bp;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-8} = 0b010;
+ let Inst{7-6} = bp;
+ let Inst{5-0} = Operation.Value;
+}
+
+class SPECIAL3_DALIGN_FM<OPCODE6 Operation> : MipsR6Inst {
+ bits<5> rd;
+ bits<5> rs;
+ bits<5> rt;
+ bits<3> bp;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_SPECIAL3.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-11} = rd;
+ let Inst{10-9} = 0b01;
+ let Inst{8-6} = bp;
+ let Inst{5-0} = Operation.Value;
+}
+
+class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst {
+ bits<5> rs;
+ bits<16> imm;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_REGIMM.Value;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = Operation.Value;
+ let Inst{15-0} = imm;
+}
+
+class COP1_CMP_CONDN_FM<FIELD_CMP_FORMAT Format,
+ FIELD_CMP_COND Cond> : MipsR6Inst {
+ bits<5> fd;
+ bits<5> fs;
+ bits<5> ft;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = OPGROUP_COP1.Value;
+ let Inst{25-21} = Format.Value;
+ let Inst{20-16} = ft;
+ let Inst{15-11} = fs;
+ let Inst{10-6} = fd;
+ let Inst{5} = 0;
+ let Inst{4-0} = Cond.Value;
+}
+
diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td
new file mode 100644
index 0000000..ffaf965
--- /dev/null
+++ b/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -0,0 +1,583 @@
+//=- Mips32r6InstrInfo.td - Mips32r6 Instruction Information -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips32r6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+include "Mips32r6InstrFormats.td"
+
+// Notes about removals/changes from MIPS32r6:
+// Unclear: ssnop
+// Reencoded: cache, pref
+// Reencoded: clo, clz
+// Reencoded: jr -> jalr
+// Reencoded: jr.hb -> jalr.hb
+// Reencoded: ldc2
+// Reencoded: ll, sc
+// Reencoded: lwc2
+// Reencoded: sdbbp
+// Reencoded: sdc2
+// Reencoded: swc2
+// Removed: bc1any2, bc1any4
+// Removed: bc2[ft]
+// Removed: bc2f, bc2t
+// Removed: bgezal
+// Removed: bltzal
+// Removed: c.cond.fmt, bc1[ft]
+// Removed: div, divu
+// Removed: jalx
+// Removed: ldxc1
+// Removed: luxc1
+// Removed: lwxc1
+// Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds]
+// Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul
+// Removed: movf, movt
+// Removed: movf.fmt, movt.fmt, movn.fmt, movz.fmt
+// Removed: movn, movz
+// Removed: mult, multu
+// Removed: prefx
+// Removed: sdxc1
+// Removed: suxc1
+// Removed: swxc1
+// Rencoded: [ls][wd]c2
+
+def brtarget21 : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget21OpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget21";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def brtarget26 : Operand<OtherVT> {
+ let EncoderMethod = "getBranchTarget26OpValue";
+ let OperandType = "OPERAND_PCREL";
+ let DecoderMethod = "DecodeBranchTarget26";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def jmpoffset16 : Operand<OtherVT> {
+ let EncoderMethod = "getJumpOffset16OpValue";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+def calloffset16 : Operand<iPTR> {
+ let EncoderMethod = "getJumpOffset16OpValue";
+ let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+
+class ADDIUPC_ENC : PCREL19_FM<OPCODE2_ADDIUPC>;
+class ALIGN_ENC : SPECIAL3_ALIGN_FM<OPCODE6_ALIGN>;
+class ALUIPC_ENC : PCREL16_FM<OPCODE5_ALUIPC>;
+class AUI_ENC : AUI_FM;
+class AUIPC_ENC : PCREL16_FM<OPCODE5_AUIPC>;
+
+class BALC_ENC : BRANCH_OFF26_FM<0b111010>;
+class BC_ENC : BRANCH_OFF26_FM<0b110010>;
+class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
+ DecodeDisambiguates<"AddiGroupBranch">;
+class BEQZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_ADDI>,
+ DecodeDisambiguatedBy<"DaddiGroupBranch">;
+class BNEC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_DADDI>,
+ DecodeDisambiguates<"DaddiGroupBranch">;
+class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_DADDI>,
+ DecodeDisambiguatedBy<"DaddiGroupBranch">;
+
+class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZL>,
+ DecodeDisambiguates<"BgtzlGroupBranch">;
+class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>,
+ DecodeDisambiguates<"BlezlGroupBranch">;
+class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>,
+ DecodeDisambiguatedBy<"BgtzGroupBranch">;
+
+class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>,
+ DecodeDisambiguatedBy<"BlezlGroupBranch">;
+class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>,
+ DecodeDisambiguates<"BgtzGroupBranch">;
+class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZL>,
+ DecodeDisambiguatedBy<"BgtzlGroupBranch">;
+
+class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>;
+class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>;
+class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>;
+
+class BC1EQZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1EQZ>;
+class BC1NEZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1NEZ>;
+class BC2EQZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2EQZ>;
+class BC2NEZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2NEZ>;
+
+class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>;
+class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>;
+
+class BITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_BITSWAP>;
+class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>;
+class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_DADDI>,
+ DecodeDisambiguatedBy<"DaddiGroupBranch">;
+class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>,
+ DecodeDisambiguatedBy<"AddiGroupBranch">;
+class DIV_ENC : SPECIAL_3R_FM<0b00010, 0b011010>;
+class DIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011011>;
+class MOD_ENC : SPECIAL_3R_FM<0b00011, 0b011010>;
+class MODU_ENC : SPECIAL_3R_FM<0b00011, 0b011011>;
+class MUH_ENC : SPECIAL_3R_FM<0b00011, 0b011000>;
+class MUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011001>;
+class MUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011000>;
+class MULU_ENC : SPECIAL_3R_FM<0b00010, 0b011001>;
+
+class MADDF_S_ENC : COP1_3R_FM<0b011000, FIELD_FMT_S>;
+class MADDF_D_ENC : COP1_3R_FM<0b011000, FIELD_FMT_D>;
+class MSUBF_S_ENC : COP1_3R_FM<0b011001, FIELD_FMT_S>;
+class MSUBF_D_ENC : COP1_3R_FM<0b011001, FIELD_FMT_D>;
+
+class SEL_D_ENC : COP1_3R_FM<0b010000, FIELD_FMT_D>;
+class SEL_S_ENC : COP1_3R_FM<0b010000, FIELD_FMT_S>;
+
+class SELEQZ_ENC : SPECIAL_3R_FM<0b00000, 0b110101>;
+class SELNEZ_ENC : SPECIAL_3R_FM<0b00000, 0b110111>;
+
+class LWPC_ENC : PCREL19_FM<OPCODE2_LWPC>;
+class LWUPC_ENC : PCREL19_FM<OPCODE2_LWUPC>;
+
+class MAX_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>;
+class MAX_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>;
+class MIN_S_ENC : COP1_3R_FM<0b011100, FIELD_FMT_S>;
+class MIN_D_ENC : COP1_3R_FM<0b011100, FIELD_FMT_D>;
+
+class MAXA_S_ENC : COP1_3R_FM<0b011111, FIELD_FMT_S>;
+class MAXA_D_ENC : COP1_3R_FM<0b011111, FIELD_FMT_D>;
+class MINA_S_ENC : COP1_3R_FM<0b011110, FIELD_FMT_S>;
+class MINA_D_ENC : COP1_3R_FM<0b011110, FIELD_FMT_D>;
+
+class SELEQZ_S_ENC : COP1_3R_FM<0b010100, FIELD_FMT_S>;
+class SELEQZ_D_ENC : COP1_3R_FM<0b010100, FIELD_FMT_D>;
+class SELNEZ_S_ENC : COP1_3R_FM<0b010111, FIELD_FMT_S>;
+class SELNEZ_D_ENC : COP1_3R_FM<0b010111, FIELD_FMT_D>;
+
+class RINT_S_ENC : COP1_2R_FM<0b011010, FIELD_FMT_S>;
+class RINT_D_ENC : COP1_2R_FM<0b011010, FIELD_FMT_D>;
+class CLASS_S_ENC : COP1_2R_FM<0b011011, FIELD_FMT_S>;
+class CLASS_D_ENC : COP1_2R_FM<0b011011, FIELD_FMT_D>;
+
+class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Multiclasses
+//
+//===----------------------------------------------------------------------===//
+
+multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
+ RegisterOperand FGROpnd>{
+ def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_F>,
+ CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
+ CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
+ CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>,
+ CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_OLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLT>,
+ CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>,
+ CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_OLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLE>,
+ CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>,
+ CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SF>,
+ CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGLE>,
+ CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>,
+ CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_NGL_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGL>,
+ CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
+ CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_NGE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGE>,
+ CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
+ CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+ def CMP_NGT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGT>,
+ CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>,
+ ISA_MIPS32R6;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class PCREL19_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins simm19_lsl2:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
+ list<dag> Pattern = [];
+}
+
+class ADDIUPC_DESC : PCREL19_DESC_BASE<"addiupc", GPR32Opnd>;
+class LWPC_DESC: PCREL19_DESC_BASE<"lwpc", GPR32Opnd>;
+class LWUPC_DESC: PCREL19_DESC_BASE<"lwupc", GPR32Opnd>;
+
+class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+ Operand ImmOpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
+ list<dag> Pattern = [];
+}
+
+class ALIGN_DESC : ALIGN_DESC_BASE<"align", GPR32Opnd, uimm2>;
+
+class ALUIPC_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $imm");
+ list<dag> Pattern = [];
+}
+
+class ALUIPC_DESC : ALUIPC_DESC_BASE<"aluipc", GPR32Opnd>;
+class AUIPC_DESC : ALUIPC_DESC_BASE<"auipc", GPR32Opnd>;
+
+class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
+ list<dag> Pattern = [];
+}
+
+class AUI_DESC : AUI_DESC_BASE<"aui", GPR32Opnd>;
+
+class BRANCH_DESC_BASE {
+ bit isBranch = 1;
+ bit isTerminator = 1;
+ bit hasDelaySlot = 0;
+}
+
+class BC_DESC_BASE<string instr_asm, DAGOperand opnd> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins opnd:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = !strconcat(instr_asm, "\t$offset");
+ bit isBarrier = 1;
+}
+
+class CMP_BC_DESC_BASE<string instr_asm, DAGOperand opnd,
+ RegisterOperand GPROpnd> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $offset");
+ list<Register> Defs = [AT];
+}
+
+class CMP_CBR_EQNE_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
+ RegisterOperand GPROpnd> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins GPROpnd:$rs, opnd:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = !strconcat(instr_asm, "\t$rs, $offset");
+ list<Register> Defs = [AT];
+}
+
+class CMP_CBR_RT_Z_DESC_BASE<string instr_asm, DAGOperand opnd,
+ RegisterOperand GPROpnd> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $offset");
+ list<Register> Defs = [AT];
+}
+
+class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> {
+ bit isCall = 1;
+ list<Register> Defs = [RA];
+}
+
+class BC_DESC : BC_DESC_BASE<"bc", brtarget26>;
+class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>;
+class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>;
+
+class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>;
+class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>;
+
+class BLEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezc", brtarget, GPR32Opnd>;
+class BGTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR32Opnd>;
+
+class BEQZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21, GPR32Opnd>;
+class BNEZC_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21, GPR32Opnd>;
+
+class COP1_BCCZ_DESC_BASE<string instr_asm> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins FGR64Opnd:$ft, brtarget:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = instr_asm;
+ bit hasDelaySlot = 1;
+}
+
+class BC1EQZ_DESC : COP1_BCCZ_DESC_BASE<"bc1eqz $ft, $offset">;
+class BC1NEZ_DESC : COP1_BCCZ_DESC_BASE<"bc1nez $ft, $offset">;
+
+class COP2_BCCZ_DESC_BASE<string instr_asm> : BRANCH_DESC_BASE {
+ dag InOperandList = (ins COP2Opnd:$ct, brtarget:$offset);
+ dag OutOperandList = (outs);
+ string AsmString = instr_asm;
+ bit hasDelaySlot = 1;
+}
+
+class BC2EQZ_DESC : COP2_BCCZ_DESC_BASE<"bc2eqz $ct, $offset">;
+class BC2NEZ_DESC : COP2_BCCZ_DESC_BASE<"bc2nez $ct, $offset">;
+
+class BOVC_DESC : CMP_BC_DESC_BASE<"bovc", brtarget, GPR32Opnd>;
+class BNVC_DESC : CMP_BC_DESC_BASE<"bnvc", brtarget, GPR32Opnd>;
+
+class JMP_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
+ RegisterOperand GPROpnd> {
+ dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
+ string AsmString = !strconcat(opstr, "\t$rt, $offset");
+ list<dag> Pattern = [];
+ bit isTerminator = 1;
+ bit hasDelaySlot = 0;
+ string DecoderMethod = "DecodeSimm16";
+}
+
+class JIALC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
+ GPR32Opnd> {
+ bit isCall = 1;
+ list<Register> Defs = [RA];
+}
+
+class JIC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd> {
+ bit isBarrier = 1;
+ list<Register> Defs = [AT];
+}
+
+class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
+ list<dag> Pattern = [];
+}
+
+class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>;
+
+class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [];
+}
+
+class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd>;
+class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd>;
+class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd>;
+class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd>;
+
+class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+
+class BGEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+
+class BGTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+
+class BLEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+
+class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+
+class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> {
+ list<Register> Defs = [RA];
+}
+class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [];
+}
+
+class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd>;
+class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd>;
+class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd>;
+class MULU_DESC : MUL_R6_DESC_BASE<"mulu", GPR32Opnd>;
+
+class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+ string Constraints = "$fd_in = $fd";
+}
+
+class SEL_D_DESC : COP1_4R_DESC_BASE<"sel.d", FGR64Opnd>;
+class SEL_S_DESC : COP1_4R_DESC_BASE<"sel.s", FGR32Opnd>;
+
+class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rd);
+ dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
+ string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
+ list<dag> Pattern = [];
+}
+
+class SELEQZ_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR32Opnd>;
+class SELNEZ_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR32Opnd>;
+
+class MADDF_S_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>;
+class MADDF_D_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>;
+class MSUBF_S_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>;
+class MSUBF_D_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>;
+
+class MAX_MIN_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+}
+
+class MAX_S_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>;
+class MAX_D_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>;
+class MIN_S_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>;
+class MIN_D_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>;
+
+class MAXA_S_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>;
+class MAXA_D_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>;
+class MINA_S_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>;
+class MINA_D_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>;
+
+class SELEQNEZ_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
+ list<dag> Pattern = [];
+}
+
+class SELEQZ_S_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>;
+class SELEQZ_D_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>;
+class SELNEZ_S_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>;
+class SELNEZ_D_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>;
+
+class CLASS_RINT_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+ dag OutOperandList = (outs FGROpnd:$fd);
+ dag InOperandList = (ins FGROpnd:$fs);
+ string AsmString = !strconcat(instr_asm, "\t$fd, $fs");
+ list<dag> Pattern = [];
+}
+
+class RINT_S_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
+class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
+class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
+class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+def ADDIUPC : ADDIUPC_ENC, ADDIUPC_DESC, ISA_MIPS32R6;
+def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6;
+def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6;
+def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6;
+def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6;
+def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6;
+def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6;
+def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6;
+def BC2EQZ : BC2EQZ_ENC, BC2EQZ_DESC, ISA_MIPS32R6;
+def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6;
+def BC : BC_ENC, BC_DESC, ISA_MIPS32R6;
+def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
+def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
+def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
+def BGEC; // Also aliased to blec with operands swapped
+def BGEUC; // Also aliased to bleuc with operands swapped
+def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
+def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
+def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
+def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
+def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
+def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
+def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
+def BLTC; // Also aliased to bgtc with operands swapped
+def BLTUC; // Also aliased to bgtuc with operands swapped
+def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
+def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
+def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
+def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
+def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
+def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
+def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
+def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6;
+def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6;
+defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
+defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>;
+def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6;
+def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
+def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
+def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6;
+// def LSA; // See MSA
+def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
+def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
+def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6;
+def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6;
+def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6;
+def MAXA_S : MAXA_S_ENC, MAXA_S_DESC, ISA_MIPS32R6;
+def MAX_D : MAX_D_ENC, MAX_D_DESC, ISA_MIPS32R6;
+def MAX_S : MAX_S_ENC, MAX_S_DESC, ISA_MIPS32R6;
+def MINA_D : MINA_D_ENC, MINA_D_DESC, ISA_MIPS32R6;
+def MINA_S : MINA_S_ENC, MINA_S_DESC, ISA_MIPS32R6;
+def MIN_D : MIN_D_ENC, MIN_D_DESC, ISA_MIPS32R6;
+def MIN_S : MIN_S_ENC, MIN_S_DESC, ISA_MIPS32R6;
+def MOD : MOD_ENC, MOD_DESC, ISA_MIPS32R6;
+def MODU : MODU_ENC, MODU_DESC, ISA_MIPS32R6;
+def MSUBF_S : MSUBF_S_ENC, MSUBF_S_DESC, ISA_MIPS32R6;
+def MSUBF_D : MSUBF_D_ENC, MSUBF_D_DESC, ISA_MIPS32R6;
+def MUH : MUH_ENC, MUH_DESC, ISA_MIPS32R6;
+def MUHU : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6;
+def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6;
+def MULU : MULU_ENC, MULU_DESC, ISA_MIPS32R6;
+def NAL; // BAL with rd=0
+def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6;
+def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6;
+def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6;
+def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6;
+def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6;
+def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6;
+def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6;
+def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6;
+def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6;
+def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6;
diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td
index 7115d11..924b325 100644
--- a/lib/Target/Mips/Mips64InstrInfo.td
+++ b/lib/Target/Mips/Mips64InstrInfo.td
@@ -20,6 +20,9 @@ def uimm16_64 : Operand<i64> {
let PrintMethod = "printUnsignedImm";
}
+// Signed Operand
+def simm10_64 : Operand<i64>;
+
// Transformation Function - get Imm - 32.
def Subtract32 : SDNodeXForm<imm, [{
return getImm(N, (unsigned)N->getZExtValue() - 32);
@@ -28,6 +31,11 @@ def Subtract32 : SDNodeXForm<imm, [{
// shamt must fit in 6 bits.
def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
+// Node immediate fits as 10-bit sign extended on target immediate.
+// e.g. seqi, snei
+def immSExt10_64 : PatLeaf<(i64 imm),
+ [{ return isInt<10>(N->getSExtValue()); }]>;
+
//===----------------------------------------------------------------------===//
// Instructions specific format
//===----------------------------------------------------------------------===//
@@ -53,10 +61,11 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
//===----------------------------------------------------------------------===//
let DecoderNamespace = "Mips64" in {
/// Arithmetic Instructions (ALU Immediate)
-def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>;
+def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>,
+ ISA_MIPS3;
def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU,
immSExt16, add>,
- ADDI_FM<0x19>, IsAsCheapAsAMove;
+ ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3;
let isCodeGenOnly = 1 in {
def SLTi64 : SetCC_I<"slti", setlt, simm16_64, immSExt16, GPR64Opnd>,
@@ -73,12 +82,14 @@ def LUi64 : LoadUpper<"lui", GPR64Opnd, uimm16_64>, LUI_FM;
}
/// Arithmetic Instructions (3-Operand, R-Type)
-def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>;
-def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>,
- ADD_FM<0, 0x2d>;
-def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>,
- ADD_FM<0, 0x2f>;
-def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB, sub>, ADD_FM<0, 0x2e>;
+def DADD : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>, ADD_FM<0, 0x2c>,
+ ISA_MIPS3;
+def DADDu : ArithLogicR<"daddu", GPR64Opnd, 1, II_DADDU, add>, ADD_FM<0, 0x2d>,
+ ISA_MIPS3;
+def DSUBu : ArithLogicR<"dsubu", GPR64Opnd, 0, II_DSUBU, sub>, ADD_FM<0, 0x2f>,
+ ISA_MIPS3;
+def DSUB : ArithLogicR<"dsub", GPR64Opnd, 0, II_DSUB>, ADD_FM<0, 0x2e>,
+ ISA_MIPS3;
let isCodeGenOnly = 1 in {
def SLT64 : SetCC_R<"slt", setlt, GPR64Opnd>, ADD_FM<0, 0x2a>;
@@ -91,33 +102,32 @@ def NOR64 : LogicNOR<"nor", GPR64Opnd>, ADD_FM<0, 0x27>;
/// Shift Instructions
def DSLL : shift_rotate_imm<"dsll", uimm6, GPR64Opnd, II_DSLL, shl, immZExt6>,
- SRA_FM<0x38, 0>;
+ SRA_FM<0x38, 0>, ISA_MIPS3;
def DSRL : shift_rotate_imm<"dsrl", uimm6, GPR64Opnd, II_DSRL, srl, immZExt6>,
- SRA_FM<0x3a, 0>;
+ SRA_FM<0x3a, 0>, ISA_MIPS3;
def DSRA : shift_rotate_imm<"dsra", uimm6, GPR64Opnd, II_DSRA, sra, immZExt6>,
- SRA_FM<0x3b, 0>;
+ SRA_FM<0x3b, 0>, ISA_MIPS3;
def DSLLV : shift_rotate_reg<"dsllv", GPR64Opnd, II_DSLLV, shl>,
- SRLV_FM<0x14, 0>;
+ SRLV_FM<0x14, 0>, ISA_MIPS3;
def DSRLV : shift_rotate_reg<"dsrlv", GPR64Opnd, II_DSRLV, srl>,
- SRLV_FM<0x16, 0>;
+ SRLV_FM<0x16, 0>, ISA_MIPS3;
def DSRAV : shift_rotate_reg<"dsrav", GPR64Opnd, II_DSRAV, sra>,
- SRLV_FM<0x17, 0>;
+ SRLV_FM<0x17, 0>, ISA_MIPS3;
def DSLL32 : shift_rotate_imm<"dsll32", uimm5, GPR64Opnd, II_DSLL32>,
- SRA_FM<0x3c, 0>;
+ SRA_FM<0x3c, 0>, ISA_MIPS3;
def DSRL32 : shift_rotate_imm<"dsrl32", uimm5, GPR64Opnd, II_DSRL32>,
- SRA_FM<0x3e, 0>;
+ SRA_FM<0x3e, 0>, ISA_MIPS3;
def DSRA32 : shift_rotate_imm<"dsra32", uimm5, GPR64Opnd, II_DSRA32>,
- SRA_FM<0x3f, 0>;
+ SRA_FM<0x3f, 0>, ISA_MIPS3;
// Rotate Instructions
-let Predicates = [HasMips64r2, HasStdEnc] in {
- def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr,
- immZExt6>, SRA_FM<0x3a, 1>;
- def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>,
- SRLV_FM<0x16, 1>;
- def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>,
- SRA_FM<0x3e, 1>;
-}
+def DROTR : shift_rotate_imm<"drotr", uimm6, GPR64Opnd, II_DROTR, rotr,
+ immZExt6>,
+ SRA_FM<0x3a, 1>, ISA_MIPS64R2;
+def DROTRV : shift_rotate_reg<"drotrv", GPR64Opnd, II_DROTRV, rotr>,
+ SRLV_FM<0x16, 1>, ISA_MIPS64R2;
+def DROTR32 : shift_rotate_imm<"drotr32", uimm5, GPR64Opnd, II_DROTR32>,
+ SRA_FM<0x3e, 1>, ISA_MIPS64R2;
/// Load and Store Instructions
/// aligned
@@ -132,9 +142,9 @@ def SH64 : Store<"sh", GPR64Opnd, truncstorei16, II_SH>, LW_FM<0x29>;
def SW64 : Store<"sw", GPR64Opnd, truncstorei32, II_SW>, LW_FM<0x2b>;
}
-def LWu : Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>;
-def LD : Load<"ld", GPR64Opnd, load, II_LD>, LW_FM<0x37>;
-def SD : Store<"sd", GPR64Opnd, store, II_SD>, LW_FM<0x3f>;
+def LWu : Load<"lwu", GPR64Opnd, zextloadi32, II_LWU>, LW_FM<0x27>, ISA_MIPS3;
+def LD : Load<"ld", GPR64Opnd, load, II_LD>, LW_FM<0x37>, ISA_MIPS3;
+def SD : Store<"sd", GPR64Opnd, store, II_SD>, LW_FM<0x3f>, ISA_MIPS3;
/// load/store left/right
let isCodeGenOnly = 1 in {
@@ -144,14 +154,18 @@ def SWL64 : StoreLeftRight<"swl", MipsSWL, GPR64Opnd, II_SWL>, LW_FM<0x2a>;
def SWR64 : StoreLeftRight<"swr", MipsSWR, GPR64Opnd, II_SWR>, LW_FM<0x2e>;
}
-def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>;
-def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>;
-def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>;
-def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>;
+def LDL : LoadLeftRight<"ldl", MipsLDL, GPR64Opnd, II_LDL>, LW_FM<0x1a>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def LDR : LoadLeftRight<"ldr", MipsLDR, GPR64Opnd, II_LDR>, LW_FM<0x1b>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def SDL : StoreLeftRight<"sdl", MipsSDL, GPR64Opnd, II_SDL>, LW_FM<0x2c>,
+ ISA_MIPS3_NOT_32R6_64R6;
+def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>,
+ ISA_MIPS3_NOT_32R6_64R6;
/// Load-linked, Store-conditional
-def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>;
-def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>;
+def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3;
+def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3;
/// Jump and Branch Instructions
let isCodeGenOnly = 1 in {
@@ -169,17 +183,17 @@ def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
/// Multiply and Divide Instructions.
def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1c>;
+ MULT_FM<0, 0x1c>, ISA_MIPS3;
def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1d>;
+ MULT_FM<0, 0x1d>, ISA_MIPS3;
def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult,
II_DMULT>;
def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu,
II_DMULTU>;
def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1e>;
+ MULT_FM<0, 0x1e>, ISA_MIPS3;
def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>,
- MULT_FM<0, 0x1f>;
+ MULT_FM<0, 0x1f>, ISA_MIPS3;
def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem,
II_DDIV, 0, 1, 1>;
def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU,
@@ -195,17 +209,19 @@ def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>;
def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>;
/// Sign Ext In Register Instructions.
-def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>;
-def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>;
+def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>,
+ ISA_MIPS32R2;
+def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
+ ISA_MIPS32R2;
}
/// Count Leading
-def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>;
-def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>;
+def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64;
+def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64;
/// Double Word Swap Bytes/HalfWords
-def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>;
-def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>;
+def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2;
+def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>, ISA_MIPS64R2;
def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
@@ -229,8 +245,19 @@ let isCodeGenOnly = 1, rs = 0, shamt = 0 in {
"sll\t$rd, $rt, 0", [], II_SLL>;
}
+// We need the following pseudo instruction to avoid offset calculation for
+// long branches. See the comment in file MipsLongBranch.cpp for detailed
+// explanation.
+
+// Expands to: daddiu $dst, $src, %PART($tgt - $baltgt)
+// where %PART may be %hi or %lo, depending on the relocation kind
+// that $tgt is annotated with.
+def LONG_BRANCH_DADDiu : PseudoSE<(outs GPR64Opnd:$dst),
+ (ins GPR64Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+
// Cavium Octeon cmMIPS instructions
-let Predicates = [HasCnMips] in {
+let EncodingPredicates = []<Predicate>, // FIXME: The lack of HasStdEnc is probably a bug
+ AdditionalPredicates = [HasCnMips] in {
class Count1s<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
@@ -254,6 +281,14 @@ class SetCC64_R<string opstr, PatFrag cond_op> :
let TwoOperandAliasConstraint = "$rd = $rs";
}
+class SetCC64_I<string opstr, PatFrag cond_op>:
+ InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, simm10_64:$imm10),
+ !strconcat(opstr, "\t$rt, $rs, $imm10"),
+ [(set GPR64Opnd:$rt, (cond_op GPR64Opnd:$rs, immSExt10_64:$imm10))],
+ II_SEQI_SNEI, FrmI, opstr> {
+ let TwoOperandAliasConstraint = "$rt = $rs";
+}
+
// Unsigned Byte Add
let Pattern = [(set GPR64Opnd:$rd,
(and (add GPR64Opnd:$rs, GPR64Opnd:$rt), 255))] in
@@ -287,7 +322,25 @@ def DPOP : Count1s<"dpop", GPR64Opnd>, POP_FM<0x2d>;
// Set on equal/not equal
def SEQ : SetCC64_R<"seq", seteq>, SEQ_FM<0x2a>;
+def SEQi : SetCC64_I<"seqi", seteq>, SEQI_FM<0x2e>;
def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>;
+def SNEi : SetCC64_I<"snei", setne>, SEQI_FM<0x2f>;
+
+// 192-bit x 64-bit Unsigned Multiply and Add
+let Defs = [P0, P1, P2] in
+def V3MULU: ArithLogicR<"v3mulu", GPR64Opnd, 0, II_DMUL>,
+ ADD_FM<0x1c, 0x11>;
+
+// 64-bit Unsigned Multiply and Add Move
+let Defs = [MPL0, P0, P1, P2] in
+def VMM0 : ArithLogicR<"vmm0", GPR64Opnd, 0, II_DMUL>,
+ ADD_FM<0x1c, 0x10>;
+
+// 64-bit Unsigned Multiply and Add
+let Defs = [MPL1, MPL2, P0, P1, P2] in
+def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>,
+ ADD_FM<0x1c, 0x0f>;
+
}
}
@@ -297,12 +350,10 @@ def SNE : SetCC64_R<"sne", setne>, SEQ_FM<0x2b>;
//===----------------------------------------------------------------------===//
// extended loads
-let Predicates = [HasStdEnc] in {
- def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
- def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
- def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
- def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
-}
+def : MipsPat<(i64 (extloadi1 addr:$src)), (LB64 addr:$src)>;
+def : MipsPat<(i64 (extloadi8 addr:$src)), (LB64 addr:$src)>;
+def : MipsPat<(i64 (extloadi16 addr:$src)), (LH64 addr:$src)>;
+def : MipsPat<(i64 (extloadi32 addr:$src)), (LW64 addr:$src)>;
// hi/lo relocs
def : MipsPat<(MipsHi tglobaladdr:$in), (LUi64 tglobaladdr:$in)>;
@@ -355,8 +406,7 @@ defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
// truncate
def : MipsPat<(i32 (trunc GPR64:$src)),
- (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>,
- Requires<[HasStdEnc]>;
+ (SLL (EXTRACT_SUBREG GPR64:$src, sub_32), 0)>;
// 32-to-64-bit extension
def : MipsPat<(i64 (anyext GPR32:$src)), (SLL64_32 GPR32:$src)>;
@@ -373,64 +423,59 @@ def : MipsPat<(bswap GPR64:$rt), (DSHD (DSBH GPR64:$rt))>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst, $src",
- (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
- Requires<[HasMips64]>;
-def : InstAlias<"daddu $rs, $rt, $imm",
- (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
-def : InstAlias<"dadd $rs, $rt, $imm",
- (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
- 0>;
-def : InstAlias<"daddu $rs, $imm",
- (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
-def : InstAlias<"dadd $rs, $imm",
- (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
-def : InstAlias<"add $rs, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-def : InstAlias<"addu $rs, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-let isPseudo=1, usesCustomInserter=1, isCodeGenOnly=1 in {
-def SUBi : MipsInst<(outs GPR32Opnd: $rt), (ins GPR32Opnd: $rs, simm16: $imm),
- "sub\t$rt, $rs, $imm", [], II_DSUB, Pseudo>;
-def SUBiu : MipsInst<(outs GPR32Opnd: $rt), (ins GPR32Opnd: $rs, simm16: $imm),
- "subu\t$rt, $rs, $imm", [], II_DSUB, Pseudo>;
-def DSUBi : MipsInst<(outs GPR64Opnd: $rt), (ins GPR64Opnd: $rs, simm16_64: $imm),
- "ssub\t$rt, $rs, $imm", [], II_DSUB, Pseudo>;
-def DSUBiu : MipsInst<(outs GPR64Opnd: $rt), (ins GPR64Opnd: $rs, simm16_64: $imm),
- "ssubu\t$rt, $rs, $imm", [], II_DSUB, Pseudo>;
-}
-def : InstAlias<"dsubu $rt, $rs, $imm",
- (DSUBiu GPR64Opnd:$rt, GPR64Opnd:$rs, simm16_64: $imm),
- 0>;
-def : InstAlias<"sub $rs, $imm",
- (SUBi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-def : InstAlias<"subu $rs, $imm",
- (SUBiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
- 0>;
-def : InstAlias<"dsub $rs, $imm",
- (DSUBi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
-def : InstAlias<"dsubu $rs, $imm",
- (DSUBiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
- 0>;
+def : MipsInstAlias<"move $dst, $src",
+ (DADDu GPR64Opnd:$dst, GPR64Opnd:$src, ZERO_64), 1>,
+ GPR_64;
+def : MipsInstAlias<"daddu $rs, $rt, $imm",
+ (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
+ 0>;
+def : MipsInstAlias<"dadd $rs, $rt, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm),
+ 0>;
+def : MipsInstAlias<"daddu $rs, $imm",
+ (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
+ 0>;
+def : MipsInstAlias<"dadd $rs, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm),
+ 0>;
+def : MipsInstAlias<"add $rs, $imm",
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
+ 0>;
+def : MipsInstAlias<"addu $rs, $imm",
+ (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm),
+ 0>;
+def : MipsInstAlias<"dsll $rd, $rt, $rs",
+ (DSLLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
+ ISA_MIPS3;
+def : MipsInstAlias<"dsubu $rt, $rs, $imm",
+ (DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs,
+ InvertedImOperand64:$imm), 0>;
+def : MipsInstAlias<"dsub $rs, $imm",
+ (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs,
+ InvertedImOperand64:$imm),
+ 0>;
+def : MipsInstAlias<"dsubu $rs, $imm",
+ (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs,
+ InvertedImOperand64:$imm),
+ 0>;
+def : MipsInstAlias<"dsra $rd, $rt, $rs",
+ (DSRAV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
+ ISA_MIPS3;
+def : MipsInstAlias<"dsrl $rd, $rt, $rs",
+ (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>,
+ ISA_MIPS3;
/// Move between CPU and coprocessor registers
let DecoderNamespace = "Mips64", Predicates = [HasMips64] in {
def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>;
-def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>;
-def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>;
-def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>;
+def DMTC0 : MFC3OP<"dmtc0", GPR64Opnd>, MFC3OP_FM<0x10, 5>, ISA_MIPS3;
+def DMFC2 : MFC3OP<"dmfc2", GPR64Opnd>, MFC3OP_FM<0x12, 1>, ISA_MIPS3;
+def DMTC2 : MFC3OP<"dmtc2", GPR64Opnd>, MFC3OP_FM<0x12, 5>, ISA_MIPS3;
}
// Two operand (implicit 0 selector) versions:
-def : InstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
-def : InstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmfc0 $rt, $rd", (DMFC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc0 $rt, $rd", (DMTC0 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmfc2 $rt, $rd", (DMFC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"dmtc2 $rt, $rd", (DMTC2 GPR64Opnd:$rt, GPR64Opnd:$rd, 0), 0>;
diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td
new file mode 100644
index 0000000..f971218
--- /dev/null
+++ b/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -0,0 +1,88 @@
+//=- Mips64r6InstrInfo.td - Mips64r6 Instruction Information -*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes Mips64r6 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// Notes about removals/changes from MIPS32r6:
+// Reencoded: dclo, dclz
+// Reencoded: lld, scd
+// Removed: daddi
+// Removed: ddiv, ddivu, dmult, dmultu
+// Removed: div, divu
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Encodings
+//
+//===----------------------------------------------------------------------===//
+
+class DALIGN_ENC : SPECIAL3_DALIGN_FM<OPCODE6_DALIGN>;
+class DAUI_ENC : DAUI_FM;
+class DAHI_ENC : REGIMM_FM<OPCODE5_DAHI>;
+class DATI_ENC : REGIMM_FM<OPCODE5_DATI>;
+class DBITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_DBITSWAP>;
+class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>;
+class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>;
+class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>;
+class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>;
+class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>;
+class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b111001>;
+class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b111000>;
+class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
+class AHI_ATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+ dag OutOperandList = (outs GPROpnd:$rs);
+ dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+ string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+ string Constraints = "$rs = $rt";
+}
+
+class DALIGN_DESC : ALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>;
+class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>;
+class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>;
+class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>;
+class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>;
+class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>;
+class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd>;
+class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd>;
+class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd>;
+class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd>;
+class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd>;
+class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd>;
+class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Definitions
+//
+//===----------------------------------------------------------------------===//
+
+def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
+def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
+def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
+def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
+def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
+def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
+// def DLSA; // See MSA
+def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
+def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
+def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
+def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6;
+def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6;
+def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6;
+def LDPC;
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index d5df855..6df90aa 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-asm-printer"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MCTargetDesc/MipsMCNaCl.h"
@@ -52,6 +51,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-asm-printer"
+
MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() {
return static_cast<MipsTargetStreamer &>(*OutStreamer.getTargetStreamer());
}
@@ -147,7 +148,8 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// removing another test for this situation downstream in the
// callchain.
//
- if (I->isPseudo() && !Subtarget->inMips16Mode())
+ if (I->isPseudo() && !Subtarget->inMips16Mode()
+ && !isLongBranchPseudo(I->getOpcode()))
llvm_unreachable("Pseudo opcode found in EmitInstruction()");
MCInst TmpInst0;
@@ -285,9 +287,8 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() {
if (Subtarget->inMicroMipsMode())
TS.emitDirectiveSetMicroMips();
- // leave out until FSF available gas has micromips changes
- // else
- // TS.emitDirectiveSetNoMicroMips();
+ else
+ TS.emitDirectiveSetNoMicroMips();
if (Subtarget->inMips16Mode())
TS.emitDirectiveSetMips16();
@@ -621,16 +622,29 @@ printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
// TODO: Need to add -mabicalls and -mno-abicalls flags.
// Currently we assume that -mabicalls is the default.
- getTargetStreamer().emitDirectiveAbiCalls();
- Reloc::Model RM = Subtarget->getRelocationModel();
- if (RM == Reloc::Static && !Subtarget->hasMips64())
- getTargetStreamer().emitDirectiveOptionPic0();
+ bool IsABICalls = true;
+ if (IsABICalls) {
+ getTargetStreamer().emitDirectiveAbiCalls();
+ Reloc::Model RM = Subtarget->getRelocationModel();
+ // FIXME: This condition should be a lot more complicated that it is here.
+ // Ideally it should test for properties of the ABI and not the ABI
+ // itself.
+ // For the moment, I'm only correcting enough to make MIPS-IV work.
+ if (RM == Reloc::Static && !Subtarget->isABI_N64())
+ getTargetStreamer().emitDirectiveOptionPic0();
+ }
// Tell the assembler which ABI we are using
std::string SectionName = std::string(".mdebug.") + getCurrentABIString();
OutStreamer.SwitchSection(OutContext.getELFSection(
SectionName, ELF::SHT_PROGBITS, 0, SectionKind::getDataRel()));
+ // NaN: At the moment we only support:
+ // 1. .nan legacy (default)
+ // 2. .nan 2008
+ Subtarget->isNaN2008() ? getTargetStreamer().emitDirectiveNaN2008()
+ : getTargetStreamer().emitDirectiveNaNLegacy();
+
// TODO: handle O64 ABI
if (Subtarget->isABI_EABI()) {
@@ -824,7 +838,7 @@ void MipsAsmPrinter::EmitFPCallStub(
const MCSectionELF *M = OutContext.getELFSection(
".mips16.call.fp." + std::string(Symbol), ELF::SHT_PROGBITS,
ELF::SHF_ALLOC | ELF::SHF_EXECINSTR, SectionKind::getText());
- OutStreamer.SwitchSection(M, 0);
+ OutStreamer.SwitchSection(M, nullptr);
//
// .align 2
//
@@ -941,6 +955,12 @@ void MipsAsmPrinter::NaClAlignIndirectJumpTargets(MachineFunction &MF) {
}
}
+bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
+ return (Opcode == Mips::LONG_BRANCH_LUi
+ || Opcode == Mips::LONG_BRANCH_ADDiu
+ || Opcode == Mips::LONG_BRANCH_DADDiu);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeMipsAsmPrinter() {
RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
index 3e9093e..e82b145 100644
--- a/lib/Target/Mips/MipsAsmPrinter.h
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -75,6 +75,8 @@ private:
void NaClAlignIndirectJumpTargets(MachineFunction &MF);
+ bool isLongBranchPseudo(int Opcode) const;
+
public:
const MipsSubtarget *Subtarget;
@@ -82,18 +84,18 @@ public:
MipsMCInstLower MCInstLowering;
explicit MipsAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
- : AsmPrinter(TM, Streamer), MCP(0), InConstantPool(false),
+ : AsmPrinter(TM, Streamer), MCP(nullptr), InConstantPool(false),
MCInstLowering(*this) {
Subtarget = &TM.getSubtarget<MipsSubtarget>();
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Mips Assembly Printer";
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual void EmitConstantPool() override {
+ void EmitConstantPool() override {
bool UsingConstantPools =
(Subtarget->inMips16Mode() && Subtarget->useConstantIslands());
if (!UsingConstantPools)
@@ -101,30 +103,30 @@ public:
// we emit constant pools customly!
}
- void EmitInstruction(const MachineInstr *MI);
+ void EmitInstruction(const MachineInstr *MI) override;
void printSavedRegsBitmask();
void emitFrameDirective();
const char *getCurrentABIString() const;
- virtual void EmitFunctionEntryLabel();
- virtual void EmitFunctionBodyStart();
- virtual void EmitFunctionBodyEnd();
- virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
- MBB) const;
+ void EmitFunctionEntryLabel() override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ bool isBlockOnlyReachableByFallthrough(
+ const MachineBasicBlock* MBB) const override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
void printUnsignedImm8(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
- void EmitStartOfAsmFile(Module &M);
- void EmitEndOfAsmFile(Module &M);
+ const char *Modifier = nullptr);
+ void EmitStartOfAsmFile(Module &M) override;
+ void EmitEndOfAsmFile(Module &M) override;
void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
};
}
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 615310f..c83d880 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -245,8 +245,8 @@ def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP,
def CSR_O32_FP64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 20), RA, FP,
(sequence "S%u", 7, 0))>;
-def CSR_N32 : CalleeSavedRegs<(add D31_64, D29_64, D27_64, D25_64, D24_64,
- D23_64, D22_64, D21_64, RA_64, FP_64, GP_64,
+def CSR_N32 : CalleeSavedRegs<(add D20_64, D22_64, D24_64, D26_64, D28_64,
+ D30_64, RA_64, FP_64, GP_64,
(sequence "S%u_64", 7, 0))>;
def CSR_N64 : CalleeSavedRegs<(add (sequence "D%u_64", 31, 24), RA_64, FP_64,
diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp
index ea49086..13fa546 100644
--- a/lib/Target/Mips/MipsCodeEmitter.cpp
+++ b/lib/Target/Mips/MipsCodeEmitter.cpp
@@ -12,7 +12,6 @@
//
//===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsInstrInfo.h"
@@ -41,6 +40,8 @@
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
@@ -56,7 +57,7 @@ class MipsCodeEmitter : public MachineFunctionPass {
const std::vector<MachineJumpTableEntry> *MJTEs;
bool IsPIC;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfo> ();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -65,13 +66,13 @@ class MipsCodeEmitter : public MachineFunctionPass {
public:
MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(0), II(0), TD(0),
- TM(tm), MCE(mce), MCPEs(0), MJTEs(0),
+ : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
+ TM(tm), MCE(mce), MCPEs(nullptr), MJTEs(nullptr),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Mips Machine Code Emitter";
}
@@ -109,6 +110,12 @@ private:
unsigned getBranchTargetOpValueMM(const MachineInstr &MI,
unsigned OpNo) const;
+ unsigned getBranchTarget21OpValue(const MachineInstr &MI,
+ unsigned OpNo) const;
+ unsigned getBranchTarget26OpValue(const MachineInstr &MI,
+ unsigned OpNo) const;
+ unsigned getJumpOffset16OpValue(const MachineInstr &MI, unsigned OpNo) const;
+
unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemEncodingMMImm12(const MachineInstr &MI, unsigned OpNo) const;
@@ -116,6 +123,7 @@ private:
unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const;
/// Expand pseudo instructions with accumulator register operands.
void expandACCInstr(MachineBasicBlock::instr_iterator MI,
@@ -138,7 +146,7 @@ bool MipsCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
TD = Target.getDataLayout();
Subtarget = &TM.getSubtarget<MipsSubtarget> ();
MCPEs = &MF.getConstantPool()->getConstants();
- MJTEs = 0;
+ MJTEs = nullptr;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
JTI->Initialize(MF, IsPIC, Subtarget->isLittle());
MCE.setModuleInfo(&getAnalysis<MachineModuleInfo> ());
@@ -201,6 +209,24 @@ unsigned MipsCodeEmitter::getBranchTargetOpValueMM(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getBranchTarget21OpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getBranchTarget26OpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
+unsigned MipsCodeEmitter::getJumpOffset16OpValue(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI,
unsigned OpNo) const {
MachineOperand MO = MI.getOperand(OpNo);
@@ -247,6 +273,12 @@ unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI,
return 0;
}
+unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Unimplemented function.");
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI,
diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td
index 567eef9..7177f65 100644
--- a/lib/Target/Mips/MipsCondMov.td
+++ b/lib/Target/Mips/MipsCondMov.td
@@ -104,9 +104,9 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst,
// Instantiation of instructions.
def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, II_MOVZ>,
- ADD_FM<0, 0xa>;
+ ADD_FM<0, 0xa>, INSN_MIPS4_32;
-let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 in {
def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>,
ADD_FM<0, 0xa>;
def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>,
@@ -116,9 +116,9 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
}
def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>,
- ADD_FM<0, 0xb>;
+ ADD_FM<0, 0xb>, INSN_MIPS4_32;
-let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1 in {
def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>,
ADD_FM<0, 0xb>;
def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>,
@@ -128,118 +128,112 @@ let Predicates = [HasStdEnc], isCodeGenOnly = 1 in {
}
def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>;
+ CMov_I_F_FM<18, 16>, INSN_MIPS4_32;
let isCodeGenOnly = 1 in
def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, II_MOVZ_S>,
- CMov_I_F_FM<18, 16>, Requires<[HasMips64, HasStdEnc]>;
+ CMov_I_F_FM<18, 16>, AdditionalRequires<[HasMips64]>;
def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>;
+ CMov_I_F_FM<19, 16>, INSN_MIPS4_32;
let isCodeGenOnly = 1 in
def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, II_MOVN_S>,
- CMov_I_F_FM<19, 16>, Requires<[HasMips64, HasStdEnc]>;
+ CMov_I_F_FM<19, 16>, AdditionalRequires<[IsGP64bit]>;
-let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd,
- II_MOVZ_D>, CMov_I_F_FM<18, 17>;
- def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd,
- II_MOVN_D>, CMov_I_F_FM<19, 17>;
-}
+def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd,
+ II_MOVZ_D>, CMov_I_F_FM<18, 17>,
+ INSN_MIPS4_32, FGR_32;
+def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd,
+ II_MOVN_D>, CMov_I_F_FM<19, 17>,
+ INSN_MIPS4_32, FGR_32;
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+let DecoderNamespace = "Mips64" in {
def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>,
- CMov_I_F_FM<18, 17>;
+ CMov_I_F_FM<18, 17>, INSN_MIPS4_32, FGR_64;
def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>,
- CMov_I_F_FM<19, 17>;
+ CMov_I_F_FM<19, 17>, INSN_MIPS4_32, FGR_64;
let isCodeGenOnly = 1 in {
def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd,
- II_MOVZ_D>, CMov_I_F_FM<18, 17>;
+ II_MOVZ_D>, CMov_I_F_FM<18, 17>, FGR_64;
def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd,
- II_MOVN_D>, CMov_I_F_FM<19, 17>;
+ II_MOVN_D>, CMov_I_F_FM<19, 17>, FGR_64;
}
}
def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>;
+ CMov_F_I_FM<1>, INSN_MIPS4_32;
let isCodeGenOnly = 1 in
def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, II_MOVT, MipsCMovFP_T>,
- CMov_F_I_FM<1>, Requires<[HasMips64, HasStdEnc]>;
+ CMov_F_I_FM<1>, AdditionalRequires<[IsGP64bit]>;
def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>;
+ CMov_F_I_FM<0>, INSN_MIPS4_32;
let isCodeGenOnly = 1 in
def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, II_MOVF, MipsCMovFP_F>,
- CMov_F_I_FM<0>, Requires<[HasMips64, HasStdEnc]>;
+ CMov_F_I_FM<0>, AdditionalRequires<[IsGP64bit]>;
def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>,
- CMov_F_F_FM<16, 1>;
+ CMov_F_F_FM<16, 1>, INSN_MIPS4_32;
def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>,
- CMov_F_F_FM<16, 0>;
+ CMov_F_F_FM<16, 0>, INSN_MIPS4_32;
-let Predicates = [NotFP64bit, HasStdEnc] in {
- def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D,
- MipsCMovFP_T>, CMov_F_F_FM<17, 1>;
- def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D,
- MipsCMovFP_F>, CMov_F_F_FM<17, 0>;
-}
+def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D,
+ MipsCMovFP_T>, CMov_F_F_FM<17, 1>,
+ INSN_MIPS4_32, FGR_32;
+def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D,
+ MipsCMovFP_F>, CMov_F_F_FM<17, 0>,
+ INSN_MIPS4_32, FGR_32;
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+let DecoderNamespace = "Mips64" in {
def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>,
- CMov_F_F_FM<17, 1>;
+ CMov_F_F_FM<17, 1>, INSN_MIPS4_32, FGR_64;
def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>,
- CMov_F_F_FM<17, 0>;
+ CMov_F_F_FM<17, 0>, INSN_MIPS4_32, FGR_64;
}
// Instantiation of conditional move patterns.
defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>;
defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>;
-let Predicates = [HasMips64, HasStdEnc] in {
- defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>;
- defm : MovzPats0<GPR64, GPR32, MOVZ_I_I, SLT64, SLTu64, SLTi64,
- SLTiu64>;
- defm : MovzPats0<GPR64, GPR64, MOVZ_I_I64, SLT64, SLTu64, SLTi64,
- SLTiu64>;
- defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>;
- defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>;
- defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>;
- defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>;
- defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>;
- defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>;
-}
+
+defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>, GPR_64;
+defm : MovzPats0<GPR64, GPR32, MOVZ_I_I, SLT64, SLTu64, SLTi64, SLTiu64>,
+ GPR_64;
+defm : MovzPats0<GPR64, GPR64, MOVZ_I_I64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ GPR_64;
+defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>, GPR_64;
+defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>, GPR_64;
+defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>, GPR_64;
+defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>, GPR_64;
+defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>, GPR_64;
+defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>, GPR_64;
defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>;
-let Predicates = [HasMips64, HasStdEnc] in {
- defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>;
- defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>;
- defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>;
-}
+
+defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, GPR_64;
+defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, GPR_64;
+defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, GPR_64;
defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>;
defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>;
defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>;
-let Predicates = [HasMips64, HasStdEnc] in {
- defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64,
- SLTiu64>;
- defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>;
- defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>;
-}
-let Predicates = [NotFP64bit, HasStdEnc] in {
- defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>;
- defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>;
- defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>;
-}
-let Predicates = [IsFP64bit, HasStdEnc] in {
- defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>;
- defm : MovzPats0<GPR64, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64,
- SLTiu64>;
- defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>;
- defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>;
- defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>;
- defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>;
-}
+defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64, SLTiu64>,
+ GPR_64;
+defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, GPR_64;
+defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, GPR_64;
+
+defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>, FGR_32;
+defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, FGR_32;
+defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, FGR_32;
+
+defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>, FGR_64;
+defm : MovzPats0<GPR64, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64, SLTiu64>,
+ FGR_64;
+defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, FGR_64;
+defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>, FGR_64;
+defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, FGR_64;
+defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, FGR_64;
diff --git a/lib/Target/Mips/MipsConstantIslandPass.cpp b/lib/Target/Mips/MipsConstantIslandPass.cpp
index e5642ba..a37062f 100644
--- a/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -21,8 +21,6 @@
//
//
-#define DEBUG_TYPE "mips-constant-islands"
-
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips16InstrInfo.h"
@@ -47,6 +45,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-constant-islands"
+
STATISTIC(NumCPEs, "Number of constpool entries");
STATISTIC(NumSplit, "Number of uncond branches inserted");
STATISTIC(NumCBrFixed, "Number of cond branches fixed");
@@ -368,14 +368,14 @@ namespace {
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- STI(&TM.getSubtarget<MipsSubtarget>()), MF(0), MCP(0),
+ STI(&TM.getSubtarget<MipsSubtarget>()), MF(nullptr), MCP(nullptr),
PrescannedForConstants(false){}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Mips Constant Islands";
}
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
@@ -628,7 +628,7 @@ MipsConstantIslands::CPEntry
if (CPEs[i].CPEMI == CPEMI)
return &CPEs[i];
}
- return NULL;
+ return nullptr;
}
/// getCPELogAlign - Returns the required alignment of the constant pool entry
@@ -1065,7 +1065,7 @@ bool MipsConstantIslands::decrementCPEReferenceCount(unsigned CPI,
assert(CPE && "Unexpected!");
if (--CPE->RefCount == 0) {
removeDeadCPEMI(CPEMI);
- CPE->CPEMI = NULL;
+ CPE->CPEMI = nullptr;
--NumCPEs;
return true;
}
@@ -1098,7 +1098,7 @@ int MipsConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
if (CPEs[i].CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == NULL)
+ if (CPEs[i].CPEMI == nullptr)
continue;
if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
U.NegOk)) {
@@ -1154,7 +1154,7 @@ int MipsConstantIslands::findLongFormInRangeCPEntry
if (CPEs[i].CPEMI == CPEMI)
continue;
// Removing CPEs can leave empty entries, skip
- if (CPEs[i].CPEMI == NULL)
+ if (CPEs[i].CPEMI == nullptr)
continue;
if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI,
U.getLongFormMaxDisp(), U.NegOk)) {
@@ -1486,7 +1486,7 @@ bool MipsConstantIslands::removeUnusedCPEntries() {
for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
removeDeadCPEMI(CPEs[j].CPEMI);
- CPEs[j].CPEMI = NULL;
+ CPEs[j].CPEMI = nullptr;
MadeChange = true;
}
}
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index eef9f38..d6c7cac 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "delay-slot-filler"
-
#include "MCTargetDesc/MipsMCNaCl.h"
#include "Mips.h"
#include "MipsInstrInfo.h"
@@ -33,6 +31,8 @@
using namespace llvm;
+#define DEBUG_TYPE "delay-slot-filler"
+
STATISTIC(FilledSlots, "Number of delay slots filled");
STATISTIC(UsefulSlots, "Number of delay slots filled with instructions that"
" are not NOP.");
@@ -124,7 +124,7 @@ namespace {
public:
NoMemInstr() : InspectMemInstr(true) {}
private:
- virtual bool hasHazard_(const MachineInstr &MI) { return true; }
+ bool hasHazard_(const MachineInstr &MI) override { return true; }
};
/// This subclass accepts loads from stacks and constant loads.
@@ -132,7 +132,7 @@ namespace {
public:
LoadFromStackOrConst() : InspectMemInstr(false) {}
private:
- virtual bool hasHazard_(const MachineInstr &MI);
+ bool hasHazard_(const MachineInstr &MI) override;
};
/// This subclass uses memory dependence information to determine whether a
@@ -142,19 +142,21 @@ namespace {
MemDefsUses(const MachineFrameInfo *MFI);
private:
- virtual bool hasHazard_(const MachineInstr &MI);
+ typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+
+ bool hasHazard_(const MachineInstr &MI) override;
/// Update Defs and Uses. Return true if there exist dependences that
/// disqualify the delay slot candidate between V and values in Uses and
/// Defs.
- bool updateDefsUses(const Value *V, bool MayStore);
+ bool updateDefsUses(ValueType V, bool MayStore);
/// Get the list of underlying objects of MI's memory operand.
bool getUnderlyingObjects(const MachineInstr &MI,
- SmallVectorImpl<const Value *> &Objects) const;
+ SmallVectorImpl<ValueType> &Objects) const;
const MachineFrameInfo *MFI;
- SmallPtrSet<const Value*, 4> Uses, Defs;
+ SmallPtrSet<ValueType, 4> Uses, Defs;
/// Flags indicating whether loads or stores with no underlying objects have
/// been seen.
@@ -166,11 +168,11 @@ namespace {
Filler(TargetMachine &tm)
: MachineFunctionPass(ID), TM(tm) { }
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Mips Delay Slot Filler";
}
- bool runOnMachineFunction(MachineFunction &F) {
+ bool runOnMachineFunction(MachineFunction &F) override {
bool Changed = false;
for (MachineFunction::iterator FI = F.begin(), FE = F.end();
FI != FE; ++FI)
@@ -178,7 +180,7 @@ namespace {
return Changed;
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineBranchProbabilityInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -399,16 +401,15 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
if (MI.mayStore())
return true;
- if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getPseudoValue())
return true;
- const Value *V = (*MI.memoperands_begin())->getValue();
-
- if (isa<FixedStackPseudoSourceValue>(V))
- return false;
-
- if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
- return !PSV->isConstant(0) && V != PseudoSourceValue::getStack();
+ if (const PseudoSourceValue *PSV =
+ (*MI.memoperands_begin())->getPseudoValue()) {
+ if (isa<FixedStackPseudoSourceValue>(PSV))
+ return false;
+ return !PSV->isConstant(nullptr) && PSV != PseudoSourceValue::getStack();
+ }
return true;
}
@@ -419,11 +420,11 @@ MemDefsUses::MemDefsUses(const MachineFrameInfo *MFI_)
bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
bool HasHazard = false;
- SmallVector<const Value *, 4> Objs;
+ SmallVector<ValueType, 4> Objs;
// Check underlying object list.
if (getUnderlyingObjects(MI, Objs)) {
- for (SmallVectorImpl<const Value *>::const_iterator I = Objs.begin();
+ for (SmallVectorImpl<ValueType>::const_iterator I = Objs.begin();
I != Objs.end(); ++I)
HasHazard |= updateDefsUses(*I, MI.mayStore());
@@ -440,7 +441,7 @@ bool MemDefsUses::hasHazard_(const MachineInstr &MI) {
return HasHazard;
}
-bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
+bool MemDefsUses::updateDefsUses(ValueType V, bool MayStore) {
if (MayStore)
return !Defs.insert(V) || Uses.count(V) || SeenNoObjStore || SeenNoObjLoad;
@@ -450,10 +451,20 @@ bool MemDefsUses::updateDefsUses(const Value *V, bool MayStore) {
bool MemDefsUses::
getUnderlyingObjects(const MachineInstr &MI,
- SmallVectorImpl<const Value *> &Objects) const {
- if (!MI.hasOneMemOperand() || !(*MI.memoperands_begin())->getValue())
+ SmallVectorImpl<ValueType> &Objects) const {
+ if (!MI.hasOneMemOperand() ||
+ (!(*MI.memoperands_begin())->getValue() &&
+ !(*MI.memoperands_begin())->getPseudoValue()))
return false;
+ if (const PseudoSourceValue *PSV =
+ (*MI.memoperands_begin())->getPseudoValue()) {
+ if (!PSV->isAliased(MFI))
+ return false;
+ Objects.push_back(PSV);
+ return true;
+ }
+
const Value *V = (*MI.memoperands_begin())->getValue();
SmallVector<Value *, 4> Objs;
@@ -461,10 +472,7 @@ getUnderlyingObjects(const MachineInstr &MI,
for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), E = Objs.end();
I != E; ++I) {
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(*I)) {
- if (PSV->isAliased(MFI))
- return false;
- } else if (!isIdentifiedObject(V))
+ if (!isIdentifiedObject(V))
return false;
Objects.push_back(*I);
@@ -602,7 +610,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
RegDefsUses RegDU(TM);
bool HasMultipleSuccs = false;
BB2BrMap BrMap;
- OwningPtr<InspectMemInstr> IM;
+ std::unique_ptr<InspectMemInstr> IM;
Iter Filler;
// Iterate over SuccBB's predecessor list.
@@ -636,7 +644,7 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
if (B.succ_empty())
- return NULL;
+ return nullptr;
// Select the successor with the larget edge weight.
auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
@@ -645,14 +653,14 @@ MachineBasicBlock *Filler::selectSuccBB(MachineBasicBlock &B) const {
const MachineBasicBlock *Dst1) {
return Prob.getEdgeWeight(&B, Dst0) < Prob.getEdgeWeight(&B, Dst1);
});
- return S->isLandingPad() ? NULL : S;
+ return S->isLandingPad() ? nullptr : S;
}
std::pair<MipsInstrInfo::BranchType, MachineInstr *>
Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
const MipsInstrInfo *TII =
static_cast<const MipsInstrInfo*>(TM.getInstrInfo());
- MachineBasicBlock *TrueBB = 0, *FalseBB = 0;
+ MachineBasicBlock *TrueBB = nullptr, *FalseBB = nullptr;
SmallVector<MachineInstr*, 2> BranchInstrs;
SmallVector<MachineOperand, 2> Cond;
@@ -660,11 +668,11 @@ Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
TII->AnalyzeBranch(MBB, TrueBB, FalseBB, Cond, false, BranchInstrs);
if ((R == MipsInstrInfo::BT_None) || (R == MipsInstrInfo::BT_NoBranch))
- return std::make_pair(R, (MachineInstr*)NULL);
+ return std::make_pair(R, nullptr);
if (R != MipsInstrInfo::BT_CondUncond) {
if (!hasUnoccupiedSlot(BranchInstrs[0]))
- return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+ return std::make_pair(MipsInstrInfo::BT_None, nullptr);
assert(((R != MipsInstrInfo::BT_Uncond) || (TrueBB == &Dst)));
@@ -681,7 +689,7 @@ Filler::getBranch(MachineBasicBlock &MBB, const MachineBasicBlock &Dst) const {
if (hasUnoccupiedSlot(BranchInstrs[1]) && (FalseBB == &Dst))
return std::make_pair(MipsInstrInfo::BT_Uncond, BranchInstrs[1]);
- return std::make_pair(MipsInstrInfo::BT_None, (MachineInstr*)NULL);
+ return std::make_pair(MipsInstrInfo::BT_None, nullptr);
}
bool Filler::examinePred(MachineBasicBlock &Pred, const MachineBasicBlock &Succ,
diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp
new file mode 100644
index 0000000..268a0ed
--- /dev/null
+++ b/lib/Target/Mips/MipsFastISel.cpp
@@ -0,0 +1,283 @@
+//===-- MipsastISel.cpp - Mips FastISel implementation
+//---------------------===//
+
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "MipsRegisterInfo.h"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsSubtarget.h"
+
+using namespace llvm;
+
+namespace {
+
+// All possible address modes.
+typedef struct Address {
+ enum { RegBase, FrameIndexBase } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ int64_t Offset;
+
+ // Innocuous defaults for our address.
+ Address() : BaseType(RegBase), Offset(0) { Base.Reg = 0; }
+} Address;
+
+class MipsFastISel final : public FastISel {
+
+ /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MipsSubtarget *Subtarget;
+ Module &M;
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ MipsFunctionInfo *MFI;
+
+ // Convenience variables to avoid some queries.
+ LLVMContext *Context;
+
+ bool TargetSupported;
+
+public:
+ explicit MipsFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo)
+ : FastISel(funcInfo, libInfo),
+ M(const_cast<Module &>(*funcInfo.Fn->getParent())),
+ TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()) {
+ Subtarget = &TM.getSubtarget<MipsSubtarget>();
+ MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
+ Context = &funcInfo.Fn->getContext();
+ TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) &&
+ (Subtarget->hasMips32r2() && (Subtarget->isABI_O32())));
+ }
+
+ bool TargetSelectInstruction(const Instruction *I) override;
+ unsigned TargetMaterializeConstant(const Constant *C) override;
+
+ bool ComputeAddress(const Value *Obj, Address &Addr);
+
+private:
+ bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment = 0);
+ bool SelectRet(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+
+ unsigned MaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned MaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned MaterializeInt(const Constant *C, MVT VT);
+ unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC);
+};
+
+bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT evt = TLI.getValueType(Ty, true);
+ // Only handle simple types.
+ if (evt == MVT::Other || !evt.isSimple())
+ return false;
+ VT = evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT))
+ return true;
+ // We will extend this in a later patch:
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ return false;
+}
+
+bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) {
+ // This construct looks a big awkward but it is how other ports handle this
+ // and as this function is more fully completed, these cases which
+ // return false will have additional code in them.
+ //
+ if (isa<Instruction>(Obj))
+ return false;
+ else if (isa<ConstantExpr>(Obj))
+ return false;
+ Addr.Base.Reg = getRegForValue(Obj);
+ return Addr.Base.Reg != 0;
+}
+
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple())
+ return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return MaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return MaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return MaterializeInt(C, VT);
+
+ return 0;
+}
+
+bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr,
+ unsigned Alignment) {
+ //
+ // more cases will be handled here in following patches.
+ //
+ if (VT != MVT::i32)
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::SW))
+ .addReg(SrcReg)
+ .addReg(Addr.Base.Reg)
+ .addImm(Addr.Offset);
+ return true;
+}
+
+bool MipsFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // Atomic stores need special handling.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!ComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!EmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
+ return false;
+ return true;
+}
+
+bool MipsFastISel::SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+ if (Ret->getNumOperands() > 0) {
+ return false;
+ }
+ unsigned RetOpc = Mips::RetRA;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc));
+ return true;
+}
+
+bool MipsFastISel::TargetSelectInstruction(const Instruction *I) {
+ if (!TargetSupported)
+ return false;
+ switch (I->getOpcode()) {
+ default:
+ break;
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ }
+ return false;
+}
+}
+
+unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) {
+ return 0;
+}
+
+unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) {
+ // For now 32-bit only.
+ if (VT != MVT::i32)
+ return 0;
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ unsigned DestReg = createResultReg(RC);
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ bool IsThreadLocal = GVar && GVar->isThreadLocal();
+ // TLS not supported at this time.
+ if (IsThreadLocal)
+ return 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LW), DestReg)
+ .addReg(MFI->getGlobalBaseReg())
+ .addGlobalAddress(GV, 0, MipsII::MO_GOT);
+ return DestReg;
+}
+unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+ const TargetRegisterClass *RC = &Mips::GPR32RegClass;
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ int64_t Imm;
+ if (CI->isNegative())
+ Imm = CI->getSExtValue();
+ else
+ Imm = CI->getZExtValue();
+ return Materialize32BitInt(Imm, RC);
+}
+
+unsigned MipsFastISel::Materialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned ResultReg = createResultReg(RC);
+
+ if (isInt<16>(Imm)) {
+ unsigned Opc = Mips::ADDiu;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
+ .addReg(Mips::ZERO)
+ .addImm(Imm);
+ return ResultReg;
+ } else if (isUInt<16>(Imm)) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
+ ResultReg)
+ .addReg(Mips::ZERO)
+ .addImm(Imm);
+ return ResultReg;
+ }
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+ if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
+ TmpReg).addImm(Hi);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi),
+ ResultReg)
+ .addReg(TmpReg)
+ .addImm(Lo);
+
+ } else {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi),
+ ResultReg).addImm(Hi);
+ }
+ return ResultReg;
+}
+
+namespace llvm {
+FastISel *Mips::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) {
+ return new MipsFastISel(funcInfo, libInfo);
+}
+}
diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp
index eb9d49f..8ba35fa 100644
--- a/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/lib/Target/Mips/MipsFrameLowering.cpp
@@ -110,7 +110,7 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
Offset = std::max(Offset, -MFI->getObjectOffset(I));
// Conservatively assume all callee-saved registers will be saved.
- for (const uint16_t *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
+ for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
unsigned Size = TRI.getMinimalPhysRegClass(*R)->getSize();
Offset = RoundUpToAlignment(Offset + Size, Size);
}
diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h
index 6a5f79d..e10a3a5 100644
--- a/lib/Target/Mips/MipsFrameLowering.h
+++ b/lib/Target/Mips/MipsFrameLowering.h
@@ -32,7 +32,7 @@ public:
static const MipsFrameLowering *create(MipsTargetMachine &TM,
const MipsSubtarget &ST);
- bool hasFP(const MachineFunction &MF) const;
+ bool hasFP(const MachineFunction &MF) const override;
protected:
uint64_t estimateStackSize(const MachineFunction &MF) const;
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 941aeac..90cff63 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-isel"
#include "MipsISelDAGToDAG.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
@@ -36,6 +35,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "mips-isel"
+
//===----------------------------------------------------------------------===//
// Instruction Selector Implementation
//===----------------------------------------------------------------------===//
@@ -182,7 +183,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
Node->setNodeId(-1);
- return NULL;
+ return nullptr;
}
// See if subclasses can handle this node.
@@ -201,8 +202,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
#ifndef NDEBUG
case ISD::LOAD:
case ISD::STORE:
- assert(cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
- cast<MemSDNode>(Node)->getAlignment() &&
+ assert((Subtarget.systemSupportsUnalignedAccess() ||
+ cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <=
+ cast<MemSDNode>(Node)->getAlignment()) &&
"Unexpected unaligned loads/stores.");
break;
#endif
@@ -212,7 +214,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDNode *ResNode = SelectCode(Node);
DEBUG(errs() << "=> ");
- if (ResNode == NULL || ResNode == Node)
+ if (ResNode == nullptr || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h
index 4546182..13becb6 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -35,11 +35,11 @@ public:
: SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
// Pass Name
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MIPS DAG->DAG Pattern Instruction Selection";
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
protected:
SDNode *getGlobalBaseReg();
@@ -110,7 +110,7 @@ private:
/// starting at bit zero.
virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
- virtual SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
virtual std::pair<bool, SDNode*> selectNode(SDNode *Node) = 0;
@@ -121,9 +121,9 @@ private:
virtual void processFunctionAfterISel(MachineFunction &MF) = 0;
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
};
/// createMipsISelDag - This pass converts a legalized DAG into a
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index abf36da..bfe5ea1 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -11,7 +11,6 @@
// selection DAG.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-lower"
#include "MipsISelLowering.h"
#include "InstPrinter/MipsInstPrinter.h"
#include "MCTargetDesc/MipsBaseInfo.h"
@@ -39,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-lower"
+
STATISTIC(NumTailCalls, "Number of tail calls");
static cl::opt<bool>
@@ -50,16 +51,21 @@ NoZeroDivCheck("mno-check-zero-division", cl::Hidden,
cl::desc("MIPS: Don't trap on integer division by zero."),
cl::init(false));
-static const uint16_t O32IntRegs[4] = {
+cl::opt<bool>
+EnableMipsFastISel("mips-fast-isel", cl::Hidden,
+ cl::desc("Allow mips-fast-isel to be used"),
+ cl::init(false));
+
+static const MCPhysReg O32IntRegs[4] = {
Mips::A0, Mips::A1, Mips::A2, Mips::A3
};
-static const uint16_t Mips64IntRegs[8] = {
+static const MCPhysReg Mips64IntRegs[8] = {
Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64,
Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64
};
-static const uint16_t Mips64DPRegs[8] = {
+static const MCPhysReg Mips64DPRegs[8] = {
Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64,
Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64
};
@@ -198,7 +204,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
case MipsISD::PCKEV: return "MipsISD::PCKEV";
case MipsISD::PCKOD: return "MipsISD::PCKOD";
case MipsISD::INSVE: return "MipsISD::INSVE";
- default: return NULL;
+ default: return nullptr;
}
}
@@ -245,12 +251,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- if (!TM.Options.NoNaNsFPMath) {
- setOperationAction(ISD::FABS, MVT::f32, Custom);
- setOperationAction(ISD::FABS, MVT::f64, Custom);
- }
-
- if (hasMips64()) {
+ if (isGP64bit()) {
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
@@ -262,14 +263,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
}
- if (!hasMips64()) {
+ if (!isGP64bit()) {
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
}
setOperationAction(ISD::ADD, MVT::i32, Custom);
- if (hasMips64())
+ if (isGP64bit())
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SDIV, MVT::i32, Expand);
@@ -334,11 +335,6 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setOperationAction(ISD::FREM, MVT::f32, Expand);
setOperationAction(ISD::FREM, MVT::f64, Expand);
- if (!TM.Options.NoNaNsFPMath) {
- setOperationAction(ISD::FNEG, MVT::f32, Expand);
- setOperationAction(ISD::FNEG, MVT::f64, Expand);
- }
-
setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -356,22 +352,23 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setInsertFencesForAtomic(true);
- if (!Subtarget->hasSEInReg()) {
+ if (!Subtarget->hasMips32r2()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
}
- if (!Subtarget->hasBitCount()) {
+ // MIPS16 lacks MIPS32's clz and clo instructions.
+ if (!Subtarget->hasMips32() || Subtarget->inMips16Mode())
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ if (!Subtarget->hasMips64())
setOperationAction(ISD::CTLZ, MVT::i64, Expand);
- }
- if (!Subtarget->hasSwap()) {
+ if (!Subtarget->hasMips32r2())
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+ if (!Subtarget->hasMips64r2())
setOperationAction(ISD::BSWAP, MVT::i64, Expand);
- }
- if (hasMips64()) {
+ if (isGP64bit()) {
setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom);
@@ -387,7 +384,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM)
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::ADD);
- setMinFunctionAlignment(hasMips64() ? 3 : 2);
+ setMinFunctionAlignment(isGP64bit() ? 3 : 2);
setStackPointerRegisterToSaveRestore(isN64() ? Mips::SP_64 : Mips::SP);
@@ -406,6 +403,15 @@ const MipsTargetLowering *MipsTargetLowering::create(MipsTargetMachine &TM) {
return llvm::createMipsSETargetLowering(TM);
}
+// Create a fast isel object.
+FastISel *
+MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const {
+ if (!EnableMipsFastISel)
+ return TargetLowering::createFastISel(funcInfo, libInfo);
+ return Mips::createFastISel(funcInfo, libInfo);
+}
+
EVT MipsTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
@@ -779,7 +785,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::SETCC: return lowerSETCC(Op, DAG);
case ISD::VASTART: return lowerVASTART(Op, DAG);
case ISD::FCOPYSIGN: return lowerFCOPYSIGN(Op, DAG);
- case ISD::FABS: return lowerFABS(Op, DAG);
case ISD::FRAMEADDR: return lowerFRAMEADDR(Op, DAG);
case ISD::RETURNADDR: return lowerRETURNADDR(Op, DAG);
case ISD::EH_RETURN: return lowerEH_RETURN(Op, DAG);
@@ -1506,7 +1511,7 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, 0,
MipsII::MO_GPREL);
SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, DL,
- DAG.getVTList(MVT::i32), &GA, 1);
+ DAG.getVTList(MVT::i32), GA);
SDValue GPReg = DAG.getRegister(Mips::GP, MVT::i32);
return DAG.getNode(ISD::ADD, DL, MVT::i32, GPReg, GPRelNode);
}
@@ -1572,11 +1577,9 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
Entry.Ty = PtrTy;
Args.push_back(Entry);
- TargetLowering::CallLoweringInfo CLI(DAG.getEntryNode(), PtrTy,
- false, false, false, false, 0, CallingConv::C,
- /*IsTailCall=*/false, /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- TlsGetAddr, Args, DAG, DL);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, PtrTy, TlsGetAddr, &Args, 0);
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Ret = CallResult.first;
@@ -1765,71 +1768,12 @@ static SDValue lowerFCOPYSIGN64(SDValue Op, SelectionDAG &DAG,
SDValue
MipsTargetLowering::lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
- if (Subtarget->hasMips64())
+ if (Subtarget->isGP64bit())
return lowerFCOPYSIGN64(Op, DAG, Subtarget->hasExtractInsert());
return lowerFCOPYSIGN32(Op, DAG, Subtarget->hasExtractInsert());
}
-static SDValue lowerFABS32(SDValue Op, SelectionDAG &DAG,
- bool HasExtractInsert) {
- SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
- SDLoc DL(Op);
-
- // If operand is of type f64, extract the upper 32-bit. Otherwise, bitcast it
- // to i32.
- SDValue X = (Op.getValueType() == MVT::f32) ?
- DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op.getOperand(0)) :
- DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0),
- Const1);
-
- // Clear MSB.
- if (HasExtractInsert)
- Res = DAG.getNode(MipsISD::Ins, DL, MVT::i32,
- DAG.getRegister(Mips::ZERO, MVT::i32),
- DAG.getConstant(31, MVT::i32), Const1, X);
- else {
- SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i32, X, Const1);
- Res = DAG.getNode(ISD::SRL, DL, MVT::i32, SllX, Const1);
- }
-
- if (Op.getValueType() == MVT::f32)
- return DAG.getNode(ISD::BITCAST, DL, MVT::f32, Res);
-
- SDValue LowX = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32,
- Op.getOperand(0), DAG.getConstant(0, MVT::i32));
- return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, LowX, Res);
-}
-
-static SDValue lowerFABS64(SDValue Op, SelectionDAG &DAG,
- bool HasExtractInsert) {
- SDValue Res, Const1 = DAG.getConstant(1, MVT::i32);
- SDLoc DL(Op);
-
- // Bitcast to integer node.
- SDValue X = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Op.getOperand(0));
-
- // Clear MSB.
- if (HasExtractInsert)
- Res = DAG.getNode(MipsISD::Ins, DL, MVT::i64,
- DAG.getRegister(Mips::ZERO_64, MVT::i64),
- DAG.getConstant(63, MVT::i32), Const1, X);
- else {
- SDValue SllX = DAG.getNode(ISD::SHL, DL, MVT::i64, X, Const1);
- Res = DAG.getNode(ISD::SRL, DL, MVT::i64, SllX, Const1);
- }
-
- return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Res);
-}
-
-SDValue
-MipsTargetLowering::lowerFABS(SDValue Op, SelectionDAG &DAG) const {
- if (Subtarget->hasMips64() && (Op.getValueType() == MVT::f64))
- return lowerFABS64(Op, DAG, Subtarget->hasExtractInsert());
-
- return lowerFABS32(Op, DAG, Subtarget->hasExtractInsert());
-}
-
SDValue MipsTargetLowering::
lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
// check the depth
@@ -1931,7 +1875,7 @@ SDValue MipsTargetLowering::lowerShiftLeftParts(SDValue Op,
Hi = DAG.getNode(ISD::SELECT, DL, MVT::i32, Cond, ShiftLeftLo, Or);
SDValue Ops[2] = {Lo, Hi};
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
@@ -1972,7 +1916,7 @@ SDValue MipsTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
ShiftRightHi);
SDValue Ops[2] = {Lo, Hi};
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
@@ -1988,7 +1932,7 @@ static SDValue createLoadLR(unsigned Opc, SelectionDAG &DAG, LoadSDNode *LD,
DAG.getConstant(Offset, BasePtrVT));
SDValue Ops[] = { Chain, Ptr, Src };
- return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT,
LD->getMemOperand());
}
@@ -1997,6 +1941,9 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
LoadSDNode *LD = cast<LoadSDNode>(Op);
EVT MemVT = LD->getMemoryVT();
+ if (Subtarget->systemSupportsUnalignedAccess())
+ return Op;
+
// Return if load is aligned or if MemVT is neither i32 nor i64.
if ((LD->getAlignment() >= MemVT.getSizeInBits() / 8) ||
((MemVT != MVT::i32) && (MemVT != MVT::i64)))
@@ -2051,7 +1998,7 @@ SDValue MipsTargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue SLL = DAG.getNode(ISD::SHL, DL, MVT::i64, LWR, Const32);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i64, SLL, Const32);
SDValue Ops[] = { SRL, LWR.getValue(1) };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
@@ -2066,7 +2013,7 @@ static SDValue createStoreLR(unsigned Opc, SelectionDAG &DAG, StoreSDNode *SD,
DAG.getConstant(Offset, BasePtrVT));
SDValue Ops[] = { Chain, Value, Ptr };
- return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, 3, MemVT,
+ return DAG.getMemIntrinsicNode(Opc, DL, VTList, Ops, MemVT,
SD->getMemOperand());
}
@@ -2120,7 +2067,8 @@ SDValue MipsTargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
EVT MemVT = SD->getMemoryVT();
// Lower unaligned integer stores.
- if ((SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
+ if (!Subtarget->systemSupportsUnalignedAccess() &&
+ (SD->getAlignment() < MemVT.getSizeInBits() / 8) &&
((MemVT == MVT::i32) || (MemVT == MVT::i64)))
return lowerUnalignedIntStore(SD, DAG, Subtarget->isLittle());
@@ -2177,12 +2125,12 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
- CCState &State, const uint16_t *F64Regs) {
+ CCState &State, const MCPhysReg *F64Regs) {
static const unsigned IntRegsSize = 4, FloatRegsSize = 2;
- static const uint16_t IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
- static const uint16_t F32Regs[] = { Mips::F12, Mips::F14 };
+ static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+ static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
// Do not process byval args here.
if (ArgFlags.isByVal())
@@ -2254,7 +2202,7 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const uint16_t F64Regs[] = { Mips::D6, Mips::D7 };
+ static const MCPhysReg F64Regs[] = { Mips::D6, Mips::D7 };
return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
}
@@ -2262,7 +2210,7 @@ static bool CC_MipsO32_FP32(unsigned ValNo, MVT ValVT,
static bool CC_MipsO32_FP64(unsigned ValNo, MVT ValVT,
MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
- static const uint16_t F64Regs[] = { Mips::D12_64, Mips::D14_64 };
+ static const MCPhysReg F64Regs[] = { Mips::D12_64, Mips::D14_64 };
return CC_MipsO32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State, F64Regs);
}
@@ -2383,7 +2331,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MipsCCInfo.analyzeCallOperands(Outs, IsVarArg,
Subtarget->mipsSEUsesSoftFloat(),
- Callee.getNode(), CLI.Args);
+ Callee.getNode(), CLI.getArgs());
// Get a count of how many bytes are to be pushed on the stack.
unsigned NextStackOffset = CCInfo.getNextStackOffset();
@@ -2394,6 +2342,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isEligibleForTailCallOptimization(MipsCCInfo, NextStackOffset,
*MF.getInfo<MipsFunctionInfo>());
+ if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
if (IsTailCall)
++NumTailCalls;
@@ -2489,8 +2441,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Transform all store nodes into one single node because all store
// nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
@@ -2544,9 +2495,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CLI, Callee, Chain);
if (IsTailCall)
- return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, &Ops[0], Ops.size());
+ return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
- Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops);
SDValue InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
@@ -2713,18 +2664,21 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
}
}
- // The mips ABIs for returning structs by value requires that we copy
- // the sret argument into $v0 for the return. Save the argument into
- // a virtual register so that we can access it from the return points.
- if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
- unsigned Reg = MipsFI->getSRetReturnReg();
- if (!Reg) {
- Reg = MF.getRegInfo().createVirtualRegister(
- getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
- MipsFI->setSRetReturnReg(Reg);
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ // The mips ABIs for returning structs by value requires that we copy
+ // the sret argument into $v0 for the return. Save the argument into
+ // a virtual register so that we can access it from the return points.
+ if (Ins[i].Flags.isSRet()) {
+ unsigned Reg = MipsFI->getSRetReturnReg();
+ if (!Reg) {
+ Reg = MF.getRegInfo().createVirtualRegister(
+ getRegClassFor(isN64() ? MVT::i64 : MVT::i32));
+ MipsFI->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
+ break;
}
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
}
if (IsVarArg)
@@ -2734,8 +2688,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// the size of Ins and InVals. This only happens when on varg functions
if (!OutChains.empty()) {
OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &OutChains[0], OutChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
}
return Chain;
@@ -2820,7 +2773,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
RetOps.push_back(Flag);
// Return on Mips is always a "jr $ra"
- return DAG.getNode(MipsISD::Ret, DL, MVT::Other, &RetOps[0], RetOps.size());
+ return DAG.getNode(MipsISD::Ret, DL, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
@@ -2870,7 +2823,7 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
@@ -2948,12 +2901,12 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
std::pair<bool, bool> R = parsePhysicalReg(C, Prefix, Reg);
if (!R.first)
- return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+ return std::make_pair(0U, nullptr);
if ((Prefix == "hi" || Prefix == "lo")) { // Parse hi/lo.
// No numeric characters follow "hi" or "lo".
if (R.second)
- return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+ return std::make_pair(0U, nullptr);
RC = TRI->getRegClass(Prefix == "hi" ?
Mips::HI32RegClassID : Mips::LO32RegClassID);
@@ -2963,7 +2916,7 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
// No numeric characters follow the name.
if (R.second)
- return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+ return std::make_pair(0U, nullptr);
Reg = StringSwitch<unsigned long long>(Prefix)
.Case("$msair", Mips::MSAIR)
@@ -2977,14 +2930,14 @@ parseRegForInlineAsmConstraint(const StringRef &C, MVT VT) const {
.Default(0);
if (!Reg)
- return std::make_pair((unsigned)0, (const TargetRegisterClass *)0);
+ return std::make_pair(0U, nullptr);
RC = TRI->getRegClass(Mips::MSACtrlRegClassID);
return std::make_pair(Reg, RC);
}
if (!R.second)
- return std::make_pair((unsigned)0, (const TargetRegisterClass*)0);
+ return std::make_pair(0U, nullptr);
if (Prefix == "$f") { // Parse $f0-$f31.
// If the size of FP registers is 64-bit or Reg is an even number, select
@@ -3032,7 +2985,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
if (VT == MVT::i64 && isGP64bit())
return std::make_pair(0U, &Mips::GPR64RegClass);
// This will generate an error message
- return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+ return std::make_pair(0U, nullptr);
case 'f': // FPU or MSA register
if (VT == MVT::v16i8)
return std::make_pair(0U, &Mips::MSA128BRegClass);
@@ -3062,7 +3015,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const
case 'x': // register suitable for indirect jump
// Fixme: Not triggering the use of both hi and low
// This will generate an error message
- return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+ return std::make_pair(0U, nullptr);
}
}
@@ -3081,7 +3034,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1) return;
@@ -3265,7 +3218,7 @@ static bool originalTypeIsF128(const Type *Ty, const SDNode *CallNode) {
MipsTargetLowering::MipsCC::SpecialCallingConvType
MipsTargetLowering::getSpecialCallingConv(SDValue Callee) const {
MipsCC::SpecialCallingConvType SpecialCallingConv =
- MipsCC::NoSpecialCallingConv;;
+ MipsCC::NoSpecialCallingConv;
if (Subtarget->inMips16HardFloat()) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
llvm::StringRef Sym = G->getGlobal()->getName();
@@ -3321,7 +3274,7 @@ analyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Args,
dbgs() << "Call operand #" << I << " has unhandled type "
<< EVT(ArgVT).getEVTString();
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -3344,7 +3297,7 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
continue;
}
- MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), 0, IsSoftFloat);
+ MVT RegVT = getRegVT(ArgVT, FuncArg->getType(), nullptr, IsSoftFloat);
if (!FixedFn(I, ArgVT, RegVT, CCValAssign::Full, ArgFlags, CCInfo))
continue;
@@ -3353,7 +3306,7 @@ analyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Args,
dbgs() << "Formal Arg #" << I << " has unhandled type "
<< EVT(ArgVT).getEVTString();
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
@@ -3378,7 +3331,7 @@ analyzeReturn(const SmallVectorImpl<Ty> &RetVals, bool IsSoftFloat,
dbgs() << "Call result #" << I << " has unhandled type "
<< EVT(VT).getEVTString() << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
}
@@ -3392,7 +3345,7 @@ analyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsSoftFloat,
void MipsTargetLowering::MipsCC::
analyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsSoftFloat,
const Type *RetTy) const {
- analyzeReturn(Outs, IsSoftFloat, 0, RetTy);
+ analyzeReturn(Outs, IsSoftFloat, nullptr, RetTy);
}
void MipsTargetLowering::MipsCC::handleByValArg(unsigned ValNo, MVT ValVT,
@@ -3426,7 +3379,7 @@ unsigned MipsTargetLowering::MipsCC::reservedArgArea() const {
return (IsO32 && (CallConv != CallingConv::Fast)) ? 16 : 0;
}
-const uint16_t *MipsTargetLowering::MipsCC::intArgRegs() const {
+const MCPhysReg *MipsTargetLowering::MipsCC::intArgRegs() const {
return IsO32 ? O32IntRegs : Mips64IntRegs;
}
@@ -3443,7 +3396,7 @@ llvm::CCAssignFn *MipsTargetLowering::MipsCC::varArgFn() const {
return IsO32 ? (IsFP64 ? CC_MipsO32_FP64 : CC_MipsO32_FP32) : CC_MipsN_VarArg;
}
-const uint16_t *MipsTargetLowering::MipsCC::shadowRegs() const {
+const MCPhysReg *MipsTargetLowering::MipsCC::shadowRegs() const {
return IsO32 ? O32IntRegs : Mips64DPRegs;
}
@@ -3451,7 +3404,7 @@ void MipsTargetLowering::MipsCC::allocateRegs(ByValArgInfo &ByVal,
unsigned ByValSize,
unsigned Align) {
unsigned RegSize = regSize(), NumIntArgRegs = numIntArgRegs();
- const uint16_t *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
+ const MCPhysReg *IntArgRegs = intArgRegs(), *ShadowRegs = shadowRegs();
assert(!(ByValSize % RegSize) && !(Align % RegSize) &&
"Byval argument's size and alignment should be a multiple of"
"RegSize.");
@@ -3536,21 +3489,22 @@ passByValArg(SDValue Chain, SDLoc DL,
MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
const MipsCC &CC, const ByValArgInfo &ByVal,
const ISD::ArgFlagsTy &Flags, bool isLittle) const {
- unsigned ByValSize = Flags.getByValSize();
- unsigned Offset = 0; // Offset in # of bytes from the beginning of struct.
- unsigned RegSize = CC.regSize();
- unsigned Alignment = std::min(Flags.getByValAlign(), RegSize);
- EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSize * 8);
+ unsigned ByValSizeInBytes = Flags.getByValSize();
+ unsigned OffsetInBytes = 0; // From beginning of struct
+ unsigned RegSizeInBytes = CC.regSize();
+ unsigned Alignment = std::min(Flags.getByValAlign(), RegSizeInBytes);
+ EVT PtrTy = getPointerTy(), RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
if (ByVal.NumRegs) {
- const uint16_t *ArgRegs = CC.intArgRegs();
- bool LeftoverBytes = (ByVal.NumRegs * RegSize > ByValSize);
+ const MCPhysReg *ArgRegs = CC.intArgRegs();
+ bool LeftoverBytes = (ByVal.NumRegs * RegSizeInBytes > ByValSizeInBytes);
unsigned I = 0;
// Copy words to registers.
- for (; I < ByVal.NumRegs - LeftoverBytes; ++I, Offset += RegSize) {
+ for (; I < ByVal.NumRegs - LeftoverBytes;
+ ++I, OffsetInBytes += RegSizeInBytes) {
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
- DAG.getConstant(Offset, PtrTy));
+ DAG.getConstant(OffsetInBytes, PtrTy));
SDValue LoadVal = DAG.getLoad(RegTy, DL, Chain, LoadPtr,
MachinePointerInfo(), false, false, false,
Alignment);
@@ -3560,38 +3514,38 @@ passByValArg(SDValue Chain, SDLoc DL,
}
// Return if the struct has been fully copied.
- if (ByValSize == Offset)
+ if (ByValSizeInBytes == OffsetInBytes)
return;
// Copy the remainder of the byval argument with sub-word loads and shifts.
if (LeftoverBytes) {
- assert((ByValSize > Offset) && (ByValSize < Offset + RegSize) &&
- "Size of the remainder should be smaller than RegSize.");
+ assert((ByValSizeInBytes > OffsetInBytes) &&
+ (ByValSizeInBytes < OffsetInBytes + RegSizeInBytes) &&
+ "Size of the remainder should be smaller than RegSizeInBytes.");
SDValue Val;
- for (unsigned LoadSize = RegSize / 2, TotalSizeLoaded = 0;
- Offset < ByValSize; LoadSize /= 2) {
- unsigned RemSize = ByValSize - Offset;
+ for (unsigned LoadSizeInBytes = RegSizeInBytes / 2, TotalBytesLoaded = 0;
+ OffsetInBytes < ByValSizeInBytes; LoadSizeInBytes /= 2) {
+ unsigned RemainingSizeInBytes = ByValSizeInBytes - OffsetInBytes;
- if (RemSize < LoadSize)
+ if (RemainingSizeInBytes < LoadSizeInBytes)
continue;
// Load subword.
SDValue LoadPtr = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
- DAG.getConstant(Offset, PtrTy));
- SDValue LoadVal =
- DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr,
- MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8),
- false, false, Alignment);
+ DAG.getConstant(OffsetInBytes, PtrTy));
+ SDValue LoadVal = DAG.getExtLoad(
+ ISD::ZEXTLOAD, DL, RegTy, Chain, LoadPtr, MachinePointerInfo(),
+ MVT::getIntegerVT(LoadSizeInBytes * 8), false, false, Alignment);
MemOpChains.push_back(LoadVal.getValue(1));
// Shift the loaded value.
unsigned Shamt;
if (isLittle)
- Shamt = TotalSizeLoaded;
+ Shamt = TotalBytesLoaded * 8;
else
- Shamt = (RegSize - (TotalSizeLoaded + LoadSize)) * 8;
+ Shamt = (RegSizeInBytes - (TotalBytesLoaded + LoadSizeInBytes)) * 8;
SDValue Shift = DAG.getNode(ISD::SHL, DL, RegTy, LoadVal,
DAG.getConstant(Shamt, MVT::i32));
@@ -3601,9 +3555,9 @@ passByValArg(SDValue Chain, SDLoc DL,
else
Val = Shift;
- Offset += LoadSize;
- TotalSizeLoaded += LoadSize;
- Alignment = std::min(Alignment, LoadSize);
+ OffsetInBytes += LoadSizeInBytes;
+ TotalBytesLoaded += LoadSizeInBytes;
+ Alignment = std::min(Alignment, LoadSizeInBytes);
}
unsigned ArgReg = ArgRegs[ByVal.FirstIdx + I];
@@ -3613,14 +3567,14 @@ passByValArg(SDValue Chain, SDLoc DL,
}
// Copy remainder of byval arg to it with memcpy.
- unsigned MemCpySize = ByValSize - Offset;
+ unsigned MemCpySize = ByValSizeInBytes - OffsetInBytes;
SDValue Src = DAG.getNode(ISD::ADD, DL, PtrTy, Arg,
- DAG.getConstant(Offset, PtrTy));
+ DAG.getConstant(OffsetInBytes, PtrTy));
SDValue Dst = DAG.getNode(ISD::ADD, DL, PtrTy, StackPtr,
DAG.getIntPtrConstant(ByVal.Address));
Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy),
Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false,
- MachinePointerInfo(0), MachinePointerInfo(0));
+ MachinePointerInfo(), MachinePointerInfo());
MemOpChains.push_back(Chain);
}
@@ -3628,7 +3582,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
const MipsCC &CC, SDValue Chain,
SDLoc DL, SelectionDAG &DAG) const {
unsigned NumRegs = CC.numIntArgRegs();
- const uint16_t *ArgRegs = CC.intArgRegs();
+ const MCPhysReg *ArgRegs = CC.intArgRegs();
const CCState &CCInfo = CC.getCCInfo();
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumRegs);
unsigned RegSize = CC.regSize();
@@ -3662,7 +3616,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
MachinePointerInfo(), false, false, 0);
- cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue(0);
+ cast<StoreSDNode>(Store.getNode())->getMemOperand()->setValue((Value*)nullptr);
OutChains.push_back(Store);
}
}
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index 35dd396..4ac33bf 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -218,32 +218,38 @@ namespace llvm {
static const MipsTargetLowering *create(MipsTargetMachine &TM);
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ /// createFastISel - This method returns a target specific FastISel object,
+ /// or null if the target does not support "fast" ISel.
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo) const override;
- virtual void LowerOperationWrapper(SDNode *N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
+
+ void LowerOperationWrapper(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
/// getTargetNodeName - This method returns the name of a target specific
// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
/// getSetCCResultType - get the ISD::SETCC result ValueType
- EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
struct LTStr {
bool operator()(const char *S1, const char *S2) const {
@@ -382,7 +388,7 @@ namespace llvm {
unsigned reservedArgArea() const;
/// Return pointer to array of integer argument registers.
- const uint16_t *intArgRegs() const;
+ const MCPhysReg *intArgRegs() const;
typedef SmallVectorImpl<ByValArgInfo>::const_iterator byval_iterator;
byval_iterator byval_begin() const { return ByValArgs.begin(); }
@@ -403,7 +409,7 @@ namespace llvm {
/// Return the function that analyzes variable argument list functions.
llvm::CCAssignFn *varArgFn() const;
- const uint16_t *shadowRegs() const;
+ const MCPhysReg *shadowRegs() const;
void allocateRegs(ByValArgInfo &ByVal, unsigned ByValSize,
unsigned Align);
@@ -523,41 +529,39 @@ namespace llvm {
void writeVarArgRegs(std::vector<SDValue> &OutChains, const MipsCC &CC,
SDValue Chain, SDLoc DL, SelectionDAG &DAG) const;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue passArgOnStack(SDValue StackPtr, unsigned Offset, SDValue Chain,
SDValue Arg, SDLoc DL, bool IsTailCall,
SelectionDAG &DAG) const;
- virtual SDValue
- LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual bool
- CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
-
- virtual SDValue
- LowerReturn(SDValue Chain,
- CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc dl, SelectionDAG &DAG) const override;
// Inline asm support
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
+ AsmOperandInfo &info, const char *constraint) const override;
/// This function parses registers that appear in inline-asm constraints.
/// It returns pair (0, 0) on failure.
@@ -566,33 +570,33 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ MVT VT) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops. If hasMemory is
/// true it means one of the asm constraint of the inline asm instruction
/// being processed is 'm'.
- virtual void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
- virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
- unsigned SrcAlign,
- bool IsMemset, bool ZeroMemset,
- bool MemcpyStrSrc,
- MachineFunction &MF) const;
+ EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign,
+ bool IsMemset, bool ZeroMemset,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const override;
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
- virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
- virtual unsigned getJumpTableEncoding() const;
+ unsigned getJumpTableEncoding() const override;
MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned Size, unsigned BinOpcode, bool Nand = false) const;
@@ -608,6 +612,11 @@ namespace llvm {
/// Create MipsTargetLowering objects.
const MipsTargetLowering *createMips16TargetLowering(MipsTargetMachine &TM);
const MipsTargetLowering *createMipsSETargetLowering(MipsTargetMachine &TM);
+
+ namespace Mips {
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
+ const TargetLibraryInfo *libInfo);
+ }
}
#endif // MipsISELLOWERING_H
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index 4b5a73e..32cda3b 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -66,6 +66,16 @@ def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">,
def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">,
AssemblerPredicate<"!FeatureSingleFloat">;
+//===----------------------------------------------------------------------===//
+// Mips FGR size adjectives.
+// They are mutually exclusive.
+//===----------------------------------------------------------------------===//
+
+class FGR_32 { list<Predicate> FGRPredicates = [NotFP64bit]; }
+class FGR_64 { list<Predicate> FGRPredicates = [IsFP64bit]; }
+
+//===----------------------------------------------------------------------===//
+
// FP immediate patterns.
def fpimm0 : PatLeaf<(fpimm), [{
return N->isExactlyValue(+0.0);
@@ -100,10 +110,10 @@ class ADDS_FT<string opstr, RegisterOperand RC, InstrItinClass Itin, bit IsComm,
multiclass ADDS_M<string opstr, InstrItinClass Itin, bit IsComm,
SDPatternOperator OpNode = null_frag> {
def _D32 : MMRel, ADDS_FT<opstr, AFGR64Opnd, Itin, IsComm, OpNode>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
def _D64 : ADDS_FT<opstr, FGR64Opnd, Itin,
IsComm, OpNode>,
- Requires<[IsFP64bit, HasStdEnc]> {
+ AdditionalRequires<[IsFP64bit]> {
string DecoderNamespace = "Mips64";
}
}
@@ -117,18 +127,18 @@ class ABSS_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
multiclass ABSS_M<string opstr, InstrItinClass Itin,
SDPatternOperator OpNode= null_frag> {
def _D32 : MMRel, ABSS_FT<opstr, AFGR64Opnd, AFGR64Opnd, Itin, OpNode>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
def _D64 : ABSS_FT<opstr, FGR64Opnd, FGR64Opnd, Itin, OpNode>,
- Requires<[IsFP64bit, HasStdEnc]> {
+ AdditionalRequires<[IsFP64bit]> {
string DecoderNamespace = "Mips64";
}
}
multiclass ROUND_M<string opstr, InstrItinClass Itin> {
def _D32 : MMRel, ABSS_FT<opstr, FGR32Opnd, AFGR64Opnd, Itin>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
def _D64 : ABSS_FT<opstr, FGR32Opnd, FGR64Opnd, Itin>,
- Requires<[IsFP64bit, HasStdEnc]> {
+ AdditionalRequires<[IsFP64bit]> {
let DecoderNamespace = "Mips64";
}
}
@@ -241,77 +251,75 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt,
defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>;
defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>,
- Requires<[IsFP64bit, HasStdEnc]>;
+ AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
// Floating Point Instructions
//===----------------------------------------------------------------------===//
def ROUND_W_S : MMRel, ABSS_FT<"round.w.s", FGR32Opnd, FGR32Opnd, II_ROUND>,
- ABSS_FM<0xc, 16>;
+ ABSS_FM<0xc, 16>, ISA_MIPS2;
def TRUNC_W_S : MMRel, ABSS_FT<"trunc.w.s", FGR32Opnd, FGR32Opnd, II_TRUNC>,
- ABSS_FM<0xd, 16>;
+ ABSS_FM<0xd, 16>, ISA_MIPS2;
def CEIL_W_S : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>,
- ABSS_FM<0xe, 16>;
+ ABSS_FM<0xe, 16>, ISA_MIPS2;
def FLOOR_W_S : MMRel, ABSS_FT<"floor.w.s", FGR32Opnd, FGR32Opnd, II_FLOOR>,
- ABSS_FM<0xf, 16>;
+ ABSS_FM<0xf, 16>, ISA_MIPS2;
def CVT_W_S : MMRel, ABSS_FT<"cvt.w.s", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x24, 16>;
-defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>;
-defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>;
-defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>;
-defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>;
+defm ROUND_W : ROUND_M<"round.w.d", II_ROUND>, ABSS_FM<0xc, 17>, ISA_MIPS2;
+defm TRUNC_W : ROUND_M<"trunc.w.d", II_TRUNC>, ABSS_FM<0xd, 17>, ISA_MIPS2;
+defm CEIL_W : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
+defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2;
defm CVT_W : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>;
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+let DecoderNamespace = "Mips64" in {
def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>,
- ABSS_FM<0x8, 16>;
+ ABSS_FM<0x8, 16>, FGR_64;
def ROUND_L_D64 : ABSS_FT<"round.l.d", FGR64Opnd, FGR64Opnd, II_ROUND>,
- ABSS_FM<0x8, 17>;
+ ABSS_FM<0x8, 17>, FGR_64;
def TRUNC_L_S : ABSS_FT<"trunc.l.s", FGR64Opnd, FGR32Opnd, II_TRUNC>,
- ABSS_FM<0x9, 16>;
+ ABSS_FM<0x9, 16>, FGR_64;
def TRUNC_L_D64 : ABSS_FT<"trunc.l.d", FGR64Opnd, FGR64Opnd, II_TRUNC>,
- ABSS_FM<0x9, 17>;
+ ABSS_FM<0x9, 17>, FGR_64;
def CEIL_L_S : ABSS_FT<"ceil.l.s", FGR64Opnd, FGR32Opnd, II_CEIL>,
- ABSS_FM<0xa, 16>;
+ ABSS_FM<0xa, 16>, FGR_64;
def CEIL_L_D64 : ABSS_FT<"ceil.l.d", FGR64Opnd, FGR64Opnd, II_CEIL>,
- ABSS_FM<0xa, 17>;
+ ABSS_FM<0xa, 17>, FGR_64;
def FLOOR_L_S : ABSS_FT<"floor.l.s", FGR64Opnd, FGR32Opnd, II_FLOOR>,
- ABSS_FM<0xb, 16>;
+ ABSS_FM<0xb, 16>, FGR_64;
def FLOOR_L_D64 : ABSS_FT<"floor.l.d", FGR64Opnd, FGR64Opnd, II_FLOOR>,
- ABSS_FM<0xb, 17>;
+ ABSS_FM<0xb, 17>, FGR_64;
}
def CVT_S_W : MMRel, ABSS_FT<"cvt.s.w", FGR32Opnd, FGR32Opnd, II_CVT>,
ABSS_FM<0x20, 20>;
def CVT_L_S : MMRel, ABSS_FT<"cvt.l.s", FGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x25, 16>;
+ ABSS_FM<0x25, 16>, INSN_MIPS3_32R2;
def CVT_L_D64: MMRel, ABSS_FT<"cvt.l.d", FGR64Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x25, 17>;
-
-let Predicates = [NotFP64bit, HasStdEnc] in {
- def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
- ABSS_FM<0x20, 17>;
- def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x21, 20>;
- def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x21, 16>;
-}
+ ABSS_FM<0x25, 17>, INSN_MIPS3_32R2;
+
+def CVT_S_D32 : MMRel, ABSS_FT<"cvt.s.d", FGR32Opnd, AFGR64Opnd, II_CVT>,
+ ABSS_FM<0x20, 17>, FGR_32;
+def CVT_D32_W : MMRel, ABSS_FT<"cvt.d.w", AFGR64Opnd, FGR32Opnd, II_CVT>,
+ ABSS_FM<0x21, 20>, FGR_32;
+def CVT_D32_S : MMRel, ABSS_FT<"cvt.d.s", AFGR64Opnd, FGR32Opnd, II_CVT>,
+ ABSS_FM<0x21, 16>, FGR_32;
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
+let DecoderNamespace = "Mips64" in {
def CVT_S_D64 : ABSS_FT<"cvt.s.d", FGR32Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x20, 17>;
+ ABSS_FM<0x20, 17>, FGR_64;
def CVT_S_L : ABSS_FT<"cvt.s.l", FGR32Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x20, 21>;
+ ABSS_FM<0x20, 21>, FGR_64;
def CVT_D64_W : ABSS_FT<"cvt.d.w", FGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x21, 20>;
+ ABSS_FM<0x21, 20>, FGR_64;
def CVT_D64_S : ABSS_FT<"cvt.d.s", FGR64Opnd, FGR32Opnd, II_CVT>,
- ABSS_FM<0x21, 16>;
+ ABSS_FM<0x21, 16>, FGR_64;
def CVT_D64_L : ABSS_FT<"cvt.d.l", FGR64Opnd, FGR64Opnd, II_CVT>,
- ABSS_FM<0x21, 21>;
+ ABSS_FM<0x21, 21>, FGR_64;
}
let isPseudo = 1, isCodeGenOnly = 1 in {
@@ -322,18 +330,16 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
def PseudoCVT_D64_L : ABSS_FT<"", FGR64Opnd, GPR64Opnd, II_CVT>;
}
-let Predicates = [NoNaNsFPMath, HasStdEnc] in {
- def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
- ABSS_FM<0x5, 16>;
- def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>,
- ABSS_FM<0x7, 16>;
- defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>;
- defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>;
-}
+def FABS_S : MMRel, ABSS_FT<"abs.s", FGR32Opnd, FGR32Opnd, II_ABS, fabs>,
+ ABSS_FM<0x5, 16>;
+def FNEG_S : MMRel, ABSS_FT<"neg.s", FGR32Opnd, FGR32Opnd, II_NEG, fneg>,
+ ABSS_FM<0x7, 16>;
+defm FABS : ABSS_M<"abs.d", II_ABS, fabs>, ABSS_FM<0x5, 17>;
+defm FNEG : ABSS_M<"neg.d", II_NEG, fneg>, ABSS_FM<0x7, 17>;
def FSQRT_S : MMRel, ABSS_FT<"sqrt.s", FGR32Opnd, FGR32Opnd, II_SQRT_S, fsqrt>,
- ABSS_FM<0x4, 16>;
-defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>;
+ ABSS_FM<0x4, 16>, ISA_MIPS2;
+defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2;
// The odd-numbered registers are only referenced when doing loads,
// stores, and moves between floating-point and integer registers.
@@ -348,76 +354,92 @@ def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
bitconvert>, MFC1_FM<4>;
def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>,
- MFC1_FM<3>;
+ MFC1_FM<3>, ISA_MIPS32R2;
def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>,
- MFC1_FM<7>;
+ MFC1_FM<7>, ISA_MIPS32R2;
def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1,
- bitconvert>, MFC1_FM<1>;
+ bitconvert>, MFC1_FM<1>, ISA_MIPS3;
def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1,
- bitconvert>, MFC1_FM<5>;
+ bitconvert>, MFC1_FM<5>, ISA_MIPS3;
def FMOV_S : MMRel, ABSS_FT<"mov.s", FGR32Opnd, FGR32Opnd, II_MOV_S>,
ABSS_FM<0x6, 16>;
def FMOV_D32 : MMRel, ABSS_FT<"mov.d", AFGR64Opnd, AFGR64Opnd, II_MOV_D>,
- ABSS_FM<0x6, 17>, Requires<[NotFP64bit, HasStdEnc]>;
+ ABSS_FM<0x6, 17>, AdditionalRequires<[NotFP64bit]>;
def FMOV_D64 : ABSS_FT<"mov.d", FGR64Opnd, FGR64Opnd, II_MOV_D>,
- ABSS_FM<0x6, 17>, Requires<[IsFP64bit, HasStdEnc]> {
+ ABSS_FM<0x6, 17>, AdditionalRequires<[IsFP64bit]> {
let DecoderNamespace = "Mips64";
}
/// Floating Point Memory Instructions
-let Predicates = [HasStdEnc] in {
- def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, II_LWC1, load>, LW_FM<0x31>;
- def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, II_SWC1, store>, LW_FM<0x39>;
-}
-
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace = "Mips64" in {
- def LDC164 : LW_FT<"ldc1", FGR64Opnd, II_LDC1, load>, LW_FM<0x35>;
- def SDC164 : SW_FT<"sdc1", FGR64Opnd, II_SDC1, store>, LW_FM<0x3d>;
-}
-
-let Predicates = [NotFP64bit, HasStdEnc] in {
- def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>;
- def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>;
-}
-
-/// Cop2 Memory Instructions
-let Predicates = [HasStdEnc] in {
- def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
- def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
- def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>;
- def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>;
-}
+def LWC1 : MMRel, LW_FT<"lwc1", FGR32Opnd, II_LWC1, load>, LW_FM<0x31>;
+def SWC1 : MMRel, SW_FT<"swc1", FGR32Opnd, II_SWC1, store>, LW_FM<0x39>;
+
+let DecoderNamespace = "Mips64" in {
+ def LDC164 : LW_FT<"ldc1", FGR64Opnd, II_LDC1, load>, LW_FM<0x35>, ISA_MIPS2,
+ FGR_64;
+ def SDC164 : SW_FT<"sdc1", FGR64Opnd, II_SDC1, store>, LW_FM<0x3d>, ISA_MIPS2,
+ FGR_64;
+}
+
+def LDC1 : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM<0x35>,
+ ISA_MIPS2, FGR_32;
+def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>,
+ ISA_MIPS2, FGR_32;
+
+// Cop2 Memory Instructions
+// FIXME: These aren't really FPU instructions and as such don't belong in this
+// file
+def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>;
+def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>;
+def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, ISA_MIPS2;
+def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, ISA_MIPS2;
+
+// Cop3 Memory Instructions
+// FIXME: These aren't really FPU instructions and as such don't belong in this
+// file
+def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>;
+def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>;
+def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, ISA_MIPS2;
+def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, ISA_MIPS2;
// Indexed loads and stores.
// Base register + offset register addressing mode (indicated by "x" in the
// instruction mnemonic) is disallowed under NaCl.
-let Predicates = [HasFPIdx, HasStdEnc, IsNotNaCl] in {
- def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>;
- def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>;
+let AdditionalPredicates = [IsNotNaCl] in {
+ def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>,
+ INSN_MIPS4_32R2;
+ def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>,
+ INSN_MIPS4_32R2;
}
-let Predicates = [HasFPIdx, NotFP64bit, HasStdEnc, NotInMicroMips,
- IsNotNaCl] in {
- def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>;
- def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>;
+let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in {
+ def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
+ INSN_MIPS4_32R2, FGR_32;
+ def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
+ INSN_MIPS4_32R2, FGR_32;
}
-let Predicates = [HasFPIdx, IsFP64bit, HasStdEnc],
- DecoderNamespace="Mips64" in {
- def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>;
- def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>;
+let DecoderNamespace="Mips64" in {
+ def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>,
+ INSN_MIPS4_32R2, FGR_64;
+ def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>,
+ INSN_MIPS4_32R2, FGR_64;
}
// Load/store doubleword indexed unaligned.
-let Predicates = [NotFP64bit, HasStdEnc, IsNotNaCl] in {
- def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>;
- def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>;
+let AdditionalPredicates = [IsNotNaCl] in {
+ def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
+ INSN_MIPS5_32R2, FGR_32;
+ def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
+ INSN_MIPS5_32R2, FGR_32;
}
-let Predicates = [IsFP64bit, HasStdEnc], DecoderNamespace="Mips64" in {
- def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>;
- def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>;
+let DecoderNamespace="Mips64" in {
+ def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>,
+ INSN_MIPS5_32R2, FGR_64;
+ def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>,
+ INSN_MIPS5_32R2, FGR_64;
}
/// Floating-point Aritmetic
@@ -434,47 +456,43 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>,
ADDS_FM<0x01, 16>;
defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>;
-let Predicates = [HasMips32r2, HasStdEnc] in {
- def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
- MADDS_FM<4, 0>;
- def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
- MADDS_FM<5, 0>;
-}
+def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>,
+ MADDS_FM<4, 0>, ISA_MIPS32R2;
+def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>,
+ MADDS_FM<5, 0>, ISA_MIPS32R2;
-let Predicates = [HasMips32r2, NoNaNsFPMath, HasStdEnc] in {
+let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>,
- MADDS_FM<6, 0>;
+ MADDS_FM<6, 0>, ISA_MIPS32R2;
def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>,
- MADDS_FM<7, 0>;
+ MADDS_FM<7, 0>, ISA_MIPS32R2;
}
-let Predicates = [HasMips32r2, NotFP64bit, HasStdEnc] in {
- def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>;
- def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>;
-}
+def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>,
+ MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_32;
+def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>,
+ MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_32;
-let Predicates = [HasMips32r2, NotFP64bit, NoNaNsFPMath, HasStdEnc] in {
+let AdditionalPredicates = [NoNaNsFPMath] in {
def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>;
+ MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_32;
def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>;
+ MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_32;
}
-let Predicates = [HasMips32r2, IsFP64bit, HasStdEnc], isCodeGenOnly=1 in {
+let isCodeGenOnly=1 in {
def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>,
- MADDS_FM<4, 1>;
+ MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_64;
def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>,
- MADDS_FM<5, 1>;
+ MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_64;
}
-let Predicates = [HasMips32r2, IsFP64bit, NoNaNsFPMath, HasStdEnc],
+let AdditionalPredicates = [NoNaNsFPMath],
isCodeGenOnly=1 in {
def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>,
- MADDS_FM<6, 1>;
+ MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_64;
def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>,
- MADDS_FM<7, 1>;
+ MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_64;
}
//===----------------------------------------------------------------------===//
@@ -515,10 +533,10 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>;
def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
let DecoderNamespace = "Mips64" in
def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>,
- Requires<[IsFP64bit, HasStdEnc]>;
+ AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
// Floating Point Pseudo-Instructions
@@ -531,9 +549,9 @@ class BuildPairF64Base<RegisterOperand RO> :
[(set RO:$dst, (MipsBuildPairF64 GPR32Opnd:$lo, GPR32Opnd:$hi))]>;
def BuildPairF64 : BuildPairF64Base<AFGR64Opnd>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
def BuildPairF64_64 : BuildPairF64Base<FGR64Opnd>,
- Requires<[IsFP64bit, HasStdEnc]>;
+ AdditionalRequires<[IsFP64bit]>;
// This pseudo instr gets expanded into 2 mfc1 instrs after register
// allocation.
@@ -544,15 +562,15 @@ class ExtractElementF64Base<RegisterOperand RO> :
[(set GPR32Opnd:$dst, (MipsExtractElementF64 RO:$src, imm:$n))]>;
def ExtractElementF64 : ExtractElementF64Base<AFGR64Opnd>,
- Requires<[NotFP64bit, HasStdEnc]>;
+ AdditionalRequires<[NotFP64bit]>;
def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>,
- Requires<[IsFP64bit, HasStdEnc]>;
+ AdditionalRequires<[IsFP64bit]>;
//===----------------------------------------------------------------------===//
// InstAliases.
//===----------------------------------------------------------------------===//
-def : InstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>;
-def : InstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>;
+def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>;
+def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>;
//===----------------------------------------------------------------------===//
// Floating Point Patterns
@@ -565,55 +583,45 @@ def : MipsPat<(f32 (sint_to_fp GPR32Opnd:$src)),
def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
(TRUNC_W_S FGR32Opnd:$src)>;
-let Predicates = [NotFP64bit, HasStdEnc] in {
- def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
- (PseudoCVT_D32_W GPR32Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
- (TRUNC_W_D32 AFGR64Opnd:$src)>;
- def : MipsPat<(f32 (fround AFGR64Opnd:$src)),
- (CVT_S_D32 AFGR64Opnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
- (CVT_D32_S FGR32Opnd:$src)>;
-}
+def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
+ (PseudoCVT_D32_W GPR32Opnd:$src)>, FGR_32;
+def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
+ (TRUNC_W_D32 AFGR64Opnd:$src)>, FGR_32;
+def : MipsPat<(f32 (fround AFGR64Opnd:$src)),
+ (CVT_S_D32 AFGR64Opnd:$src)>, FGR_32;
+def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D32_S FGR32Opnd:$src)>, FGR_32;
+
+def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>, FGR_64;
+def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>, FGR_64;
+
+def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
+ (PseudoCVT_D64_W GPR32Opnd:$src)>, FGR_64;
+def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)),
+ (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>, FGR_64;
+def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)),
+ (PseudoCVT_D64_L GPR64Opnd:$src)>, FGR_64;
+
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_W_D64 FGR64Opnd:$src)>, FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
+ (TRUNC_L_S FGR32Opnd:$src)>, FGR_64;
+def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
+ (TRUNC_L_D64 FGR64Opnd:$src)>, FGR_64;
-let Predicates = [IsFP64bit, HasStdEnc] in {
- def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>;
- def : MipsPat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>;
-
- def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
- (PseudoCVT_D64_W GPR32Opnd:$src)>;
- def : MipsPat<(f32 (sint_to_fp GPR64Opnd:$src)),
- (EXTRACT_SUBREG (PseudoCVT_S_L GPR64Opnd:$src), sub_lo)>;
- def : MipsPat<(f64 (sint_to_fp GPR64Opnd:$src)),
- (PseudoCVT_D64_L GPR64Opnd:$src)>;
-
- def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
- (TRUNC_W_D64 FGR64Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
- (TRUNC_L_S FGR32Opnd:$src)>;
- def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
- (TRUNC_L_D64 FGR64Opnd:$src)>;
-
- def : MipsPat<(f32 (fround FGR64Opnd:$src)),
- (CVT_S_D64 FGR64Opnd:$src)>;
- def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
- (CVT_D64_S FGR32Opnd:$src)>;
-}
+def : MipsPat<(f32 (fround FGR64Opnd:$src)),
+ (CVT_S_D64 FGR64Opnd:$src)>, FGR_64;
+def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+ (CVT_D64_S FGR32Opnd:$src)>, FGR_64;
// Patterns for loads/stores with a reg+imm operand.
let AddedComplexity = 40 in {
- let Predicates = [HasStdEnc] in {
- def : LoadRegImmPat<LWC1, f32, load>;
- def : StoreRegImmPat<SWC1, f32>;
- }
+ def : LoadRegImmPat<LWC1, f32, load>;
+ def : StoreRegImmPat<SWC1, f32>;
- let Predicates = [IsFP64bit, HasStdEnc] in {
- def : LoadRegImmPat<LDC164, f64, load>;
- def : StoreRegImmPat<SDC164, f64>;
- }
+ def : LoadRegImmPat<LDC164, f64, load>, FGR_64;
+ def : StoreRegImmPat<SDC164, f64>, FGR_64;
- let Predicates = [NotFP64bit, HasStdEnc] in {
- def : LoadRegImmPat<LDC1, f64, load>;
- def : StoreRegImmPat<SDC1, f64>;
- }
+ def : LoadRegImmPat<LDC1, f64, load>, FGR_32;
+ def : StoreRegImmPat<SDC1, f64>, FGR_32;
}
diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td
index e4405ab..0377eab 100644
--- a/lib/Target/Mips/MipsInstrFormats.td
+++ b/lib/Target/Mips/MipsInstrFormats.td
@@ -93,8 +93,8 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
// Mips32/64 Instruction Format
class InstSE<dag outs, dag ins, string asmstr, list<dag> pattern,
InstrItinClass itin, Format f, string opstr = ""> :
- MipsInst<outs, ins, asmstr, pattern, itin, f> {
- let Predicates = [HasStdEnc];
+ MipsInst<outs, ins, asmstr, pattern, itin, f>, PredicateControl {
+ let EncodingPredicates = [HasStdEnc];
string BaseOpcode = opstr;
string Arch;
}
@@ -109,9 +109,9 @@ class MipsPseudo<dag outs, dag ins, list<dag> pattern,
// Mips32/64 Pseudo Instruction Format
class PseudoSE<dag outs, dag ins, list<dag> pattern,
- InstrItinClass itin = IIPseudo>:
- MipsPseudo<outs, ins, pattern, itin> {
- let Predicates = [HasStdEnc];
+ InstrItinClass itin = IIPseudo> :
+ MipsPseudo<outs, ins, pattern, itin>, PredicateControl {
+ let EncodingPredicates = [HasStdEnc];
}
// Pseudo-instructions for alternate assembly syntax (never used by codegen).
@@ -545,6 +545,20 @@ class SEQ_FM<bits<6> funct> : StdArch {
let Inst{5-0} = funct;
}
+class SEQI_FM<bits<6> funct> : StdArch {
+ bits<5> rs;
+ bits<5> rt;
+ bits<10> imm10;
+
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x1c;
+ let Inst{25-21} = rs;
+ let Inst{20-16} = rt;
+ let Inst{15-6} = imm10;
+ let Inst{5-0} = funct;
+}
+
//===----------------------------------------------------------------------===//
// System calls format <op|code_|funct>
//===----------------------------------------------------------------------===//
@@ -829,3 +843,12 @@ class BARRIER_FM<bits<5> op> : StdArch {
let Inst{10-6} = op; // Operation
let Inst{5-0} = 0; // SLL
}
+
+class COP0_TLB_FM<bits<6> op> : StdArch {
+ bits<32> Inst;
+
+ let Inst{31-26} = 0x10; // COP0
+ let Inst{25} = 1; // CO
+ let Inst{24-6} = 0;
+ let Inst{5-0} = op; // Operation
+}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 0ebad05..d6da6c6 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -22,11 +22,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#include "MipsGenInstrInfo.inc"
-using namespace llvm;
-
// Pin the vtable to this file.
void MipsInstrInfo::anchor() {}
@@ -195,7 +195,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
if (I == REnd || !isUnpredicatedTerminator(&*I)) {
// This block ends with no branches (it just falls through to its succ).
// Leave TBB/FBB null.
- TBB = FBB = NULL;
+ TBB = FBB = nullptr;
return BT_NoBranch;
}
@@ -209,7 +209,7 @@ AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// Get the second to last instruction in the block.
unsigned SecondLastOpc = 0;
- MachineInstr *SecondLastInst = NULL;
+ MachineInstr *SecondLastInst = nullptr;
if (++I != REnd) {
SecondLastInst = &*I;
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index d9ac961..742193f 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -9,6 +9,10 @@
//
// This file contains the Mips implementation of the TargetInstrInfo class.
//
+// FIXME: We need to override TargetInstrInfo::getInlineAsmLength method in
+// order for MipsLongBranch pass to work correctly when the code has inline
+// assembly. The returned value doesn't have to be the asm instruction's exact
+// size in bytes; MipsLongBranch only expects it to be the correct upper bound.
//===----------------------------------------------------------------------===//
#ifndef MIPSINSTRUCTIONINFO_H
@@ -47,20 +51,20 @@ public:
static const MipsInstrInfo *create(MipsTargetMachine &TM);
/// Branch Analysis
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
BranchType AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -69,8 +73,8 @@ public:
SmallVectorImpl<MachineInstr*> &BranchInstrs) const;
/// Insert nop instruction when hazard condition is found
- virtual void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
@@ -83,19 +87,19 @@ public:
/// Return the number of bytes of code the specified instruction may be.
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override {
storeRegToStack(MBB, MBBI, SrcReg, isKill, FrameIndex, RC, TRI, 0);
}
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const {
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override {
loadRegFromStack(MBB, MBBI, DestReg, FrameIndex, RC, TRI, 0);
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 07c37d8..0d3cb75 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -146,26 +146,40 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore,
//===----------------------------------------------------------------------===//
// Mips Instruction Predicate Definitions.
//===----------------------------------------------------------------------===//
-def HasSEInReg : Predicate<"Subtarget.hasSEInReg()">,
- AssemblerPredicate<"FeatureSEInReg">;
-def HasBitCount : Predicate<"Subtarget.hasBitCount()">,
- AssemblerPredicate<"FeatureBitCount">;
-def HasSwap : Predicate<"Subtarget.hasSwap()">,
- AssemblerPredicate<"FeatureSwap">;
-def HasCondMov : Predicate<"Subtarget.hasCondMov()">,
- AssemblerPredicate<"FeatureCondMov">;
-def HasFPIdx : Predicate<"Subtarget.hasFPIdx()">,
- AssemblerPredicate<"FeatureFPIdx">;
+def HasMips2 : Predicate<"Subtarget.hasMips2()">,
+ AssemblerPredicate<"FeatureMips2">;
+def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">,
+ AssemblerPredicate<"FeatureMips3_32">;
+def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">,
+ AssemblerPredicate<"FeatureMips3_32r2">;
+def HasMips3 : Predicate<"Subtarget.hasMips3()">,
+ AssemblerPredicate<"FeatureMips3">;
+def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">,
+ AssemblerPredicate<"FeatureMips4_32">;
+def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">,
+ AssemblerPredicate<"FeatureMips4_32r2">;
+def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">,
+ AssemblerPredicate<"FeatureMips5_32r2">;
def HasMips32 : Predicate<"Subtarget.hasMips32()">,
AssemblerPredicate<"FeatureMips32">;
def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">,
AssemblerPredicate<"FeatureMips32r2">;
+def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">,
+ AssemblerPredicate<"FeatureMips32r6">;
+def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">,
+ AssemblerPredicate<"!FeatureMips32r6">;
+def IsGP64bit : Predicate<"Subtarget.isGP64bit()">,
+ AssemblerPredicate<"FeatureGP64Bit">;
+def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">,
+ AssemblerPredicate<"!FeatureGP64Bit">;
def HasMips64 : Predicate<"Subtarget.hasMips64()">,
AssemblerPredicate<"FeatureMips64">;
-def NotMips64 : Predicate<"!Subtarget.hasMips64()">,
- AssemblerPredicate<"!FeatureMips64">;
def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">,
AssemblerPredicate<"FeatureMips64r2">;
+def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">,
+ AssemblerPredicate<"FeatureMips64r6">;
+def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">,
+ AssemblerPredicate<"!FeatureMips64r6">;
def IsN64 : Predicate<"Subtarget.isABI_N64()">,
AssemblerPredicate<"FeatureN64">;
def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">,
@@ -176,8 +190,7 @@ def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">,
AssemblerPredicate<"FeatureMips32">;
def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">,
AssemblerPredicate<"FeatureMips32">;
-def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">,
- AssemblerPredicate<"FeatureMips32">;
+def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">;
def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">,
AssemblerPredicate<"!FeatureMips16">;
def NotDSP : Predicate<"!Subtarget.hasDSP()">;
@@ -189,9 +202,65 @@ def IsLE : Predicate<"Subtarget.isLittle()">;
def IsBE : Predicate<"!Subtarget.isLittle()">;
def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">;
-class MipsPat<dag pattern, dag result> : Pat<pattern, result> {
- let Predicates = [HasStdEnc];
+//===----------------------------------------------------------------------===//
+// Mips GPR size adjectives.
+// They are mutually exclusive.
+//===----------------------------------------------------------------------===//
+
+class GPR_32 { list<Predicate> GPRPredicates = [IsGP32bit]; }
+class GPR_64 { list<Predicate> GPRPredicates = [IsGP64bit]; }
+
+//===----------------------------------------------------------------------===//
+// Mips ISA/ASE membership and instruction group membership adjectives.
+// They are mutually exclusive.
+//===----------------------------------------------------------------------===//
+
+// FIXME: I'd prefer to use additive predicates to build the instruction sets
+// but we are short on assembler feature bits at the moment. Using a
+// subtractive predicate will hopefully keep us under the 32 predicate
+// limit long enough to develop an alternative way to handle P1||P2
+// predicates.
+class ISA_MIPS1_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [NotMips32r6, NotMips64r6];
+}
+class ISA_MIPS2 { list<Predicate> InsnPredicates = [HasMips2]; }
+class ISA_MIPS2_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips2, NotMips32r6, NotMips64r6];
+}
+class ISA_MIPS3 { list<Predicate> InsnPredicates = [HasMips3]; }
+class ISA_MIPS3_NOT_32R6_64R6 {
+ list<Predicate> InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6];
}
+class ISA_MIPS32 { list<Predicate> InsnPredicates = [HasMips32]; }
+class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; }
+class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; }
+class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; }
+class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; }
+class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; }
+
+// The portions of MIPS-III that were also added to MIPS32
+class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; }
+
+// The portions of MIPS-III that were also added to MIPS32
+class INSN_MIPS3_32R2 { list<Predicate> InsnPredicates = [HasMips3_32r2]; }
+
+// The portions of MIPS-IV that were also added to MIPS32
+class INSN_MIPS4_32 { list<Predicate> InsnPredicates = [HasMips4_32]; }
+
+// The portions of MIPS-IV that were also added to MIPS32R2
+class INSN_MIPS4_32R2 { list<Predicate> InsnPredicates = [HasMips4_32r2]; }
+
+// The portions of MIPS-V that were also added to MIPS32R2
+class INSN_MIPS5_32R2 { list<Predicate> InsnPredicates = [HasMips5_32r2]; }
+
+//===----------------------------------------------------------------------===//
+
+class MipsPat<dag pattern, dag result> : Pat<pattern, result>, PredicateControl {
+ let EncodingPredicates = [HasStdEnc];
+}
+
+class MipsInstAlias<string Asm, dag Result, bit Emit = 0b1> :
+ InstAlias<Asm, Result, Emit>, PredicateControl;
class IsCommutable {
bit isCommutable = 1;
@@ -265,6 +334,11 @@ def simm16 : Operand<i32> {
let DecoderMethod= "DecodeSimm16";
}
+def simm19_lsl2 : Operand<i32> {
+ let EncoderMethod = "getSimm19Lsl2Encoding";
+ let DecoderMethod = "DecodeSimm19Lsl2";
+}
+
def simm20 : Operand<i32> {
}
@@ -284,6 +358,14 @@ def uimmz : Operand<i32> {
}
// Unsigned Operand
+def uimm2 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+def uimm3 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
def uimm5 : Operand<i32> {
let PrintMethod = "printUnsignedImm";
}
@@ -314,6 +396,10 @@ def InvertedImOperand : Operand<i32> {
let ParserMatchClass = MipsInvertedImmoperand;
}
+def InvertedImOperand64 : Operand<i64> {
+ let ParserMatchClass = MipsInvertedImmoperand;
+}
+
class mem_generic : Operand<iPTR> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops ptr_rc, simm16);
@@ -478,7 +564,9 @@ class shift_rotate_imm<string opstr, Operand ImmOpnd,
SDPatternOperator PF = null_frag> :
InstSE<(outs RO:$rd), (ins RO:$rt, ImmOpnd:$shamt),
!strconcat(opstr, "\t$rd, $rt, $shamt"),
- [(set RO:$rd, (OpNode RO:$rt, PF:$shamt))], itin, FrmR, opstr>;
+ [(set RO:$rd, (OpNode RO:$rt, PF:$shamt))], itin, FrmR, opstr> {
+ let TwoOperandAliasConstraint = "$rt = $rd";
+}
class shift_rotate_reg<string opstr, RegisterOperand RO, InstrItinClass itin,
SDPatternOperator OpNode = null_frag>:
@@ -590,7 +678,7 @@ class UncondBranch<Instruction BEQInst> :
let isTerminator = 1;
let isBarrier = 1;
let hasDelaySlot = 1;
- let Predicates = [RelocPIC, HasStdEnc];
+ let AdditionalPredicates = [RelocPIC];
let Defs = [AT];
}
@@ -779,27 +867,22 @@ class EffectiveAddress<string opstr, RegisterOperand RO> :
// Count Leading Ones/Zeros in Word
class CountLeading0<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz RO:$rs))], II_CLZ, FrmR, opstr>,
- Requires<[HasBitCount, HasStdEnc]>;
+ [(set RO:$rd, (ctlz RO:$rs))], II_CLZ, FrmR, opstr>;
class CountLeading1<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rs), !strconcat(opstr, "\t$rd, $rs"),
- [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>,
- Requires<[HasBitCount, HasStdEnc]>;
+ [(set RO:$rd, (ctlz (not RO:$rs)))], II_CLO, FrmR, opstr>;
// Sign Extend in Register.
class SignExtInReg<string opstr, ValueType vt, RegisterOperand RO,
InstrItinClass itin> :
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"),
- [(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr> {
- let Predicates = [HasSEInReg, HasStdEnc];
-}
+ [(set RO:$rd, (sext_inreg RO:$rt, vt))], itin, FrmR, opstr>;
// Subword Swap
class SubwordSwap<string opstr, RegisterOperand RO>:
InstSE<(outs RO:$rd), (ins RO:$rt), !strconcat(opstr, "\t$rd, $rt"), [],
NoItinerary, FrmR, opstr> {
- let Predicates = [HasSwap, HasStdEnc];
let neverHasSideEffects = 1;
}
@@ -814,17 +897,14 @@ class ExtBase<string opstr, RegisterOperand RO, Operand PosOpnd,
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ext:$size),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size))], NoItinerary,
- FrmR, opstr> {
- let Predicates = [HasMips32r2, HasStdEnc];
-}
+ FrmR, opstr>, ISA_MIPS32R2;
class InsBase<string opstr, RegisterOperand RO, Operand PosOpnd,
SDPatternOperator Op = null_frag>:
InstSE<(outs RO:$rt), (ins RO:$rs, PosOpnd:$pos, size_ins:$size, RO:$src),
!strconcat(opstr, " $rt, $rs, $pos, $size"),
[(set RO:$rt, (Op RO:$rs, imm:$pos, imm:$size, RO:$src))],
- NoItinerary, FrmR, opstr> {
- let Predicates = [HasMips32r2, HasStdEnc];
+ NoItinerary, FrmR, opstr>, ISA_MIPS32R2 {
let Constraints = "$src = $rt";
}
@@ -915,6 +995,18 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
def STORE_ACC64 : Store<"", ACC64>;
}
+// We need these two pseudo instructions to avoid offset calculation for long
+// branches. See the comment in file MipsLongBranch.cpp for detailed
+// explanation.
+
+// Expands to: lui $dst, %hi($tgt - $baltgt)
+def LONG_BRANCH_LUi : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins brtarget:$tgt, brtarget:$baltgt), []>;
+
+// Expands to: addiu $dst, $src, %lo($tgt - $baltgt)
+def LONG_BRANCH_ADDiu : PseudoSE<(outs GPR32Opnd:$dst),
+ (ins GPR32Opnd:$src, brtarget:$tgt, brtarget:$baltgt), []>;
+
//===----------------------------------------------------------------------===//
// Instruction definition
//===----------------------------------------------------------------------===//
@@ -926,7 +1018,8 @@ let isPseudo = 1, isCodeGenOnly = 1 in {
def ADDiu : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16,
add>,
ADDI_FM<0x9>, IsAsCheapAsAMove;
-def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>;
+def ADDi : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>, ADDI_FM<0x8>,
+ ISA_MIPS1_NOT_32R6_64R6;
def SLTi : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
SLTI_FM<0xa>;
def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
@@ -949,7 +1042,7 @@ def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
ADD_FM<0, 0x23>;
let Defs = [HI0, LO0] in
def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
- ADD_FM<0x1c, 2>;
+ ADD_FM<0x1c, 2>, ISA_MIPS32;
def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>;
def SUB : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>;
def SLT : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
@@ -977,12 +1070,11 @@ def SRAV : MMRel, shift_rotate_reg<"srav", GPR32Opnd, II_SRAV, sra>,
SRLV_FM<7, 0>;
// Rotate Instructions
-let Predicates = [HasMips32r2, HasStdEnc] in {
- def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, II_ROTR, rotr,
- immZExt5>, SRA_FM<2, 1>;
- def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, II_ROTRV, rotr>,
- SRLV_FM<6, 1>;
-}
+def ROTR : MMRel, shift_rotate_imm<"rotr", uimm5, GPR32Opnd, II_ROTR, rotr,
+ immZExt5>,
+ SRA_FM<2, 1>, ISA_MIPS32R2;
+def ROTRV : MMRel, shift_rotate_reg<"rotrv", GPR32Opnd, II_ROTRV, rotr>,
+ SRLV_FM<6, 1>, ISA_MIPS32R2;
/// Load and Store Instructions
/// aligned
@@ -999,11 +1091,16 @@ def SH : Store<"sh", GPR32Opnd, truncstorei16, II_SH>, MMRel, LW_FM<0x29>;
def SW : Store<"sw", GPR32Opnd, store, II_SW>, MMRel, LW_FM<0x2b>;
/// load/store left/right
-let Predicates = [NotInMicroMips] in {
-def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>;
-def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>;
-def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>;
-def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>;
+let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
+ AdditionalPredicates = [NotInMicroMips] in {
+def LWL : LoadLeftRight<"lwl", MipsLWL, GPR32Opnd, II_LWL>, LW_FM<0x22>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def LWR : LoadLeftRight<"lwr", MipsLWR, GPR32Opnd, II_LWR>, LW_FM<0x26>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWL : StoreLeftRight<"swl", MipsSWL, GPR32Opnd, II_SWL>, LW_FM<0x2a>,
+ ISA_MIPS1_NOT_32R6_64R6;
+def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>,
+ ISA_MIPS1_NOT_32R6_64R6;
}
def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM;
@@ -1014,34 +1111,41 @@ def TLT : MMRel, TEQ_FT<"tlt", GPR32Opnd>, TEQ_FM<0x32>;
def TLTU : MMRel, TEQ_FT<"tltu", GPR32Opnd>, TEQ_FM<0x33>;
def TNE : MMRel, TEQ_FT<"tne", GPR32Opnd>, TEQ_FM<0x36>;
-def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>;
-def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>;
-def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>;
-def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>;
-def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>;
-def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>;
+def TEQI : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM<0xc>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def TGEI : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM<0x8>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def TGEIU : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM<0x9>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def TLTI : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM<0xa>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def TTLTIU : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM<0xb>,
+ ISA_MIPS2_NOT_32R6_64R6;
+def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>,
+ ISA_MIPS2_NOT_32R6_64R6;
def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>;
def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>;
def TRAP : TrapBase<BREAK>;
-def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>;
-def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>;
+def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32;
+def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32;
-def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>;
-def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>;
+def EI : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM<1>, ISA_MIPS32R2;
+def DI : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM<0>, ISA_MIPS32R2;
-let Predicates = [NotInMicroMips] in {
+let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
+ AdditionalPredicates = [NotInMicroMips] in {
def WAIT : WAIT_FT<"wait">, WAIT_FM;
/// Load-linked, Store-conditional
-def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>;
-def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>;
+def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2;
+def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2;
}
/// Jump and Branch Instructions
def J : MMRel, JumpFJ<jmptarget, "j", br, bb, "j">, FJ<2>,
- Requires<[RelocStatic, HasStdEnc]>, IsBranch;
+ AdditionalRequires<[RelocStatic]>, IsBranch;
def JR : MMRel, IndirectBranch<"jr", GPR32Opnd>, MTLO_FM<8>;
def BEQ : MMRel, CBranch<"beq", brtarget, seteq, GPR32Opnd>, BEQ_FM<4>;
def BNE : MMRel, CBranch<"bne", brtarget, setne, GPR32Opnd>, BEQ_FM<5>;
@@ -1056,7 +1160,7 @@ def BLTZ : MMRel, CBranchZero<"bltz", brtarget, setlt, GPR32Opnd>,
def B : UncondBranch<BEQ>;
def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>;
-let Predicates = [NotInMicroMips, HasStdEnc] in {
+let AdditionalPredicates = [NotInMicroMips] in {
def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM;
def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>;
}
@@ -1102,21 +1206,24 @@ def UDIV : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>,
def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>;
def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>;
-let Predicates = [NotInMicroMips] in {
+let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug
+ AdditionalPredicates = [NotInMicroMips] in {
def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>;
def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>;
}
/// Sign Ext In Register Instructions.
-def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>, SEB_FM<0x10, 0x20>;
-def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, SEB_FM<0x18, 0x20>;
+def SEB : MMRel, SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>,
+ SEB_FM<0x10, 0x20>, ISA_MIPS32R2;
+def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>,
+ SEB_FM<0x18, 0x20>, ISA_MIPS32R2;
/// Count Leading
-def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>;
-def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>;
+def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, ISA_MIPS32;
+def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32;
/// Word Swap Bytes Within Halfwords
-def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>;
+def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2;
/// No operation.
def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
@@ -1128,12 +1235,12 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>;
def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>;
// MADD*/MSUB*
-def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>;
-def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>;
-def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>;
-def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>;
+def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, ISA_MIPS32;
+def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, ISA_MIPS32;
+def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, ISA_MIPS32;
+def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, ISA_MIPS32;
-let Predicates = [HasStdEnc, NotDSP] in {
+let AdditionalPredicates = [NotDSP] in {
def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>;
def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>;
def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>;
@@ -1156,8 +1263,8 @@ def EXT : MMRel, ExtBase<"ext", GPR32Opnd, uimm5, MipsExt>, EXT_FM<0>;
def INS : MMRel, InsBase<"ins", GPR32Opnd, uimm5, MipsIns>, EXT_FM<4>;
/// Move Control Registers From/To CPU Registers
-def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>;
-def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>;
+def MFC0 : MFC3OP<"mfc0", GPR32Opnd>, MFC3OP_FM<0x10, 0>, ISA_MIPS32;
+def MTC0 : MFC3OP<"mtc0", GPR32Opnd>, MFC3OP_FM<0x10, 4>, ISA_MIPS32;
def MFC2 : MFC3OP<"mfc2", GPR32Opnd>, MFC3OP_FM<0x12, 0>;
def MTC2 : MFC3OP<"mtc2", GPR32Opnd>, MFC3OP_FM<0x12, 4>;
@@ -1165,67 +1272,94 @@ class Barrier<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
FrmOther>;
def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>;
def EHB : Barrier<"ehb">, BARRIER_FM<3>;
-def PAUSE : Barrier<"pause">, BARRIER_FM<5>, Requires<[HasMips32r2]>;
+def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2;
+
+class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary,
+ FrmOther>;
+def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>;
+def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>;
+def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>;
+def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>;
//===----------------------------------------------------------------------===//
// Instruction aliases
//===----------------------------------------------------------------------===//
-def : InstAlias<"move $dst, $src",
- (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>,
- Requires<[NotMips64, NotInMicroMips]>;
-def : InstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>;
-def : InstAlias<"addu $rs, $rt, $imm",
- (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
-def : InstAlias<"add $rs, $rt, $imm",
- (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
-def : InstAlias<"and $rs, $rt, $imm",
- (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
-def : InstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"move $dst, $src",
+ (ADDu GPR32Opnd:$dst, GPR32Opnd:$src,ZERO), 1>,
+ GPR_32 {
+ let AdditionalPredicates = [NotInMicroMips];
+}
+def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"addu $rs, $rt, $imm",
+ (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"add $rs, $rt, $imm",
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"and $rs, $rt, $imm",
+ (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>;
let Predicates = [NotInMicroMips] in {
-def : InstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
-}
-def : InstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>;
-def : InstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
-def : InstAlias<"not $rt, $rs",
- (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
-def : InstAlias<"neg $rt, $rs",
- (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
-def : InstAlias<"negu $rt, $rs",
- (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
-def : InstAlias<"slt $rs, $rt, $imm",
- (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
-def : InstAlias<"xor $rs, $rt, $imm",
- (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
-def : InstAlias<"or $rs, $rt, $imm",
- (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
-def : InstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
-def : InstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
-def : InstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
-def : InstAlias<"bnez $rs,$offset",
- (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
-def : InstAlias<"beqz $rs,$offset",
- (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
-def : InstAlias<"syscall", (SYSCALL 0), 1>;
-
-def : InstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
-def : InstAlias<"break", (BREAK 0, 0), 1>;
-def : InstAlias<"ei", (EI ZERO), 1>;
-def : InstAlias<"di", (DI ZERO), 1>;
-
-def : InstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
-def : InstAlias<"sub, $rd, $rs, $imm",
- (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
-def : InstAlias<"subu, $rd, $rs, $imm",
- (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs, InvertedImOperand:$imm)>;
-
+def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
+}
+def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"not $rt, $rs",
+ (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
+def : MipsInstAlias<"neg $rt, $rs",
+ (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
+def : MipsInstAlias<"negu $rt",
+ (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 0>;
+def : MipsInstAlias<"negu $rt, $rs",
+ (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
+def : MipsInstAlias<"slt $rs, $rt, $imm",
+ (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>;
+def : MipsInstAlias<"sltu $rt, $rs, $imm",
+ (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm16:$imm), 0>;
+def : MipsInstAlias<"xor $rs, $rt, $imm",
+ (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"or $rs, $rt, $imm",
+ (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>;
+def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
+def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mfc2 $rt, $rd", (MFC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"mtc2 $rt, $rd", (MTC2 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>;
+def : MipsInstAlias<"b $offset", (BEQ ZERO, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"bnez $rs,$offset",
+ (BNE GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"beqz $rs,$offset",
+ (BEQ GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+def : MipsInstAlias<"syscall", (SYSCALL 0), 1>;
+
+def : MipsInstAlias<"break", (BREAK 0, 0), 1>;
+def : MipsInstAlias<"break $imm", (BREAK uimm10:$imm, 0), 1>;
+def : MipsInstAlias<"ei", (EI ZERO), 1>;
+def : MipsInstAlias<"di", (DI ZERO), 1>;
+
+def : MipsInstAlias<"teq $rs, $rt", (TEQ GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tge $rs, $rt", (TGE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tgeu $rs, $rt", (TGEU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
+ 1>;
+def : MipsInstAlias<"tlt $rs, $rt", (TLT GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"tltu $rs, $rt", (TLTU GPR32Opnd:$rs, GPR32Opnd:$rt, 0),
+ 1>;
+def : MipsInstAlias<"tne $rs, $rt", (TNE GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+def : MipsInstAlias<"sll $rd, $rt, $rs",
+ (SLLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"sub, $rd, $rs, $imm",
+ (ADDi GPR32Opnd:$rd, GPR32Opnd:$rs,
+ InvertedImOperand:$imm), 0>;
+def : MipsInstAlias<"sub $rs, $imm",
+ (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, InvertedImOperand:$imm),
+ 0>;
+def : MipsInstAlias<"subu, $rd, $rs, $imm",
+ (ADDiu GPR32Opnd:$rd, GPR32Opnd:$rs,
+ InvertedImOperand:$imm), 0>;
+def : MipsInstAlias<"subu $rs, $imm", (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rs,
+ InvertedImOperand:$imm), 0>;
+def : MipsInstAlias<"sra $rd, $rt, $rs",
+ (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+def : MipsInstAlias<"srl $rd, $rt, $rs",
+ (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
@@ -1271,7 +1405,7 @@ def : MipsPat<(i32 imm:$imm),
// Carry MipsPatterns
def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs),
(SUBu GPR32:$lhs, GPR32:$rhs)>;
-let Predicates = [HasStdEnc, NotDSP] in {
+let AdditionalPredicates = [NotDSP] in {
def : MipsPat<(addc GPR32:$lhs, GPR32:$rhs),
(ADDu GPR32:$lhs, GPR32:$rhs)>;
def : MipsPat<(addc GPR32:$src, immSExt16:$imm),
@@ -1340,14 +1474,11 @@ def : MipsPat<(not GPR32:$in),
(NOR GPR32Opnd:$in, ZERO)>;
// extended loads
-let Predicates = [HasStdEnc] in {
- def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
- def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
- def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
-}
+def : MipsPat<(i32 (extloadi1 addr:$src)), (LBu addr:$src)>;
+def : MipsPat<(i32 (extloadi8 addr:$src)), (LBu addr:$src)>;
+def : MipsPat<(i32 (extloadi16 addr:$src)), (LHu addr:$src)>;
// peepholes
-let Predicates = [HasStdEnc] in
def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
// brcond patterns
@@ -1441,11 +1572,9 @@ def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>;
// Load halfword/word patterns.
let AddedComplexity = 40 in {
- let Predicates = [HasStdEnc] in {
- def : LoadRegImmPat<LBu, i32, zextloadi8>;
- def : LoadRegImmPat<LH, i32, sextloadi16>;
- def : LoadRegImmPat<LW, i32, load>;
- }
+ def : LoadRegImmPat<LBu, i32, zextloadi8>;
+ def : LoadRegImmPat<LH, i32, sextloadi16>;
+ def : LoadRegImmPat<LW, i32, load>;
}
//===----------------------------------------------------------------------===//
@@ -1456,6 +1585,9 @@ include "MipsInstrFPU.td"
include "Mips64InstrInfo.td"
include "MipsCondMov.td"
+include "Mips32r6InstrInfo.td"
+include "Mips64r6InstrInfo.td"
+
//
// Mips16
diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp
index d76cb1d..2072488 100644
--- a/lib/Target/Mips/MipsJITInfo.cpp
+++ b/lib/Target/Mips/MipsJITInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "MipsJITInfo.h"
#include "MipsInstrInfo.h"
#include "MipsRelocations.h"
@@ -25,6 +24,8 @@
#include <cstdlib>
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
void MipsJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
unsigned NewAddr = (intptr_t)New;
diff --git a/lib/Target/Mips/MipsJITInfo.h b/lib/Target/Mips/MipsJITInfo.h
index ecda310..c9dfd83 100644
--- a/lib/Target/Mips/MipsJITInfo.h
+++ b/lib/Target/Mips/MipsJITInfo.h
@@ -37,26 +37,26 @@ class MipsJITInfo : public TargetJITInfo {
/// overwriting OLD with a branch to NEW. This is used for self-modifying
/// code.
///
- virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
// getStubLayout - Returns the size and alignment of the largest call stub
// on Mips.
- virtual StubLayout getStubLayout();
+ StubLayout getStubLayout() override;
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
- virtual void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE);
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
/// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
- virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase);
+ void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase) override;
/// Initialize - Initialize internal stage for the function being JITted.
void Initialize(const MachineFunction &MF, bool isPIC,
diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp
index 2b6a874..acfe76e 100644
--- a/lib/Target/Mips/MipsLongBranch.cpp
+++ b/lib/Target/Mips/MipsLongBranch.cpp
@@ -10,14 +10,9 @@
// This pass expands a branch or jump instruction into a long branch if its
// offset is too large to fit into its immediate field.
//
-// FIXME:
-// 1. Fix pc-region jump instructions which cross 256MB segment boundaries.
-// 2. If program has inline assembly statements whose size cannot be
-// determined accurately, load branch target addresses from the GOT.
+// FIXME: Fix pc-region jump instructions which cross 256MB segment boundaries.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-long-branch"
-
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsTargetMachine.h"
@@ -33,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-long-branch"
+
STATISTIC(LongBranches, "Number of long branches.");
static cl::opt<bool> SkipLongBranch(
@@ -56,7 +53,7 @@ namespace {
bool HasLongBranch;
MachineInstr *Br;
- MBBInfo() : Size(0), HasLongBranch(false), Br(0) {}
+ MBBInfo() : Size(0), HasLongBranch(false), Br(nullptr) {}
};
class MipsLongBranch : public MachineFunctionPass {
@@ -67,13 +64,13 @@ namespace {
: MachineFunctionPass(ID), TM(tm),
IsPIC(TM.getRelocationModel() == Reloc::PIC_),
ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()),
- LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 13 : 9)) {}
+ LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Mips Long Branch";
}
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
private:
void splitMBB(MachineBasicBlock *MBB);
@@ -111,7 +108,7 @@ static MachineBasicBlock *getTargetMBB(const MachineInstr &Br) {
}
assert(false && "This instruction does not have an MBB operand.");
- return 0;
+ return nullptr;
}
// Traverse the list of instructions backwards until a non-debug instruction is
@@ -267,20 +264,14 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
LongBrMBB->addSuccessor(BalTgtMBB);
BalTgtMBB->addSuccessor(TgtMBB);
- int64_t TgtAddress = MBBInfos[TgtMBB->getNumber()].Address;
- unsigned BalTgtMBBSize = 5;
- int64_t Offset = TgtAddress - (I.Address + I.Size - BalTgtMBBSize * 4);
- int64_t Lo = SignExtend64<16>(Offset & 0xffff);
- int64_t Hi = SignExtend64<16>(((Offset + 0x8000) >> 16) & 0xffff);
-
if (ABI != MipsSubtarget::N64) {
// $longbr:
// addiu $sp, $sp, -8
// sw $ra, 0($sp)
- // bal $baltgt
// lui $at, %hi($tgt - $baltgt)
- // $baltgt:
+ // bal $baltgt
// addiu $at, $at, %lo($tgt - $baltgt)
+ // $baltgt:
// addu $at, $ra, $at
// lw $ra, 0($sp)
// jr $at
@@ -295,14 +286,31 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SW)).addReg(Mips::RA)
.addReg(Mips::SP).addImm(0);
+ // LUi and ADDiu instructions create 32-bit offset of the target basic
+ // block from the target of BAL instruction. We cannot use immediate
+ // value for this offset because it cannot be determined accurately when
+ // the program has inline assembly statements. We therefore use the
+ // relocation expressions %hi($tgt-$baltgt) and %lo($tgt-$baltgt) which
+ // are resolved during the fixup, so the values will always be correct.
+ //
+ // Since we cannot create %hi($tgt-$baltgt) and %lo($tgt-$baltgt)
+ // expressions at this point (it is possible only at the MC layer),
+ // we replace LUi and ADDiu with pseudo instructions
+ // LONG_BRANCH_LUi and LONG_BRANCH_ADDiu, and add both basic
+ // blocks as operands to these instructions. When lowering these pseudo
+ // instructions to LUi and ADDiu in the MC layer, we will create
+ // %hi($tgt-$baltgt) and %lo($tgt-$baltgt) expressions and add them as
+ // operands to lowered instructions.
+
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT)
+ .addMBB(TgtMBB).addMBB(BalTgtMBB);
MIBundleBuilder(*LongBrMBB, Pos)
.append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::LUi), Mips::AT).addImm(Hi));
+ .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT)
+ .addReg(Mips::AT).addMBB(TgtMBB).addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::AT)
- .addReg(Mips::AT).addImm(Lo);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDu), Mips::AT)
.addReg(Mips::RA).addReg(Mips::AT);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA)
@@ -316,14 +324,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
// $longbr:
// daddiu $sp, $sp, -16
// sd $ra, 0($sp)
- // lui64 $at, %highest($tgt - $baltgt)
- // daddiu $at, $at, %higher($tgt - $baltgt)
+ // daddiu $at, $zero, %hi($tgt - $baltgt)
// dsll $at, $at, 16
- // daddiu $at, $at, %hi($tgt - $baltgt)
// bal $baltgt
- // dsll $at, $at, 16
- // $baltgt:
// daddiu $at, $at, %lo($tgt - $baltgt)
+ // $baltgt:
// daddu $at, $ra, $at
// ld $ra, 0($sp)
// jr64 $at
@@ -331,9 +336,20 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
// $fallthrough:
//
- int64_t Higher = SignExtend64<16>(((Offset + 0x80008000) >> 32) & 0xffff);
- int64_t Highest =
- SignExtend64<16>(((Offset + 0x800080008000LL) >> 48) & 0xffff);
+ // We assume the branch is within-function, and that offset is within
+ // +/- 2GB. High 32 bits will therefore always be zero.
+
+ // Note that this will work even if the offset is negative, because
+ // of the +1 modification that's added in that case. For example, if the
+ // offset is -1MB (0xFFFFFFFFFFF00000), the computation for %higher is
+ //
+ // 0xFFFFFFFFFFF00000 + 0x80008000 = 0x000000007FF08000
+ //
+ // and the bits [47:32] are zero. For %highest
+ //
+ // 0xFFFFFFFFFFF00000 + 0x800080008000 = 0x000080007FF08000
+ //
+ // and the bits [63:48] are zero.
Pos = LongBrMBB->begin();
@@ -341,24 +357,21 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64).addImm(-16);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::SD)).addReg(Mips::RA_64)
.addReg(Mips::SP_64).addImm(0);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LUi64), Mips::AT_64)
- .addImm(Highest);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
- .addReg(Mips::AT_64).addImm(Higher);
+ BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
+ Mips::AT_64).addReg(Mips::ZERO_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_HI).addMBB(BalTgtMBB);
BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DSLL), Mips::AT_64)
.addReg(Mips::AT_64).addImm(16);
- BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
- .addReg(Mips::AT_64).addImm(Hi);
MIBundleBuilder(*LongBrMBB, Pos)
.append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB))
- .append(BuildMI(*MF, DL, TII->get(Mips::DSLL), Mips::AT_64)
- .addReg(Mips::AT_64).addImm(16));
+ .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu),
+ Mips::AT_64).addReg(Mips::AT_64)
+ .addMBB(TgtMBB, MipsII::MO_ABS_LO)
+ .addMBB(BalTgtMBB));
Pos = BalTgtMBB->begin();
- BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDiu), Mips::AT_64)
- .addReg(Mips::AT_64).addImm(Lo);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::DADDu), Mips::AT_64)
.addReg(Mips::RA_64).addReg(Mips::AT_64);
BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LD), Mips::RA_64)
@@ -370,8 +383,7 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) {
.addReg(Mips::SP_64).addImm(16));
}
- assert(BalTgtMBBSize == BalTgtMBB->size());
- assert(LongBrMBB->size() + BalTgtMBBSize == LongBranchSeqSize);
+ assert(LongBrMBB->size() + BalTgtMBB->size() == LongBranchSeqSize);
} else {
// $longbr:
// j $tgt
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
index 7c9a9ed..821392e 100644
--- a/lib/Target/Mips/MipsMCInstLower.cpp
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -151,7 +151,75 @@ MCOperand MipsMCInstLower::LowerOperand(const MachineOperand &MO,
return MCOperand();
}
+MCOperand MipsMCInstLower::createSub(MachineBasicBlock *BB1,
+ MachineBasicBlock *BB2,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ const MCSymbolRefExpr *Sym1 = MCSymbolRefExpr::Create(BB1->getSymbol(), *Ctx);
+ const MCSymbolRefExpr *Sym2 = MCSymbolRefExpr::Create(BB2->getSymbol(), *Ctx);
+ const MCBinaryExpr *Sub = MCBinaryExpr::CreateSub(Sym1, Sym2, *Ctx);
+
+ return MCOperand::CreateExpr(MipsMCExpr::Create(Kind, Sub, *Ctx));
+}
+
+void MipsMCInstLower::
+lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const {
+ OutMI.setOpcode(Mips::LUi);
+
+ // Lower register operand.
+ OutMI.addOperand(LowerOperand(MI->getOperand(0)));
+
+ // Create %hi($tgt-$baltgt).
+ OutMI.addOperand(createSub(MI->getOperand(1).getMBB(),
+ MI->getOperand(2).getMBB(),
+ MCSymbolRefExpr::VK_Mips_ABS_HI));
+}
+
+void MipsMCInstLower::
+lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI, int Opcode,
+ MCSymbolRefExpr::VariantKind Kind) const {
+ OutMI.setOpcode(Opcode);
+
+ // Lower two register operands.
+ for (unsigned I = 0, E = 2; I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ OutMI.addOperand(LowerOperand(MO));
+ }
+
+ // Create %lo($tgt-$baltgt) or %hi($tgt-$baltgt).
+ OutMI.addOperand(createSub(MI->getOperand(2).getMBB(),
+ MI->getOperand(3).getMBB(), Kind));
+}
+
+bool MipsMCInstLower::lowerLongBranch(const MachineInstr *MI,
+ MCInst &OutMI) const {
+ switch (MI->getOpcode()) {
+ default:
+ return false;
+ case Mips::LONG_BRANCH_LUi:
+ lowerLongBranchLUi(MI, OutMI);
+ return true;
+ case Mips::LONG_BRANCH_ADDiu:
+ lowerLongBranchADDiu(MI, OutMI, Mips::ADDiu,
+ MCSymbolRefExpr::VK_Mips_ABS_LO);
+ return true;
+ case Mips::LONG_BRANCH_DADDiu:
+ unsigned TargetFlags = MI->getOperand(2).getTargetFlags();
+ if (TargetFlags == MipsII::MO_ABS_HI)
+ lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu,
+ MCSymbolRefExpr::VK_Mips_ABS_HI);
+ else if (TargetFlags == MipsII::MO_ABS_LO)
+ lowerLongBranchADDiu(MI, OutMI, Mips::DADDiu,
+ MCSymbolRefExpr::VK_Mips_ABS_LO);
+ else
+ report_fatal_error("Unexpected flags for LONG_BRANCH_DADDiu");
+ return true;
+ }
+}
+
void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+ if (lowerLongBranch(MI, OutMI))
+ return;
+
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
index 4570bd9..269190f 100644
--- a/lib/Target/Mips/MipsMCInstLower.h
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -9,6 +9,7 @@
#ifndef MIPSMCINSTLOWER_H
#define MIPSMCINSTLOWER_H
+#include "MCTargetDesc/MipsMCExpr.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/Support/Compiler.h"
@@ -36,6 +37,13 @@ public:
private:
MCOperand LowerSymbolOperand(const MachineOperand &MO,
MachineOperandType MOTy, unsigned Offset) const;
+ MCOperand createSub(MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MCSymbolRefExpr::VariantKind Kind) const;
+ void lowerLongBranchLUi(const MachineInstr *MI, MCInst &OutMI) const;
+ void lowerLongBranchADDiu(const MachineInstr *MI, MCInst &OutMI,
+ int Opcode,
+ MCSymbolRefExpr::VariantKind Kind) const;
+ bool lowerLongBranch(const MachineInstr *MI, MCInst &OutMI) const;
};
}
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index 5722c6c..285bb14 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -65,10 +65,6 @@ def MipsVExtractZExt : SDNode<"MipsISD::VEXTRACT_ZEXT_ELT",
// Operands
-def uimm2 : Operand<i32> {
- let PrintMethod = "printUnsignedImm";
-}
-
// The immediate of an LSA instruction needs special handling
// as the encoded value should be subtracted by one.
def uimm2LSAAsmOperand : AsmOperandClass {
@@ -84,10 +80,6 @@ def LSAImm : Operand<i32> {
let ParserMatchClass = uimm2LSAAsmOperand;
}
-def uimm3 : Operand<i32> {
- let PrintMethod = "printUnsignedImm8";
-}
-
def uimm4 : Operand<i32> {
let PrintMethod = "printUnsignedImm8";
}
@@ -1505,6 +1497,15 @@ class MSA_INSERT_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
string Constraints = "$wd = $wd_in";
}
+class MSA_INSERT_VIDX_PSEUDO_BASE<SDPatternOperator OpNode, ValueType Ty,
+ RegisterOperand ROWD, RegisterOperand ROFS> :
+ MSAPseudo<(outs ROWD:$wd), (ins ROWD:$wd_in, GPR32Opnd:$n, ROFS:$fs),
+ [(set ROWD:$wd, (OpNode (Ty ROWD:$wd_in), ROFS:$fs,
+ GPR32Opnd:$n))]> {
+ bit usesCustomInserter = 1;
+ string Constraints = "$wd = $wd_in";
+}
+
class MSA_INSVE_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
RegisterOperand ROWD, RegisterOperand ROWS = ROWD,
InstrItinClass itin = NoItinerary> {
@@ -2300,11 +2301,25 @@ class INSERT_W_DESC : MSA_INSERT_DESC_BASE<"insert.w", vinsert_v4i32,
class INSERT_D_DESC : MSA_INSERT_DESC_BASE<"insert.d", vinsert_v2i64,
MSA128DOpnd, GPR64Opnd>;
+class INSERT_B_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v16i8, MSA128BOpnd, GPR32Opnd>;
+class INSERT_H_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v8i16, MSA128HOpnd, GPR32Opnd>;
+class INSERT_W_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4i32, MSA128WOpnd, GPR32Opnd>;
+class INSERT_D_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2i64, MSA128DOpnd, GPR64Opnd>;
+
class INSERT_FW_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v4f32,
MSA128WOpnd, FGR32Opnd>;
class INSERT_FD_PSEUDO_DESC : MSA_INSERT_PSEUDO_BASE<vector_insert, v2f64,
MSA128DOpnd, FGR64Opnd>;
+class INSERT_FW_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v4f32, MSA128WOpnd, FGR32Opnd>;
+class INSERT_FD_VIDX_PSEUDO_DESC :
+ MSA_INSERT_VIDX_PSEUDO_BASE<vector_insert, v2f64, MSA128DOpnd, FGR64Opnd>;
+
class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8,
MSA128BOpnd>;
class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16,
@@ -3214,6 +3229,13 @@ let DecoderMethod = "DecodeINSVE_DF" in {
def INSERT_FW_PSEUDO : INSERT_FW_PSEUDO_DESC;
def INSERT_FD_PSEUDO : INSERT_FD_PSEUDO_DESC;
+def INSERT_B_VIDX_PSEUDO : INSERT_B_VIDX_PSEUDO_DESC;
+def INSERT_H_VIDX_PSEUDO : INSERT_H_VIDX_PSEUDO_DESC;
+def INSERT_W_VIDX_PSEUDO : INSERT_W_VIDX_PSEUDO_DESC;
+def INSERT_D_VIDX_PSEUDO : INSERT_D_VIDX_PSEUDO_DESC;
+def INSERT_FW_VIDX_PSEUDO : INSERT_FW_VIDX_PSEUDO_DESC;
+def INSERT_FD_VIDX_PSEUDO : INSERT_FD_VIDX_PSEUDO_DESC;
+
def LD_B: LD_B_ENC, LD_B_DESC;
def LD_H: LD_H_ENC, LD_H_DESC;
def LD_W: LD_W_ENC, LD_W_DESC;
@@ -3731,3 +3753,55 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
MSA128D, NoItinerary>;
def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
MSA128B, NoItinerary>;
+
+// Vector extraction with variable index
+def : MSAPat<(i32 (vextract_sext_i8 v16i8:$ws, i32:$idx)),
+ (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32), (i32 24))>;
+def : MSAPat<(i32 (vextract_sext_i16 v8i16:$ws, i32:$idx)),
+ (SRA (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_H v8i16:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32), (i32 16))>;
+def : MSAPat<(i32 (vextract_sext_i32 v4i32:$ws, i32:$idx)),
+ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_W v4i32:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32)>;
+def : MSAPat<(i64 (vextract_sext_i64 v2i64:$ws, i32:$idx)),
+ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (SPLAT_D v2i64:$ws,
+ i32:$idx),
+ sub_64)),
+ GPR64), [HasMSA, IsGP64bit]>;
+
+def : MSAPat<(i32 (vextract_zext_i8 v16i8:$ws, i32:$idx)),
+ (SRL (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_B v16i8:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32), (i32 24))>;
+def : MSAPat<(i32 (vextract_zext_i16 v8i16:$ws, i32:$idx)),
+ (SRL (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_H v8i16:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32), (i32 16))>;
+def : MSAPat<(i32 (vextract_zext_i32 v4i32:$ws, i32:$idx)),
+ (COPY_TO_REGCLASS (i32 (EXTRACT_SUBREG (SPLAT_W v4i32:$ws,
+ i32:$idx),
+ sub_lo)),
+ GPR32)>;
+def : MSAPat<(i64 (vextract_zext_i64 v2i64:$ws, i32:$idx)),
+ (COPY_TO_REGCLASS (i64 (EXTRACT_SUBREG (SPLAT_D v2i64:$ws,
+ i32:$idx),
+ sub_64)),
+ GPR64), [HasMSA, IsGP64bit]>;
+
+def : MSAPat<(f32 (vector_extract v4f32:$ws, i32:$idx)),
+ (f32 (EXTRACT_SUBREG (SPLAT_W v4f32:$ws,
+ i32:$idx),
+ sub_lo))>;
+def : MSAPat<(f64 (vector_extract v2f64:$ws, i32:$idx)),
+ (f64 (EXTRACT_SUBREG (SPLAT_D v2f64:$ws,
+ i32:$idx),
+ sub_64))>;
diff --git a/lib/Target/Mips/MipsMachineFunction.cpp b/lib/Target/Mips/MipsMachineFunction.cpp
index dedf802..e30302e 100644
--- a/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/lib/Target/Mips/MipsMachineFunction.cpp
@@ -27,7 +27,7 @@ FixGlobalBaseReg("mips-fix-global-base-reg", cl::Hidden, cl::init(true),
MipsCallEntry::MipsCallEntry(const StringRef &N) {
#ifndef NDEBUG
Name = N;
- Val = 0;
+ Val = nullptr;
#endif
}
@@ -65,9 +65,8 @@ MipsFunctionInfo::~MipsFunctionInfo() {
++I)
delete I->getValue();
- for (ValueMap<const GlobalValue *, const MipsCallEntry *>::iterator
- I = GlobalCallEntries.begin(), E = GlobalCallEntries.end(); I != E; ++I)
- delete I->second;
+ for (const auto &Entry : GlobalCallEntries)
+ delete Entry.second;
}
bool MipsFunctionInfo::globalBaseRegSet() const {
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 3e14c8c..e9101cc 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -37,12 +37,12 @@ class MipsCallEntry : public PseudoSourceValue {
public:
explicit MipsCallEntry(const StringRef &N);
explicit MipsCallEntry(const GlobalValue *V);
- virtual bool isConstant(const MachineFrameInfo *) const;
- virtual bool isAliased(const MachineFrameInfo *) const;
- virtual bool mayAlias(const MachineFrameInfo *) const;
+ bool isConstant(const MachineFrameInfo *) const override;
+ bool isAliased(const MachineFrameInfo *) const override;
+ bool mayAlias(const MachineFrameInfo *) const override;
private:
- virtual void printCustom(raw_ostream &O) const;
+ void printCustom(raw_ostream &O) const override;
#ifndef NDEBUG
std::string Name;
const GlobalValue *Val;
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index c6abf17..03c76ea 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -14,6 +14,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "mips-isel"
+
namespace llvm {
bool MipsModuleDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/Target/Mips/MipsModuleISelDAGToDAG.h b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
index fda35ae..a96862a 100644
--- a/lib/Target/Mips/MipsModuleISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsModuleISelDAGToDAG.h
@@ -41,15 +41,11 @@ public:
TM(TM_), Subtarget(TM.getSubtarget<MipsSubtarget>()) {}
// Pass Name
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MIPS DAG->DAG Pattern Instruction Selection";
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- virtual SDNode *Select(SDNode *N) {
- llvm_unreachable("unexpected");
- }
+ bool runOnMachineFunction(MachineFunction &MF) override;
protected:
/// Keep a pointer to the MipsSubtarget around so that we can make the right
diff --git a/lib/Target/Mips/MipsOptimizePICCall.cpp b/lib/Target/Mips/MipsOptimizePICCall.cpp
index db270f3..c234049 100644
--- a/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -12,8 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "optimize-mips-pic-call"
-
#include "Mips.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "MipsMachineFunction.h"
@@ -25,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "optimize-mips-pic-call"
+
static cl::opt<bool> LoadTargetFromGOT("mips-load-target-from-got",
cl::init(true),
cl::desc("Load target address from GOT"),
@@ -35,11 +35,13 @@ static cl::opt<bool> EraseGPOpnd("mips-erase-gp-opnd",
cl::Hidden);
namespace {
+typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
+
typedef std::pair<unsigned, unsigned> CntRegP;
typedef RecyclingAllocator<BumpPtrAllocator,
- ScopedHashTableVal<const Value *, CntRegP> >
+ ScopedHashTableVal<ValueType, CntRegP> >
AllocatorTy;
-typedef ScopedHashTable<const Value *, CntRegP, DenseMapInfo<const Value *>,
+typedef ScopedHashTable<ValueType, CntRegP, DenseMapInfo<ValueType>,
AllocatorTy> ScopedHTType;
class MBBInfo {
@@ -59,11 +61,11 @@ class OptimizePICCall : public MachineFunctionPass {
public:
OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {}
- virtual const char *getPassName() const { return "Mips OptimizePICCall"; }
+ const char *getPassName() const override { return "Mips OptimizePICCall"; }
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -78,18 +80,18 @@ private:
/// and the underlying object in Reg and Val respectively, if the function's
/// address can be resolved lazily.
bool isCallViaRegister(MachineInstr &MI, unsigned &Reg,
- const Value *&Val) const;
+ ValueType &Val) const;
/// \brief Return the number of instructions that dominate the current
/// instruction and load the function address from object Entry.
- unsigned getCount(const Value *Entry);
+ unsigned getCount(ValueType Entry);
/// \brief Return the destination virtual register of the last instruction
/// that loads from object Entry.
- unsigned getReg(const Value *Entry);
+ unsigned getReg(ValueType Entry);
/// \brief Update ScopedHT.
- void incCntAndSetReg(const Value *Entry, unsigned Reg);
+ void incCntAndSetReg(ValueType Entry, unsigned Reg);
ScopedHTType ScopedHT;
static char ID;
@@ -101,13 +103,13 @@ char OptimizePICCall::ID = 0;
/// Return the first MachineOperand of MI if it is a used virtual register.
static MachineOperand *getCallTargetRegOpnd(MachineInstr &MI) {
if (MI.getNumOperands() == 0)
- return 0;
+ return nullptr;
MachineOperand &MO = MI.getOperand(0);
if (!MO.isReg() || !MO.isUse() ||
!TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- return 0;
+ return nullptr;
return &MO;
}
@@ -153,10 +155,10 @@ static void eraseGPOpnd(MachineInstr &MI) {
}
}
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
-MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(0) {}
+MBBInfo::MBBInfo(MachineDomTreeNode *N) : Node(N), HTScope(nullptr) {}
const MachineDomTreeNode *MBBInfo::getNode() const { return Node; }
@@ -210,7 +212,7 @@ bool OptimizePICCall::visitNode(MBBInfo &MBBI) {
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I) {
unsigned Reg;
- const Value *Entry;
+ ValueType Entry;
// Skip instructions that are not call instructions via registers.
if (!isCallViaRegister(*I, Reg, Entry))
@@ -242,7 +244,7 @@ bool OptimizePICCall::visitNode(MBBInfo &MBBI) {
}
bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg,
- const Value *&Val) const {
+ ValueType &Val) const {
if (!MI.isCall())
return false;
@@ -254,7 +256,7 @@ bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg,
// Get the instruction that loads the function address from the GOT.
Reg = MO->getReg();
- Val = 0;
+ Val = (Value*)nullptr;
MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
MachineInstr *DefMI = MRI.getVRegDef(Reg);
@@ -273,20 +275,22 @@ bool OptimizePICCall::isCallViaRegister(MachineInstr &MI, unsigned &Reg,
// Return the underlying object for the GOT entry in Val.
assert(DefMI->hasOneMemOperand());
Val = (*DefMI->memoperands_begin())->getValue();
+ if (!Val)
+ Val = (*DefMI->memoperands_begin())->getPseudoValue();
return true;
}
-unsigned OptimizePICCall::getCount(const Value *Entry) {
+unsigned OptimizePICCall::getCount(ValueType Entry) {
return ScopedHT.lookup(Entry).first;
}
-unsigned OptimizePICCall::getReg(const Value *Entry) {
+unsigned OptimizePICCall::getReg(ValueType Entry) {
unsigned Reg = ScopedHT.lookup(Entry).second;
assert(Reg);
return Reg;
}
-void OptimizePICCall::incCntAndSetReg(const Value *Entry, unsigned Reg) {
+void OptimizePICCall::incCntAndSetReg(ValueType Entry, unsigned Reg) {
CntRegP P = ScopedHT.lookup(Entry);
ScopedHT.insert(Entry, std::make_pair(P.first + 1, Reg));
}
diff --git a/lib/Target/Mips/MipsOs16.cpp b/lib/Target/Mips/MipsOs16.cpp
index fe60841..7aae964 100644
--- a/lib/Target/Mips/MipsOs16.cpp
+++ b/lib/Target/Mips/MipsOs16.cpp
@@ -11,13 +11,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-os16"
#include "MipsOs16.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "mips-os16"
+
static cl::opt<std::string> Mips32FunctionMask(
"mips32-function-mask",
diff --git a/lib/Target/Mips/MipsOs16.h b/lib/Target/Mips/MipsOs16.h
index 21beef8..55e5a81 100644
--- a/lib/Target/Mips/MipsOs16.h
+++ b/lib/Target/Mips/MipsOs16.h
@@ -34,11 +34,11 @@ public:
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "MIPS Os16 Optimization";
}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index d7fc93b..83d25ab 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-reg-info"
-
#include "MipsRegisterInfo.h"
#include "Mips.h"
#include "MipsAnalyzeImmediate.h"
@@ -37,11 +35,13 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mips-reg-info"
+
#define GET_REGINFO_TARGET_DESC
#include "MipsGenRegisterInfo.inc"
-using namespace llvm;
-
MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST)
: MipsGenRegisterInfo(Mips::RA), Subtarget(ST) {}
@@ -79,8 +79,8 @@ MipsRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
//===----------------------------------------------------------------------===//
/// Mips Callee Saved Registers
-const uint16_t* MipsRegisterInfo::
-getCalleeSavedRegs(const MachineFunction *MF) const {
+const MCPhysReg *
+MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isSingleFloat())
return CSR_SingleFloatOnly_SaveList;
@@ -119,11 +119,11 @@ const uint32_t *MipsRegisterInfo::getMips16RetHelperMask() {
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
- static const uint16_t ReservedGPR32[] = {
+ static const MCPhysReg ReservedGPR32[] = {
Mips::ZERO, Mips::K0, Mips::K1, Mips::SP
};
- static const uint16_t ReservedGPR64[] = {
+ static const MCPhysReg ReservedGPR64[] = {
Mips::ZERO_64, Mips::K0_64, Mips::K1_64, Mips::SP_64
};
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 0450c6f..b34496f 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -43,30 +43,31 @@ public:
/// Code Generation virtual methods...
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
- unsigned Kind) const;
+ unsigned Kind) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const uint32_t *getCallPreservedMask(CallingConv::ID) const;
+ MachineFunction &MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
static const uint32_t *getMips16RetHelperMask();
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
- virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- virtual bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
/// Stack Frame Processing Methods
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const;
/// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
/// \brief Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 834e6c5..875a596 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -205,6 +205,10 @@ let Namespace = "Mips" in {
foreach I = 0-31 in
def COP2#I : MipsReg<#I, ""#I>;
+ // COP3 registers.
+ foreach I = 0-31 in
+ def COP3#I : MipsReg<#I, ""#I>;
+
// PC register
def PC : Register<"pc">;
@@ -387,6 +391,10 @@ def DSPCC : RegisterClass<"Mips", [v4i8, v2i16], 32, (add DSPCCond)>;
def COP2 : RegisterClass<"Mips", [i32], 32, (sequence "COP2%u", 0, 31)>,
Unallocatable;
+// Coprocessor 3 registers.
+def COP3 : RegisterClass<"Mips", [i32], 32, (sequence "COP3%u", 0, 31)>,
+ Unallocatable;
+
// Octeon multiplier and product registers
def OCTEON_MPL : RegisterClass<"Mips", [i64], 64, (add MPL0, MPL1, MPL2)>,
Unallocatable;
@@ -484,6 +492,10 @@ def COP2AsmOperand : MipsAsmRegOperand {
let Name = "COP2AsmReg";
}
+def COP3AsmOperand : MipsAsmRegOperand {
+ let Name = "COP3AsmReg";
+}
+
def HWRegsOpnd : RegisterOperand<HWRegs> {
let ParserMatchClass = HWRegsAsmOperand;
}
@@ -524,6 +536,10 @@ def COP2Opnd : RegisterOperand<COP2> {
let ParserMatchClass = COP2AsmOperand;
}
+def COP3Opnd : RegisterOperand<COP3> {
+ let ParserMatchClass = COP3AsmOperand;
+}
+
def MSA128BOpnd : RegisterOperand<MSA128B> {
let ParserMatchClass = MSA128AsmOperand;
}
diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp
index 0343a47..6ad5821 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -375,7 +375,8 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF) const {
// if framepointer enabled, set it to point to the stack pointer.
if (hasFP(MF)) {
// Insert instruction "move $fp, $sp" at this location.
- BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO);
+ BuildMI(MBB, MBBI, dl, TII.get(ADDu), FP).addReg(SP).addReg(ZERO)
+ .setMIFlag(MachineInstr::FrameSetup);
// emit ".cfi_def_cfa_register $fp"
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h
index 8fa9e46..5d2801f 100644
--- a/lib/Target/Mips/MipsSEFrameLowering.h
+++ b/lib/Target/Mips/MipsSEFrameLowering.h
@@ -25,22 +25,22 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS) const;
+ RegScavenger *RS) const override;
unsigned ehDataReg(unsigned I) const;
};
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 5b20a6c..d5385be 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-isel"
#include "MipsSEISelDAGToDAG.h"
#include "MCTargetDesc/MipsBaseInfo.h"
#include "Mips.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "mips-isel"
+
bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
if (Subtarget.inMips16Mode())
return false;
@@ -412,7 +413,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const {
BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N);
- if (Node == NULL)
+ if (!Node)
return false;
APInt SplatValue, SplatUndef;
@@ -813,16 +814,16 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
EVT ViaVecTy;
if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector())
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
HasAnyUndefs, 8,
!Subtarget.isLittle()))
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
switch (SplatBitSize) {
default:
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
case 8:
LdiOp = Mips::LDI_B;
ViaVecTy = MVT::v16i8;
@@ -842,7 +843,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
if (!SplatValue.isSignedIntN(10))
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
SDValue Imm = CurDAG->getTargetConstant(SplatValue,
ViaVecTy.getVectorElementType());
@@ -868,7 +869,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) {
}
- return std::make_pair(false, (SDNode*)NULL);
+ return std::make_pair(false, nullptr);
}
FunctionPass *llvm::createMipsSEISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h
index ba84a6d..57328d2 100644
--- a/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -25,7 +25,7 @@ public:
private:
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
void addDSPCtrlRegOperands(bool IsDef, MachineInstr &MI,
MachineFunction &MF);
@@ -44,66 +44,66 @@ private:
bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
unsigned OffsetBits) const;
- virtual bool selectAddrRegImm(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
- virtual bool selectAddrRegReg(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectAddrRegReg(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
- virtual bool selectAddrDefault(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectAddrDefault(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
- virtual bool selectIntAddr(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectIntAddr(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
- virtual bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
- virtual bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectAddrRegImm12(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const;
- virtual bool selectIntAddrMM(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectIntAddrMM(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
- virtual bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
- SDValue &Offset) const;
+ bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
+ SDValue &Offset) const override;
/// \brief Select constant vector splats.
- virtual bool selectVSplat(SDNode *N, APInt &Imm) const;
+ bool selectVSplat(SDNode *N, APInt &Imm) const override;
/// \brief Select constant vector splats whose value fits in a given integer.
- virtual bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
+ bool selectVSplatCommon(SDValue N, SDValue &Imm, bool Signed,
unsigned ImmBitSize) const;
/// \brief Select constant vector splats whose value fits in a uimm1.
- virtual bool selectVSplatUimm1(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm1(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm2.
- virtual bool selectVSplatUimm2(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm2(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm3.
- virtual bool selectVSplatUimm3(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm3(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm4.
- virtual bool selectVSplatUimm4(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm4(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm5.
- virtual bool selectVSplatUimm5(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm5(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm6.
- virtual bool selectVSplatUimm6(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm6(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a uimm8.
- virtual bool selectVSplatUimm8(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimm8(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value fits in a simm5.
- virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const;
+ bool selectVSplatSimm5(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value is a power of 2.
- virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value is the inverse of a
/// power of 2.
- virtual bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const;
+ bool selectVSplatUimmInvPow2(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value is a run of set bits
/// ending at the most significant bit
- virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const;
+ bool selectVSplatMaskL(SDValue N, SDValue &Imm) const override;
/// \brief Select constant vector splats whose value is a run of set bits
/// starting at bit zero.
- virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
+ bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override;
- virtual std::pair<bool, SDNode*> selectNode(SDNode *Node);
+ std::pair<bool, SDNode*> selectNode(SDNode *Node) override;
- virtual void processFunctionAfterISel(MachineFunction &MF);
+ void processFunctionAfterISel(MachineFunction &MF) override;
// Insert instructions to initialize the global base register in the
// first MBB of the function.
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 0dac0b7..969d730 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -10,7 +10,6 @@
// Subclass of MipsTargetLowering specialized for mips32/64.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-isel"
#include "MipsSEISelLowering.h"
#include "MipsRegisterInfo.h"
#include "MipsTargetMachine.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-isel"
+
static cl::opt<bool>
EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
cl::desc("MIPS: Enable tail calls."), cl::init(false));
@@ -119,10 +120,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM)
if (Subtarget->hasCnMips())
setOperationAction(ISD::MUL, MVT::i64, Legal);
- else if (hasMips64())
+ else if (isGP64bit())
setOperationAction(ISD::MUL, MVT::i64, Custom);
- if (hasMips64()) {
+ if (isGP64bit()) {
setOperationAction(ISD::MULHS, MVT::i64, Custom);
setOperationAction(ISD::MULHU, MVT::i64, Custom);
}
@@ -253,6 +254,16 @@ MipsSETargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
bool *Fast) const {
MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy;
+ if (Subtarget->systemSupportsUnalignedAccess()) {
+ // MIPS32r6/MIPS64r6 is required to support unaligned access. It's
+ // implementation defined whether this is handled by hardware, software, or
+ // a hybrid of the two but it's expected that most implementations will
+ // handle the majority of cases in hardware.
+ if (Fast)
+ *Fast = true;
+ return true;
+ }
+
switch (SVT) {
case MVT::i64:
case MVT::i32:
@@ -487,7 +498,8 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
Log2 == ExtendTySize) {
SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 };
DAG.MorphNodeTo(Op0.getNode(), MipsISD::VEXTRACT_ZEXT_ELT,
- Op0->getVTList(), Ops, Op0->getNumOperands());
+ Op0->getVTList(),
+ makeArrayRef(Ops, Op0->getNumOperands()));
return Op0;
}
}
@@ -507,7 +519,7 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode());
- if (Node == NULL)
+ if (!Node)
return false;
APInt SplatValue, SplatUndef;
@@ -831,7 +843,8 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1),
Op0Op0->getOperand(2) };
DAG.MorphNodeTo(Op0Op0.getNode(), MipsISD::VEXTRACT_SEXT_ELT,
- Op0Op0->getVTList(), Ops, Op0Op0->getNumOperands());
+ Op0Op0->getVTList(),
+ makeArrayRef(Ops, Op0Op0->getNumOperands()));
return Op0Op0;
}
}
@@ -1051,6 +1064,18 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return emitINSERT_FW(MI, BB);
case Mips::INSERT_FD_PSEUDO:
return emitINSERT_FD(MI, BB);
+ case Mips::INSERT_B_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 1, false);
+ case Mips::INSERT_H_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 2, false);
+ case Mips::INSERT_W_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 4, false);
+ case Mips::INSERT_D_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 8, false);
+ case Mips::INSERT_FW_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 4, true);
+ case Mips::INSERT_FD_VIDX_PSEUDO:
+ return emitINSERT_DF_VIDX(MI, BB, 8, true);
case Mips::FILL_FW_PSEUDO:
return emitFILL_FW(MI, BB);
case Mips::FILL_FD_PSEUDO:
@@ -1117,7 +1142,7 @@ SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
SDValue Ops[2] = {BP, Hi.getValue(1)};
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1168,7 +1193,7 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
return HasLo ? Lo : Hi;
SDValue Vals[] = { Lo, Hi };
- return DAG.getMergeValues(Vals, 2, DL);
+ return DAG.getMergeValues(Vals, DL);
}
@@ -1235,7 +1260,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
ResTys.push_back((*I == MVT::i64) ? MVT::Untyped : *I);
// Create node.
- SDValue Val = DAG.getNode(Opc, DL, ResTys, &Ops[0], Ops.size());
+ SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops);
SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val;
if (!HasChainIn)
@@ -1243,7 +1268,7 @@ static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) {
assert(Val->getValueType(1) == MVT::Other);
SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) };
- return DAG.getMergeValues(Vals, 2, DL);
+ return DAG.getMergeValues(Vals, DL);
}
// Lower an MSA copy intrinsic into the specified SelectionDAG node
@@ -1280,8 +1305,8 @@ static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) {
SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB,
LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB };
- SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
- ViaVecTy.getVectorNumElements());
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy,
+ makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
if (ViaVecTy != ResVecTy)
Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, Result);
@@ -1320,8 +1345,8 @@ static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue,
SplatValueA, SplatValueB, SplatValueA, SplatValueB,
SplatValueA, SplatValueB, SplatValueA, SplatValueB };
- SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy, Ops,
- ViaVecTy.getVectorNumElements());
+ SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, DL, ViaVecTy,
+ makeArrayRef(Ops, ViaVecTy.getVectorNumElements()));
if (VecTy != ViaVecTy)
Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result);
@@ -1355,7 +1380,7 @@ static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG,
}
}
- if (Exp2Imm.getNode() == NULL) {
+ if (!Exp2Imm.getNode()) {
// We couldnt constant fold, do a vector shift instead
// Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since
@@ -1735,7 +1760,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
// If ResTy is v2i64 then the type legalizer will break this node down into
// an equivalent v4i32.
- return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, ResTy, Ops);
}
case Intrinsic::mips_fexp2_w:
case Intrinsic::mips_fexp2_d: {
@@ -2560,8 +2585,7 @@ static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy,
++I)
Ops.push_back(DAG.getTargetConstant(*I, MaskEltTy));
- SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, &Ops[0],
- Ops.size());
+ SDValue MaskVec = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVecTy, Ops);
if (Using1stVec && Using2ndVec) {
Op0 = Op->getOperand(0);
@@ -2885,6 +2909,131 @@ MipsSETargetLowering::emitINSERT_FD(MachineInstr *MI,
return BB;
}
+// Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction.
+//
+// For integer:
+// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs)
+// =>
+// (SLL $lanetmp1, $lane, <log2size)
+// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
+// (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs)
+// (NEG $lanetmp2, $lanetmp1)
+// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
+//
+// For floating point:
+// (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs)
+// =>
+// (SUBREG_TO_REG $wt, $fs, <subreg>)
+// (SLL $lanetmp1, $lane, <log2size)
+// (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1)
+// (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0)
+// (NEG $lanetmp2, $lanetmp1)
+// (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2)
+MachineBasicBlock *
+MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned EltSizeInBytes,
+ bool IsFP) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+ DebugLoc DL = MI->getDebugLoc();
+ unsigned Wd = MI->getOperand(0).getReg();
+ unsigned SrcVecReg = MI->getOperand(1).getReg();
+ unsigned LaneReg = MI->getOperand(2).getReg();
+ unsigned SrcValReg = MI->getOperand(3).getReg();
+
+ const TargetRegisterClass *VecRC = nullptr;
+ const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass
+ : &Mips::GPR32RegClass;
+ unsigned EltLog2Size;
+ unsigned InsertOp = 0;
+ unsigned InsveOp = 0;
+ switch (EltSizeInBytes) {
+ default:
+ llvm_unreachable("Unexpected size");
+ case 1:
+ EltLog2Size = 0;
+ InsertOp = Mips::INSERT_B;
+ InsveOp = Mips::INSVE_B;
+ VecRC = &Mips::MSA128BRegClass;
+ break;
+ case 2:
+ EltLog2Size = 1;
+ InsertOp = Mips::INSERT_H;
+ InsveOp = Mips::INSVE_H;
+ VecRC = &Mips::MSA128HRegClass;
+ break;
+ case 4:
+ EltLog2Size = 2;
+ InsertOp = Mips::INSERT_W;
+ InsveOp = Mips::INSVE_W;
+ VecRC = &Mips::MSA128WRegClass;
+ break;
+ case 8:
+ EltLog2Size = 3;
+ InsertOp = Mips::INSERT_D;
+ InsveOp = Mips::INSVE_D;
+ VecRC = &Mips::MSA128DRegClass;
+ break;
+ }
+
+ if (IsFP) {
+ unsigned Wt = RegInfo.createVirtualRegister(VecRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt)
+ .addImm(0)
+ .addReg(SrcValReg)
+ .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo);
+ SrcValReg = Wt;
+ }
+
+ // Convert the lane index into a byte index
+ if (EltSizeInBytes != 1) {
+ unsigned LaneTmp1 = RegInfo.createVirtualRegister(GPRRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SLL), LaneTmp1)
+ .addReg(LaneReg)
+ .addImm(EltLog2Size);
+ LaneReg = LaneTmp1;
+ }
+
+ // Rotate bytes around so that the desired lane is element zero
+ unsigned WdTmp1 = RegInfo.createVirtualRegister(VecRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1)
+ .addReg(SrcVecReg)
+ .addReg(SrcVecReg)
+ .addReg(LaneReg);
+
+ unsigned WdTmp2 = RegInfo.createVirtualRegister(VecRC);
+ if (IsFP) {
+ // Use insve.df to insert to element zero
+ BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2)
+ .addReg(WdTmp1)
+ .addImm(0)
+ .addReg(SrcValReg)
+ .addImm(0);
+ } else {
+ // Use insert.df to insert to element zero
+ BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2)
+ .addReg(WdTmp1)
+ .addReg(SrcValReg)
+ .addImm(0);
+ }
+
+ // Rotate elements the rest of the way for a full rotation.
+ // sld.df inteprets $rt modulo the number of columns so we only need to negate
+ // the lane index to do this.
+ unsigned LaneTmp2 = RegInfo.createVirtualRegister(GPRRC);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SUB), LaneTmp2)
+ .addReg(Mips::ZERO)
+ .addReg(LaneReg);
+ BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd)
+ .addReg(WdTmp2)
+ .addReg(WdTmp2)
+ .addReg(LaneTmp2);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
// Emit the FILL_FW pseudo instruction.
//
// fill_fw_pseudo $wd, $fs
diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h
index 079fbf6..03a20ef 100644
--- a/lib/Target/Mips/MipsSEISelLowering.h
+++ b/lib/Target/Mips/MipsSEISelLowering.h
@@ -30,22 +30,23 @@ namespace llvm {
void addMSAFloatType(MVT::SimpleValueType Ty,
const TargetRegisterClass *RC);
- virtual bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0,
- bool *Fast = 0) const override;
+ bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS = 0,
+ bool *Fast = nullptr) const override;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual MachineBasicBlock *
- EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+ MachineBasicBlock *
+ EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB) const override;
- virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
- EVT VT) const {
+ bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+ EVT VT) const override {
return false;
}
- virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
+ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override {
if (VT == MVT::Untyped)
return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass :
&Mips::ACC64RegClass;
@@ -54,16 +55,16 @@ namespace llvm {
}
private:
- virtual bool
- isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
- unsigned NextStackOffset,
- const MipsFunctionInfo& FI) const;
+ bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo,
+ unsigned NextStackOffset,
+ const MipsFunctionInfo& FI) const override;
- virtual void
+ void
getOpndList(SmallVectorImpl<SDValue> &Ops,
std::deque< std::pair<unsigned, SDValue> > &RegsToPass,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
- CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const;
+ CallLoweringInfo &CLI, SDValue Callee,
+ SDValue Chain) const override;
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const;
@@ -97,6 +98,11 @@ namespace llvm {
/// \brief Emit the INSERT_FD pseudo instruction
MachineBasicBlock *emitINSERT_FD(MachineInstr *MI,
MachineBasicBlock *BB) const;
+ /// \brief Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction
+ MachineBasicBlock *emitINSERT_DF_VIDX(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned EltSizeInBytes,
+ bool IsFP) const;
/// \brief Emit the FILL_FW pseudo instruction
MachineBasicBlock *emitFILL_FW(MachineInstr *MI,
MachineBasicBlock *BB) const;
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index 094ee29..f6f364f 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -368,7 +368,7 @@ void MipsSEInstrInfo::adjustStackPtr(unsigned SP, int64_t Amount,
if (isInt<16>(Amount))// addi sp, sp, amount
BuildMI(MBB, I, DL, get(ADDiu), SP).addReg(SP).addImm(Amount);
else { // Expand immediate that doesn't fit in 16-bit.
- unsigned Reg = loadImmediate(Amount, MBB, I, DL, 0);
+ unsigned Reg = loadImmediate(Amount, MBB, I, DL, nullptr);
BuildMI(MBB, I, DL, get(ADDu), SP).addReg(SP).addReg(Reg, RegState::Kill);
}
}
diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h
index 6d2dd90..aa68552 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.h
+++ b/lib/Target/Mips/MipsSEInstrInfo.h
@@ -26,46 +26,46 @@ class MipsSEInstrInfo : public MipsInstrInfo {
public:
explicit MipsSEInstrInfo(MipsTargetMachine &TM);
- virtual const MipsRegisterInfo &getRegisterInfo() const;
+ const MipsRegisterInfo &getRegisterInfo() const override;
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
- virtual void storeRegToStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- int64_t Offset) const;
+ void storeRegToStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const override;
- virtual void loadRegFromStack(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI,
- int64_t Offset) const;
+ void loadRegFromStack(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI,
+ int64_t Offset) const override;
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
- virtual unsigned getOppositeBranchOpc(unsigned Opc) const;
+ unsigned getOppositeBranchOpc(unsigned Opc) const override;
/// Adjust SP by Amount bytes.
void adjustStackPtr(unsigned SP, int64_t Amount, MachineBasicBlock &MBB,
@@ -79,7 +79,7 @@ public:
unsigned *NewImm) const;
private:
- virtual unsigned getAnalyzableBrOpc(unsigned Opc) const;
+ unsigned getAnalyzableBrOpc(unsigned Opc) const override;
void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned Opc) const;
diff --git a/lib/Target/Mips/MipsSERegisterInfo.cpp b/lib/Target/Mips/MipsSERegisterInfo.cpp
index 2ac082f..0af1a6b 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -39,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mips-reg-info"
+
MipsSERegisterInfo::MipsSERegisterInfo(const MipsSubtarget &ST)
: MipsRegisterInfo(ST) {}
@@ -187,7 +189,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
*static_cast<const MipsSEInstrInfo *>(
MBB.getParent()->getTarget().getInstrInfo());
unsigned Reg = TII.loadImmediate(Offset, MBB, II, DL,
- OffsetBitSize == 16 ? &NewImm : NULL);
+ OffsetBitSize == 16 ? &NewImm : nullptr);
BuildMI(MBB, II, DL, TII.get(ADDu), Reg).addReg(FrameReg)
.addReg(Reg, RegState::Kill);
diff --git a/lib/Target/Mips/MipsSERegisterInfo.h b/lib/Target/Mips/MipsSERegisterInfo.h
index 76cdd9d..f2f3a7e 100644
--- a/lib/Target/Mips/MipsSERegisterInfo.h
+++ b/lib/Target/Mips/MipsSERegisterInfo.h
@@ -24,16 +24,16 @@ class MipsSERegisterInfo : public MipsRegisterInfo {
public:
MipsSERegisterInfo(const MipsSubtarget &Subtarget);
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
- virtual const TargetRegisterClass *intRegClass(unsigned Size) const;
+ const TargetRegisterClass *intRegClass(unsigned Size) const override;
private:
- virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
- int FrameIndex, uint64_t StackSize,
- int64_t SPOffset) const;
+ void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
+ int FrameIndex, uint64_t StackSize,
+ int64_t SPOffset) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index e4d70fc..0d4398e 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-selectiondag-info"
#include "MipsTargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "mips-selectiondag-info"
+
MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 143b945..74ec064 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mips-subtarget"
-
#include "MipsMachineFunction.h"
#include "Mips.h"
#include "MipsRegisterInfo.h"
@@ -25,13 +23,14 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "mips-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "MipsGenSubtargetInfo.inc"
-
-using namespace llvm;
-
// FIXME: Maybe this should be on by default when Mips16 is specified
//
static cl::opt<bool> Mixed16_32(
@@ -77,17 +76,16 @@ void MipsSubtarget::anchor() { }
MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool little,
- Reloc::Model _RM, MipsTargetMachine *_TM) :
- MipsGenSubtargetInfo(TT, CPU, FS),
- MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little),
- IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false),
- HasCnMips(false), IsLinux(true), HasSEInReg(false), HasCondMov(false),
- HasSwap(false), HasBitCount(false), HasFPIdx(false),
- InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
- InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
- AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
- RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT)
-{
+ Reloc::Model _RM, MipsTargetMachine *_TM)
+ : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32),
+ MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false),
+ IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false),
+ HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false),
+ HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false),
+ InMips16Mode(false), InMips16HardFloat(Mips16HardFloat),
+ InMicroMipsMode(false), HasDSP(false), HasDSPR2(false),
+ AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false),
+ RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) {
std::string CPUName = CPU;
CPUName = selectMipsCPU(TT, CPUName);
@@ -109,6 +107,19 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
// Initialize scheduling itinerary for the specified CPU.
InstrItins = getInstrItineraryForCPU(CPUName);
+ // Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and
+ // MIPS-V. They have not been tested and currently exist for the integrated
+ // assembler only.
+ if (MipsArchVersion == Mips1)
+ report_fatal_error("Code generation for MIPS-I is not implemented", false);
+ if (MipsArchVersion == Mips2)
+ report_fatal_error("Code generation for MIPS-II is not implemented", false);
+ if (MipsArchVersion == Mips3)
+ report_fatal_error("Code generation for MIPS-III is not implemented",
+ false);
+ if (MipsArchVersion == Mips5)
+ report_fatal_error("Code generation for MIPS-V is not implemented", false);
+
// Assert exactly one ABI was chosen.
assert(MipsABI != UnknownABI);
assert((((getFeatureBits() & Mips::FeatureO32) != 0) +
@@ -126,15 +137,23 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
"See -mattr=+fp64.",
false);
+ if (hasMips32r6()) {
+ StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6";
+
+ assert(isFP64bit());
+ assert(isNaN2008());
+ if (hasDSP())
+ report_fatal_error(ISA + " is not compatible with the DSP ASE", false);
+ }
+
// Is the target system Linux ?
if (TT.find("linux") == std::string::npos)
IsLinux = false;
// Set UseSmallSection.
+ // TODO: Investigate the IsLinux check. I suspect it's really checking for
+ // bare-metal.
UseSmallSection = !IsLinux && (RM == Reloc::Static);
- // set some subtarget specific features
- if (inMips16Mode())
- HasBitCount=false;
}
bool
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 2166b93..373f481 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -37,7 +37,10 @@ public:
};
protected:
- enum MipsArchEnum { Mips32, Mips32r2, Mips4, Mips64, Mips64r2 };
+ enum MipsArchEnum {
+ Mips1, Mips2, Mips32, Mips32r2, Mips32r6, Mips3, Mips4, Mips5, Mips64,
+ Mips64r2, Mips64r6
+ };
// Mips architecture version
MipsArchEnum MipsArchVersion;
@@ -56,6 +59,9 @@ protected:
// IsFP64bit - The target processor has 64-bit floating point registers.
bool IsFP64bit;
+ // IsNan2008 - IEEE 754-2008 NaN encoding.
+ bool IsNaN2008bit;
+
// IsFP64bit - General-purpose registers are 64 bits wide
bool IsGP64bit;
@@ -73,20 +79,20 @@ protected:
/// Features related to the presence of specific instructions.
- // HasSEInReg - SEB and SEH (signext in register) instructions.
- bool HasSEInReg;
+ // HasMips3_32 - The subset of MIPS-III instructions added to MIPS32
+ bool HasMips3_32;
- // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
- bool HasCondMov;
+ // HasMips3_32r2 - The subset of MIPS-III instructions added to MIPS32r2
+ bool HasMips3_32r2;
- // HasSwap - Byte and half swap instructions.
- bool HasSwap;
+ // HasMips4_32 - Has the subset of MIPS-IV present in MIPS32
+ bool HasMips4_32;
- // HasBitCount - Count leading '1' and '0' bits.
- bool HasBitCount;
+ // HasMips4_32r2 - Has the subset of MIPS-IV present in MIPS32r2
+ bool HasMips4_32r2;
- // HasFPIdx -- Floating point indexed load/store instructions.
- bool HasFPIdx;
+ // HasMips5_32r2 - Has the subset of MIPS-V present in MIPS32r2
+ bool HasMips5_32r2;
// InMips16 -- can process Mips16 instructions
bool InMips16Mode;
@@ -127,9 +133,9 @@ protected:
Triple TargetTriple;
public:
- virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const;
+ bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const override;
/// Only O32 and EABI supported right now.
bool isABI_EABI() const { return MipsABI == EABI; }
@@ -148,16 +154,24 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ bool hasMips2() const { return MipsArchVersion >= Mips2; }
+ bool hasMips3() const { return MipsArchVersion >= Mips3; }
+ bool hasMips4_32() const { return HasMips4_32; }
+ bool hasMips4_32r2() const { return HasMips4_32r2; }
bool hasMips32() const { return MipsArchVersion >= Mips32; }
bool hasMips32r2() const { return MipsArchVersion == Mips32r2 ||
MipsArchVersion == Mips64r2; }
+ bool hasMips32r6() const { return MipsArchVersion == Mips32r6 ||
+ MipsArchVersion == Mips64r6; }
bool hasMips64() const { return MipsArchVersion >= Mips64; }
bool hasMips64r2() const { return MipsArchVersion == Mips64r2; }
+ bool hasMips64r6() const { return MipsArchVersion == Mips64r6; }
bool hasCnMips() const { return HasCnMips; }
bool isLittle() const { return IsLittle; }
bool isFP64bit() const { return IsFP64bit; }
+ bool isNaN2008() const { return IsNaN2008bit; }
bool isNotFP64bit() const { return !IsFP64bit; }
bool isGP64bit() const { return IsGP64bit; }
bool isGP32bit() const { return !IsGP64bit; }
@@ -197,11 +211,6 @@ public:
}
/// Features related to the presence of specific instructions.
- bool hasSEInReg() const { return HasSEInReg; }
- bool hasCondMov() const { return HasCondMov; }
- bool hasSwap() const { return HasSwap; }
- bool hasBitCount() const { return HasBitCount; }
- bool hasFPIdx() const { return HasFPIdx; }
bool hasExtractInsert() const { return !inMips16Mode() && hasMips32r2(); }
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
@@ -213,10 +222,9 @@ public:
bool isTargetNaCl() const { return TargetTriple.isOSNaCl(); }
bool isNotTargetNaCl() const { return !TargetTriple.isOSNaCl(); }
-// for now constant islands are on for the whole compilation unit but we only
-// really use them if in addition we are in mips16 mode
-//
-static bool useConstantIslands();
+ // for now constant islands are on for the whole compilation unit but we only
+ // really use them if in addition we are in mips16 mode
+ static bool useConstantIslands();
unsigned stackAlignment() const { return hasMips64() ? 16 : 8; }
@@ -226,7 +234,12 @@ static bool useConstantIslands();
/// \brief Reset the subtarget for the Mips target.
void resetSubtarget(MachineFunction *MF);
-
+ /// Does the system support unaligned memory access.
+ ///
+ /// MIPS32r6/MIPS64r6 require full unaligned access support but does not
+ /// specify which component of the system provides it. Hardware, software, and
+ /// hybrid implementations are all valid.
+ bool systemSupportsUnalignedAccess() const { return hasMips32r6(); }
};
} // End llvm namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index e9053c8..984c58e 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -35,7 +35,7 @@
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
-
+#define DEBUG_TYPE "mips"
extern "C" void LLVMInitializeMipsTarget() {
// Register the target.
@@ -171,12 +171,12 @@ public:
return *getMipsTargetMachine().getSubtargetImpl();
}
- virtual void addIRPasses();
- virtual bool addInstSelector();
- virtual void addMachineSSAOptimization();
- virtual bool addPreEmitPass();
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ void addMachineSSAOptimization() override;
+ bool addPreEmitPass() override;
- virtual bool addPreRegAlloc();
+ bool addPreRegAlloc() override;
};
} // namespace
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 5a9a11d..a5aa39b 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -20,7 +20,6 @@
#include "MipsJITInfo.h"
#include "MipsSelectionDAGInfo.h"
#include "MipsSubtarget.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/IR/DataLayout.h"
@@ -34,15 +33,15 @@ class MipsRegisterInfo;
class MipsTargetMachine : public LLVMTargetMachine {
MipsSubtarget Subtarget;
const DataLayout DL; // Calculates type size & alignment
- OwningPtr<const MipsInstrInfo> InstrInfo;
- OwningPtr<const MipsFrameLowering> FrameLowering;
- OwningPtr<const MipsTargetLowering> TLInfo;
- OwningPtr<const MipsInstrInfo> InstrInfo16;
- OwningPtr<const MipsFrameLowering> FrameLowering16;
- OwningPtr<const MipsTargetLowering> TLInfo16;
- OwningPtr<const MipsInstrInfo> InstrInfoSE;
- OwningPtr<const MipsFrameLowering> FrameLoweringSE;
- OwningPtr<const MipsTargetLowering> TLInfoSE;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering;
+ std::unique_ptr<const MipsTargetLowering> TLInfo;
+ std::unique_ptr<const MipsInstrInfo> InstrInfo16;
+ std::unique_ptr<const MipsFrameLowering> FrameLowering16;
+ std::unique_ptr<const MipsTargetLowering> TLInfo16;
+ std::unique_ptr<const MipsInstrInfo> InstrInfoSE;
+ std::unique_ptr<const MipsFrameLowering> FrameLoweringSE;
+ std::unique_ptr<const MipsTargetLowering> TLInfoSE;
MipsSelectionDAGInfo TSInfo;
const InstrItineraryData &InstrItins;
MipsJITInfo JITInfo;
@@ -56,39 +55,38 @@ public:
virtual ~MipsTargetMachine() {}
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ void addAnalysisPasses(PassManagerBase &PM) override;
- virtual const MipsInstrInfo *getInstrInfo() const
+ const MipsInstrInfo *getInstrInfo() const override
{ return InstrInfo.get(); }
- virtual const TargetFrameLowering *getFrameLowering() const
+ const TargetFrameLowering *getFrameLowering() const override
{ return FrameLowering.get(); }
- virtual const MipsSubtarget *getSubtargetImpl() const
+ const MipsSubtarget *getSubtargetImpl() const override
{ return &Subtarget; }
- virtual const DataLayout *getDataLayout() const
+ const DataLayout *getDataLayout() const override
{ return &DL;}
- virtual const InstrItineraryData *getInstrItineraryData() const {
- return Subtarget.inMips16Mode() ? 0 : &InstrItins;
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return Subtarget.inMips16Mode() ? nullptr : &InstrItins;
}
- virtual MipsJITInfo *getJITInfo()
- { return &JITInfo; }
+ MipsJITInfo *getJITInfo() override { return &JITInfo; }
- virtual const MipsRegisterInfo *getRegisterInfo() const {
+ const MipsRegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
- virtual const MipsTargetLowering *getTargetLowering() const {
+ const MipsTargetLowering *getTargetLowering() const override {
return TLInfo.get();
}
- virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ const MipsSelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
// Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
// Set helper classes
void setHelperClassesMips16();
diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h
index 5f4b74b..4ad37ac 100644
--- a/lib/Target/Mips/MipsTargetStreamer.h
+++ b/lib/Target/Mips/MipsTargetStreamer.h
@@ -34,6 +34,8 @@ public:
virtual void emitDirectiveEnt(const MCSymbol &Symbol) = 0;
virtual void emitDirectiveAbiCalls() = 0;
+ virtual void emitDirectiveNaN2008() = 0;
+ virtual void emitDirectiveNaNLegacy() = 0;
virtual void emitDirectiveOptionPic0() = 0;
virtual void emitDirectiveOptionPic2() = 0;
virtual void emitFrame(unsigned StackReg, unsigned StackSize,
@@ -45,6 +47,11 @@ public:
virtual void emitDirectiveSetMips64() = 0;
virtual void emitDirectiveSetMips64R2() = 0;
virtual void emitDirectiveSetDsp() = 0;
+
+ // PIC support
+ virtual void emitDirectiveCpload(unsigned RegNo) = 0;
+ virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
+ const MCSymbol &Sym, bool IsReg) = 0;
};
// This part is for ascii assembly output
@@ -53,32 +60,39 @@ class MipsTargetAsmStreamer : public MipsTargetStreamer {
public:
MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- virtual void emitDirectiveSetMicroMips();
- virtual void emitDirectiveSetNoMicroMips();
- virtual void emitDirectiveSetMips16();
- virtual void emitDirectiveSetNoMips16();
-
- virtual void emitDirectiveSetReorder();
- virtual void emitDirectiveSetNoReorder();
- virtual void emitDirectiveSetMacro();
- virtual void emitDirectiveSetNoMacro();
- virtual void emitDirectiveSetAt();
- virtual void emitDirectiveSetNoAt();
- virtual void emitDirectiveEnd(StringRef Name);
-
- virtual void emitDirectiveEnt(const MCSymbol &Symbol);
- virtual void emitDirectiveAbiCalls();
- virtual void emitDirectiveOptionPic0();
- virtual void emitDirectiveOptionPic2();
- virtual void emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg);
- virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
- virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
-
- virtual void emitDirectiveSetMips32R2();
- virtual void emitDirectiveSetMips64();
- virtual void emitDirectiveSetMips64R2();
- virtual void emitDirectiveSetDsp();
+ void emitDirectiveSetMicroMips() override;
+ void emitDirectiveSetNoMicroMips() override;
+ void emitDirectiveSetMips16() override;
+ void emitDirectiveSetNoMips16() override;
+
+ void emitDirectiveSetReorder() override;
+ void emitDirectiveSetNoReorder() override;
+ void emitDirectiveSetMacro() override;
+ void emitDirectiveSetNoMacro() override;
+ void emitDirectiveSetAt() override;
+ void emitDirectiveSetNoAt() override;
+ void emitDirectiveEnd(StringRef Name) override;
+
+ void emitDirectiveEnt(const MCSymbol &Symbol) override;
+ void emitDirectiveAbiCalls() override;
+ void emitDirectiveNaN2008() override;
+ void emitDirectiveNaNLegacy() override;
+ void emitDirectiveOptionPic0() override;
+ void emitDirectiveOptionPic2() override;
+ void emitFrame(unsigned StackReg, unsigned StackSize,
+ unsigned ReturnReg) override;
+ void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
+ void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
+
+ void emitDirectiveSetMips32R2() override;
+ void emitDirectiveSetMips64() override;
+ void emitDirectiveSetMips64R2() override;
+ void emitDirectiveSetDsp() override;
+
+ // PIC support
+ virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
+ const MCSymbol &Sym, bool IsReg) override;
};
// This part is for ELF object output
@@ -92,36 +106,48 @@ public:
MCELFStreamer &getStreamer();
MipsTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
- virtual void emitLabel(MCSymbol *Symbol) override;
- virtual void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
+ void emitLabel(MCSymbol *Symbol) override;
+ void emitAssignment(MCSymbol *Symbol, const MCExpr *Value) override;
void finish() override;
- virtual void emitDirectiveSetMicroMips();
- virtual void emitDirectiveSetNoMicroMips();
- virtual void emitDirectiveSetMips16();
- virtual void emitDirectiveSetNoMips16();
-
- virtual void emitDirectiveSetReorder();
- virtual void emitDirectiveSetNoReorder();
- virtual void emitDirectiveSetMacro();
- virtual void emitDirectiveSetNoMacro();
- virtual void emitDirectiveSetAt();
- virtual void emitDirectiveSetNoAt();
- virtual void emitDirectiveEnd(StringRef Name);
-
- virtual void emitDirectiveEnt(const MCSymbol &Symbol);
- virtual void emitDirectiveAbiCalls();
- virtual void emitDirectiveOptionPic0();
- virtual void emitDirectiveOptionPic2();
- virtual void emitFrame(unsigned StackReg, unsigned StackSize,
- unsigned ReturnReg);
- virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff);
- virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff);
-
- virtual void emitDirectiveSetMips32R2();
- virtual void emitDirectiveSetMips64();
- virtual void emitDirectiveSetMips64R2();
- virtual void emitDirectiveSetDsp();
+ void emitDirectiveSetMicroMips() override;
+ void emitDirectiveSetNoMicroMips() override;
+ void emitDirectiveSetMips16() override;
+ void emitDirectiveSetNoMips16() override;
+
+ void emitDirectiveSetReorder() override;
+ void emitDirectiveSetNoReorder() override;
+ void emitDirectiveSetMacro() override;
+ void emitDirectiveSetNoMacro() override;
+ void emitDirectiveSetAt() override;
+ void emitDirectiveSetNoAt() override;
+ void emitDirectiveEnd(StringRef Name) override;
+
+ void emitDirectiveEnt(const MCSymbol &Symbol) override;
+ void emitDirectiveAbiCalls() override;
+ void emitDirectiveNaN2008() override;
+ void emitDirectiveNaNLegacy() override;
+ void emitDirectiveOptionPic0() override;
+ void emitDirectiveOptionPic2() override;
+ void emitFrame(unsigned StackReg, unsigned StackSize,
+ unsigned ReturnReg) override;
+ void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) override;
+ void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) override;
+
+ void emitDirectiveSetMips32R2() override;
+ void emitDirectiveSetMips64() override;
+ void emitDirectiveSetMips64R2() override;
+ void emitDirectiveSetDsp() override;
+
+ // PIC support
+ virtual void emitDirectiveCpload(unsigned RegNo);
+ void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset,
+ const MCSymbol &Sym, bool IsReg) override;
+
+protected:
+ bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; }
+ bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; }
+ bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; }
};
}
#endif
diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt
index 029118a..4e35b18 100644
--- a/lib/Target/NVPTX/CMakeLists.txt
+++ b/lib/Target/NVPTX/CMakeLists.txt
@@ -9,6 +9,7 @@ tablegen(LLVM NVPTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(NVPTXCommonTableGen)
set(NVPTXCodeGen_sources
+ NVPTXFavorNonGenericAddrSpaces.cpp
NVPTXFrameLowering.cpp
NVPTXInstrInfo.cpp
NVPTXISelDAGToDAG.cpp
@@ -26,6 +27,8 @@ set(NVPTXCodeGen_sources
NVPTXAssignValidGlobalNames.cpp
NVPTXPrologEpilogPass.cpp
NVPTXMCExpr.cpp
+ NVPTXReplaceImageHandles.cpp
+ NVPTXImageOptimizer.cpp
)
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index cf165be..9618896 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "InstPrinter/NVPTXInstPrinter.h"
#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "NVPTX.h"
@@ -25,6 +24,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "NVPTXGenAsmWriter.inc"
diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
index 93029ae..1fb3c57 100644
--- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
@@ -27,8 +27,8 @@ public:
NVPTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &OS, StringRef Annot) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -37,15 +37,15 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printLdStCode(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
void printMemOperand(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
};
}
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index edf4a80..ddb122f 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -43,14 +43,16 @@ enum PropertyAnnotation {
PROPERTY_ISSAMPLER,
PROPERTY_ISREADONLY_IMAGE_PARAM,
PROPERTY_ISWRITEONLY_IMAGE_PARAM,
+ PROPERTY_ISREADWRITE_IMAGE_PARAM,
PROPERTY_ISKERNEL_FUNCTION,
PROPERTY_ALIGN,
+ PROPERTY_MANAGED,
// last property
PROPERTY_LAST
};
-const unsigned AnnotationNameLen = 8; // length of each annotation name
+const unsigned AnnotationNameLen = 9; // length of each annotation name
const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
"maxntidx", // PROPERTY_MAXNTID_X
"maxntidy", // PROPERTY_MAXNTID_Y
@@ -64,8 +66,10 @@ const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
"sampler", // PROPERTY_ISSAMPLER
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
+ "rdwrimage", // PROPERTY_ISREADWRITE_IMAGE_PARAM
"kernel", // PROPERTY_ISKERNEL_FUNCTION
"align", // PROPERTY_ALIGN
+ "managed", // PROPERTY_MANAGED
// last property
"proplast", // PROPERTY_LAST
diff --git a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index 3cf6e4b..158ca90 100644
--- a/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -20,6 +20,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "NVPTXGenInstrInfo.inc"
@@ -29,8 +31,6 @@
#define GET_REGINFO_MC_DESC
#include "NVPTXGenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createNVPTXMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitNVPTXMCInstrInfo(X);
@@ -66,7 +66,7 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Target &T,
const MCSubtargetInfo &STI) {
if (SyntaxVariant == 0)
return new NVPTXInstPrinter(MAI, MII, MRI, STI);
- return 0;
+ return nullptr;
}
// Force static initialization.
diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h
index 8cbdd47..e74c808 100644
--- a/lib/Target/NVPTX/NVPTX.h
+++ b/lib/Target/NVPTX/NVPTX.h
@@ -63,9 +63,12 @@ FunctionPass *
createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
ModulePass *createGenericToNVVMPass();
+FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
ModulePass *createNVVMReflectPass();
ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
+MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
+FunctionPass *createNVPTXImageOptimizerPass();
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/lib/Target/NVPTX/NVPTXAllocaHoisting.h b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
index 22404b7..5b61068 100644
--- a/lib/Target/NVPTX/NVPTXAllocaHoisting.h
+++ b/lib/Target/NVPTX/NVPTXAllocaHoisting.h
@@ -30,17 +30,17 @@ public:
static char ID; // Pass ID
NVPTXAllocaHoisting() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX specific alloca hoisting";
}
- virtual bool runOnFunction(Function &function);
+ bool runOnFunction(Function &function) override;
};
extern FunctionPass *createAllocaHoisting();
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 97e2cc6..4ec575f 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/NVPTXMCAsmInfo.h"
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
+#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXMCExpr.h"
#include "NVPTXRegisterInfo.h"
#include "NVPTXTargetMachine.h"
@@ -131,7 +132,7 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
- if (CE == 0)
+ if (!CE)
llvm_unreachable("Unknown constant value to lower!");
switch (CE->getOpcode()) {
@@ -149,9 +150,24 @@ const MCExpr *nvptx::LowerConstant(const Constant *CV, AsmPrinter &AP) {
raw_string_ostream OS(S);
OS << "Unsupported expression in static initializer: ";
CE->printAsOperand(OS, /*PrintType=*/ false,
- !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+ !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
report_fatal_error(OS.str());
}
+ case Instruction::AddrSpaceCast: {
+ // Strip any addrspace(1)->addrspace(0) addrspace casts. These will be
+ // handled by the generic() logic in the MCExpr printer
+ PointerType *DstTy = cast<PointerType>(CE->getType());
+ PointerType *SrcTy = cast<PointerType>(CE->getOperand(0)->getType());
+ if (SrcTy->getAddressSpace() == 1 && DstTy->getAddressSpace() == 0) {
+ return LowerConstant(cast<const Constant>(CE->getOperand(0)), AP);
+ }
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "Unsupported expression in static initializer: ";
+ CE->printAsOperand(OS, /*PrintType=*/ false,
+ !AP.MF ? nullptr : AP.MF->getFunction()->getParent());
+ report_fatal_error(OS.str());
+ }
case Instruction::GetElementPtr: {
const DataLayout &TD = *AP.TM.getDataLayout();
// Generate a symbolic expression for the byte address
@@ -310,13 +326,279 @@ void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
EmitToStreamer(OutStreamer, Inst);
}
+// Handle symbol backtracking for targets that do not support image handles
+bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
+ unsigned OpNo, MCOperand &MCOp) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+
+ switch (MI->getOpcode()) {
+ default: return false;
+ case NVPTX::TEX_1D_F32_I32:
+ case NVPTX::TEX_1D_F32_F32:
+ case NVPTX::TEX_1D_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_F32_F32_GRAD:
+ case NVPTX::TEX_1D_I32_I32:
+ case NVPTX::TEX_1D_I32_F32:
+ case NVPTX::TEX_1D_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_I32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_F32_I32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_I32_I32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_2D_F32_I32:
+ case NVPTX::TEX_2D_F32_F32:
+ case NVPTX::TEX_2D_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_F32_F32_GRAD:
+ case NVPTX::TEX_2D_I32_I32:
+ case NVPTX::TEX_2D_I32_F32:
+ case NVPTX::TEX_2D_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_I32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_F32_I32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_I32_I32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_3D_F32_I32:
+ case NVPTX::TEX_3D_F32_F32:
+ case NVPTX::TEX_3D_F32_F32_LEVEL:
+ case NVPTX::TEX_3D_F32_F32_GRAD:
+ case NVPTX::TEX_3D_I32_I32:
+ case NVPTX::TEX_3D_I32_F32:
+ case NVPTX::TEX_3D_I32_F32_LEVEL:
+ case NVPTX::TEX_3D_I32_F32_GRAD:
+ {
+ // This is a texture fetch, so operand 4 is a texref and operand 5 is
+ // a samplerref
+ if (OpNo == 4) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+ if (OpNo == 5) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_I8_TRAP:
+ case NVPTX::SULD_1D_I16_TRAP:
+ case NVPTX::SULD_1D_I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_2D_I8_TRAP:
+ case NVPTX::SULD_2D_I16_TRAP:
+ case NVPTX::SULD_2D_I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_3D_I8_TRAP:
+ case NVPTX::SULD_3D_I16_TRAP:
+ case NVPTX::SULD_3D_I32_TRAP: {
+ // This is a V1 surface load, so operand 1 is a surfref
+ if (OpNo == 1) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_V2I8_TRAP:
+ case NVPTX::SULD_1D_V2I16_TRAP:
+ case NVPTX::SULD_1D_V2I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_2D_V2I8_TRAP:
+ case NVPTX::SULD_2D_V2I16_TRAP:
+ case NVPTX::SULD_2D_V2I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_3D_V2I8_TRAP:
+ case NVPTX::SULD_3D_V2I16_TRAP:
+ case NVPTX::SULD_3D_V2I32_TRAP: {
+ // This is a V2 surface load, so operand 2 is a surfref
+ if (OpNo == 2) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SULD_1D_V4I8_TRAP:
+ case NVPTX::SULD_1D_V4I16_TRAP:
+ case NVPTX::SULD_1D_V4I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_2D_V4I8_TRAP:
+ case NVPTX::SULD_2D_V4I16_TRAP:
+ case NVPTX::SULD_2D_V4I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_3D_V4I8_TRAP:
+ case NVPTX::SULD_3D_V4I16_TRAP:
+ case NVPTX::SULD_3D_V4I32_TRAP: {
+ // This is a V4 surface load, so operand 4 is a surfref
+ if (OpNo == 4) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::SUST_B_1D_B8_TRAP:
+ case NVPTX::SUST_B_1D_B16_TRAP:
+ case NVPTX::SUST_B_1D_B32_TRAP:
+ case NVPTX::SUST_B_1D_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_V4B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_B8_TRAP:
+ case NVPTX::SUST_B_2D_B16_TRAP:
+ case NVPTX::SUST_B_2D_B32_TRAP:
+ case NVPTX::SUST_B_2D_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_3D_B8_TRAP:
+ case NVPTX::SUST_B_3D_B16_TRAP:
+ case NVPTX::SUST_B_3D_B32_TRAP:
+ case NVPTX::SUST_B_3D_V2B8_TRAP:
+ case NVPTX::SUST_B_3D_V2B16_TRAP:
+ case NVPTX::SUST_B_3D_V2B32_TRAP:
+ case NVPTX::SUST_B_3D_V4B8_TRAP:
+ case NVPTX::SUST_B_3D_V4B16_TRAP:
+ case NVPTX::SUST_B_3D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_B8_TRAP:
+ case NVPTX::SUST_P_1D_B16_TRAP:
+ case NVPTX::SUST_P_1D_B32_TRAP:
+ case NVPTX::SUST_P_1D_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_B8_TRAP:
+ case NVPTX::SUST_P_2D_B16_TRAP:
+ case NVPTX::SUST_P_2D_B32_TRAP:
+ case NVPTX::SUST_P_2D_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_3D_B8_TRAP:
+ case NVPTX::SUST_P_3D_B16_TRAP:
+ case NVPTX::SUST_P_3D_B32_TRAP:
+ case NVPTX::SUST_P_3D_V2B8_TRAP:
+ case NVPTX::SUST_P_3D_V2B16_TRAP:
+ case NVPTX::SUST_P_3D_V2B32_TRAP:
+ case NVPTX::SUST_P_3D_V4B8_TRAP:
+ case NVPTX::SUST_P_3D_V4B16_TRAP:
+ case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ // This is a surface store, so operand 0 is a surfref
+ if (OpNo == 0) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ case NVPTX::TXQ_CHANNEL_ORDER:
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE:
+ case NVPTX::TXQ_WIDTH:
+ case NVPTX::TXQ_HEIGHT:
+ case NVPTX::TXQ_DEPTH:
+ case NVPTX::TXQ_ARRAY_SIZE:
+ case NVPTX::TXQ_NUM_SAMPLES:
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
+ case NVPTX::SUQ_CHANNEL_ORDER:
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE:
+ case NVPTX::SUQ_WIDTH:
+ case NVPTX::SUQ_HEIGHT:
+ case NVPTX::SUQ_DEPTH:
+ case NVPTX::SUQ_ARRAY_SIZE: {
+ // This is a query, so operand 1 is a surfref/texref
+ if (OpNo == 1) {
+ lowerImageHandleSymbol(MO.getImm(), MCOp);
+ return true;
+ }
+
+ return false;
+ }
+ }
+}
+
+void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
+ // Ewwww
+ TargetMachine &TM = const_cast<TargetMachine&>(MF->getTarget());
+ NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
+ const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>();
+ const char *Sym = MFI->getImageHandleSymbol(Index);
+ std::string *SymNamePtr =
+ nvTM.getManagedStrPool()->getManagedString(Sym);
+ MCOp = GetSymbolRef(OutContext.GetOrCreateSymbol(
+ StringRef(SymNamePtr->c_str())));
+}
+
void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
OutMI.setOpcode(MI->getOpcode());
+ const NVPTXSubtarget &ST = TM.getSubtarget<NVPTXSubtarget>();
// Special: Do not mangle symbol operand of CALL_PROTOTYPE
if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
const MachineOperand &MO = MI->getOperand(0);
- OutMI.addOperand(GetSymbolRef(MO,
+ OutMI.addOperand(GetSymbolRef(
OutContext.GetOrCreateSymbol(Twine(MO.getSymbolName()))));
return;
}
@@ -325,6 +607,13 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
+ if (!ST.hasImageHandles()) {
+ if (lowerImageHandleOperand(MI, i, MCOp)) {
+ OutMI.addOperand(MCOp);
+ continue;
+ }
+ }
+
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
@@ -345,10 +634,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
+ MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_GlobalAddress:
- MCOp = GetSymbolRef(MO, getSymbol(MO.getGlobal()));
+ MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_FPImmediate: {
const ConstantFP *Cnt = MO.getFPImm();
@@ -407,8 +696,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
}
}
-MCOperand NVPTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
- const MCSymbol *Symbol) {
+MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
@@ -750,7 +1038,7 @@ static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
if (Pty->getAddressSpace() != llvm::ADDRESS_SPACE_SHARED)
return false;
- const Function *oneFunc = 0;
+ const Function *oneFunc = nullptr;
bool flag = usedInOneFunc(gv, oneFunc);
if (flag == false)
@@ -1010,6 +1298,8 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
for (i = 0; i < n; i++)
global_list.insert(global_list.end(), gv_array[i]);
+ clearAnnotationCache(&M);
+
delete[] gv_array;
return ret;
@@ -1105,10 +1395,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
if (llvm::isSampler(*GVar)) {
O << ".global .samplerref " << llvm::getSamplerName(*GVar);
- const Constant *Initializer = NULL;
+ const Constant *Initializer = nullptr;
if (GVar->hasInitializer())
Initializer = GVar->getInitializer();
- const ConstantInt *CI = NULL;
+ const ConstantInt *CI = nullptr;
if (Initializer)
CI = dyn_cast<ConstantInt>(Initializer);
if (CI) {
@@ -1175,7 +1465,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
return;
}
- const Function *demotedFunc = 0;
+ const Function *demotedFunc = nullptr;
if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
O << "// " << GVar->getName().str() << " has been demoted\n";
if (localDecls.find(demotedFunc) != localDecls.end())
@@ -1347,7 +1637,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const {
return "u32";
}
llvm_unreachable("unexpected type");
- return NULL;
+ return nullptr;
}
void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
@@ -1495,19 +1785,33 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
first = false;
// Handle image/sampler parameters
- if (llvm::isSampler(*I) || llvm::isImage(*I)) {
- if (llvm::isImage(*I)) {
- std::string sname = I->getName();
- if (llvm::isImageWriteOnly(*I))
- O << "\t.param .surfref " << *getSymbol(F) << "_param_"
- << paramIndex;
- else // Default image is read_only
- O << "\t.param .texref " << *getSymbol(F) << "_param_"
- << paramIndex;
- } else // Should be llvm::isSampler(*I)
- O << "\t.param .samplerref " << *getSymbol(F) << "_param_"
- << paramIndex;
- continue;
+ if (isKernelFunction(*F)) {
+ if (isSampler(*I) || isImage(*I)) {
+ if (isImage(*I)) {
+ std::string sname = I->getName();
+ if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .surfref ";
+ else
+ O << "\t.param .surfref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ else { // Default image is read_only
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .texref ";
+ else
+ O << "\t.param .texref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ } else {
+ if (nvptxSubtarget.hasImageHandles())
+ O << "\t.param .u64 .ptr .samplerref ";
+ else
+ O << "\t.param .samplerref ";
+ O << *CurrentFnSym << "_param_" << paramIndex;
+ }
+ continue;
+ }
}
if (PAL.hasAttribute(paramIndex + 1, Attribute::ByVal) == false) {
@@ -1752,13 +2056,35 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
return;
}
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
- O << *getSymbol(GVar);
+ PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
+ if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *getSymbol(GVar);
+ O << ")";
+ } else {
+ O << *getSymbol(GVar);
+ }
return;
}
if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
const Value *v = Cexpr->stripPointerCasts();
+ PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
- O << *getSymbol(GVar);
+ if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *getSymbol(GVar);
+ O << ")";
+ } else {
+ O << *getSymbol(GVar);
+ }
return;
} else {
O << *LowerConstant(CPV, *this);
@@ -2121,7 +2447,7 @@ void NVPTXAsmPrinter::emitSrcInText(StringRef filename, unsigned line) {
}
LineReader *NVPTXAsmPrinter::getReader(std::string filename) {
- if (reader == NULL) {
+ if (!reader) {
reader = new LineReader(filename);
}
diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 7162420..a9f9bdd 100644
--- a/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -96,6 +96,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
unsigned curpos;
raw_ostream &O;
NVPTXAsmPrinter &AP;
+ bool EmitGeneric;
public:
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
@@ -104,6 +105,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
size = _size;
curpos = 0;
numSymbols = 0;
+ EmitGeneric = AP.EmitGeneric;
}
~AggBuffer() { delete[] buffer; }
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
@@ -155,7 +157,18 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
const Value *v = Symbols[nSym];
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
MCSymbol *Name = AP.getSymbol(GVar);
- O << *Name;
+ PointerType *PTy = dyn_cast<PointerType>(GVar->getType());
+ bool IsNonGenericPointer = false;
+ if (PTy && PTy->getAddressSpace() != 0) {
+ IsNonGenericPointer = true;
+ }
+ if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
+ O << "generic(";
+ O << *Name;
+ O << ")";
+ } else {
+ O << *Name;
+ }
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
O << *nvptx::LowerConstant(Cexpr, AP);
} else
@@ -176,31 +189,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
friend class AggBuffer;
- virtual void emitSrcInText(StringRef filename, unsigned line);
+ void emitSrcInText(StringRef filename, unsigned line);
private:
- virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
+ const char *getPassName() const override { return "NVPTX Assembly Printer"; }
const Function *F;
std::string CurrentFnName;
- void EmitFunctionEntryLabel();
- void EmitFunctionBodyStart();
- void EmitFunctionBodyEnd();
- void emitImplicitDef(const MachineInstr *MI) const;
+ void EmitFunctionEntryLabel() override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
+ void emitImplicitDef(const MachineInstr *MI) const override;
- void EmitInstruction(const MachineInstr *);
+ void EmitInstruction(const MachineInstr *) override;
void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
- MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
+ MCOperand GetSymbolRef(const MCSymbol *Symbol);
unsigned encodeVirtualRegister(unsigned Reg);
- void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const {}
+ void EmitAlignment(unsigned NumBits, const GlobalValue *GV = nullptr) const {}
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
raw_ostream &O);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool = false);
@@ -221,15 +234,15 @@ private:
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &);
+ raw_ostream &) override;
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &);
+ raw_ostream &) override;
protected:
- bool doInitialization(Module &M);
- bool doFinalization(Module &M);
+ bool doInitialization(Module &M) override;
+ bool doFinalization(Module &M) override;
private:
std::string CurrentBankselLabelInBasicBlock;
@@ -274,14 +287,33 @@ private:
static const char *getRegisterName(unsigned RegNo);
void emitDemotedVars(const Function *, raw_ostream &);
+ bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
+ MCOperand &MCOp);
+ void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
+
LineReader *reader;
LineReader *getReader(std::string);
+
+ // Used to control the need to emit .generic() in the initializer of
+ // module scope variables.
+ // Although ptx supports the hybrid mode like the following,
+ // .global .u32 a;
+ // .global .u32 b;
+ // .global .u32 addr[] = {a, generic(b)}
+ // we have difficulty representing the difference in the NVVM IR.
+ //
+ // Since the address value should always be generic in CUDA C and always
+ // be specific in OpenCL, we use this simple control here.
+ //
+ bool EmitGeneric;
+
public:
NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer),
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
CurrentBankselLabelInBasicBlock = "";
- reader = NULL;
+ reader = nullptr;
+ EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA);
}
~NVPTXAsmPrinter() {
diff --git a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 158c482..962b123 100644
--- a/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -33,7 +33,7 @@ public:
static char ID;
NVPTXAssignValidGlobalNames() : ModulePass(ID) {}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
/// \brief Clean up the name to remove symbols invalid in PTX.
std::string cleanUpName(StringRef Name);
diff --git a/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
new file mode 100644
index 0000000..f3a095d
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -0,0 +1,195 @@
+//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// When a load/store accesses the generic address space, checks whether the
+// address is casted from a non-generic address space. If so, remove this
+// addrspacecast because accessing non-generic address spaces is typically
+// faster. Besides seeking addrspacecasts, this optimization also traces into
+// the base pointer of a GEP.
+//
+// For instance, the code below loads a float from an array allocated in
+// addrspace(3).
+//
+// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+// %1 = gep [10 x float]* %0, i64 0, i64 %i
+// %2 = load float* %1 ; emits ld.f32
+//
+// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast
+// and the GEP to expose more optimization opportunities to function
+// optimizeMemoryInst. The intermediate code looks like:
+//
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = addrspacecast float addrspace(3)* %0 to float*
+// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly
+//
+// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
+// generic pointers, and folds the load and the addrspacecast into a load from
+// the original address space. The final code looks like:
+//
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32
+//
+// This pass may remove an addrspacecast in a different BB. Therefore, we
+// implement it as a FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+// An option to disable this optimization. Enable it by default.
+static cl::opt<bool> DisableFavorNonGeneric(
+ "disable-nvptx-favor-non-generic",
+ cl::init(false),
+ cl::desc("Do not convert generic address space usage "
+ "to non-generic address space usage"),
+ cl::Hidden);
+
+namespace {
+/// \brief NVPTXFavorNonGenericAddrSpaces
+class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
+public:
+ static char ID;
+ NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override;
+
+ /// Optimizes load/store instructions. Idx is the index of the pointer operand
+ /// (0 for load, and 1 for store). Returns true if it changes anything.
+ bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
+ /// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
+ /// indices)". This reordering exposes to optimizeMemoryInstruction more
+ /// optimization opportunities on loads and stores. Returns true if it changes
+ /// the program.
+ bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP);
+};
+}
+
+char NVPTXFavorNonGenericAddrSpaces::ID = 0;
+
+namespace llvm {
+void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
+}
+INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
+ "Remove unnecessary non-generic-to-generic addrspacecasts",
+ false, false)
+
+// Decides whether removing Cast is valid and beneficial. Cast can be an
+// instruction or a constant expression.
+static bool IsEliminableAddrSpaceCast(Operator *Cast) {
+ // Returns false if not even an addrspacecast.
+ if (Cast->getOpcode() != Instruction::AddrSpaceCast)
+ return false;
+
+ Value *Src = Cast->getOperand(0);
+ PointerType *SrcTy = cast<PointerType>(Src->getType());
+ PointerType *DestTy = cast<PointerType>(Cast->getType());
+ // TODO: For now, we only handle the case where the addrspacecast only changes
+ // the address space but not the type. If the type also changes, we could
+ // still get rid of the addrspacecast by adding an extra bitcast, but we
+ // rarely see such scenarios.
+ if (SrcTy->getElementType() != DestTy->getElementType())
+ return false;
+
+ // Checks whether the addrspacecast is from a non-generic address space to the
+ // generic address space.
+ return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
+ DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
+ GEPOperator *GEP) {
+ Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand());
+ if (!Cast)
+ return false;
+
+ if (!IsEliminableAddrSpaceCast(Cast))
+ return false;
+
+ SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
+ if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
+ // %1 = gep (addrspacecast X), indices
+ // =>
+ // %0 = gep X, indices
+ // %1 = addrspacecast %0
+ GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(Cast->getOperand(0),
+ Indices,
+ GEP->getName(),
+ GEPI);
+ NewGEPI->setIsInBounds(GEP->isInBounds());
+ GEP->replaceAllUsesWith(
+ new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
+ } else {
+ // GEP is a constant expression.
+ Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
+ cast<Constant>(Cast->getOperand(0)),
+ Indices,
+ GEP->isInBounds());
+ GEP->replaceAllUsesWith(
+ ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
+ }
+
+ return true;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
+ unsigned Idx) {
+ // If the pointer operand is a GEP, hoist the addrspacecast if any from the
+ // GEP to expose more optimization opportunites.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) {
+ hoistAddrSpaceCastFromGEP(GEP);
+ }
+
+ // load/store (addrspacecast X) => load/store X if shortcutting the
+ // addrspacecast is valid and can improve performance.
+ //
+ // e.g.,
+ // %1 = addrspacecast float addrspace(3)* %0 to float*
+ // %2 = load float* %1
+ // ->
+ // %2 = load float addrspace(3)* %0
+ //
+ // Note: the addrspacecast can also be a constant expression.
+ if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) {
+ if (IsEliminableAddrSpaceCast(Cast)) {
+ MI->setOperand(Idx, Cast->getOperand(0));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
+ if (DisableFavorNonGeneric)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ++I) {
+ if (isa<LoadInst>(I)) {
+ // V = load P
+ Changed |= optimizeMemoryInstruction(I, 0);
+ } else if (isa<StoreInst>(I)) {
+ // store V, P
+ Changed |= optimizeMemoryInstruction(I, 1);
+ }
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() {
+ return new NVPTXFavorNonGenericAddrSpaces();
+}
diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h
index 819f1dd..2ae6d72 100644
--- a/lib/Target/NVPTX/NVPTXFrameLowering.h
+++ b/lib/Target/NVPTX/NVPTXFrameLowering.h
@@ -28,13 +28,13 @@ public:
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
is64bit(_is64bit) {}
- virtual bool hasFP(const MachineFunction &MF) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ bool hasFP(const MachineFunction &MF) const override;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
};
} // End llvm namespace
diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 45f0734..023dd5e 100644
--- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -40,10 +40,9 @@ public:
GenericToNVVM() : ModulePass(ID) {}
- virtual bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
private:
Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV,
@@ -88,7 +87,8 @@ bool GenericToNVVM::runOnModule(Module &M) {
!GV->getName().startswith("llvm.")) {
GlobalVariable *NewGV = new GlobalVariable(
M, GV->getType()->getElementType(), GV->isConstant(),
- GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL,
+ GV->getLinkage(),
+ GV->hasInitializer() ? GV->getInitializer() : nullptr,
"", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL);
NewGV->copyAttributesFrom(GV);
GVMap[GV] = NewGV;
@@ -162,7 +162,7 @@ Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F,
GlobalVariable *GV,
IRBuilder<> &Builder) {
PointerType *GVType = GV->getType();
- Value *CVTA = NULL;
+ Value *CVTA = nullptr;
// See if the address space conversion requires the operand to be bitcast
// to i8 addrspace(n)* first.
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index bd08d2d..cd30880 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -20,11 +20,10 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "nvptx-isel"
-
using namespace llvm;
+#define DEBUG_TYPE "nvptx-isel"
+
static cl::opt<int>
FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
@@ -120,10 +119,10 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
- SDNode *ResNode = NULL;
+ SDNode *ResNode = nullptr;
switch (N->getOpcode()) {
case ISD::LOAD:
ResNode = SelectLoad(N);
@@ -162,6 +161,98 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
case NVPTXISD::StoreParamU32:
ResNode = SelectStoreParam(N);
break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ ResNode = SelectIntrinsicNoChain(N);
+ break;
+ case NVPTXISD::Tex1DFloatI32:
+ case NVPTXISD::Tex1DFloatFloat:
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ case NVPTXISD::Tex1DI32I32:
+ case NVPTXISD::Tex1DI32Float:
+ case NVPTXISD::Tex1DI32FloatLevel:
+ case NVPTXISD::Tex1DI32FloatGrad:
+ case NVPTXISD::Tex1DArrayFloatI32:
+ case NVPTXISD::Tex1DArrayFloatFloat:
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ case NVPTXISD::Tex1DArrayI32I32:
+ case NVPTXISD::Tex1DArrayI32Float:
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ case NVPTXISD::Tex2DFloatI32:
+ case NVPTXISD::Tex2DFloatFloat:
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ case NVPTXISD::Tex2DI32I32:
+ case NVPTXISD::Tex2DI32Float:
+ case NVPTXISD::Tex2DI32FloatLevel:
+ case NVPTXISD::Tex2DI32FloatGrad:
+ case NVPTXISD::Tex2DArrayFloatI32:
+ case NVPTXISD::Tex2DArrayFloatFloat:
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ case NVPTXISD::Tex2DArrayI32I32:
+ case NVPTXISD::Tex2DArrayI32Float:
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ case NVPTXISD::Tex3DFloatI32:
+ case NVPTXISD::Tex3DFloatFloat:
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ case NVPTXISD::Tex3DI32I32:
+ case NVPTXISD::Tex3DI32Float:
+ case NVPTXISD::Tex3DI32FloatLevel:
+ case NVPTXISD::Tex3DI32FloatGrad:
+ ResNode = SelectTextureIntrinsic(N);
+ break;
+ case NVPTXISD::Suld1DI8Trap:
+ case NVPTXISD::Suld1DI16Trap:
+ case NVPTXISD::Suld1DI32Trap:
+ case NVPTXISD::Suld1DV2I8Trap:
+ case NVPTXISD::Suld1DV2I16Trap:
+ case NVPTXISD::Suld1DV2I32Trap:
+ case NVPTXISD::Suld1DV4I8Trap:
+ case NVPTXISD::Suld1DV4I16Trap:
+ case NVPTXISD::Suld1DV4I32Trap:
+ case NVPTXISD::Suld1DArrayI8Trap:
+ case NVPTXISD::Suld1DArrayI16Trap:
+ case NVPTXISD::Suld1DArrayI32Trap:
+ case NVPTXISD::Suld1DArrayV2I8Trap:
+ case NVPTXISD::Suld1DArrayV2I16Trap:
+ case NVPTXISD::Suld1DArrayV2I32Trap:
+ case NVPTXISD::Suld1DArrayV4I8Trap:
+ case NVPTXISD::Suld1DArrayV4I16Trap:
+ case NVPTXISD::Suld1DArrayV4I32Trap:
+ case NVPTXISD::Suld2DI8Trap:
+ case NVPTXISD::Suld2DI16Trap:
+ case NVPTXISD::Suld2DI32Trap:
+ case NVPTXISD::Suld2DV2I8Trap:
+ case NVPTXISD::Suld2DV2I16Trap:
+ case NVPTXISD::Suld2DV2I32Trap:
+ case NVPTXISD::Suld2DV4I8Trap:
+ case NVPTXISD::Suld2DV4I16Trap:
+ case NVPTXISD::Suld2DV4I32Trap:
+ case NVPTXISD::Suld2DArrayI8Trap:
+ case NVPTXISD::Suld2DArrayI16Trap:
+ case NVPTXISD::Suld2DArrayI32Trap:
+ case NVPTXISD::Suld2DArrayV2I8Trap:
+ case NVPTXISD::Suld2DArrayV2I16Trap:
+ case NVPTXISD::Suld2DArrayV2I32Trap:
+ case NVPTXISD::Suld2DArrayV4I8Trap:
+ case NVPTXISD::Suld2DArrayV4I16Trap:
+ case NVPTXISD::Suld2DArrayV4I32Trap:
+ case NVPTXISD::Suld3DI8Trap:
+ case NVPTXISD::Suld3DI16Trap:
+ case NVPTXISD::Suld3DI32Trap:
+ case NVPTXISD::Suld3DV2I8Trap:
+ case NVPTXISD::Suld3DV2I16Trap:
+ case NVPTXISD::Suld3DV2I32Trap:
+ case NVPTXISD::Suld3DV4I8Trap:
+ case NVPTXISD::Suld3DV4I16Trap:
+ case NVPTXISD::Suld3DV4I32Trap:
+ ResNode = SelectSurfaceIntrinsic(N);
+ break;
case ISD::ADDRSPACECAST:
ResNode = SelectAddrSpaceCast(N);
break;
@@ -175,7 +266,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
static unsigned int getCodeAddrSpace(MemSDNode *N,
const NVPTXSubtarget &Subtarget) {
- const Value *Src = N->getSrcValue();
+ const Value *Src = N->getMemOperand()->getValue();
if (!Src)
return NVPTX::PTXLdStInstCode::GENERIC;
@@ -194,6 +285,24 @@ static unsigned int getCodeAddrSpace(MemSDNode *N,
return NVPTX::PTXLdStInstCode::GENERIC;
}
+SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
+ unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IID) {
+ default:
+ return nullptr;
+ case Intrinsic::nvvm_texsurf_handle_internal:
+ return SelectTexSurfHandle(N);
+ }
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
+ // Op 0 is the intrinsic ID
+ SDValue Wrapper = N->getOperand(1);
+ SDValue GlobalVal = Wrapper.getOperand(0);
+ return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
+ GlobalVal);
+}
+
SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
SDValue Src = N->getOperand(0);
AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
@@ -258,14 +367,14 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
SDLoc dl(N);
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT LoadedVT = LD->getMemoryVT();
- SDNode *NVPTXLD = NULL;
+ SDNode *NVPTXLD = nullptr;
// do not support pre/post inc/dec
if (LD->isIndexed())
- return NULL;
+ return nullptr;
if (!LoadedVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
@@ -288,7 +397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
- return NULL;
+ return nullptr;
}
// Type Setting: fromType + fromTypeWidth
@@ -337,7 +446,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_avar;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
@@ -366,7 +475,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_asi;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(fromType),
@@ -396,7 +505,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_ari_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (TargetVT) {
@@ -419,7 +528,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_ari;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -448,7 +557,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_areg_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (TargetVT) {
@@ -471,7 +580,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Opcode = NVPTX::LD_f64_areg;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -480,7 +589,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
}
- if (NVPTXLD != NULL) {
+ if (NVPTXLD) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -501,7 +610,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
EVT LoadedVT = MemSD->getMemoryVT();
if (!LoadedVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
@@ -547,7 +656,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
VecType = NVPTX::PTXLdStInstCode::V4;
break;
default:
- return NULL;
+ return nullptr;
}
EVT EltVT = N->getValueType(0);
@@ -555,11 +664,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_avar;
break;
@@ -583,7 +692,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_avar;
break;
@@ -609,11 +718,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
: SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_asi;
break;
@@ -637,7 +746,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_asi;
break;
@@ -664,11 +773,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_ari_64;
break;
@@ -692,7 +801,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_ari_64;
break;
@@ -711,11 +820,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_ari;
break;
@@ -739,7 +848,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_ari;
break;
@@ -766,11 +875,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_areg_64;
break;
@@ -794,7 +903,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_areg_64;
break;
@@ -813,11 +922,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v2_areg;
break;
@@ -841,7 +950,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
case NVPTXISD::LoadV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::LDV_i8_v4_areg;
break;
@@ -887,11 +996,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
if (SelectDirectAddr(Op1, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
break;
@@ -915,7 +1024,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
break;
@@ -939,7 +1048,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
break;
@@ -957,7 +1066,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
break;
@@ -975,19 +1084,18 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDValue Ops[] = { Addr, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 2));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else if (Subtarget.is64Bit()
? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
: SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
break;
@@ -1011,7 +1119,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
break;
@@ -1035,7 +1143,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
break;
@@ -1053,7 +1161,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
break;
@@ -1072,11 +1180,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
break;
@@ -1100,7 +1208,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
break;
@@ -1124,7 +1232,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
break;
@@ -1142,7 +1250,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
break;
@@ -1162,17 +1270,16 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
SDValue Ops[] = { Base, Offset, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 3));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
} else {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
break;
@@ -1196,7 +1303,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
break;
@@ -1220,7 +1327,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
break;
@@ -1238,7 +1345,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
break;
@@ -1257,11 +1364,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LDGV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
break;
@@ -1285,7 +1392,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
break;
@@ -1309,7 +1416,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDGV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
break;
@@ -1327,7 +1434,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
case NVPTXISD::LDUV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
break;
@@ -1346,8 +1453,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) {
}
SDValue Ops[] = { Op1, Chain };
- LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(),
- ArrayRef<SDValue>(Ops, 2));
+ LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
}
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
@@ -1361,14 +1467,14 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
SDLoc dl(N);
StoreSDNode *ST = cast<StoreSDNode>(N);
EVT StoreVT = ST->getMemoryVT();
- SDNode *NVPTXST = NULL;
+ SDNode *NVPTXST = nullptr;
// do not support pre/post inc/dec
if (ST->isIndexed())
- return NULL;
+ return nullptr;
if (!StoreVT.isSimple())
- return NULL;
+ return nullptr;
// Address Space Setting
unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
@@ -1391,7 +1497,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
else if (num == 4)
vecType = NVPTX::PTXLdStInstCode::V4;
else
- return NULL;
+ return nullptr;
}
// Type Setting: toType + toTypeWidth
@@ -1435,7 +1541,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_avar;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
@@ -1464,7 +1570,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_asi;
break;
default:
- return NULL;
+ return nullptr;
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
getI32Imm(vecType), getI32Imm(toType),
@@ -1494,7 +1600,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_ari_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (SourceVT) {
@@ -1517,7 +1623,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_ari;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -1546,7 +1652,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_areg_64;
break;
default:
- return NULL;
+ return nullptr;
}
} else {
switch (SourceVT) {
@@ -1569,7 +1675,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Opcode = NVPTX::ST_f64_areg;
break;
default:
- return NULL;
+ return nullptr;
}
}
SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
@@ -1578,7 +1684,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
}
- if (NVPTXST != NULL) {
+ if (NVPTXST) {
MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -1645,7 +1751,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
N2 = N->getOperand(5);
break;
default:
- return NULL;
+ return nullptr;
}
StOps.push_back(getI32Imm(IsVolatile));
@@ -1657,11 +1763,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (SelectDirectAddr(N2, Addr)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_avar;
break;
@@ -1685,7 +1791,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_avar;
break;
@@ -1707,11 +1813,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
: SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_asi;
break;
@@ -1735,7 +1841,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_asi;
break;
@@ -1759,11 +1865,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_ari_64;
break;
@@ -1787,7 +1893,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_ari_64;
break;
@@ -1806,11 +1912,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_ari;
break;
@@ -1834,7 +1940,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_ari;
break;
@@ -1857,11 +1963,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
if (Subtarget.is64Bit()) {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_areg_64;
break;
@@ -1885,7 +1991,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_areg_64;
break;
@@ -1904,11 +2010,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
} else {
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreV2:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v2_areg;
break;
@@ -1932,7 +2038,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
case NVPTXISD::StoreV4:
switch (EltVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i8:
Opcode = NVPTX::STV_i8_v4_areg;
break;
@@ -1973,7 +2079,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
unsigned VecSize;
switch (Node->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::LoadParam:
VecSize = 1;
break;
@@ -1992,11 +2098,11 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
switch (VecSize) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemI8;
break;
@@ -2023,7 +2129,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
case 2:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemV2I8;
break;
@@ -2050,7 +2156,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
case 4:
switch (MemVT.getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opc = NVPTX::LoadParamMemV4I8;
break;
@@ -2077,7 +2183,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
} else {
EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
- VTs = CurDAG->getVTList(&EVTs[0], array_lengthof(EVTs));
+ VTs = CurDAG->getVTList(EVTs);
}
unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
@@ -2103,7 +2209,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
unsigned NumElts = 1;
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreRetval:
NumElts = 1;
break;
@@ -2128,11 +2234,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
unsigned Opcode = 0;
switch (NumElts) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalI8;
break;
@@ -2159,7 +2265,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
case 2:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalV2I8;
break;
@@ -2186,7 +2292,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
case 4:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreRetvalV4I8;
break;
@@ -2229,7 +2335,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
unsigned NumElts = 1;
switch (N->getOpcode()) {
default:
- return NULL;
+ return nullptr;
case NVPTXISD::StoreParamU32:
case NVPTXISD::StoreParamS32:
case NVPTXISD::StoreParam:
@@ -2260,11 +2366,11 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
default:
switch (NumElts) {
default:
- return NULL;
+ return nullptr;
case 1:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamI8;
break;
@@ -2291,7 +2397,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
case 2:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamV2I8;
break;
@@ -2318,7 +2424,7 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
case 4:
switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
default:
- return NULL;
+ return nullptr;
case MVT::i1:
Opcode = NVPTX::StoreParamV4I8;
break;
@@ -2371,6 +2477,488 @@ SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
return Ret;
}
+SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue TexRef = N->getOperand(1);
+ SDValue SampRef = N->getOperand(2);
+ SDNode *Ret = nullptr;
+ unsigned Opc = 0;
+ SmallVector<SDValue, 8> Ops;
+
+ switch (N->getOpcode()) {
+ default: return nullptr;
+ case NVPTXISD::Tex1DFloatI32:
+ Opc = NVPTX::TEX_1D_F32_I32;
+ break;
+ case NVPTXISD::Tex1DFloatFloat:
+ Opc = NVPTX::TEX_1D_F32_F32;
+ break;
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ Opc = NVPTX::TEX_1D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DI32I32:
+ Opc = NVPTX::TEX_1D_I32_I32;
+ break;
+ case NVPTXISD::Tex1DI32Float:
+ Opc = NVPTX::TEX_1D_I32_F32;
+ break;
+ case NVPTXISD::Tex1DI32FloatLevel:
+ Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DI32FloatGrad:
+ Opc = NVPTX::TEX_1D_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DArrayFloatI32:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloat:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex1DArrayI32I32:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
+ break;
+ case NVPTXISD::Tex1DArrayI32Float:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
+ break;
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DFloatI32:
+ Opc = NVPTX::TEX_2D_F32_I32;
+ break;
+ case NVPTXISD::Tex2DFloatFloat:
+ Opc = NVPTX::TEX_2D_F32_F32;
+ break;
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ Opc = NVPTX::TEX_2D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DI32I32:
+ Opc = NVPTX::TEX_2D_I32_I32;
+ break;
+ case NVPTXISD::Tex2DI32Float:
+ Opc = NVPTX::TEX_2D_I32_F32;
+ break;
+ case NVPTXISD::Tex2DI32FloatLevel:
+ Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DI32FloatGrad:
+ Opc = NVPTX::TEX_2D_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DArrayFloatI32:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloat:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex2DArrayI32I32:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
+ break;
+ case NVPTXISD::Tex2DArrayI32Float:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
+ break;
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DFloatI32:
+ Opc = NVPTX::TEX_3D_F32_I32;
+ break;
+ case NVPTXISD::Tex3DFloatFloat:
+ Opc = NVPTX::TEX_3D_F32_F32;
+ break;
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ Opc = NVPTX::TEX_3D_F32_F32_GRAD;
+ break;
+ case NVPTXISD::Tex3DI32I32:
+ Opc = NVPTX::TEX_3D_I32_I32;
+ break;
+ case NVPTXISD::Tex3DI32Float:
+ Opc = NVPTX::TEX_3D_I32_F32;
+ break;
+ case NVPTXISD::Tex3DI32FloatLevel:
+ Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
+ break;
+ case NVPTXISD::Tex3DI32FloatGrad:
+ Opc = NVPTX::TEX_3D_I32_F32_GRAD;
+ break;
+ }
+
+ Ops.push_back(TexRef);
+ Ops.push_back(SampRef);
+
+ // Copy over indices
+ for (unsigned i = 3; i < N->getNumOperands(); ++i) {
+ Ops.push_back(N->getOperand(i));
+ }
+
+ Ops.push_back(Chain);
+ Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ return Ret;
+}
+
+SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue TexHandle = N->getOperand(1);
+ SDNode *Ret = nullptr;
+ unsigned Opc = 0;
+ SmallVector<SDValue, 8> Ops;
+ switch (N->getOpcode()) {
+ default: return nullptr;
+ case NVPTXISD::Suld1DI8Trap:
+ Opc = NVPTX::SULD_1D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI16Trap:
+ Opc = NVPTX::SULD_1D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DI32Trap:
+ Opc = NVPTX::SULD_1D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I8Trap:
+ Opc = NVPTX::SULD_1D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I16Trap:
+ Opc = NVPTX::SULD_1D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV2I32Trap:
+ Opc = NVPTX::SULD_1D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I8Trap:
+ Opc = NVPTX::SULD_1D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I16Trap:
+ Opc = NVPTX::SULD_1D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DV4I32Trap:
+ Opc = NVPTX::SULD_1D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayI32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV2I32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I8Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I16Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld1DArrayV4I32Trap:
+ Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI8Trap:
+ Opc = NVPTX::SULD_2D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI16Trap:
+ Opc = NVPTX::SULD_2D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DI32Trap:
+ Opc = NVPTX::SULD_2D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I8Trap:
+ Opc = NVPTX::SULD_2D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I16Trap:
+ Opc = NVPTX::SULD_2D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV2I32Trap:
+ Opc = NVPTX::SULD_2D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I8Trap:
+ Opc = NVPTX::SULD_2D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I16Trap:
+ Opc = NVPTX::SULD_2D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DV4I32Trap:
+ Opc = NVPTX::SULD_2D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayI32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV2I32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I8Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I16Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld2DArrayV4I32Trap:
+ Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI8Trap:
+ Opc = NVPTX::SULD_3D_I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI16Trap:
+ Opc = NVPTX::SULD_3D_I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DI32Trap:
+ Opc = NVPTX::SULD_3D_I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I8Trap:
+ Opc = NVPTX::SULD_3D_V2I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I16Trap:
+ Opc = NVPTX::SULD_3D_V2I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV2I32Trap:
+ Opc = NVPTX::SULD_3D_V2I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I8Trap:
+ Opc = NVPTX::SULD_3D_V4I8_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I16Trap:
+ Opc = NVPTX::SULD_3D_V4I16_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ case NVPTXISD::Suld3DV4I32Trap:
+ Opc = NVPTX::SULD_3D_V4I32_TRAP;
+ Ops.push_back(TexHandle);
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ Ops.push_back(N->getOperand(4));
+ Ops.push_back(Chain);
+ break;
+ }
+ Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ return Ret;
+}
+
// SelectDirectAddr - Match a direct address for DAG.
// A direct address could be a globaladdress or externalsymbol.
bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
@@ -2464,14 +3052,18 @@ bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
unsigned int spN) const {
- const Value *Src = NULL;
+ const Value *Src = nullptr;
// Even though MemIntrinsicSDNode is a subclas of MemSDNode,
// the classof() for MemSDNode does not include MemIntrinsicSDNode
// (See SelectionDAGNodes.h). So we need to check for both.
if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
- Src = mN->getSrcValue();
+ if (spN == 0 && mN->getMemOperand()->getPseudoValue())
+ return true;
+ Src = mN->getMemOperand()->getValue();
} else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
- Src = mN->getSrcValue();
+ if (spN == 0 && mN->getMemOperand()->getPseudoValue())
+ return true;
+ Src = mN->getMemOperand()->getValue();
}
if (!Src)
return false;
diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 93ad169..11f92e7 100644
--- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-isel"
-
#include "NVPTX.h"
#include "NVPTXISelLowering.h"
#include "NVPTXRegisterInfo.h"
@@ -46,19 +44,22 @@ public:
CodeGenOpt::Level OptLevel);
// Pass Name
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "NVPTX DAG->DAG Pattern Instruction Selection";
}
const NVPTXSubtarget &Subtarget;
- virtual bool SelectInlineAsmMemoryOperand(
- const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
private:
// Include the pieces autogenerated from the target description.
#include "NVPTXGenDAGISel.inc"
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
+ SDNode *SelectIntrinsicNoChain(SDNode *N);
+ SDNode *SelectTexSurfHandle(SDNode *N);
SDNode *SelectLoad(SDNode *N);
SDNode *SelectLoadVector(SDNode *N);
SDNode *SelectLDGLDUVector(SDNode *N);
@@ -68,6 +69,8 @@ private:
SDNode *SelectStoreRetval(SDNode *N);
SDNode *SelectStoreParam(SDNode *N);
SDNode *SelectAddrSpaceCast(SDNode *N);
+ SDNode *SelectTextureIntrinsic(SDNode *N);
+ SDNode *SelectSurfaceIntrinsic(SDNode *N);
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 8e25a65..b0943be 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -75,7 +75,7 @@ static bool IsPTXVectorType(MVT VT) {
/// LowerCall, and LowerReturn.
static void ComputePTXValueVTs(const TargetLowering &TLI, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *Offsets = 0,
+ SmallVectorImpl<uint64_t> *Offsets = nullptr,
uint64_t StartingOffset = 0) {
SmallVector<EVT, 16> TempVTs;
SmallVector<uint64_t, 16> TempOffsets;
@@ -245,7 +245,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case NVPTXISD::CALL:
return "NVPTXISD::CALL";
case NVPTXISD::RET_FLAG:
@@ -328,6 +328,116 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "NVPTXISD::StoreV2";
case NVPTXISD::StoreV4:
return "NVPTXISD::StoreV4";
+ case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32";
+ case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat";
+ case NVPTXISD::Tex1DFloatFloatLevel:
+ return "NVPTXISD::Tex1DFloatFloatLevel";
+ case NVPTXISD::Tex1DFloatFloatGrad:
+ return "NVPTXISD::Tex1DFloatFloatGrad";
+ case NVPTXISD::Tex1DI32I32: return "NVPTXISD::Tex1DI32I32";
+ case NVPTXISD::Tex1DI32Float: return "NVPTXISD::Tex1DI32Float";
+ case NVPTXISD::Tex1DI32FloatLevel:
+ return "NVPTXISD::Tex1DI32FloatLevel";
+ case NVPTXISD::Tex1DI32FloatGrad:
+ return "NVPTXISD::Tex1DI32FloatGrad";
+ case NVPTXISD::Tex1DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex1DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex1DArrayFloatFloatLevel:
+ return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ case NVPTXISD::Tex1DArrayFloatFloatGrad:
+ return "NVPTXISD::Tex2DArrayFloatFloatGrad";
+ case NVPTXISD::Tex1DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
+ case NVPTXISD::Tex1DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
+ case NVPTXISD::Tex1DArrayI32FloatLevel:
+ return "NVPTXISD::Tex2DArrayI32FloatLevel";
+ case NVPTXISD::Tex1DArrayI32FloatGrad:
+ return "NVPTXISD::Tex2DArrayI32FloatGrad";
+ case NVPTXISD::Tex2DFloatI32: return "NVPTXISD::Tex2DFloatI32";
+ case NVPTXISD::Tex2DFloatFloat: return "NVPTXISD::Tex2DFloatFloat";
+ case NVPTXISD::Tex2DFloatFloatLevel:
+ return "NVPTXISD::Tex2DFloatFloatLevel";
+ case NVPTXISD::Tex2DFloatFloatGrad:
+ return "NVPTXISD::Tex2DFloatFloatGrad";
+ case NVPTXISD::Tex2DI32I32: return "NVPTXISD::Tex2DI32I32";
+ case NVPTXISD::Tex2DI32Float: return "NVPTXISD::Tex2DI32Float";
+ case NVPTXISD::Tex2DI32FloatLevel:
+ return "NVPTXISD::Tex2DI32FloatLevel";
+ case NVPTXISD::Tex2DI32FloatGrad:
+ return "NVPTXISD::Tex2DI32FloatGrad";
+ case NVPTXISD::Tex2DArrayFloatI32: return "NVPTXISD::Tex2DArrayFloatI32";
+ case NVPTXISD::Tex2DArrayFloatFloat: return "NVPTXISD::Tex2DArrayFloatFloat";
+ case NVPTXISD::Tex2DArrayFloatFloatLevel:
+ return "NVPTXISD::Tex2DArrayFloatFloatLevel";
+ case NVPTXISD::Tex2DArrayFloatFloatGrad:
+ return "NVPTXISD::Tex2DArrayFloatFloatGrad";
+ case NVPTXISD::Tex2DArrayI32I32: return "NVPTXISD::Tex2DArrayI32I32";
+ case NVPTXISD::Tex2DArrayI32Float: return "NVPTXISD::Tex2DArrayI32Float";
+ case NVPTXISD::Tex2DArrayI32FloatLevel:
+ return "NVPTXISD::Tex2DArrayI32FloatLevel";
+ case NVPTXISD::Tex2DArrayI32FloatGrad:
+ return "NVPTXISD::Tex2DArrayI32FloatGrad";
+ case NVPTXISD::Tex3DFloatI32: return "NVPTXISD::Tex3DFloatI32";
+ case NVPTXISD::Tex3DFloatFloat: return "NVPTXISD::Tex3DFloatFloat";
+ case NVPTXISD::Tex3DFloatFloatLevel:
+ return "NVPTXISD::Tex3DFloatFloatLevel";
+ case NVPTXISD::Tex3DFloatFloatGrad:
+ return "NVPTXISD::Tex3DFloatFloatGrad";
+ case NVPTXISD::Tex3DI32I32: return "NVPTXISD::Tex3DI32I32";
+ case NVPTXISD::Tex3DI32Float: return "NVPTXISD::Tex3DI32Float";
+ case NVPTXISD::Tex3DI32FloatLevel:
+ return "NVPTXISD::Tex3DI32FloatLevel";
+ case NVPTXISD::Tex3DI32FloatGrad:
+ return "NVPTXISD::Tex3DI32FloatGrad";
+
+ case NVPTXISD::Suld1DI8Trap: return "NVPTXISD::Suld1DI8Trap";
+ case NVPTXISD::Suld1DI16Trap: return "NVPTXISD::Suld1DI16Trap";
+ case NVPTXISD::Suld1DI32Trap: return "NVPTXISD::Suld1DI32Trap";
+ case NVPTXISD::Suld1DV2I8Trap: return "NVPTXISD::Suld1DV2I8Trap";
+ case NVPTXISD::Suld1DV2I16Trap: return "NVPTXISD::Suld1DV2I16Trap";
+ case NVPTXISD::Suld1DV2I32Trap: return "NVPTXISD::Suld1DV2I32Trap";
+ case NVPTXISD::Suld1DV4I8Trap: return "NVPTXISD::Suld1DV4I8Trap";
+ case NVPTXISD::Suld1DV4I16Trap: return "NVPTXISD::Suld1DV4I16Trap";
+ case NVPTXISD::Suld1DV4I32Trap: return "NVPTXISD::Suld1DV4I32Trap";
+
+ case NVPTXISD::Suld1DArrayI8Trap: return "NVPTXISD::Suld1DArrayI8Trap";
+ case NVPTXISD::Suld1DArrayI16Trap: return "NVPTXISD::Suld1DArrayI16Trap";
+ case NVPTXISD::Suld1DArrayI32Trap: return "NVPTXISD::Suld1DArrayI32Trap";
+ case NVPTXISD::Suld1DArrayV2I8Trap: return "NVPTXISD::Suld1DArrayV2I8Trap";
+ case NVPTXISD::Suld1DArrayV2I16Trap: return "NVPTXISD::Suld1DArrayV2I16Trap";
+ case NVPTXISD::Suld1DArrayV2I32Trap: return "NVPTXISD::Suld1DArrayV2I32Trap";
+ case NVPTXISD::Suld1DArrayV4I8Trap: return "NVPTXISD::Suld1DArrayV4I8Trap";
+ case NVPTXISD::Suld1DArrayV4I16Trap: return "NVPTXISD::Suld1DArrayV4I16Trap";
+ case NVPTXISD::Suld1DArrayV4I32Trap: return "NVPTXISD::Suld1DArrayV4I32Trap";
+
+ case NVPTXISD::Suld2DI8Trap: return "NVPTXISD::Suld2DI8Trap";
+ case NVPTXISD::Suld2DI16Trap: return "NVPTXISD::Suld2DI16Trap";
+ case NVPTXISD::Suld2DI32Trap: return "NVPTXISD::Suld2DI32Trap";
+ case NVPTXISD::Suld2DV2I8Trap: return "NVPTXISD::Suld2DV2I8Trap";
+ case NVPTXISD::Suld2DV2I16Trap: return "NVPTXISD::Suld2DV2I16Trap";
+ case NVPTXISD::Suld2DV2I32Trap: return "NVPTXISD::Suld2DV2I32Trap";
+ case NVPTXISD::Suld2DV4I8Trap: return "NVPTXISD::Suld2DV4I8Trap";
+ case NVPTXISD::Suld2DV4I16Trap: return "NVPTXISD::Suld2DV4I16Trap";
+ case NVPTXISD::Suld2DV4I32Trap: return "NVPTXISD::Suld2DV4I32Trap";
+
+ case NVPTXISD::Suld2DArrayI8Trap: return "NVPTXISD::Suld2DArrayI8Trap";
+ case NVPTXISD::Suld2DArrayI16Trap: return "NVPTXISD::Suld2DArrayI16Trap";
+ case NVPTXISD::Suld2DArrayI32Trap: return "NVPTXISD::Suld2DArrayI32Trap";
+ case NVPTXISD::Suld2DArrayV2I8Trap: return "NVPTXISD::Suld2DArrayV2I8Trap";
+ case NVPTXISD::Suld2DArrayV2I16Trap: return "NVPTXISD::Suld2DArrayV2I16Trap";
+ case NVPTXISD::Suld2DArrayV2I32Trap: return "NVPTXISD::Suld2DArrayV2I32Trap";
+ case NVPTXISD::Suld2DArrayV4I8Trap: return "NVPTXISD::Suld2DArrayV4I8Trap";
+ case NVPTXISD::Suld2DArrayV4I16Trap: return "NVPTXISD::Suld2DArrayV4I16Trap";
+ case NVPTXISD::Suld2DArrayV4I32Trap: return "NVPTXISD::Suld2DArrayV4I32Trap";
+
+ case NVPTXISD::Suld3DI8Trap: return "NVPTXISD::Suld3DI8Trap";
+ case NVPTXISD::Suld3DI16Trap: return "NVPTXISD::Suld3DI16Trap";
+ case NVPTXISD::Suld3DI32Trap: return "NVPTXISD::Suld3DI32Trap";
+ case NVPTXISD::Suld3DV2I8Trap: return "NVPTXISD::Suld3DV2I8Trap";
+ case NVPTXISD::Suld3DV2I16Trap: return "NVPTXISD::Suld3DV2I16Trap";
+ case NVPTXISD::Suld3DV2I32Trap: return "NVPTXISD::Suld3DV2I32Trap";
+ case NVPTXISD::Suld3DV4I8Trap: return "NVPTXISD::Suld3DV4I8Trap";
+ case NVPTXISD::Suld3DV4I16Trap: return "NVPTXISD::Suld3DV4I16Trap";
+ case NVPTXISD::Suld3DV4I32Trap: return "NVPTXISD::Suld3DV4I32Trap";
}
}
@@ -526,7 +636,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
- ArgListTy &Args = CLI.Args;
+ ArgListTy &Args = CLI.getArgs();
Type *retTy = CLI.RetTy;
ImmutableCallSite *CS = CLI.CS;
@@ -575,7 +685,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
@@ -599,7 +709,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(curOffset, MVT::i32),
StVal, InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, &CopyParamOps[0], 5,
+ CopyParamVTs, CopyParamOps,
elemtype, MachinePointerInfo());
InFlag = Chain.getValue(1);
curOffset += sz / 8;
@@ -621,7 +731,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(paramCount, MVT::i32),
DAG.getConstant(sz, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned NumElts = ObjectVT.getVectorNumElements();
EVT EltVT = ObjectVT.getVectorElementType();
@@ -644,7 +754,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(0, MVT::i32), Elt,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl,
- CopyParamVTs, &CopyParamOps[0], 5,
+ CopyParamVTs, CopyParamOps,
MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
} else if (NumElts == 2) {
@@ -661,7 +771,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(0, MVT::i32), Elt0, Elt1,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParamV2, dl,
- CopyParamVTs, &CopyParamOps[0], 6,
+ CopyParamVTs, CopyParamOps,
MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
} else {
@@ -735,9 +845,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(InFlag);
SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, &Ops[0],
- Ops.size(), MemVT,
- MachinePointerInfo());
+ Chain = DAG.getMemIntrinsicNode(Opc, dl, CopyParamVTs, Ops,
+ MemVT, MachinePointerInfo());
InFlag = Chain.getValue(1);
curOffset += PerStoreOffset;
}
@@ -762,7 +871,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(sz, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
SDValue OutV = OutVals[OIdx];
if (needExtend) {
@@ -781,7 +890,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
opcode = NVPTXISD::StoreParamU32;
else if (Outs[OIdx].Flags.isSExt())
opcode = NVPTXISD::StoreParamS32;
- Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps, 5,
+ Chain = DAG.getMemIntrinsicNode(opcode, dl, CopyParamVTs, CopyParamOps,
VT, MachinePointerInfo());
InFlag = Chain.getValue(1);
@@ -806,7 +915,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InFlag
};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
- DeclareParamOps, 5);
+ DeclareParamOps);
InFlag = Chain.getValue(1);
unsigned curOffset = 0;
for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
@@ -834,7 +943,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(curOffset, MVT::i32), theVal,
InFlag };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
- CopyParamOps, 5, elemtype,
+ CopyParamOps, elemtype,
MachinePointerInfo());
InFlag = Chain.getValue(1);
@@ -865,7 +974,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(resultsz, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
- DeclareRetOps, 5);
+ DeclareRetOps);
InFlag = Chain.getValue(1);
} else {
retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
@@ -875,7 +984,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(resultsz / 8, MVT::i32),
DAG.getConstant(0, MVT::i32), InFlag };
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
- DeclareRetOps, 5);
+ DeclareRetOps);
InFlag = Chain.getValue(1);
}
}
@@ -895,7 +1004,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue ProtoOps[] = {
Chain, DAG.getTargetExternalSymbol(ProtoStr, MVT::i32), InFlag,
};
- Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, &ProtoOps[0], 3);
+ Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
InFlag = Chain.getValue(1);
}
// Op to just print "call"
@@ -904,20 +1013,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, MVT::i32), InFlag
};
Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
- dl, PrintCallVTs, PrintCallOps, 3);
+ dl, PrintCallVTs, PrintCallOps);
InFlag = Chain.getValue(1);
// Ops to print out the function name
SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallVoidOps[] = { Chain, Callee, InFlag };
- Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
+ Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
InFlag = Chain.getValue(1);
// Ops to print out the param list
SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgBeginOps[] = { Chain, InFlag };
Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
- CallArgBeginOps, 2);
+ CallArgBeginOps);
InFlag = Chain.getValue(1);
for (unsigned i = 0, e = paramCount; i != e; ++i) {
@@ -929,21 +1038,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
DAG.getConstant(i, MVT::i32), InFlag };
- Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
+ Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
InFlag = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
InFlag };
- Chain =
- DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
+ Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
InFlag = Chain.getValue(1);
if (!Func) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
InFlag };
- Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
+ Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
InFlag = Chain.getValue(1);
}
@@ -962,7 +1070,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (NumElts == 1) {
// Just a simple load
- std::vector<EVT> LoadRetVTs;
+ SmallVector<EVT, 4> LoadRetVTs;
if (needTruncate) {
// If loading i1 result, generate
// load i16
@@ -972,15 +1080,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetVTs.push_back(EltVT);
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
+ SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), EltVT, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval;
@@ -989,7 +1096,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
InVals.push_back(Ret0);
} else if (NumElts == 2) {
// LoadV2
- std::vector<EVT> LoadRetVTs;
+ SmallVector<EVT, 4> LoadRetVTs;
if (needTruncate) {
// If loading i1 result, generate
// load i16
@@ -1002,15 +1109,14 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
LoadRetVTs.push_back(MVT::Other);
LoadRetVTs.push_back(MVT::Glue);
- std::vector<SDValue> LoadRetOps;
+ SmallVector<SDValue, 4> LoadRetOps;
LoadRetOps.push_back(Chain);
LoadRetOps.push_back(DAG.getConstant(1, MVT::i32));
LoadRetOps.push_back(DAG.getConstant(0, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParamV2, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), EltVT, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps, EltVT, MachinePointerInfo());
Chain = retval.getValue(2);
InFlag = retval.getValue(3);
SDValue Ret0 = retval.getValue(0);
@@ -1054,8 +1160,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetOps.push_back(DAG.getConstant(Ofst, MVT::i32));
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
- Opc, dl, DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()),
- &LoadRetOps[0], LoadRetOps.size(), EltVT, MachinePointerInfo());
+ Opc, dl, DAG.getVTList(LoadRetVTs),
+ LoadRetOps, EltVT, MachinePointerInfo());
if (VecSize == 2) {
Chain = retval.getValue(2);
InFlag = retval.getValue(3);
@@ -1110,8 +1216,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
LoadRetOps.push_back(InFlag);
SDValue retval = DAG.getMemIntrinsicNode(
NVPTXISD::LoadParam, dl,
- DAG.getVTList(&LoadRetVTs[0], LoadRetVTs.size()), &LoadRetOps[0],
- LoadRetOps.size(), TheLoadType, MachinePointerInfo());
+ DAG.getVTList(LoadRetVTs), LoadRetOps,
+ TheLoadType, MachinePointerInfo());
Chain = retval.getValue(1);
InFlag = retval.getValue(2);
SDValue Ret0 = retval.getValue(0);
@@ -1153,8 +1259,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(j)));
}
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
- Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops);
}
SDValue
@@ -1209,7 +1314,7 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
// load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
// in LegalizeDAG.cpp which also uses MergeValues.
SDValue Ops[] = { result, LD->getChain() };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -1297,7 +1402,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
MemSDNode *MemSD = cast<MemSDNode>(N);
SDValue NewSt = DAG.getMemIntrinsicNode(
- Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
+ Opcode, DL, DAG.getVTList(MVT::Other), Ops,
MemSD->getMemoryVT(), MemSD->getMemOperand());
//return DCI.CombineTo(N, NewSt, true);
@@ -1429,7 +1534,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (isImageOrSamplerVal(
theArgs[i],
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
- : 0))) {
+ : nullptr))) {
assert(isKernel && "Only kernels can have image/sampler params");
InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
continue;
@@ -1683,8 +1788,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
//}
if (!OutChains.empty())
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
- OutChains.size()));
+ DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains));
return Chain;
}
@@ -1726,7 +1830,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
StoreVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, StoreVal);
SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), &Ops[0], 3,
+ DAG.getVTList(MVT::Other), Ops,
EltVT, MachinePointerInfo());
} else if (NumElts == 2) {
@@ -1742,7 +1846,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Ops[] = { Chain, DAG.getConstant(0, MVT::i32), StoreVal0,
StoreVal1 };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetvalV2, dl,
- DAG.getVTList(MVT::Other), &Ops[0], 4,
+ DAG.getVTList(MVT::Other), Ops,
EltVT, MachinePointerInfo());
} else {
// V4 stores
@@ -1814,8 +1918,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Chain = DAG.getNode(Opc, dl, MVT::Other, &Ops[0], Ops.size());
Chain =
- DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), &Ops[0],
- Ops.size(), EltVT, MachinePointerInfo());
+ DAG.getMemIntrinsicNode(Opc, dl, DAG.getVTList(MVT::Other), Ops,
+ EltVT, MachinePointerInfo());
Offset += PerStoreOffset;
}
}
@@ -1852,8 +1956,8 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal };
Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl,
- DAG.getVTList(MVT::Other), &Ops[0],
- 3, TheStoreType,
+ DAG.getVTList(MVT::Other), Ops,
+ TheStoreType,
MachinePointerInfo());
if(TheValType.isVector())
SizeSoFar +=
@@ -1891,6 +1995,195 @@ bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
return false;
}
+static unsigned getOpcForTextureInstr(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ default:
+ return 0;
+
+ case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ return NVPTXISD::Tex1DFloatI32;
+ case Intrinsic::nvvm_tex_1d_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloat;
+ case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
+ return NVPTXISD::Tex1DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_1d_v4i32_i32:
+ return NVPTXISD::Tex1DI32I32;
+ case Intrinsic::nvvm_tex_1d_v4i32_f32:
+ return NVPTXISD::Tex1DI32Float;
+ case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
+ return NVPTXISD::Tex1DI32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
+ return NVPTXISD::Tex1DI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ return NVPTXISD::Tex1DArrayFloatI32;
+ case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
+ return NVPTXISD::Tex1DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
+ return NVPTXISD::Tex1DArrayI32I32;
+ case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32Float;
+ case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32FloatLevel;
+ case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
+ return NVPTXISD::Tex1DArrayI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ return NVPTXISD::Tex2DFloatI32;
+ case Intrinsic::nvvm_tex_2d_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloat;
+ case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
+ return NVPTXISD::Tex2DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_2d_v4i32_i32:
+ return NVPTXISD::Tex2DI32I32;
+ case Intrinsic::nvvm_tex_2d_v4i32_f32:
+ return NVPTXISD::Tex2DI32Float;
+ case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
+ return NVPTXISD::Tex2DI32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
+ return NVPTXISD::Tex2DI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ return NVPTXISD::Tex2DArrayFloatI32;
+ case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloat;
+ case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
+ return NVPTXISD::Tex2DArrayFloatFloatGrad;
+ case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
+ return NVPTXISD::Tex2DArrayI32I32;
+ case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32Float;
+ case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32FloatLevel;
+ case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
+ return NVPTXISD::Tex2DArrayI32FloatGrad;
+
+ case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ return NVPTXISD::Tex3DFloatI32;
+ case Intrinsic::nvvm_tex_3d_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloat;
+ case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32:
+ return NVPTXISD::Tex3DFloatFloatGrad;
+ case Intrinsic::nvvm_tex_3d_v4i32_i32:
+ return NVPTXISD::Tex3DI32I32;
+ case Intrinsic::nvvm_tex_3d_v4i32_f32:
+ return NVPTXISD::Tex3DI32Float;
+ case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
+ return NVPTXISD::Tex3DI32FloatLevel;
+ case Intrinsic::nvvm_tex_3d_grad_v4i32_f32:
+ return NVPTXISD::Tex3DI32FloatGrad;
+ }
+}
+
+static unsigned getOpcForSurfaceInstr(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ default:
+ return 0;
+ case Intrinsic::nvvm_suld_1d_i8_trap:
+ return NVPTXISD::Suld1DI8Trap;
+ case Intrinsic::nvvm_suld_1d_i16_trap:
+ return NVPTXISD::Suld1DI16Trap;
+ case Intrinsic::nvvm_suld_1d_i32_trap:
+ return NVPTXISD::Suld1DI32Trap;
+ case Intrinsic::nvvm_suld_1d_v2i8_trap:
+ return NVPTXISD::Suld1DV2I8Trap;
+ case Intrinsic::nvvm_suld_1d_v2i16_trap:
+ return NVPTXISD::Suld1DV2I16Trap;
+ case Intrinsic::nvvm_suld_1d_v2i32_trap:
+ return NVPTXISD::Suld1DV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_v4i8_trap:
+ return NVPTXISD::Suld1DV4I8Trap;
+ case Intrinsic::nvvm_suld_1d_v4i16_trap:
+ return NVPTXISD::Suld1DV4I16Trap;
+ case Intrinsic::nvvm_suld_1d_v4i32_trap:
+ return NVPTXISD::Suld1DV4I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_i8_trap:
+ return NVPTXISD::Suld1DArrayI8Trap;
+ case Intrinsic::nvvm_suld_1d_array_i16_trap:
+ return NVPTXISD::Suld1DArrayI16Trap;
+ case Intrinsic::nvvm_suld_1d_array_i32_trap:
+ return NVPTXISD::Suld1DArrayI32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
+ return NVPTXISD::Suld1DArrayV2I8Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
+ return NVPTXISD::Suld1DArrayV2I16Trap;
+ case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
+ return NVPTXISD::Suld1DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
+ return NVPTXISD::Suld1DArrayV4I8Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
+ return NVPTXISD::Suld1DArrayV4I16Trap;
+ case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
+ return NVPTXISD::Suld1DArrayV4I32Trap;
+ case Intrinsic::nvvm_suld_2d_i8_trap:
+ return NVPTXISD::Suld2DI8Trap;
+ case Intrinsic::nvvm_suld_2d_i16_trap:
+ return NVPTXISD::Suld2DI16Trap;
+ case Intrinsic::nvvm_suld_2d_i32_trap:
+ return NVPTXISD::Suld2DI32Trap;
+ case Intrinsic::nvvm_suld_2d_v2i8_trap:
+ return NVPTXISD::Suld2DV2I8Trap;
+ case Intrinsic::nvvm_suld_2d_v2i16_trap:
+ return NVPTXISD::Suld2DV2I16Trap;
+ case Intrinsic::nvvm_suld_2d_v2i32_trap:
+ return NVPTXISD::Suld2DV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_v4i8_trap:
+ return NVPTXISD::Suld2DV4I8Trap;
+ case Intrinsic::nvvm_suld_2d_v4i16_trap:
+ return NVPTXISD::Suld2DV4I16Trap;
+ case Intrinsic::nvvm_suld_2d_v4i32_trap:
+ return NVPTXISD::Suld2DV4I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_i8_trap:
+ return NVPTXISD::Suld2DArrayI8Trap;
+ case Intrinsic::nvvm_suld_2d_array_i16_trap:
+ return NVPTXISD::Suld2DArrayI16Trap;
+ case Intrinsic::nvvm_suld_2d_array_i32_trap:
+ return NVPTXISD::Suld2DArrayI32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
+ return NVPTXISD::Suld2DArrayV2I8Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
+ return NVPTXISD::Suld2DArrayV2I16Trap;
+ case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
+ return NVPTXISD::Suld2DArrayV2I32Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
+ return NVPTXISD::Suld2DArrayV4I8Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
+ return NVPTXISD::Suld2DArrayV4I16Trap;
+ case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
+ return NVPTXISD::Suld2DArrayV4I32Trap;
+ case Intrinsic::nvvm_suld_3d_i8_trap:
+ return NVPTXISD::Suld3DI8Trap;
+ case Intrinsic::nvvm_suld_3d_i16_trap:
+ return NVPTXISD::Suld3DI16Trap;
+ case Intrinsic::nvvm_suld_3d_i32_trap:
+ return NVPTXISD::Suld3DI32Trap;
+ case Intrinsic::nvvm_suld_3d_v2i8_trap:
+ return NVPTXISD::Suld3DV2I8Trap;
+ case Intrinsic::nvvm_suld_3d_v2i16_trap:
+ return NVPTXISD::Suld3DV2I16Trap;
+ case Intrinsic::nvvm_suld_3d_v2i32_trap:
+ return NVPTXISD::Suld3DV2I32Trap;
+ case Intrinsic::nvvm_suld_3d_v4i8_trap:
+ return NVPTXISD::Suld3DV4I8Trap;
+ case Intrinsic::nvvm_suld_3d_v4i16_trap:
+ return NVPTXISD::Suld3DV4I16Trap;
+ case Intrinsic::nvvm_suld_3d_v4i32_trap:
+ return NVPTXISD::Suld3DV4I32Trap;
+ }
+}
+
// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
// TgtMemIntrinsic
// because we need the information that is only available in the "Value" type
@@ -1944,6 +2237,142 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
Info.align = 0;
return true;
+ case Intrinsic::nvvm_tex_1d_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_v4f32_i32:
+ case Intrinsic::nvvm_tex_3d_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4f32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4f32_f32: {
+ Info.opc = getOpcForTextureInstr(Intrinsic);
+ Info.memVT = MVT::f32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_tex_1d_v4i32_i32:
+ case Intrinsic::nvvm_tex_1d_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_v4i32_i32:
+ case Intrinsic::nvvm_tex_1d_array_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_1d_array_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_v4i32_i32:
+ case Intrinsic::nvvm_tex_2d_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_v4i32_i32:
+ case Intrinsic::nvvm_tex_2d_array_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_2d_array_grad_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_v4i32_i32:
+ case Intrinsic::nvvm_tex_3d_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_level_v4i32_f32:
+ case Intrinsic::nvvm_tex_3d_grad_v4i32_f32: {
+ Info.opc = getOpcForTextureInstr(Intrinsic);
+ Info.memVT = MVT::i32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i8_trap:
+ case Intrinsic::nvvm_suld_1d_v2i8_trap:
+ case Intrinsic::nvvm_suld_1d_v4i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i8_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i8_trap:
+ case Intrinsic::nvvm_suld_2d_i8_trap:
+ case Intrinsic::nvvm_suld_2d_v2i8_trap:
+ case Intrinsic::nvvm_suld_2d_v4i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i8_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i8_trap:
+ case Intrinsic::nvvm_suld_3d_i8_trap:
+ case Intrinsic::nvvm_suld_3d_v2i8_trap:
+ case Intrinsic::nvvm_suld_3d_v4i8_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i8;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i16_trap:
+ case Intrinsic::nvvm_suld_1d_v2i16_trap:
+ case Intrinsic::nvvm_suld_1d_v4i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i16_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i16_trap:
+ case Intrinsic::nvvm_suld_2d_i16_trap:
+ case Intrinsic::nvvm_suld_2d_v2i16_trap:
+ case Intrinsic::nvvm_suld_2d_v4i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i16_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i16_trap:
+ case Intrinsic::nvvm_suld_3d_i16_trap:
+ case Intrinsic::nvvm_suld_3d_v2i16_trap:
+ case Intrinsic::nvvm_suld_3d_v4i16_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i16;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+ case Intrinsic::nvvm_suld_1d_i32_trap:
+ case Intrinsic::nvvm_suld_1d_v2i32_trap:
+ case Intrinsic::nvvm_suld_1d_v4i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_v2i32_trap:
+ case Intrinsic::nvvm_suld_1d_array_v4i32_trap:
+ case Intrinsic::nvvm_suld_2d_i32_trap:
+ case Intrinsic::nvvm_suld_2d_v2i32_trap:
+ case Intrinsic::nvvm_suld_2d_v4i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_v2i32_trap:
+ case Intrinsic::nvvm_suld_2d_array_v4i32_trap:
+ case Intrinsic::nvvm_suld_3d_i32_trap:
+ case Intrinsic::nvvm_suld_3d_v2i32_trap:
+ case Intrinsic::nvvm_suld_3d_v4i32_trap: {
+ Info.opc = getOpcForSurfaceInstr(Intrinsic);
+ Info.memVT = MVT::i32;
+ Info.ptrVal = nullptr;
+ Info.offset = 0;
+ Info.vol = 0;
+ Info.readMem = true;
+ Info.writeMem = false;
+ Info.align = 16;
+ return true;
+ }
+
}
return false;
}
@@ -2094,7 +2523,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
case 4: {
Opcode = NVPTXISD::LoadV4;
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
- LdResVTs = DAG.getVTList(ListVTs, 5);
+ LdResVTs = DAG.getVTList(ListVTs);
break;
}
}
@@ -2111,8 +2540,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
// pass along the extension information
OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
- SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
- OtherOps.size(), LD->getMemoryVT(),
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
+ LD->getMemoryVT(),
LD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
@@ -2126,8 +2555,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
SDValue LoadChain = NewLD.getValue(NumElts);
- SDValue BuildVec =
- DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
@@ -2207,7 +2635,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
break;
}
EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
- LdResVTs = DAG.getVTList(ListVTs, 5);
+ LdResVTs = DAG.getVTList(ListVTs);
break;
}
}
@@ -2224,9 +2652,9 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
- SDValue NewLD = DAG.getMemIntrinsicNode(
- Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
- MemSD->getMemoryVT(), MemSD->getMemOperand());
+ SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, OtherOps,
+ MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
SmallVector<SDValue, 4> ScalarRes;
@@ -2241,7 +2669,7 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
SDValue LoadChain = NewLD.getValue(NumElts);
SDValue BuildVec =
- DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
+ DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, ScalarRes);
Results.push_back(BuildVec);
Results.push_back(LoadChain);
@@ -2263,8 +2691,8 @@ static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
// We make sure the memory type is i8, which will be used during isel
// to select the proper instruction.
SDValue NewLD =
- DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
- Ops.size(), MVT::i8, MemSD->getMemOperand());
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, Ops,
+ MVT::i8, MemSD->getMemOperand());
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i8,
NewLD.getValue(0)));
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h
index c1e8c21..7bad8a2 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -70,7 +70,100 @@ enum NodeType {
StoreParamU32, // to zext and store a <32bit value, not used currently
StoreRetval,
StoreRetvalV2,
- StoreRetvalV4
+ StoreRetvalV4,
+
+ // Texture intrinsics
+ Tex1DFloatI32,
+ Tex1DFloatFloat,
+ Tex1DFloatFloatLevel,
+ Tex1DFloatFloatGrad,
+ Tex1DI32I32,
+ Tex1DI32Float,
+ Tex1DI32FloatLevel,
+ Tex1DI32FloatGrad,
+ Tex1DArrayFloatI32,
+ Tex1DArrayFloatFloat,
+ Tex1DArrayFloatFloatLevel,
+ Tex1DArrayFloatFloatGrad,
+ Tex1DArrayI32I32,
+ Tex1DArrayI32Float,
+ Tex1DArrayI32FloatLevel,
+ Tex1DArrayI32FloatGrad,
+ Tex2DFloatI32,
+ Tex2DFloatFloat,
+ Tex2DFloatFloatLevel,
+ Tex2DFloatFloatGrad,
+ Tex2DI32I32,
+ Tex2DI32Float,
+ Tex2DI32FloatLevel,
+ Tex2DI32FloatGrad,
+ Tex2DArrayFloatI32,
+ Tex2DArrayFloatFloat,
+ Tex2DArrayFloatFloatLevel,
+ Tex2DArrayFloatFloatGrad,
+ Tex2DArrayI32I32,
+ Tex2DArrayI32Float,
+ Tex2DArrayI32FloatLevel,
+ Tex2DArrayI32FloatGrad,
+ Tex3DFloatI32,
+ Tex3DFloatFloat,
+ Tex3DFloatFloatLevel,
+ Tex3DFloatFloatGrad,
+ Tex3DI32I32,
+ Tex3DI32Float,
+ Tex3DI32FloatLevel,
+ Tex3DI32FloatGrad,
+
+ // Surface intrinsics
+ Suld1DI8Trap,
+ Suld1DI16Trap,
+ Suld1DI32Trap,
+ Suld1DV2I8Trap,
+ Suld1DV2I16Trap,
+ Suld1DV2I32Trap,
+ Suld1DV4I8Trap,
+ Suld1DV4I16Trap,
+ Suld1DV4I32Trap,
+
+ Suld1DArrayI8Trap,
+ Suld1DArrayI16Trap,
+ Suld1DArrayI32Trap,
+ Suld1DArrayV2I8Trap,
+ Suld1DArrayV2I16Trap,
+ Suld1DArrayV2I32Trap,
+ Suld1DArrayV4I8Trap,
+ Suld1DArrayV4I16Trap,
+ Suld1DArrayV4I32Trap,
+
+ Suld2DI8Trap,
+ Suld2DI16Trap,
+ Suld2DI32Trap,
+ Suld2DV2I8Trap,
+ Suld2DV2I16Trap,
+ Suld2DV2I32Trap,
+ Suld2DV4I8Trap,
+ Suld2DV4I16Trap,
+ Suld2DV4I32Trap,
+
+ Suld2DArrayI8Trap,
+ Suld2DArrayI16Trap,
+ Suld2DArrayI32Trap,
+ Suld2DArrayV2I8Trap,
+ Suld2DArrayV2I16Trap,
+ Suld2DArrayV2I32Trap,
+ Suld2DArrayV4I8Trap,
+ Suld2DArrayV4I16Trap,
+ Suld2DArrayV4I32Trap,
+
+ Suld3DI8Trap,
+ Suld3DI16Trap,
+ Suld3DI32Trap,
+ Suld3DV2I8Trap,
+ Suld3DV2I16Trap,
+ Suld3DV2I32Trap,
+ Suld3DV4I8Trap,
+ Suld3DV4I16Trap,
+ Suld3DV4I32Trap
};
}
@@ -80,68 +173,70 @@ enum NodeType {
class NVPTXTargetLowering : public TargetLowering {
public:
explicit NVPTXTargetLowering(NVPTXTargetMachine &TM);
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(const GlobalValue *GV, int64_t Offset,
SelectionDAG &DAG) const;
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
bool isTypeSupportedInIntrinsic(MVT VT) const;
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
- unsigned Intrinsic) const;
+ unsigned Intrinsic) const override;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
- virtual unsigned getFunctionAlignment(const Function *F) const;
+ unsigned getFunctionAlignment(const Function *F) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const {
+ EVT getSetCCResultType(LLVMContext &, EVT VT) const override {
if (VT.isVector())
return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
return MVT::i1;
}
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
std::pair<unsigned, const TargetRegisterClass *>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ MVT VT) const override;
- virtual SDValue LowerFormalArguments(
+ SDValue LowerFormalArguments(
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
- LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
std::string getPrototype(Type *, const ArgListTy &,
const SmallVectorImpl<ISD::OutputArg> &,
unsigned retAlignment,
const ImmutableCallSite *CS) const;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG) const override;
- virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
- virtual bool shouldSplitVectorType(EVT VT) const override;
+ bool shouldSplitVectorType(EVT VT) const override;
private:
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
@@ -160,8 +255,8 @@ private:
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
Type *Ty, unsigned Idx) const;
diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
new file mode 100644
index 0000000..397f4bc
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -0,0 +1,178 @@
+//===-- NVPTXImageOptimizer.cpp - Image optimization pass -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements IR-level optimizations of image access code,
+// including:
+//
+// 1. Eliminate istypep intrinsics when image access qualifier is known
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+
+using namespace llvm;
+
+namespace {
+class NVPTXImageOptimizer : public FunctionPass {
+private:
+ static char ID;
+ SmallVector<Instruction*, 4> InstrToDelete;
+
+public:
+ NVPTXImageOptimizer();
+
+ bool runOnFunction(Function &F) override;
+
+private:
+ bool replaceIsTypePSampler(Instruction &I);
+ bool replaceIsTypePSurface(Instruction &I);
+ bool replaceIsTypePTexture(Instruction &I);
+ Value *cleanupValue(Value *V);
+ void replaceWith(Instruction *From, ConstantInt *To);
+};
+}
+
+char NVPTXImageOptimizer::ID = 0;
+
+NVPTXImageOptimizer::NVPTXImageOptimizer()
+ : FunctionPass(ID) {}
+
+bool NVPTXImageOptimizer::runOnFunction(Function &F) {
+ bool Changed = false;
+ InstrToDelete.clear();
+
+ // Look for call instructions in the function
+ for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
+ ++BI) {
+ for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
+ I != E; ++I) {
+ Instruction &Instr = *I;
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ Function *CalledF = CI->getCalledFunction();
+ if (CalledF && CalledF->isIntrinsic()) {
+ // This is an intrinsic function call, check if its an istypep
+ switch (CalledF->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::nvvm_istypep_sampler:
+ Changed |= replaceIsTypePSampler(Instr);
+ break;
+ case Intrinsic::nvvm_istypep_surface:
+ Changed |= replaceIsTypePSurface(Instr);
+ break;
+ case Intrinsic::nvvm_istypep_texture:
+ Changed |= replaceIsTypePTexture(Instr);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ // Delete any istypep instances we replaced in the IR
+ for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
+ InstrToDelete[i]->eraseFromParent();
+
+ return Changed;
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePSampler(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isSampler(*TexHandle)) {
+ // This is an OpenCL sampler, so it must be a samplerref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageWriteOnly(*TexHandle) ||
+ isImageReadWrite(*TexHandle) ||
+ isImageReadOnly(*TexHandle)) {
+ // This is an OpenCL image, so it cannot be a samplerref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePSurface(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isImageReadWrite(*TexHandle) ||
+ isImageWriteOnly(*TexHandle)) {
+ // This is an OpenCL read-only/read-write image, so it must be a surfref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageReadOnly(*TexHandle) ||
+ isSampler(*TexHandle)) {
+ // This is an OpenCL read-only/ imageor sampler, so it cannot be
+ // a surfref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) {
+ Value *TexHandle = cleanupValue(I.getOperand(0));
+ if (isImageReadOnly(*TexHandle)) {
+ // This is an OpenCL read-only image, so it must be a texref
+ replaceWith(&I, ConstantInt::getTrue(I.getContext()));
+ return true;
+ } else if (isImageWriteOnly(*TexHandle) ||
+ isImageReadWrite(*TexHandle) ||
+ isSampler(*TexHandle)) {
+ // This is an OpenCL read-write/write-only image or a sampler, so it
+ // cannot be a texref
+ replaceWith(&I, ConstantInt::getFalse(I.getContext()));
+ return true;
+ } else {
+ // The image type is unknown, so we cannot eliminate the intrinsic
+ return false;
+ }
+}
+
+void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) {
+ // We implement "poor man's DCE" here to make sure any code that is no longer
+ // live is actually unreachable and can be trivially eliminated by the
+ // unreachable block elimiation pass.
+ for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end();
+ UI != UE; ++UI) {
+ if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) {
+ if (BI->isUnconditional()) continue;
+ BasicBlock *Dest;
+ if (To->isZero())
+ // Get false block
+ Dest = BI->getSuccessor(1);
+ else
+ // Get true block
+ Dest = BI->getSuccessor(0);
+ BranchInst::Create(Dest, BI);
+ InstrToDelete.push_back(BI);
+ }
+ }
+ From->replaceAllUsesWith(To);
+ InstrToDelete.push_back(From);
+}
+
+Value *NVPTXImageOptimizer::cleanupValue(Value *V) {
+ if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) {
+ return cleanupValue(EVI->getAggregateOperand());
+ }
+ return V;
+}
+
+FunctionPass *llvm::createNVPTXImageOptimizerPass() {
+ return new NVPTXImageOptimizer();
+}
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 86ddd38..cdc8088 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -14,8 +14,6 @@
#include "NVPTX.h"
#include "NVPTXInstrInfo.h"
#include "NVPTXTargetMachine.h"
-#define GET_INSTRINFO_CTOR_DTOR
-#include "NVPTXGenInstrInfo.inc"
#include "llvm/IR/Function.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -24,6 +22,9 @@
using namespace llvm;
+#define GET_INSTRINFO_CTOR_DTOR
+#include "NVPTXGenInstrInfo.inc"
+
// Pin the vtable to this file.
void NVPTXInstrInfo::anchor() {}
@@ -256,7 +257,7 @@ unsigned NVPTXInstrInfo::InsertBranch(
"NVPTX branch conditions have two components!");
// One-way branch.
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
else // Conditional branch
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h
index 600fc5c..88a9e45 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -30,7 +30,7 @@ class NVPTXInstrInfo : public NVPTXGenInstrInfo {
public:
explicit NVPTXInstrInfo(NVPTXTargetMachine &TM);
- virtual const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
+ const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; }
/* The following virtual functions are used in register allocation.
* They are not implemented because the existing interface and the logic
@@ -50,9 +50,9 @@ public:
* const TargetRegisterClass *RC) const;
*/
- virtual void copyPhysReg(
+ void copyPhysReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
+ unsigned DestReg, unsigned SrcReg, bool KillSrc) const override;
virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
unsigned &DestReg) const;
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
@@ -61,13 +61,13 @@ public:
virtual bool CanTailMerge(const MachineInstr *MI) const;
// Branch analysis.
- virtual bool AnalyzeBranch(
+ bool AnalyzeBranch(
MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
return MI.getOperand(2).getImm();
}
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 14049b1..5e228fc 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1666,6 +1666,9 @@ def : Pat<(i32 (int_nvvm_ptr_gen_to_local (int_nvvm_ptr_local_to_gen
(MoveParam texternalsym:$src)))),
(nvvm_move_ptr32 texternalsym:$src)>;
+def texsurf_handles
+ : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src),
+ "mov.u64 \t$result, $src;", []>;
//-----------------------------------
// Compiler Error Warn
@@ -1686,6 +1689,1826 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
[(int_nvvm_compiler_error Int64Regs:$a)]>;
+//-----------------------------------
+// Texture Intrinsics
+//-----------------------------------
+
+// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be
+// also defined in NVPTXReplaceImageHandles.cpp
+
+
+// Texture fetch instructions using handles
+def TEX_1D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
+ "tex.1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
+ "tex.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$lod),
+ "tex.level.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], $lod;",
+ []>;
+def TEX_1D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_1D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x),
+ "tex.1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x),
+ "tex.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];",
+ []>;
+def TEX_1D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], $lod;",
+ []>;
+def TEX_1D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_1D_ARRAY_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_1D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+def TEX_1D_ARRAY_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "tex.a1d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x),
+ "tex.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}];",
+ []>;
+def TEX_1D_ARRAY_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$lod),
+ "tex.level.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], $lod;",
+ []>;
+def TEX_1D_ARRAY_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$gradx, Float32Regs:$grady),
+ "tex.grad.a1d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};",
+ []>;
+
+def TEX_2D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_2D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_2D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "tex.2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y),
+ "tex.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}];",
+ []>;
+def TEX_2D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$lod),
+ "tex.level.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], $lod;",
+ []>;
+def TEX_2D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_2D_ARRAY_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_2D_ARRAY_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+def TEX_2D_ARRAY_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$y),
+ "tex.a2d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y),
+ "tex.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def TEX_2D_ARRAY_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y, Float32Regs:$lod),
+ "tex.level.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], $lod;",
+ []>;
+def TEX_2D_ARRAY_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$l, Float32Regs:$x,
+ Float32Regs:$y,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$grady0, Float32Regs:$grady1),
+ "tex.grad.a2d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$l, $x, $y, $y\\}], \\{$gradx0, $gradx1\\}, "
+ "\\{$grady0, $grady1\\};",
+ []>;
+
+def TEX_3D_F32_I32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.f32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_F32_F32
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_F32_F32_LEVEL
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_3D_F32_F32_GRAD
+ : NVPTXInst<(outs Float32Regs:$r, Float32Regs:$g,
+ Float32Regs:$b, Float32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.f32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+def TEX_3D_I32_I32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$z),
+ "tex.3d.v4.s32.s32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_I32_F32
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z),
+ "tex.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def TEX_3D_I32_F32_LEVEL
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z, Float32Regs:$lod),
+ "tex.level.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], $lod;",
+ []>;
+def TEX_3D_I32_F32_GRAD
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$t, Int64Regs:$s, Float32Regs:$x, Float32Regs:$y,
+ Float32Regs:$z,
+ Float32Regs:$gradx0, Float32Regs:$gradx1,
+ Float32Regs:$gradx2, Float32Regs:$grady0,
+ Float32Regs:$grady1, Float32Regs:$grady2),
+ "tex.grad.3d.v4.s32.f32\t\\{$r, $g, $b, $a\\}, "
+ "[$t, $s, \\{$x, $y, $z, $z\\}], "
+ "\\{$gradx0, $gradx1, $gradx2, $gradx2\\}, "
+ "\\{$grady0, $grady1, $grady2, $grady2\\};",
+ []>;
+
+
+// Surface load instructions
+def SULD_1D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b8.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b16.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.b32.trap \\{$r\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+def SULD_1D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x),
+ "suld.b.1d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];",
+ []>;
+
+def SULD_1D_ARRAY_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b8.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b16.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.b32.trap \\{$r\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+def SULD_1D_ARRAY_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x),
+ "suld.b.a1d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x\\}];",
+ []>;
+
+def SULD_2D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b8.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b16.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.b32.trap \\{$r\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b8.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b16.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+def SULD_2D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.2d.v4.b32.trap \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];",
+ []>;
+
+def SULD_2D_ARRAY_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b8.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b16.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.b32.trap \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b8.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b16.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v2.b32.trap \\{$r, $g\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+def SULD_2D_ARRAY_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y),
+ "suld.b.a2d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$l, $x, $y, $y\\}];",
+ []>;
+
+def SULD_3D_I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b8.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b16.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.b32.trap \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b8.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b16.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V2I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v2.b32.trap \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I8_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b8.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I16_TRAP
+ : NVPTXInst<(outs Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b16.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+def SULD_3D_V4I32_TRAP
+ : NVPTXInst<(outs Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z),
+ "suld.b.3d.v4.b32.trap \\{$r, $g, $b, $a\\}, "
+ "[$s, \\{$x, $y, $z, $z\\}];",
+ []>;
+
+
+//-----------------------------------
+// Texture Query Intrinsics
+//-----------------------------------
+def TXQ_CHANNEL_ORDER
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.channel_order.b32 \t$d, [$a];",
+ []>;
+def TXQ_CHANNEL_DATA_TYPE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def TXQ_WIDTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.width.b32 \t$d, [$a];",
+ []>;
+def TXQ_HEIGHT
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.height.b32 \t$d, [$a];",
+ []>;
+def TXQ_DEPTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.depth.b32 \t$d, [$a];",
+ []>;
+def TXQ_ARRAY_SIZE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.array_size.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_SAMPLES
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.num_samples.b32 \t$d, [$a];",
+ []>;
+def TXQ_NUM_MIPMAP_LEVELS
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "txq.num_mipmap_levels.b32 \t$d, [$a];",
+ []>;
+
+def : Pat<(int_nvvm_txq_channel_order Int64Regs:$a),
+ (TXQ_CHANNEL_ORDER Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_channel_data_type Int64Regs:$a),
+ (TXQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_width Int64Regs:$a),
+ (TXQ_WIDTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_height Int64Regs:$a),
+ (TXQ_HEIGHT Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_depth Int64Regs:$a),
+ (TXQ_DEPTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_array_size Int64Regs:$a),
+ (TXQ_ARRAY_SIZE Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_num_samples Int64Regs:$a),
+ (TXQ_NUM_SAMPLES Int64Regs:$a)>;
+def : Pat<(int_nvvm_txq_num_mipmap_levels Int64Regs:$a),
+ (TXQ_NUM_MIPMAP_LEVELS Int64Regs:$a)>;
+
+
+//-----------------------------------
+// Surface Query Intrinsics
+//-----------------------------------
+def SUQ_CHANNEL_ORDER
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.channel_order.b32 \t$d, [$a];",
+ []>;
+def SUQ_CHANNEL_DATA_TYPE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.channel_data_type.b32 \t$d, [$a];",
+ []>;
+def SUQ_WIDTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.width.b32 \t$d, [$a];",
+ []>;
+def SUQ_HEIGHT
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.height.b32 \t$d, [$a];",
+ []>;
+def SUQ_DEPTH
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.depth.b32 \t$d, [$a];",
+ []>;
+def SUQ_ARRAY_SIZE
+ : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "suq.array_size.b32 \t$d, [$a];",
+ []>;
+
+def : Pat<(int_nvvm_suq_channel_order Int64Regs:$a),
+ (SUQ_CHANNEL_ORDER Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_channel_data_type Int64Regs:$a),
+ (SUQ_CHANNEL_DATA_TYPE Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_width Int64Regs:$a),
+ (SUQ_WIDTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_height Int64Regs:$a),
+ (SUQ_HEIGHT Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_depth Int64Regs:$a),
+ (SUQ_DEPTH Int64Regs:$a)>;
+def : Pat<(int_nvvm_suq_array_size Int64Regs:$a),
+ (SUQ_ARRAY_SIZE Int64Regs:$a)>;
+
+
+//===- Handle Query -------------------------------------------------------===//
+
+// TODO: These intrinsics are not yet finalized, pending PTX ISA design work
+def ISTYPEP_SAMPLER
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.samplerref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_sampler Int64Regs:$a))]>;
+def ISTYPEP_SURFACE
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.surfref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_surface Int64Regs:$a))]>;
+def ISTYPEP_TEXTURE
+ : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "istypep.texref \t$d, $a;",
+ [(set Int1Regs:$d, (int_nvvm_istypep_texture Int64Regs:$a))]>;
+
+//===- Surface Stores -----------------------------------------------------===//
+
+// Unformatted
+
+def SUST_B_1D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.b.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.b.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.b.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.b.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_1D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.b.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.b.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_1D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.b.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.b.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.b.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.b.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_B_2D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_2D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.b.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.b.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_2D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_B_3D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.b.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.b.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_B_3D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.b.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.b.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_B_3D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.b.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_B_3D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.b.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+// Formatted
+
+def SUST_P_1D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.1d.b8.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.1d.b16.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ "sust.p.1d.b32.trap \t[$s, \\{$x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.p.1d.v2.b8.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ "sust.p.1d.v2.b16.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ "sust.p.1d.v2.b32.trap \t[$s, \\{$x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.p.1d.v4.b8.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g,
+ Int16Regs:$b, Int16Regs:$a),
+ "sust.p.1d.v4.b16.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g,
+ Int32Regs:$b, Int32Regs:$a),
+ "sust.p.1d.v4.b32.trap \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_1D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.a1d.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r),
+ "sust.p.a1d.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r),
+ "sust.p.a1d.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.a1d.v2.b8.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.a1d.v2.b16.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.p.a1d.v2.b32.trap \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a1d.v4.b8.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a1d.v4.b16.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_1D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.a1d.v4.b32.trap \t[$s, \\{$idx, $x\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_2D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.p.2d.b8.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ "sust.p.2d.b16.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ "sust.p.2d.b32.trap \t[$s, \\{$x, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.2d.v2.b8.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g),
+ "sust.p.2d.v2.b16.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ "sust.p.2d.v2.b32.trap \t[$s, \\{$x, $y\\}], \\{$r, $g\\};",
+ []>;
+def SUST_P_2D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.2d.v4.b8.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r,
+ Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.2d.v4.b16.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.2d.v4.b32.trap \t[$s, \\{$x, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_2D_ARRAY_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.p.a2d.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r),
+ "sust.p.a2d.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r),
+ "sust.p.a2d.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.a2d.v2.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.a2d.v2.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.p.a2d.v2.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a2d.v4.b8.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.a2d.v4.b16.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_2D_ARRAY_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.a2d.v4.b32.trap \t[$s, \\{$idx, $x, $y, $y\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+def SUST_P_3D_B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.p.3d.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ "sust.p.3d.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ "sust.p.3d.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};",
+ []>;
+def SUST_P_3D_V2B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.3d.v2.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V2B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ "sust.p.3d.v2.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V2B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ "sust.p.3d.v2.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g\\};",
+ []>;
+def SUST_P_3D_V4B8_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.3d.v4.b8.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_3D_V4B16_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ "sust.p.3d.v4.b16.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+def SUST_P_3D_V4B32_TRAP
+ : NVPTXInst<(outs),
+ (ins Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ "sust.p.3d.v4.b32.trap \t[$s, \\{$x, $y, $z, $z\\}], "
+ "\\{$r, $g, $b, $a\\};",
+ []>;
+
+
+// Surface store instruction patterns
+// I'm not sure why we can't just include these in the instruction definitions,
+// but TableGen complains of type errors :(
+
+def : Pat<(int_nvvm_sust_b_1d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_1d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_B_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_B_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_2d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_B_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_B_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_B_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_b_3d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_B_3D_B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_B_3D_B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_B_3D_V2B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_B_3D_V2B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_B_3D_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_b_3d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_B_3D_V4B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+
+def : Pat<(int_nvvm_sust_p_1d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r),
+ (SUST_P_1D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_1D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_1D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_1d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_ARRAY_B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r),
+ (SUST_P_1D_ARRAY_B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r),
+ (SUST_P_1D_ARRAY_B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_1D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_1D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_ARRAY_V4B8_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_1D_ARRAY_V4B16_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_1D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l, Int32Regs:$x,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_2d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_P_2D_B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_V2B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_V2B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_2D_V2B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_V4B8_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_V4B16_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_2D_V4B32_TRAP Int64Regs:$s, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_2d_array_i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_ARRAY_B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r),
+ (SUST_P_2D_ARRAY_B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r),
+ (SUST_P_2D_ARRAY_B32_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_ARRAY_V2B8_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_2D_ARRAY_V2B16_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r,
+ Int32Regs:$g),
+ (SUST_P_2D_ARRAY_V2B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_ARRAY_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_2D_ARRAY_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap
+ Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_2D_ARRAY_V4B32_TRAP Int64Regs:$s, Int32Regs:$l,
+ Int32Regs:$x, Int32Regs:$y,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
+
+def : Pat<(int_nvvm_sust_p_3d_i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_P_3D_B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r),
+ (SUST_P_3D_B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r),
+ (SUST_P_3D_B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_3D_V2B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g),
+ (SUST_P_3D_V2B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v2i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g),
+ (SUST_P_3D_V2B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i8_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_3D_V4B8_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i16_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a),
+ (SUST_P_3D_V4B16_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>;
+
+def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
+ Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a),
+ (SUST_P_3D_V4B32_TRAP Int64Regs:$s,
+ Int32Regs:$x, Int32Regs:$y, Int32Regs:$z,
+ Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>;
+
+
//===-- Old PTX Back-end Intrinsics ---------------------------------------===//
diff --git a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
index c9aa87d..5ec1fc9 100644
--- a/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
+++ b/lib/Target/NVPTX/NVPTXLowerAggrCopies.h
@@ -27,17 +27,17 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
NVPTXLowerAggrCopies() : FunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DataLayoutPass>();
AU.addPreserved("stack-protector");
AU.addPreserved<MachineFunctionAnalysis>();
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
static const unsigned MaxAggrCopySize = 128;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Lower aggregate copies/intrinsics into loops";
}
};
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.cpp b/lib/Target/NVPTX/NVPTXMCExpr.cpp
index ca24764..137248b 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -7,13 +7,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-mcexpr"
#include "NVPTXMCExpr.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
using namespace llvm;
+#define DEBUG_TYPE "nvptx-mcexpr"
+
const NVPTXFloatMCExpr*
NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
return new (Ctx) NVPTXFloatMCExpr(Kind, Flt);
diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h
index 0efb231..0ee018c 100644
--- a/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -61,18 +61,18 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const {
+ const MCAsmLayout *Layout) const override {
return false;
}
- void AddValueSymbols(MCAssembler *) const {};
- const MCSection *FindAssociatedSection() const {
- return NULL;
+ void AddValueSymbols(MCAssembler *) const override {};
+ const MCSection *FindAssociatedSection() const override {
+ return nullptr;
}
// There are no TLS NVPTXMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
new file mode 100644
index 0000000..67fb390
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXMachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===-- NVPTXMachineFunctionInfo.h - NVPTX-specific Function Info --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is attached to a MachineFunction instance and tracks target-
+// dependent information
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+class NVPTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+ /// Stores a mapping from index to symbol name for removing image handles
+ /// on Fermi.
+ SmallVector<std::string, 8> ImageHandleList;
+
+public:
+ NVPTXMachineFunctionInfo(MachineFunction &MF) {}
+
+ /// Returns the index for the symbol \p Symbol. If the symbol was previously,
+ /// added, the same index is returned. Otherwise, the symbol is added and the
+ /// new index is returned.
+ unsigned getImageHandleSymbolIndex(const char *Symbol) {
+ // Is the symbol already present?
+ for (unsigned i = 0, e = ImageHandleList.size(); i != e; ++i)
+ if (ImageHandleList[i] == std::string(Symbol))
+ return i;
+ // Nope, insert it
+ ImageHandleList.push_back(Symbol);
+ return ImageHandleList.size()-1;
+ }
+
+ /// Returns the symbol name at the given index.
+ const char *getImageHandleSymbol(unsigned Idx) const {
+ assert(ImageHandleList.size() > Idx && "Bad index");
+ return ImageHandleList[Idx].c_str();
+ }
+};
+}
diff --git a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index d5b042a..348ab0c 100644
--- a/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -25,13 +25,15 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-prolog-epilog"
+
namespace {
class NVPTXPrologEpilogPass : public MachineFunctionPass {
public:
static char ID;
NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
void calculateFrameObjectOffsets(MachineFunction &Fn);
@@ -58,7 +60,7 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (!MI->getOperand(i).isFI())
continue;
- TRI.eliminateFrameIndex(MI, 0, i, NULL);
+ TRI.eliminateFrameIndex(MI, 0, i, nullptr);
Modified = true;
}
}
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 4d3a1d9..62f288b 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "nvptx-reg-info"
-
#include "NVPTXRegisterInfo.h"
#include "NVPTX.h"
#include "NVPTXSubtarget.h"
@@ -25,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-reg-info"
+
namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass) {
@@ -78,19 +78,12 @@ NVPTXRegisterInfo::NVPTXRegisterInfo(const NVPTXSubtarget &st)
#include "NVPTXGenRegisterInfo.inc"
/// NVPTX Callee Saved Registers
-const uint16_t *
+const MCPhysReg *
NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const uint16_t CalleeSavedRegs[] = { 0 };
+ static const MCPhysReg CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs;
}
-// NVPTX Callee Saved Reg Classes
-const TargetRegisterClass *const *
-NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
@@ -113,12 +106,6 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
}
-int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
- return 0;
-}
-
unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return NVPTX::VRFrame;
}
-
-unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.h b/lib/Target/NVPTX/NVPTXRegisterInfo.h
index 0a20f29..a7594be 100644
--- a/lib/Target/NVPTX/NVPTXRegisterInfo.h
+++ b/lib/Target/NVPTX/NVPTXRegisterInfo.h
@@ -16,11 +16,10 @@
#include "ManagedStringPool.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <sstream>
#define GET_REGINFO_HEADER
#include "NVPTXGenRegisterInfo.inc"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include <sstream>
namespace llvm {
@@ -42,22 +41,16 @@ public:
//------------------------------------------------------
// NVPTX callee saved registers
- virtual const uint16_t *
- getCalleeSavedRegs(const MachineFunction *MF = 0) const;
-
- // NVPTX callee saved register classes
- virtual const TargetRegisterClass *const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override;
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
- virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
- unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
+ unsigned FIOperandNum,
+ RegScavenger *RS = nullptr) const override;
- virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- virtual unsigned getFrameRegister(const MachineFunction &MF) const;
- virtual unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
ManagedStringPool *getStrPool() const {
return const_cast<ManagedStringPool *>(&ManagedStrPool);
diff --git a/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
new file mode 100644
index 0000000..afd53a6
--- /dev/null
+++ b/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -0,0 +1,357 @@
+//===-- NVPTXReplaceImageHandles.cpp - Replace image handles for Fermi ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// On Fermi, image handles are not supported. To work around this, we traverse
+// the machine code and replace image handles with concrete symbols. For this
+// to work reliably, inlining of all function call must be performed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+
+using namespace llvm;
+
+namespace {
+class NVPTXReplaceImageHandles : public MachineFunctionPass {
+private:
+ static char ID;
+ DenseSet<MachineInstr *> InstrsToRemove;
+
+public:
+ NVPTXReplaceImageHandles();
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+private:
+ bool processInstr(MachineInstr &MI);
+ void replaceImageHandle(MachineOperand &Op, MachineFunction &MF);
+};
+}
+
+char NVPTXReplaceImageHandles::ID = 0;
+
+NVPTXReplaceImageHandles::NVPTXReplaceImageHandles()
+ : MachineFunctionPass(ID) {}
+
+bool NVPTXReplaceImageHandles::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ InstrsToRemove.clear();
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); BI != BE;
+ ++BI) {
+ for (MachineBasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ Changed |= processInstr(MI);
+ }
+ }
+
+ // Now clean up any handle-access instructions
+ // This is needed in debug mode when code cleanup passes are not executed,
+ // but we need the handle access to be eliminated because they are not
+ // valid instructions when image handles are disabled.
+ for (DenseSet<MachineInstr *>::iterator I = InstrsToRemove.begin(),
+ E = InstrsToRemove.end(); I != E; ++I) {
+ (*I)->eraseFromParent();
+ }
+
+ return Changed;
+}
+
+bool NVPTXReplaceImageHandles::processInstr(MachineInstr &MI) {
+ MachineFunction &MF = *MI.getParent()->getParent();
+ // Check if we have a surface/texture instruction
+ switch (MI.getOpcode()) {
+ default: return false;
+ case NVPTX::TEX_1D_F32_I32:
+ case NVPTX::TEX_1D_F32_F32:
+ case NVPTX::TEX_1D_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_F32_F32_GRAD:
+ case NVPTX::TEX_1D_I32_I32:
+ case NVPTX::TEX_1D_I32_F32:
+ case NVPTX::TEX_1D_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_I32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_F32_I32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_1D_ARRAY_I32_I32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_1D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_2D_F32_I32:
+ case NVPTX::TEX_2D_F32_F32:
+ case NVPTX::TEX_2D_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_F32_F32_GRAD:
+ case NVPTX::TEX_2D_I32_I32:
+ case NVPTX::TEX_2D_I32_F32:
+ case NVPTX::TEX_2D_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_I32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_F32_I32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_F32_F32_GRAD:
+ case NVPTX::TEX_2D_ARRAY_I32_I32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL:
+ case NVPTX::TEX_2D_ARRAY_I32_F32_GRAD:
+ case NVPTX::TEX_3D_F32_I32:
+ case NVPTX::TEX_3D_F32_F32:
+ case NVPTX::TEX_3D_F32_F32_LEVEL:
+ case NVPTX::TEX_3D_F32_F32_GRAD:
+ case NVPTX::TEX_3D_I32_I32:
+ case NVPTX::TEX_3D_I32_F32:
+ case NVPTX::TEX_3D_I32_F32_LEVEL:
+ case NVPTX::TEX_3D_I32_F32_GRAD: {
+ // This is a texture fetch, so operand 4 is a texref and operand 5 is
+ // a samplerref
+ MachineOperand &TexHandle = MI.getOperand(4);
+ MachineOperand &SampHandle = MI.getOperand(5);
+
+ replaceImageHandle(TexHandle, MF);
+ replaceImageHandle(SampHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_I8_TRAP:
+ case NVPTX::SULD_1D_I16_TRAP:
+ case NVPTX::SULD_1D_I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_2D_I8_TRAP:
+ case NVPTX::SULD_2D_I16_TRAP:
+ case NVPTX::SULD_2D_I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_I32_TRAP:
+ case NVPTX::SULD_3D_I8_TRAP:
+ case NVPTX::SULD_3D_I16_TRAP:
+ case NVPTX::SULD_3D_I32_TRAP: {
+ // This is a V1 surface load, so operand 1 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(1);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_V2I8_TRAP:
+ case NVPTX::SULD_1D_V2I16_TRAP:
+ case NVPTX::SULD_1D_V2I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_2D_V2I8_TRAP:
+ case NVPTX::SULD_2D_V2I16_TRAP:
+ case NVPTX::SULD_2D_V2I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V2I32_TRAP:
+ case NVPTX::SULD_3D_V2I8_TRAP:
+ case NVPTX::SULD_3D_V2I16_TRAP:
+ case NVPTX::SULD_3D_V2I32_TRAP: {
+ // This is a V2 surface load, so operand 2 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(2);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SULD_1D_V4I8_TRAP:
+ case NVPTX::SULD_1D_V4I16_TRAP:
+ case NVPTX::SULD_1D_V4I32_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_1D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_2D_V4I8_TRAP:
+ case NVPTX::SULD_2D_V4I16_TRAP:
+ case NVPTX::SULD_2D_V4I32_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I8_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I16_TRAP:
+ case NVPTX::SULD_2D_ARRAY_V4I32_TRAP:
+ case NVPTX::SULD_3D_V4I8_TRAP:
+ case NVPTX::SULD_3D_V4I16_TRAP:
+ case NVPTX::SULD_3D_V4I32_TRAP: {
+ // This is a V4 surface load, so operand 4 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(4);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::SUST_B_1D_B8_TRAP:
+ case NVPTX::SUST_B_1D_B16_TRAP:
+ case NVPTX::SUST_B_1D_B32_TRAP:
+ case NVPTX::SUST_B_1D_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_V4B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_B8_TRAP:
+ case NVPTX::SUST_B_2D_B16_TRAP:
+ case NVPTX::SUST_B_2D_B32_TRAP:
+ case NVPTX::SUST_B_2D_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_V4B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_B_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_B_3D_B8_TRAP:
+ case NVPTX::SUST_B_3D_B16_TRAP:
+ case NVPTX::SUST_B_3D_B32_TRAP:
+ case NVPTX::SUST_B_3D_V2B8_TRAP:
+ case NVPTX::SUST_B_3D_V2B16_TRAP:
+ case NVPTX::SUST_B_3D_V2B32_TRAP:
+ case NVPTX::SUST_B_3D_V4B8_TRAP:
+ case NVPTX::SUST_B_3D_V4B16_TRAP:
+ case NVPTX::SUST_B_3D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_B8_TRAP:
+ case NVPTX::SUST_P_1D_B16_TRAP:
+ case NVPTX::SUST_P_1D_B32_TRAP:
+ case NVPTX::SUST_P_1D_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_V4B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_1D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_B8_TRAP:
+ case NVPTX::SUST_P_2D_B16_TRAP:
+ case NVPTX::SUST_P_2D_B32_TRAP:
+ case NVPTX::SUST_P_2D_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_V4B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V2B32_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B8_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B16_TRAP:
+ case NVPTX::SUST_P_2D_ARRAY_V4B32_TRAP:
+ case NVPTX::SUST_P_3D_B8_TRAP:
+ case NVPTX::SUST_P_3D_B16_TRAP:
+ case NVPTX::SUST_P_3D_B32_TRAP:
+ case NVPTX::SUST_P_3D_V2B8_TRAP:
+ case NVPTX::SUST_P_3D_V2B16_TRAP:
+ case NVPTX::SUST_P_3D_V2B32_TRAP:
+ case NVPTX::SUST_P_3D_V4B8_TRAP:
+ case NVPTX::SUST_P_3D_V4B16_TRAP:
+ case NVPTX::SUST_P_3D_V4B32_TRAP: {
+ // This is a surface store, so operand 0 is a surfref
+ MachineOperand &SurfHandle = MI.getOperand(0);
+
+ replaceImageHandle(SurfHandle, MF);
+
+ return true;
+ }
+ case NVPTX::TXQ_CHANNEL_ORDER:
+ case NVPTX::TXQ_CHANNEL_DATA_TYPE:
+ case NVPTX::TXQ_WIDTH:
+ case NVPTX::TXQ_HEIGHT:
+ case NVPTX::TXQ_DEPTH:
+ case NVPTX::TXQ_ARRAY_SIZE:
+ case NVPTX::TXQ_NUM_SAMPLES:
+ case NVPTX::TXQ_NUM_MIPMAP_LEVELS:
+ case NVPTX::SUQ_CHANNEL_ORDER:
+ case NVPTX::SUQ_CHANNEL_DATA_TYPE:
+ case NVPTX::SUQ_WIDTH:
+ case NVPTX::SUQ_HEIGHT:
+ case NVPTX::SUQ_DEPTH:
+ case NVPTX::SUQ_ARRAY_SIZE: {
+ // This is a query, so operand 1 is a surfref/texref
+ MachineOperand &Handle = MI.getOperand(1);
+
+ replaceImageHandle(Handle, MF);
+
+ return true;
+ }
+ }
+}
+
+void NVPTXReplaceImageHandles::
+replaceImageHandle(MachineOperand &Op, MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ NVPTXMachineFunctionInfo *MFI = MF.getInfo<NVPTXMachineFunctionInfo>();
+ // Which instruction defines the handle?
+ MachineInstr *MI = MRI.getVRegDef(Op.getReg());
+ assert(MI && "No def for image handle vreg?");
+ MachineInstr &TexHandleDef = *MI;
+
+ switch (TexHandleDef.getOpcode()) {
+ case NVPTX::LD_i64_avar: {
+ // The handle is a parameter value being loaded, replace with the
+ // parameter symbol
+ assert(TexHandleDef.getOperand(6).isSymbol() && "Load is not a symbol!");
+ StringRef Sym = TexHandleDef.getOperand(6).getSymbolName();
+ std::string ParamBaseName = MF.getName();
+ ParamBaseName += "_param_";
+ assert(Sym.startswith(ParamBaseName) && "Invalid symbol reference");
+ unsigned Param = atoi(Sym.data()+ParamBaseName.size());
+ std::string NewSym;
+ raw_string_ostream NewSymStr(NewSym);
+ NewSymStr << MF.getFunction()->getName() << "_param_" << Param;
+ Op.ChangeToImmediate(
+ MFI->getImageHandleSymbolIndex(NewSymStr.str().c_str()));
+ InstrsToRemove.insert(&TexHandleDef);
+ break;
+ }
+ case NVPTX::texsurf_handles: {
+ // The handle is a global variable, replace with the global variable name
+ assert(TexHandleDef.getOperand(1).isGlobal() && "Load is not a global!");
+ const GlobalValue *GV = TexHandleDef.getOperand(1).getGlobal();
+ assert(GV->hasName() && "Global sampler must be named!");
+ Op.ChangeToImmediate(MFI->getImageHandleSymbolIndex(GV->getName().data()));
+ InstrsToRemove.insert(&TexHandleDef);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown instruction operating on handle");
+ }
+}
+
+MachineFunctionPass *llvm::createNVPTXReplaceImageHandlesPass() {
+ return new NVPTXReplaceImageHandles();
+}
diff --git a/lib/Target/NVPTX/NVPTXSection.h b/lib/Target/NVPTX/NVPTXSection.h
index f8a692e..aa0436b 100644
--- a/lib/Target/NVPTX/NVPTXSection.h
+++ b/lib/Target/NVPTX/NVPTXSection.h
@@ -31,16 +31,16 @@ public:
/// Override this as NVPTX has its own way of printing switching
/// to a section.
- virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS,
- const MCExpr *Subsection) const {}
+ void PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS,
+ const MCExpr *Subsection) const override {}
/// Base address of PTX sections is zero.
- virtual bool isBaseAddressKnownZero() const { return true; }
- virtual bool UseCodeAlign() const { return false; }
- virtual bool isVirtualSection() const { return false; }
- virtual std::string getLabelBeginName() const { return ""; }
- virtual std::string getLabelEndName() const { return ""; }
+ bool isBaseAddressKnownZero() const override { return true; }
+ bool UseCodeAlign() const override { return false; }
+ bool isVirtualSection() const override { return false; }
+ std::string getLabelBeginName() const override { return ""; }
+ std::string getLabelEndName() const override { return ""; }
};
} // end namespace llvm
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 9771a17..8c7df52 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -12,14 +12,16 @@
//===----------------------------------------------------------------------===//
#include "NVPTXSubtarget.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-subtarget"
+
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "NVPTXGenSubtargetInfo.inc"
-using namespace llvm;
-
-
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h
index f99bebd..581e5ed 100644
--- a/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -16,12 +16,11 @@
#include "NVPTX.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "NVPTXGenSubtargetInfo.inc"
-#include <string>
-
namespace llvm {
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
@@ -65,6 +64,10 @@ public:
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
inline bool hasROT64() const { return SmVersion >= 20; }
+ bool hasImageHandles() const {
+ // Currently disabled
+ return false;
+ }
bool is64Bit() const { return Is64Bit; }
unsigned int getSmVersion() const { return SmVersion; }
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 7d7d793..26a4f84 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -16,7 +16,6 @@
#include "NVPTX.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXLowerAggrCopies.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
@@ -50,6 +49,7 @@ namespace llvm {
void initializeNVVMReflectPass(PassRegistry&);
void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
+void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -62,6 +62,8 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXFavorNonGenericAddrSpacesPass(
+ *PassRegistry::getPassRegistry());
}
static std::string computeDataLayout(const NVPTXSubtarget &ST) {
@@ -113,14 +115,14 @@ public:
return getTM<NVPTXTargetMachine>();
}
- virtual void addIRPasses();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
+ void addIRPasses() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
- virtual FunctionPass *createTargetRegisterAllocator(bool) override;
- virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
- virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
+ FunctionPass *createTargetRegisterAllocator(bool) override;
+ void addFastRegAlloc(FunctionPass *RegAllocPass) override;
+ void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
};
} // end anonymous namespace
@@ -140,15 +142,42 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&BranchFolderPassID);
disablePass(&TailDuplicateID);
+ addPass(createNVPTXImageOptimizerPass());
TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+ addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ addPass(createSeparateConstOffsetFromGEPPass());
+ // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
+ // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
+ // significantly better code than EarlyCSE for some of our benchmarks.
+ if (getOptLevel() == CodeGenOpt::Aggressive)
+ addPass(createGVNPass());
+ else
+ addPass(createEarlyCSEPass());
+ // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
+ // some dead code. We could remove dead code in an ad-hoc manner, but that
+ // requires manual work and might be error-prone.
+ //
+ // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
+ // and leave them unused.
+ //
+ // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
+ // old index and some of its intermediate results may become unused.
+ addPass(createDeadCodeEliminationPass());
}
bool NVPTXPassConfig::addInstSelector() {
+ const NVPTXSubtarget &ST =
+ getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
+
addPass(createLowerAggrCopies());
addPass(createAllocaHoisting());
addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
+
+ if (!ST.hasImageHandles())
+ addPass(createNVPTXReplaceImageHandlesPass());
+
return false;
}
@@ -159,7 +188,7 @@ bool NVPTXPassConfig::addPostRegAlloc() {
}
FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
- return 0; // No reg alloc
+ return nullptr; // No reg alloc
}
void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h
index 5fbcf73..2db7c18 100644
--- a/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -51,22 +51,22 @@ public:
const TargetOptions &Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
- virtual const TargetFrameLowering *getFrameLowering() const {
+ const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const DataLayout *getDataLayout() const { return &DL; }
- virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+ const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; }
- virtual const NVPTXRegisterInfo *getRegisterInfo() const {
+ const NVPTXRegisterInfo *getRegisterInfo() const override {
return &(InstrInfo.getRegisterInfo());
}
- virtual NVPTXTargetLowering *getTargetLowering() const {
+ NVPTXTargetLowering *getTargetLowering() const override {
return const_cast<NVPTXTargetLowering *>(&TLInfo);
}
- virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
+ const TargetSelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
@@ -79,17 +79,17 @@ public:
return const_cast<ManagedStringPool *>(&ManagedStrPool);
}
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
// Emission of machine code through JITCodeEmitter is not supported.
- virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
- bool = true) {
+ bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
+ bool = true) override {
return true;
}
// Emission of machine code through MCJIT is not supported.
- virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
- bool = true) {
+ bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
+ bool = true) override {
return true;
}
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 2a7281e..0b438c5 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -22,26 +22,26 @@ class NVPTXTargetObjectFile : public TargetLoweringObjectFile {
public:
NVPTXTargetObjectFile() {
- TextSection = 0;
- DataSection = 0;
- BSSSection = 0;
- ReadOnlySection = 0;
+ TextSection = nullptr;
+ DataSection = nullptr;
+ BSSSection = nullptr;
+ ReadOnlySection = nullptr;
- StaticCtorSection = 0;
- StaticDtorSection = 0;
- LSDASection = 0;
- EHFrameSection = 0;
- DwarfAbbrevSection = 0;
- DwarfInfoSection = 0;
- DwarfLineSection = 0;
- DwarfFrameSection = 0;
- DwarfPubTypesSection = 0;
- DwarfDebugInlineSection = 0;
- DwarfStrSection = 0;
- DwarfLocSection = 0;
- DwarfARangesSection = 0;
- DwarfRangesSection = 0;
- DwarfMacroInfoSection = 0;
+ StaticCtorSection = nullptr;
+ StaticDtorSection = nullptr;
+ LSDASection = nullptr;
+ EHFrameSection = nullptr;
+ DwarfAbbrevSection = nullptr;
+ DwarfInfoSection = nullptr;
+ DwarfLineSection = nullptr;
+ DwarfFrameSection = nullptr;
+ DwarfPubTypesSection = nullptr;
+ DwarfDebugInlineSection = nullptr;
+ DwarfStrSection = nullptr;
+ DwarfLocSection = nullptr;
+ DwarfARangesSection = nullptr;
+ DwarfRangesSection = nullptr;
+ DwarfMacroInfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
diff --git a/lib/Target/NVPTX/NVPTXUtilities.cpp b/lib/Target/NVPTX/NVPTXUtilities.cpp
index 60a5173..a9fd190b 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -22,9 +22,9 @@
#include <map>
#include <string>
#include <vector>
-//#include <iostream>
#include "llvm/Support/ManagedStatic.h"
#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/MutexGuard.h"
using namespace llvm;
@@ -33,8 +33,15 @@ typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
ManagedStatic<per_module_annot_t> annotationCache;
+static sys::Mutex Lock;
+
+void llvm::clearAnnotationCache(const llvm::Module *Mod) {
+ MutexGuard Guard(Lock);
+ annotationCache->erase(Mod);
+}
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
+ MutexGuard Guard(Lock);
assert(md && "Invalid mdnode for annotation");
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
// start index = 1, to skip the global variable key
@@ -60,6 +67,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
}
static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
+ MutexGuard Guard(Lock);
NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
if (!NMD)
return;
@@ -92,6 +100,7 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
unsigned &retval) {
+ MutexGuard Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
@@ -105,6 +114,7 @@ bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, std::string prop,
bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
std::vector<unsigned> &retval) {
+ MutexGuard Guard(Lock);
const Module *m = gv->getParent();
if ((*annotationCache).find(m) == (*annotationCache).end())
cacheAnnotationFromMD(m, gv);
@@ -195,8 +205,37 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
return false;
}
+bool llvm::isImageReadWrite(const llvm::Value &val) {
+ if (const Argument *arg = dyn_cast<Argument>(&val)) {
+ const Function *func = arg->getParent();
+ std::vector<unsigned> annot;
+ if (llvm::findAllNVVMAnnotation(func,
+ llvm::PropertyAnnotationNames[
+ llvm::PROPERTY_ISREADWRITE_IMAGE_PARAM],
+ annot)) {
+ if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+ return true;
+ }
+ }
+ return false;
+}
+
bool llvm::isImage(const llvm::Value &val) {
- return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val);
+ return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val) ||
+ llvm::isImageReadWrite(val);
+}
+
+bool llvm::isManaged(const llvm::Value &val) {
+ if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
+ unsigned annot;
+ if(llvm::findOneNVVMAnnotation(gv,
+ llvm::PropertyAnnotationNames[llvm::PROPERTY_MANAGED],
+ annot)) {
+ assert((annot == 1) && "Unexpected annotation on a managed symbol");
+ return true;
+ }
+ }
+ return false;
}
std::string llvm::getTextureName(const llvm::Value &val) {
@@ -354,12 +393,12 @@ llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
const Value *
llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
if (processed.find(V) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(V);
const Value *V2 = V->stripPointerCasts();
if (V2 != V && processed.find(V2) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(V2);
V = V2;
@@ -375,20 +414,20 @@ llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
continue;
} else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
if (V != V2 && processed.find(V) != processed.end())
- return NULL;
+ return nullptr;
processed.insert(PN);
- const Value *common = 0;
+ const Value *common = nullptr;
for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
const Value *pv = PN->getIncomingValue(i);
const Value *base = skipPointerTransfer(pv, processed);
if (base) {
- if (common == 0)
+ if (!common)
common = base;
else if (common != base)
return PN;
}
}
- if (common == 0)
+ if (!common)
return PN;
V = common;
}
@@ -406,7 +445,7 @@ BasicBlock *llvm::getParentBlock(Value *v) {
if (Instruction *I = dyn_cast<Instruction>(v))
return I->getParent();
- return 0;
+ return nullptr;
}
Function *llvm::getParentFunction(Value *v) {
@@ -419,13 +458,13 @@ Function *llvm::getParentFunction(Value *v) {
if (BasicBlock *B = dyn_cast<BasicBlock>(v))
return B->getParent();
- return 0;
+ return nullptr;
}
// Dump a block by name
void llvm::dumpBlock(Value *v, char *blockName) {
Function *F = getParentFunction(v);
- if (F == 0)
+ if (!F)
return;
for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
@@ -440,8 +479,8 @@ void llvm::dumpBlock(Value *v, char *blockName) {
// Find an instruction by name
Instruction *llvm::getInst(Value *base, char *instName) {
Function *F = getParentFunction(base);
- if (F == 0)
- return 0;
+ if (!F)
+ return nullptr;
for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
Instruction *I = &*it;
@@ -450,7 +489,7 @@ Instruction *llvm::getInst(Value *base, char *instName) {
}
}
- return 0;
+ return nullptr;
}
// Dump an instruction by nane
diff --git a/lib/Target/NVPTX/NVPTXUtilities.h b/lib/Target/NVPTX/NVPTXUtilities.h
index a208004..446bfa1 100644
--- a/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/lib/Target/NVPTX/NVPTXUtilities.h
@@ -28,6 +28,8 @@ namespace llvm {
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
+void clearAnnotationCache(const llvm::Module *);
+
bool findOneNVVMAnnotation(const llvm::GlobalValue *, std::string, unsigned &);
bool findAllNVVMAnnotation(const llvm::GlobalValue *, std::string,
std::vector<unsigned> &);
@@ -38,6 +40,8 @@ bool isSampler(const llvm::Value &);
bool isImage(const llvm::Value &);
bool isImageReadOnly(const llvm::Value &);
bool isImageWriteOnly(const llvm::Value &);
+bool isImageReadWrite(const llvm::Value &);
+bool isManaged(const llvm::Value &);
std::string getTextureName(const llvm::Value &);
std::string getSurfaceName(const llvm::Value &);
diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp
index 8b5444a..cb8bd72 100644
--- a/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/lib/Target/NVPTX/NVVMReflect.cpp
@@ -38,6 +38,8 @@
using namespace llvm;
+#define DEBUG_TYPE "nvptx-reflect"
+
namespace llvm { void initializeNVVMReflectPass(PassRegistry &); }
namespace {
@@ -49,13 +51,13 @@ private:
public:
static char ID;
- NVVMReflect() : ModulePass(ID), ReflectFunction(0) {
+ NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
VarMap.clear();
}
NVVMReflect(const StringMap<int> &Mapping)
- : ModulePass(ID), ReflectFunction(0) {
+ : ModulePass(ID), ReflectFunction(nullptr) {
initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end();
I != E; ++I) {
@@ -63,8 +65,10 @@ public:
}
}
- void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); }
- virtual bool runOnModule(Module &);
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ }
+ bool runOnModule(Module &) override;
void setVarMap();
};
@@ -126,7 +130,7 @@ bool NVVMReflect::runOnModule(Module &M) {
// If reflect function is not used, then there will be
// no entry in the module.
- if (ReflectFunction == 0)
+ if (!ReflectFunction)
return false;
// Validate _reflect function
diff --git a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
index 02ebf1d..801f27b 100644
--- a/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
+++ b/lib/Target/PowerPC/AsmParser/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Target/PowerPC/AsmParser/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -19,5 +19,5 @@
type = Library
name = PowerPCAsmParser
parent = PowerPC
-required_libraries = PowerPCDesc PowerPCInfo MC MCParser Support
+required_libraries = MC MCParser PowerPCDesc PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 8bb91cf..3ac037d 100644
--- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -230,7 +230,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchRegisterName(const AsmToken &Tok,
unsigned &RegNo, int64_t &IntVal);
- virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
PPCMCExpr::VariantKind &Variant);
@@ -248,7 +248,7 @@ class PPCAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm);
+ bool MatchingInlineAsm) override;
void ProcessInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Ops);
@@ -264,7 +264,8 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
- const MCInstrInfo &_MII)
+ const MCInstrInfo &_MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
@@ -275,17 +276,18 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
- virtual bool ParseInstruction(ParseInstructionInfo &Info,
- StringRef Name, SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+ bool ParseInstruction(ParseInstructionInfo &Info,
+ StringRef Name, SMLoc NameLoc,
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
- virtual bool ParseDirective(AsmToken DirectiveID);
+ bool ParseDirective(AsmToken DirectiveID) override;
- unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned Kind) override;
- virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
- MCSymbolRefExpr::VariantKind,
- MCContext &Ctx);
+ const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx) override;
};
/// PPCOperand - Instances of this class represent a parsed PowerPC machine
@@ -350,10 +352,10 @@ public:
}
/// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const { return StartLoc; }
+ SMLoc getStartLoc() const override { return StartLoc; }
/// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const { return EndLoc; }
+ SMLoc getEndLoc() const override { return EndLoc; }
/// isPPC64 - True if this operand is for an instruction in 64-bit mode.
bool isPPC64() const { return IsPPC64; }
@@ -378,7 +380,7 @@ public:
return TLSReg.Sym;
}
- unsigned getReg() const {
+ unsigned getReg() const override {
assert(isRegNumber() && "Invalid access!");
return (unsigned) Imm.Val;
}
@@ -403,8 +405,8 @@ public:
return 7 - countTrailingZeros<uint64_t>(Imm.Val);
}
- bool isToken() const { return Kind == Token; }
- bool isImm() const { return Kind == Immediate || Kind == Expression; }
+ bool isToken() const override { return Kind == Token; }
+ bool isImm() const override { return Kind == Immediate || Kind == Expression; }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }
bool isS5Imm() const { return Kind == Immediate && isInt<5>(getImm()); }
@@ -437,8 +439,8 @@ public:
&& isUInt<5>(getImm())); }
bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
isPowerOf2_32(getImm()); }
- bool isMem() const { return false; }
- bool isReg() const { return false; }
+ bool isMem() const override { return false; }
+ bool isReg() const override { return false; }
void addRegOperands(MCInst &Inst, unsigned N) const {
llvm_unreachable("addRegOperands");
@@ -544,7 +546,7 @@ public:
return StringRef(Tok.Data, Tok.Length);
}
- virtual void print(raw_ostream &OS) const;
+ void print(raw_ostream &OS) const override;
static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) {
PPCOperand *Op = new PPCOperand(Token);
@@ -1021,7 +1023,7 @@ ExtractModifierFromExpr(const MCExpr *E,
switch (E->getKind()) {
case MCExpr::Target:
case MCExpr::Constant:
- return 0;
+ return nullptr;
case MCExpr::SymbolRef: {
const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
@@ -1049,7 +1051,7 @@ ExtractModifierFromExpr(const MCExpr *E,
Variant = PPCMCExpr::VK_PPC_HIGHESTA;
break;
default:
- return 0;
+ return nullptr;
}
return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context);
@@ -1059,7 +1061,7 @@ ExtractModifierFromExpr(const MCExpr *E,
const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
if (!Sub)
- return 0;
+ return nullptr;
return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
}
@@ -1070,7 +1072,7 @@ ExtractModifierFromExpr(const MCExpr *E,
const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
if (!LHS && !RHS)
- return 0;
+ return nullptr;
if (!LHS) LHS = BE->getLHS();
if (!RHS) RHS = BE->getRHS();
@@ -1082,7 +1084,7 @@ ExtractModifierFromExpr(const MCExpr *E,
else if (LHSVariant == RHSVariant)
Variant = LHSVariant;
else
- return 0;
+ return nullptr;
return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
}
@@ -1593,6 +1595,6 @@ PPCAsmParser::applyModifierToExpr(const MCExpr *E,
case MCSymbolRefExpr::VK_PPC_HIGHESTA:
return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
default:
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
index 7f29040..c1011ff 100644
--- a/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
+++ b/lib/Target/PowerPC/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = PowerPCDisassembler
parent = PowerPC
-required_libraries = MC Support PowerPCDesc PowerPCInfo
+required_libraries = MC PowerPCDesc PowerPCInfo Support
add_to_library_groups = PowerPC
diff --git a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index c4a7544..a2305a9 100644
--- a/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -17,13 +17,15 @@
using namespace llvm;
+#define DEBUG_TYPE "ppc-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
class PPCDisassembler : public MCDisassembler {
public:
- PPCDisassembler(const MCSubtargetInfo &STI)
- : MCDisassembler(STI) {}
+ PPCDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
virtual ~PPCDisassembler() {}
// Override MCDisassembler.
@@ -37,8 +39,9 @@ public:
} // end anonymous namespace
static MCDisassembler *createPPCDisassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new PPCDisassembler(STI);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new PPCDisassembler(STI, Ctx);
}
extern "C" void LLVMInitializePowerPCDisassembler() {
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index dc54b52..7279b09 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCPredicates.h"
@@ -23,6 +22,8 @@
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
// FIXME: Once the integrated assembler supports full register names, tie this
// to the verbose-asm setting.
static cl::opt<bool>
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 4d1df78..211a628 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -31,8 +31,8 @@ public:
return IsDarwin;
}
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
@@ -41,7 +41,7 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O, const char *Modifier = 0);
+ raw_ostream &O, const char *Modifier = nullptr);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index f7309bb..12584be 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -77,9 +77,11 @@ public:
PPCAsmBackend(const Target &T, bool isLittle) : MCAsmBackend(), TheTarget(T),
IsLittleEndian(isLittle) {}
- unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+ unsigned getNumFixupKinds() const override {
+ return PPC::NumTargetFixupKinds;
+ }
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo InfosBE[PPC::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
@@ -110,7 +112,7 @@ public:
}
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const {
+ uint64_t Value, bool IsPCRel) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -126,7 +128,7 @@ public:
}
}
- bool mayNeedRelaxation(const MCInst &Inst) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
// FIXME.
return false;
}
@@ -134,18 +136,18 @@ public:
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+ const MCAsmLayout &Layout) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
// FIXME.
llvm_unreachable("relaxInstruction() unimplemented");
}
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
uint64_t NumNops = Count / 4;
for (uint64_t i = 0; i != NumNops; ++i)
OW->Write32(0x60000000);
@@ -180,7 +182,7 @@ namespace {
public:
DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T, false) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCMachObjectWriter(
OS,
@@ -197,7 +199,7 @@ namespace {
PPCAsmBackend(T, IsLittleEndian), OSABI(OSABI) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
bool is64 = getPointerSize() == 8;
return createPPCELFObjectWriter(OS, is64, isLittleEndian(), OSABI);
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index d19f6a0..cd3b4f4 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -41,11 +41,12 @@ PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
PPCELFObjectWriter::~PPCELFObjectWriter() {
}
-static MCSymbolRefExpr::VariantKind getAccessVariant(const MCFixup &Fixup) {
+static MCSymbolRefExpr::VariantKind getAccessVariant(const MCValue &Target,
+ const MCFixup &Fixup) {
const MCExpr *Expr = Fixup.getValue();
if (Expr->getKind() != MCExpr::Target)
- return Fixup.getAccessVariant();
+ return Target.getAccessVariant();
switch (cast<PPCMCExpr>(Expr)->getKind()) {
case PPCMCExpr::VK_PPC_None:
@@ -72,7 +73,7 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const
{
- MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Fixup);
+ MCSymbolRefExpr::VariantKind Modifier = getAccessVariant(Target, Fixup);
// determine the type of the relocation
unsigned Type;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 18609e1..b95a2ac 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -28,7 +28,7 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit, const Triple& T) {
ExceptionsType = ExceptionHandling::DwarfCFI;
if (!is64Bit)
- Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode.
+ Data64bitsDirective = nullptr; // We can't emit a 64-bit unit in PPC32 mode.
AssemblerDialect = 1; // New-Style mnemonics.
SupportsDebugInformation= true; // Debug information.
@@ -71,7 +71,7 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit, const Triple& T) {
ExceptionsType = ExceptionHandling::DwarfCFI;
ZeroDirective = "\t.space\t";
- Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+ Data64bitsDirective = is64Bit ? "\t.quad\t" : nullptr;
AssemblerDialect = 1; // New-Style mnemonics.
if (T.getOS() == llvm::Triple::FreeBSD ||
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index cee2cb7..754330b 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -21,13 +21,13 @@ namespace llvm {
class Triple;
class PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
- virtual void anchor();
+ void anchor() override;
public:
explicit PPCMCAsmInfoDarwin(bool is64Bit, const Triple&);
};
class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit PPCLinuxMCAsmInfo(bool is64Bit, const Triple&);
};
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index b259c5d..a4983ad 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/PPCMCTargetDesc.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,8 @@
#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
@@ -88,7 +89,7 @@ public:
const MCSubtargetInfo &STI) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCSubtargetInfo &STI) const override {
// For fast-isel, a float COPY_TO_REGCLASS can survive this long.
// It's just a nop to keep the register classes happy, so don't
// generate anything.
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
index c181e03..10d068d 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppcmcexpr"
#include "PPCMCExpr.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCAssembler.h"
@@ -15,6 +14,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ppcmcexpr"
+
const PPCMCExpr*
PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
bool isDarwin, MCContext &Ctx) {
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
index 5fc7918..3421b91 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -76,16 +76,16 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const {
+ const MCAsmLayout *Layout) const override;
+ void AddValueSymbols(MCAssembler *) const override;
+ const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
// There are no TLS PPCMCExprs at the moment.
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 105c511..7057797 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "PPCGenInstrInfo.inc"
@@ -35,8 +37,6 @@
#define GET_REGINFO_MC_DESC
#include "PPCGenRegisterInfo.inc"
-using namespace llvm;
-
// Pin the vtable to this file.
PPCTargetStreamer::~PPCTargetStreamer() {}
PPCTargetStreamer::PPCTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
@@ -80,7 +80,7 @@ static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
// Initial state of the frame pointer is R1.
unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
+ MCCFIInstruction::createDefCfa(nullptr, MRI.getDwarfRegNum(Reg, true), 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -115,14 +115,14 @@ class PPCTargetAsmStreamer : public PPCTargetStreamer {
public:
PPCTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS)
: PPCTargetStreamer(S), OS(OS) {}
- virtual void emitTCEntry(const MCSymbol &S) {
+ void emitTCEntry(const MCSymbol &S) override {
OS << "\t.tc ";
OS << S.getName();
OS << "[TC],";
OS << S.getName();
OS << '\n';
}
- virtual void emitMachine(StringRef CPU) {
+ void emitMachine(StringRef CPU) override {
OS << "\t.machine " << CPU << '\n';
}
};
@@ -130,11 +130,11 @@ public:
class PPCTargetELFStreamer : public PPCTargetStreamer {
public:
PPCTargetELFStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
- virtual void emitTCEntry(const MCSymbol &S) {
+ void emitTCEntry(const MCSymbol &S) override {
// Creates a R_PPC64_TOC relocation
Streamer.EmitSymbolValue(&S, 8);
}
- virtual void emitMachine(StringRef CPU) {
+ void emitMachine(StringRef CPU) override {
// FIXME: Is there anything to do in here or does this directive only
// limit the parser?
}
@@ -143,10 +143,10 @@ public:
class PPCTargetMachOStreamer : public PPCTargetStreamer {
public:
PPCTargetMachOStreamer(MCStreamer &S) : PPCTargetStreamer(S) {}
- virtual void emitTCEntry(const MCSymbol &S) {
+ void emitTCEntry(const MCSymbol &S) override {
llvm_unreachable("Unknown pseudo-op: .tc");
}
- virtual void emitMachine(StringRef CPU) {
+ void emitMachine(StringRef CPU) override {
// FIXME: We should update the CPUType, CPUSubType in the Object file if
// the new values are different from the defaults.
}
@@ -175,13 +175,12 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new PPCTargetAsmStreamer(*S, OS);
return S;
}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
index bbafe2e..cff27ba 100644
--- a/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -44,7 +44,7 @@ public:
void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
const MCAsmLayout &Layout, const MCFragment *Fragment,
const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
+ uint64_t &FixedValue) override {
if (Writer->is64Bit()) {
report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
} else
@@ -206,7 +206,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
report_fatal_error("symbol '" + A->getName() +
@@ -219,7 +219,7 @@ bool PPCMachObjectWriter::RecordScatteredRelocation(
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
@@ -324,7 +324,7 @@ void PPCMachObjectWriter::RecordPPCRelocation(
// this doesn't seem right for RIT_PPC_BR24
// Get the symbol data, if any.
- MCSymbolData *SD = 0;
+ const MCSymbolData *SD = nullptr;
if (Target.getSymA())
SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9ce8ea9..e89fb2d 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asmprinter"
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCMCExpr.h"
@@ -59,6 +58,8 @@
#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "asmprinter"
+
namespace {
class PPCAsmPrinter : public AsmPrinter {
protected:
@@ -70,22 +71,22 @@ namespace {
: AsmPrinter(TM, Streamer),
Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC Assembly Printer";
}
MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
- virtual void EmitInstruction(const MachineInstr *MI);
+ void EmitInstruction(const MachineInstr *MI) override;
void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@@ -94,15 +95,15 @@ namespace {
explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Linux PPC Assembly Printer";
}
- bool doFinalization(Module &M);
+ bool doFinalization(Module &M) override;
- virtual void EmitFunctionEntryLabel();
+ void EmitFunctionEntryLabel() override;
- void EmitFunctionBodyEnd();
+ void EmitFunctionBodyEnd() override;
};
/// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
@@ -112,12 +113,12 @@ namespace {
explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: PPCAsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Darwin PPC Assembly Printer";
}
- bool doFinalization(Module &M);
- void EmitStartOfAsmFile(Module &M);
+ bool doFinalization(Module &M) override;
+ void EmitStartOfAsmFile(Module &M) override;
void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
};
@@ -180,7 +181,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>()
.getGVStubEntry(SymToPrint);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
@@ -190,7 +191,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
MachineModuleInfoImpl::StubValueTy &StubSym =
MMI->getObjFileInfo<MachineModuleInfoMachO>().
getHiddenGVStubEntry(SymToPrint);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
@@ -207,7 +208,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
}
default:
- O << "<unknown operand type: " << MO.getType() << ">";
+ O << "<unknown operand type: " << (unsigned)MO.getType() << ">";
return;
}
}
@@ -288,9 +289,9 @@ MCSymbol *PPCAsmPrinter::lookUpOrCreateTOCEntry(MCSymbol *Sym) {
MCSymbol *&TOCEntry = TOC[Sym];
// To avoid name clash check if the name already exists.
- while (TOCEntry == 0) {
+ while (!TOCEntry) {
if (OutContext.LookupSymbol(Twine(DL->getPrivateGlobalPrefix()) +
- "C" + Twine(TOCLabelID++)) == 0) {
+ "C" + Twine(TOCLabelID++)) == nullptr) {
TOCEntry = GetTempSymbol("C", TOCLabelID);
}
}
@@ -342,7 +343,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Map symbol -> label of TOC entry
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
if (MO.isGlobal())
MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
@@ -372,23 +373,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI() || MO.isJTI()) &&
"Invalid operand for ADDIStocHA!");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
bool IsFunction = false;
bool IsCommon = false;
bool IsAvailExt = false;
if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue =
- GAlias ? GAlias->getAliasedGlobal() : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- IsExternal = GVar && !GVar->hasInitializer();
- IsCommon = GVar && RealGValue->hasCommonLinkage();
- IsFunction = !GVar;
- IsAvailExt = GVar && RealGValue->hasAvailableExternallyLinkage();
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsCommon = GV->hasCommonLinkage();
+ IsFunction = GV->getType()->getElementType()->isFunctionTy();
+ IsAvailExt = GV->hasAvailableExternallyLinkage();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
@@ -416,7 +413,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(1);
assert((MO.isGlobal() || MO.isJTI() || MO.isCPI()) &&
"Invalid operand for LDtocL!");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
@@ -427,14 +424,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue =
- GAlias ? GAlias->getAliasedGlobal() : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
-
- if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage() ||
+ MOSymbol = getSymbol(GValue);
+ if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage() ||
TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
@@ -456,19 +448,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.setOpcode(PPC::ADDI8);
const MachineOperand &MO = MI->getOperand(2);
assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL");
- MCSymbol *MOSymbol = 0;
+ MCSymbol *MOSymbol = nullptr;
bool IsExternal = false;
bool IsFunction = false;
if (MO.isGlobal()) {
- const GlobalValue *GValue = MO.getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue =
- GAlias ? GAlias->getAliasedGlobal() : GValue;
- MOSymbol = getSymbol(RealGValue);
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- IsExternal = GVar && !GVar->hasInitializer();
- IsFunction = !GVar;
+ const GlobalValue *GV = MO.getGlobal();
+ MOSymbol = getSymbol(GV);
+ IsExternal = GV->isDeclaration();
+ IsFunction = GV->getType()->getElementType()->isFunctionTy();
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index 9276211..ee90671 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-branch-select"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCInstrBuilder.h"
@@ -26,6 +25,8 @@
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "ppc-branch-select"
+
STATISTIC(NumExpanded, "Number of branches expanded to long format");
namespace llvm {
@@ -42,9 +43,9 @@ namespace {
/// BlockSizes - The sizes of the basic blocks in the function.
std::vector<unsigned> BlockSizes;
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC Branch Selector";
}
};
@@ -112,7 +113,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
unsigned MBBStartOffset = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
- MachineBasicBlock *Dest = 0;
+ MachineBasicBlock *Dest = nullptr;
if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
Dest = I->getOperand(2).getMBB();
else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
diff --git a/lib/Target/PowerPC/PPCCTRLoops.cpp b/lib/Target/PowerPC/PPCCTRLoops.cpp
index 9c5db50..ec1e34d 100644
--- a/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -23,8 +23,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ctrloops"
-
#include "llvm/Transforms/Scalar.h"
#include "PPC.h"
#include "PPCTargetMachine.h"
@@ -61,6 +59,8 @@
using namespace llvm;
+#define DEBUG_TYPE "ctrloops"
+
#ifndef NDEBUG
static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
#endif
@@ -84,16 +84,16 @@ namespace {
public:
static char ID;
- PPCCTRLoops() : FunctionPass(ID), TM(0) {
+ PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addRequired<DominatorTreeWrapperPass>();
@@ -128,12 +128,12 @@ namespace {
initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
private:
MachineDominatorTree *MDT;
@@ -172,7 +172,7 @@ bool PPCCTRLoops::runOnFunction(Function &F) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
bool MadeChange = false;
@@ -370,6 +370,14 @@ bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
J->getOpcode() == Instruction::URem ||
J->getOpcode() == Instruction::SRem)) {
return true;
+ } else if (TT.isArch32Bit() &&
+ isLargeIntegerTy(false, J->getType()->getScalarType()) &&
+ (J->getOpcode() == Instruction::Shl ||
+ J->getOpcode() == Instruction::AShr ||
+ J->getOpcode() == Instruction::LShr)) {
+ // Only on PPC32, for 128-bit integers (specifically not 64-bit
+ // integers), these might be runtime calls.
+ return true;
} else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
// On PowerPC, indirect jumps use the counter register.
return true;
@@ -424,9 +432,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
- BasicBlock *CountedExitBlock = 0;
- const SCEV *ExitCount = 0;
- BranchInst *CountedExitBranch = 0;
+ BasicBlock *CountedExitBlock = nullptr;
+ const SCEV *ExitCount = nullptr;
+ BranchInst *CountedExitBranch = nullptr;
for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
IE = ExitingBlocks.end(); I != IE; ++I) {
const SCEV *EC = SE->getExitCount(L, *I);
diff --git a/lib/Target/PowerPC/PPCCodeEmitter.cpp b/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 84fc888..0875523 100644
--- a/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -32,7 +32,7 @@ namespace {
JITCodeEmitter &MCE;
MachineModuleInfo *MMI;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -73,11 +73,13 @@ namespace {
unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
- const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+ const char *getPassName() const override {
+ return "PowerPC Machine Code Emitter";
+ }
/// runOnMachineFunction - emits the given MachineFunction to memory
///
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
/// emitBasicBlock - emits the given MachineBasicBlock to memory
///
@@ -102,7 +104,7 @@ bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
do {
- MovePCtoLROffset = 0;
+ MovePCtoLROffset = nullptr;
MCE.startFunction(MF);
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
emitBasicBlock(*BB);
diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp
index dd45683..ed3cb4d 100644
--- a/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/lib/Target/PowerPC/PPCFastISel.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppcfastisel"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCISelLowering.h"
@@ -58,6 +57,8 @@
//===----------------------------------------------------------------------===//
using namespace llvm;
+#define DEBUG_TYPE "ppcfastisel"
+
namespace {
typedef struct Address {
@@ -85,7 +86,7 @@ class PPCFastISel final : public FastISel {
const TargetMachine &TM;
const TargetInstrInfo &TII;
const TargetLowering &TLI;
- const PPCSubtarget &PPCSubTarget;
+ const PPCSubtarget *PPCSubTarget;
LLVMContext *Context;
public:
@@ -95,31 +96,29 @@ class PPCFastISel final : public FastISel {
TM(FuncInfo.MF->getTarget()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- PPCSubTarget(
- *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
- ),
+ PPCSubTarget(&TM.getSubtarget<PPCSubtarget>()),
Context(&FuncInfo.Fn->getContext()) { }
// Backend specific FastISel code.
private:
- virtual bool TargetSelectInstruction(const Instruction *I);
- virtual unsigned TargetMaterializeConstant(const Constant *C);
- virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
- virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
- const LoadInst *LI);
- virtual bool FastLowerArguments();
- virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
- virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- uint64_t Imm);
- virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill);
- virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
- const TargetRegisterClass *RC,
- unsigned Op0, bool Op0IsKill,
- unsigned Op1, bool Op1IsKill);
+ bool TargetSelectInstruction(const Instruction *I) override;
+ unsigned TargetMaterializeConstant(const Constant *C) override;
+ unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
+ bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) override;
+ bool FastLowerArguments() override;
+ unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm) override;
+ unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
// Instruction selection routines.
private:
@@ -282,7 +281,7 @@ bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
// Given a value Obj, create an Address object Addr that represents its
// address. Return false if we can't handle it.
bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
// Don't walk into other basic blocks unless the object is an alloca from
@@ -556,7 +555,7 @@ bool PPCFastISel::SelectLoad(const Instruction *I) {
// to constrain RA from using R0/X0 when this is not legal.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
- AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
@@ -739,7 +738,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
return false;
MVT SrcVT = SrcEVT.getSimpleVT();
- if (SrcVT == MVT::i1 && PPCSubTarget.useCRBits())
+ if (SrcVT == MVT::i1 && PPCSubTarget->useCRBits())
return false;
// See if operand 2 is an immediate encodeable in the compare.
@@ -900,7 +899,7 @@ unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
if (!IsSigned) {
LoadOpc = PPC::LFIWZX;
Addr.Offset = 4;
- } else if (PPCSubTarget.hasLFIWAX()) {
+ } else if (PPCSubTarget->hasLFIWAX()) {
LoadOpc = PPC::LFIWAX;
Addr.Offset = 4;
}
@@ -941,7 +940,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// We can only lower an unsigned convert if we have the newer
// floating-point conversion operations.
- if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ if (!IsSigned && !PPCSubTarget->hasFPCVT())
return false;
// FIXME: For now we require the newer floating-point conversion operations
@@ -949,7 +948,7 @@ bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
// to single-precision float. Otherwise we have to generate a lot of
// fiddly code to avoid double rounding. If necessary, the fiddly code
// can be found in PPCTargetLowering::LowerINT_TO_FP().
- if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ if (DstVT == MVT::f32 && !PPCSubTarget->hasFPCVT())
return false;
// Extend the input if necessary.
@@ -1012,7 +1011,7 @@ unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
// to determine the required register class.
unsigned AssignedReg = FuncInfo.ValueMap[I];
const TargetRegisterClass *RC =
- AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+ AssignedReg ? MRI.getRegClass(AssignedReg) : nullptr;
unsigned ResultReg = 0;
if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
@@ -1064,7 +1063,7 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (IsSigned)
Opc = PPC::FCTIWZ;
else
- Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ Opc = PPCSubTarget->hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
else
Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
@@ -1863,7 +1862,7 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
if (!GVar) {
// If GV is an alias, use the aliasee for determining thread-locality.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal());
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee());
}
// FIXME: We don't yet handle the complexity of TLS.
@@ -2001,7 +2000,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget.useCRBits()) {
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
const ConstantInt *CI = cast<ConstantInt>(C);
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@@ -2149,7 +2148,7 @@ bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
unsigned ResultReg = MI->getOperand(0).getReg();
- if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
+ if (!PPCEmitLoad(VT, ResultReg, Addr, nullptr, IsZExt))
return false;
MI->eraseFromParent();
@@ -2175,7 +2174,7 @@ unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
// If we're using CR bit registers for i1 values, handle that as a special
// case first.
- if (VT == MVT::i1 && PPCSubTarget.useCRBits()) {
+ if (VT == MVT::i1 && PPCSubTarget->useCRBits()) {
unsigned ImmReg = createResultReg(&PPC::CRBITRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Imm == 0 ? PPC::CRUNSET : PPC::CRSET), ImmReg);
@@ -2261,6 +2260,6 @@ namespace llvm {
if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
return new PPCFastISel(FuncInfo, LibInfo);
- return 0;
+ return nullptr;
}
}
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index d8f491f..e294156 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -222,7 +222,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
- FrameSize == 0) &&
+ FrameSize == 0) &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
@@ -281,8 +281,8 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
// Naked functions have no stack frame pushed, so we don't have a frame
// pointer.
- if (MF.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::Naked))
+ if (MF.getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
return false;
return MF.getTarget().Options.DisableFramePointerElim(MF) ||
@@ -426,7 +426,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
@@ -562,13 +563,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
assert(NegFrameSize);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
if (HasFP) {
unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -576,7 +578,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -584,7 +586,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
}
@@ -601,7 +603,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
}
@@ -629,7 +631,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(PPC::CR2, true), 8));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
continue;
}
@@ -637,7 +639,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
}
@@ -712,7 +714,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(FPIndex && "No Frame Pointer Save Slot!");
FPOffset = FFI->getObjectOffset(FPIndex);
} else {
- FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+ FPOffset =
+ PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
}
}
@@ -930,9 +933,9 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
- // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
+ // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
// function uses CR 2, 3, or 4.
- if (!isPPC64 && !isDarwinABI &&
+ if (!isPPC64 && !isDarwinABI &&
(MRI.isPhysRegUsed(PPC::CR2) ||
MRI.isPhysRegUsed(PPC::CR3) ||
MRI.isPhysRegUsed(PPC::CR4))) {
@@ -1106,10 +1109,10 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
unsigned Reg = CSI[i].getReg();
if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
- // Leave Darwin logic as-is.
- || (!Subtarget.isSVR4ABI() &&
- (PPC::CRBITRCRegClass.contains(Reg) ||
- PPC::CRRCRegClass.contains(Reg)))) {
+ // Leave Darwin logic as-is.
+ || (!Subtarget.isSVR4ABI() &&
+ (PPC::CRBITRCRegClass.contains(Reg) ||
+ PPC::CRRCRegClass.contains(Reg)))) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1190,11 +1193,11 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
}
}
-bool
+bool
PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
@@ -1207,7 +1210,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
DebugLoc DL;
bool CRSpilled = false;
MachineInstrBuilder CRMIB;
-
+
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
// Only Darwin actually uses the VRSAVE register, but it can still appear
@@ -1237,21 +1240,21 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CRSpilled = true;
FuncInfo->setSpillsCR();
- // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
- // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
- CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
+ // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
+ // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
+ CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
.addReg(Reg, RegState::ImplicitKill);
- MBB.insert(MI, CRMIB);
- MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
- .addReg(PPC::R12,
- getKillRegState(true)),
- CSI[i].getFrameIdx()));
+ MBB.insert(MI, CRMIB);
+ MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
+ .addReg(PPC::R12,
+ getKillRegState(true)),
+ CSI[i].getFrameIdx()));
}
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.storeRegToStackSlot(MBB, MI, Reg, true,
- CSI[i].getFrameIdx(), RC, TRI);
+ CSI[i].getFrameIdx(), RC, TRI);
}
}
return true;
@@ -1260,8 +1263,8 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
static void
restoreCRs(bool isPPC64, bool is31,
bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
MachineFunction *MF = MBB.getParent();
const PPCInstrInfo &TII =
@@ -1275,12 +1278,12 @@ restoreCRs(bool isPPC64, bool is31,
else {
// 32-bit: FP-relative
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
- PPC::R12),
- CSI[CSIIndex].getFrameIdx()));
+ PPC::R12),
+ CSI[CSIIndex].getFrameIdx()));
RestoreOp = PPC::MTOCRF;
MoveReg = PPC::R12;
}
-
+
if (CR2Spilled)
MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
.addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
@@ -1335,11 +1338,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}
-bool
+bool
PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
// Currently, this function only handles SVR4 32- and 64-bit ABIs.
// Return false otherwise to maintain pre-existing behavior.
@@ -1387,20 +1390,20 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// When we first encounter a non-CR register after seeing at
// least one CR register, restore all spilled CRs together.
if ((CR2Spilled || CR3Spilled || CR4Spilled)
- && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
bool is31 = needsFP(*MF);
restoreCRs(Subtarget.isPPC64(), is31,
CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
- CR2Spilled = CR3Spilled = CR4Spilled = false;
+ MBB, I, CSI, CSIIndex);
+ CR2Spilled = CR3Spilled = CR4Spilled = false;
}
// Default behavior for non-CR saves.
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
+ RC, TRI);
assert(I != MBB.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
+ "loadRegFromStackSlot didn't insert any code!");
}
// Insert in reverse order.
@@ -1409,16 +1412,15 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
else {
I = BeforeI;
++I;
- }
+ }
}
// If we haven't yet spilled the CRs, do so now.
if (CR2Spilled || CR3Spilled || CR4Spilled) {
- bool is31 = needsFP(*MF);
+ bool is31 = needsFP(*MF);
restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
- MBB, I, CSI, CSIIndex);
+ MBB, I, CSI, CSIIndex);
}
return true;
}
-
diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h
index 7aab37e..94e9b67 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/lib/Target/PowerPC/PPCFrameLowering.h
@@ -38,37 +38,37 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- bool hasFP(const MachineFunction &MF) const;
+ bool hasFP(const MachineFunction &MF) const override;
bool needsFP(const MachineFunction &MF) const;
void replaceFPWithRealFP(MachineFunction &MF) const;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void addScavengingSpillSlot(MachineFunction &MF, RegScavenger *RS) const;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
/// targetHandlesStackFrameRounding - Returns true if the target is
/// responsible for rounding up the stack frame (probably at emitPrologue
/// time).
- bool targetHandlesStackFrameRounding() const { return true; }
+ bool targetHandlesStackFrameRounding() const override { return true; }
/// getReturnSaveOffset - Return the previous frame offset to save the
/// return address.
@@ -141,7 +141,7 @@ public:
// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
const SpillSlot *
- getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override {
if (Subtarget.isDarwinABI()) {
NumEntries = 1;
if (Subtarget.isPPC64()) {
@@ -156,7 +156,7 @@ public:
// Early exit if not using the SVR4 ABI.
if (!Subtarget.isSVR4ABI()) {
NumEntries = 0;
- return 0;
+ return nullptr;
}
// Note that the offsets here overlap, but this is fixed up in
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 37c85b3..7ca706b 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "pre-RA-sched"
#include "PPCHazardRecognizers.h"
#include "PPC.h"
#include "PPCInstrInfo.h"
@@ -22,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "pre-RA-sched"
+
bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
// FIXME: Move this.
if (isBCTRAfterSet(SU))
@@ -226,7 +227,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
CurGroup.clear();
CurSlots = CurBranches = 0;
} else {
- CurGroup.push_back(0);
+ CurGroup.push_back(nullptr);
++CurSlots;
}
}
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h
index 6b7fe41..cf4332c 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -37,14 +37,14 @@ public:
ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_),
CurSlots(0), CurBranches(0) {}
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual bool ShouldPreferAnother(SUnit* SU);
- virtual unsigned PreEmitNoops(SUnit *SU);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void RecedeCycle();
- virtual void Reset();
- virtual void EmitNoop();
+ HazardType getHazardType(SUnit *SU, int Stalls) override;
+ bool ShouldPreferAnother(SUnit* SU) override;
+ unsigned PreEmitNoops(SUnit *SU) override;
+ void EmitInstruction(SUnit *SU) override;
+ void AdvanceCycle() override;
+ void RecedeCycle() override;
+ void Reset() override;
+ void EmitNoop() override;
};
/// PPCHazardRecognizer970 - This class defines a finite state automata that
@@ -76,10 +76,10 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
public:
PPCHazardRecognizer970(const TargetMachine &TM);
- virtual HazardType getHazardType(SUnit *SU, int Stalls);
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- virtual void Reset();
+ virtual HazardType getHazardType(SUnit *SU, int Stalls) override;
+ virtual void EmitInstruction(SUnit *SU) override;
+ virtual void AdvanceCycle() override;
+ virtual void Reset() override;
private:
/// EndDispatchGroup - Called when we are finishing a new dispatch group.
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3bbc839..251e8b6 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppc-codegen"
#include "PPC.h"
#include "MCTargetDesc/PPCPredicates.h"
#include "PPCTargetMachine.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+#define DEBUG_TYPE "ppc-codegen"
+
// FIXME: Remove this once the bug has been fixed!
cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
@@ -50,29 +51,31 @@ namespace {
///
class PPCDAGToDAGISel : public SelectionDAGISel {
const PPCTargetMachine &TM;
- const PPCTargetLowering &PPCLowering;
- const PPCSubtarget &PPCSubTarget;
+ const PPCTargetLowering *PPCLowering;
+ const PPCSubtarget *PPCSubTarget;
unsigned GlobalBaseReg;
public:
explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
: SelectionDAGISel(tm), TM(tm),
- PPCLowering(*TM.getTargetLowering()),
- PPCSubTarget(*TM.getSubtargetImpl()) {
+ PPCLowering(TM.getTargetLowering()),
+ PPCSubTarget(TM.getSubtargetImpl()) {
initializePPCDAGToDAGISelPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
// Make sure we re-emit a set of the global base reg if necessary
GlobalBaseReg = 0;
+ PPCLowering = TM.getTargetLowering();
+ PPCSubTarget = TM.getSubtargetImpl();
SelectionDAGISel::runOnMachineFunction(MF);
- if (!PPCSubTarget.isSVR4ABI())
+ if (!PPCSubTarget->isSVR4ABI())
InsertVRSaveCode(MF);
return true;
}
- virtual void PostprocessISelDAG();
+ void PostprocessISelDAG() override;
/// getI32Imm - Return a target constant with the specified value, of type
/// i32.
@@ -88,7 +91,7 @@ namespace {
/// getSmallIPtrImm - Return a target constant of pointer type.
inline SDValue getSmallIPtrImm(unsigned Imm) {
- return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+ return CurDAG->getTargetConstant(Imm, PPCLowering->getPointerTy());
}
/// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
@@ -109,7 +112,7 @@ namespace {
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
SDNode *SelectBitfieldInsert(SDNode *N);
@@ -121,7 +124,7 @@ namespace {
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -141,20 +144,20 @@ namespace {
/// represented as an indexed [r+r] operation. Returns false if it can
/// be represented by [r+imm], which are preferred.
bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG);
}
/// SelectAddrIdxOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
- return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+ return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
/// SelectAddrImmX4 - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement that is a multiple of 4.
/// Suitable for use by STD and friends.
bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
+ return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
}
// Select an address into a single register.
@@ -168,16 +171,16 @@ namespace {
/// a register. The case of adding a (possibly relocatable) constant to a
/// register can be improved, but it is wrong to substitute Reg+Reg for
/// Reg in an asm, because the load or store opcode would have to change.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps) {
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override {
OutOps.push_back(Op);
return false;
}
void InsertVRSaveCode(MachineFunction &MF);
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "PowerPC DAG->DAG Pattern Instruction Selection";
}
@@ -188,7 +191,7 @@ private:
SDNode *SelectSETCC(SDNode *N);
void PeepholePPC64();
- void PeepholdCROps();
+ void PeepholeCROps();
bool AllUsersSelectZero(SDNode *N);
void SwapAllSelectUsers(SDNode *N);
@@ -271,7 +274,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
MachineBasicBlock::iterator MBBI = FirstMBB.begin();
DebugLoc dl;
- if (PPCLowering.getPointerTy() == MVT::i32) {
+ if (PPCLowering->getPointerTy() == MVT::i32) {
GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
@@ -282,7 +285,7 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
}
}
return CurDAG->getRegister(GlobalBaseReg,
- PPCLowering.getPointerTy()).getNode();
+ PPCLowering->getPointerTy()).getNode();
}
/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
@@ -414,8 +417,8 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SDLoc dl(N);
APInt LKZ, LKO, RKZ, RKO;
- CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
- CurDAG->ComputeMaskedBits(Op1, RKZ, RKO);
+ CurDAG->computeKnownBits(Op0, LKZ, LKO);
+ CurDAG->computeKnownBits(Op1, RKZ, RKO);
unsigned TargetMask = LKZ.getZExtValue();
unsigned InsertMask = RKZ.getZExtValue();
@@ -458,11 +461,18 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
}
if (Op1Opc == ISD::AND) {
+ // The AND mask might not be a constant, and we need to make sure that
+ // if we're going to fold the masking with the insert, all bits not
+ // know to be zero in the mask are known to be one.
+ APInt MKZ, MKO;
+ CurDAG->computeKnownBits(Op1.getOperand(1), MKZ, MKO);
+ bool CanFoldMask = InsertMask == MKO.getZExtValue();
+
unsigned SHOpc = Op1.getOperand(0).getOpcode();
- if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+ if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
- // Note that Value must be in range here (less than 32) because
- // otherwise there would not be any bits set in InsertMask.
+ // Note that Value must be in range here (less than 32) because
+ // otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
}
@@ -474,7 +484,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops);
}
}
- return 0;
+ return nullptr;
}
/// SelectCC - Select a comparison of the specified values with the specified
@@ -572,7 +582,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
Opc = PPC::FCMPUS;
} else {
assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
- Opc = PPCSubTarget.hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
+ Opc = PPCSubTarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
}
return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
}
@@ -738,7 +748,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
- if (!PPCSubTarget.useCRBits() &&
+ if (!PPCSubTarget->useCRBits() &&
isInt32Immediate(N->getOperand(1), Imm)) {
// We can codegen setcc op, imm very efficiently compared to a brcond.
// Check for those cases here.
@@ -750,7 +760,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETEQ: {
Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETNE: {
if (isPPC64) break;
@@ -762,14 +772,14 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
case ISD::SETLT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
SDValue T =
SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
}
} else if (Imm == ~0U) { // setcc op, -1
@@ -799,7 +809,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
Op), 0);
SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
case ISD::SETGT: {
SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
@@ -820,7 +830,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (LHS.getValueType().isVector()) {
EVT VecVT = LHS.getValueType();
MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
- unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget.hasVSX());
+ unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
switch (CC) {
case ISD::SETEQ:
@@ -831,7 +841,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETONE:
case ISD::SETUNE: {
SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLNOR :
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
PPC::VNOR,
VecVT, VCmp, VCmp);
}
@@ -853,9 +863,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
} else {
SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR :
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
PPC::VOR,
VecVT, VCmpGT, VCmpEQ);
}
@@ -864,9 +874,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
case ISD::SETOLE:
case ISD::SETULE: {
SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
- unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget.hasVSX());
+ unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
- return CurDAG->SelectNodeTo(N, PPCSubTarget.hasVSX() ? PPC::XXLOR :
+ return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
PPC::VOR,
VecVT, VCmpLE, VCmpEQ);
}
@@ -875,8 +885,8 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
}
- if (PPCSubTarget.useCRBits())
- return 0;
+ if (PPCSubTarget->useCRBits())
+ return nullptr;
bool Inv;
unsigned Idx = getCRIdxForSetCC(CC, Inv);
@@ -886,7 +896,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
- SDValue InFlag(0, 0); // Null incoming flag value.
+ SDValue InFlag(nullptr, 0); // Null incoming flag value.
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
@@ -896,7 +906,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
if (!Inv)
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
// Get the specified bit.
SDValue Tmp =
@@ -911,7 +921,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (N->getOpcode()) {
@@ -1093,7 +1103,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Offset, Base, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering.getPointerTy(),
+ PPCLowering->getPointerTy(),
MVT::Other, Ops);
} else {
unsigned Opcode;
@@ -1128,7 +1138,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Base = LD->getBasePtr();
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
- PPCLowering.getPointerTy(),
+ PPCLowering->getPointerTy(),
MVT::Other, Ops);
}
}
@@ -1143,7 +1153,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
SDValue Val = N->getOperand(0).getOperand(0);
SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is just a masked value where the input is not handled above, and
// is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
@@ -1152,7 +1162,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
N->getOperand(0).getOpcode() != ISD::ROTL) {
SDValue Val = N->getOperand(0);
SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// If this is a 64-bit zero-extension mask, emit rldicl.
if (isInt64Immediate(N->getOperand(1).getNode(), Imm64) &&
@@ -1174,12 +1184,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
}
SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB) };
- return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops, 3);
+ return CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
}
// AND X, 0 -> 0, not "rlwinm 32".
if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
ReplaceUses(SDValue(N, 0), N->getOperand(1));
- return NULL;
+ return nullptr;
}
// ISD::OR doesn't get all the bitfield insertion fun.
// (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
@@ -1212,7 +1222,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// Other cases are autogenerated.
@@ -1224,7 +1234,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
isRotateAndMask(N, Imm, true, SH, MB, ME)) {
SDValue Ops[] = { N->getOperand(0).getOperand(0),
getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
- return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
}
// Other cases are autogenerated.
@@ -1259,7 +1269,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
bool isPPC64 = (PtrVT == MVT::i64);
// If this is a select of i1 operands, we'll pattern match it.
- if (PPCSubTarget.useCRBits() &&
+ if (PPCSubTarget->useCRBits() &&
N->getOperand(0).getValueType() == MVT::i1)
break;
@@ -1327,17 +1337,17 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
getI32Imm(BROpc) };
- return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+ return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
}
case ISD::VSELECT:
- if (PPCSubTarget.hasVSX()) {
+ if (PPCSubTarget->hasVSX()) {
SDValue Ops[] = { N->getOperand(2), N->getOperand(1), N->getOperand(0) };
- return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops, 3);
+ return CurDAG->SelectNodeTo(N, PPC::XXSEL, N->getValueType(0), Ops);
}
break;
case ISD::VECTOR_SHUFFLE:
- if (PPCSubTarget.hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
+ if (PPCSubTarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
N->getValueType(0) == MVT::v2i64)) {
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
@@ -1364,23 +1374,23 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Ops[] = { Base, Offset, Chain };
return CurDAG->SelectNodeTo(N, PPC::LXVDSX,
- N->getValueType(0), Ops, 3);
+ N->getValueType(0), Ops);
}
}
SDValue Ops[] = { Op1, Op2, DMV };
- return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops, 3);
+ return CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
}
break;
case PPCISD::BDNZ:
case PPCISD::BDZ: {
- bool IsPPC64 = PPCSubTarget.isPPC64();
+ bool IsPPC64 = PPCSubTarget->isPPC64();
SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
(IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
(IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
- MVT::Other, Ops, 2);
+ MVT::Other, Ops);
}
case PPCISD::COND_BRANCH: {
// Op #0 is the Chain.
@@ -1393,7 +1403,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
N->getOperand(0), N->getOperand(4) };
- return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
}
case ISD::BR_CC: {
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
@@ -1422,7 +1432,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
SDValue Ops[] = { getI32Imm(PCC), CondCode,
N->getOperand(4), N->getOperand(0) };
- return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+ return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
}
case ISD::BRIND: {
// FIXME: Should custom lower this.
@@ -1435,7 +1445,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
}
case PPCISD::TOC_ENTRY: {
- assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
+ assert (PPCSubTarget->isPPC64() && "Only supported for 64-bit ABI");
// For medium and large code model, we generate two instructions as
// described below. Otherwise we allow SelectCodeCommon to handle this,
@@ -1462,18 +1472,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) {
const GlobalValue *GValue = G->getGlobal();
- const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
- const GlobalValue *RealGValue =
- GAlias ? GAlias->getAliasedGlobal() : GValue;
- const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
- assert((GVar || isa<Function>(RealGValue)) &&
- "Unexpected global value subclass!");
-
- // An external variable is one without an initializer. For these,
- // for variables with common linkage, and for Functions, generate
- // the LDtocL form.
- if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage())
+ if (GValue->isDeclaration() || GValue->hasCommonLinkage() ||
+ GValue->hasAvailableExternallyLinkage())
return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA,
SDValue(Tmp, 0));
}
@@ -1566,7 +1566,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
return;
PeepholePPC64();
- PeepholdCROps();
+ PeepholeCROps();
}
// Check if all users of this node will become isel where the second operand
@@ -1576,7 +1576,7 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// containing zero.
bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
// If we're not using isel, then this does not matter.
- if (!PPCSubTarget.hasISEL())
+ if (!PPCSubTarget->hasISEL())
return false;
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
@@ -1637,7 +1637,7 @@ void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
}
}
-void PPCDAGToDAGISel::PeepholdCROps() {
+void PPCDAGToDAGISel::PeepholeCROps() {
bool IsModified;
do {
IsModified = false;
@@ -2038,7 +2038,7 @@ void PPCDAGToDAGISel::PeepholdCROps() {
void PPCDAGToDAGISel::PeepholePPC64() {
// These optimizations are currently supported only for 64-bit SVR4.
- if (PPCSubTarget.isDarwin() || !PPCSubTarget.isPPC64())
+ if (PPCSubTarget->isDarwin() || !PPCSubTarget->isPPC64())
return;
SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode());
@@ -2196,8 +2196,8 @@ FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
static void initializePassOnce(PassRegistry &Registry) {
const char *Name = "PowerPC DAG->DAG Pattern Instruction Selection";
- PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID, 0,
- false, false);
+ PassInfo *PI = new PassInfo(Name, "ppc-codegen", &SelectionDAGISel::ID,
+ nullptr, false, false);
Registry.registerPass(*PI, true);
}
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 32ac1dc..cf4c9e6 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -18,6 +18,7 @@
#include "PPCTargetMachine.h"
#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -459,6 +460,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::CTLZ, VT, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
@@ -758,7 +760,7 @@ unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case PPCISD::FSEL: return "PPCISD::FSEL";
case PPCISD::FCFID: return "PPCISD::FCFID";
case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
@@ -929,7 +931,7 @@ bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
/// amount, otherwise return -1.
int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
if (N->getValueType(0) != MVT::v16i8)
- return false;
+ return -1;
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
@@ -1019,7 +1021,7 @@ unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
/// the constant being splatted. The ByteSize field indicates the number of
/// bytes of each element [124] -> [bhw].
SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
- SDValue OpVal(0, 0);
+ SDValue OpVal(nullptr, 0);
// If ByteSize of the splat is bigger than the element size of the
// build_vector, then we have a case where we are checking for a splat where
@@ -1038,7 +1040,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
- if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+ if (!UniquedVals[i&(Multiple-1)].getNode())
UniquedVals[i&(Multiple-1)] = N->getOperand(i);
else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
return SDValue(); // no match.
@@ -1053,21 +1055,21 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
bool LeadingZero = true;
bool LeadingOnes = true;
for (unsigned i = 0; i != Multiple-1; ++i) {
- if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs.
+ if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
}
// Finally, check the least significant entry.
if (LeadingZero) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef
int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
if (Val < 16)
return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4)
}
if (LeadingOnes) {
- if (UniquedVals[Multiple-1].getNode() == 0)
+ if (!UniquedVals[Multiple-1].getNode())
return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef
int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
@@ -1080,13 +1082,13 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
// Check to see if this buildvec has a single non-undef value in its elements.
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
- if (OpVal.getNode() == 0)
+ if (!OpVal.getNode())
OpVal = N->getOperand(i);
else if (OpVal != N->getOperand(i))
return SDValue();
}
- if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
+ if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
unsigned ValSizeInBytes = EltSize;
uint64_t Value = 0;
@@ -1135,7 +1137,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
/// sign extension from a 16-bit value. If so, this returns true and the
/// immediate.
static bool isIntS16Immediate(SDNode *N, short &Imm) {
- if (N->getOpcode() != ISD::Constant)
+ if (!isa<ConstantSDNode>(N))
return false;
Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
@@ -1174,12 +1176,12 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
// disjoint.
APInt LHSKnownZero, LHSKnownOne;
APInt RHSKnownZero, RHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0),
- LHSKnownZero, LHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(0),
+ LHSKnownZero, LHSKnownOne);
if (LHSKnownZero.getBoolValue()) {
- DAG.ComputeMaskedBits(N.getOperand(1),
- RHSKnownZero, RHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(1),
+ RHSKnownZero, RHSKnownOne);
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
if (~(LHSKnownZero | RHSKnownZero) == 0) {
@@ -1279,7 +1281,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
+ DAG.computeKnownBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
// If all of the bits are known zero on the LHS or RHS, the add won't
@@ -1439,7 +1441,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// GetLabelAccessInfo - Return true if we should reference labels using a
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
- unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+ unsigned &LoOpFlags,
+ const GlobalValue *GV = nullptr) {
HiOpFlags = PPCII::MO_HA;
LoOpFlags = PPCII::MO_LO;
@@ -1885,17 +1888,12 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
Entry.Node = Nest; Args.push_back(Entry);
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- TargetLowering::CallLoweringInfo CLI(Chain,
- Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0,
- CallingConv::C,
- /*isTailCall=*/false,
- /*doesNotRet=*/false,
- /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -2016,7 +2014,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
@@ -2043,7 +2041,7 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -2067,8 +2065,8 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
/// GetFPR - Get the set of FP registers that should be allocated for arguments,
/// on Darwin.
-static const uint16_t *GetFPR() {
- static const uint16_t FPR[] = {
+static const MCPhysReg *GetFPR() {
+ static const MCPhysReg FPR[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
};
@@ -2265,13 +2263,13 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- static const uint16_t GPArgRegs[] = {
+ static const MCPhysReg GPArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
- static const uint16_t FPArgRegs[] = {
+ static const MCPhysReg FPArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
};
@@ -2333,8 +2331,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
@@ -2405,18 +2402,18 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
- static const uint16_t VSRH[] = {
+ static const MCPhysReg VSRH[] = {
PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
@@ -2683,8 +2680,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
@@ -2714,18 +2710,18 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// Area that is at least reserved in caller of this function.
unsigned MinReservedArea = ArgOffset;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -2736,7 +2732,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
// In 32-bit non-varargs functions, the stack space for vectors is after the
// stack space for non-vectors. We do not use this space unless we have
@@ -3039,8 +3035,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl,
- MVT::Other, &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
return Chain;
}
@@ -3174,12 +3169,12 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
/// 32-bit value is representable in the immediate field of a BxA instruction.
static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- if (!C) return 0;
+ if (!C) return nullptr;
int Addr = C->getZExtValue();
if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
SignExtend32<26>(Addr) != Addr)
- return 0; // Top 6 bits have to be sext of immediate.
+ return nullptr; // Top 6 bits have to be sext of immediate.
return DAG.getConstant((int)C->getZExtValue() >> 2,
DAG.getTargetLoweringInfo().getPointerTy()).getNode();
@@ -3315,8 +3310,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
- false, false, MachinePointerInfo(0),
- MachinePointerInfo(0));
+ false, false, MachinePointerInfo(),
+ MachinePointerInfo());
}
/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
@@ -3361,8 +3356,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains2[0], MemOpChains2.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
@@ -3476,8 +3470,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// Load the address of the function entry point from the function
// descriptor.
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
- SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
- InFlag.getNode() ? 3 : 2);
+ SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
Chain = LoadFuncPtr.getValue(1);
InFlag = LoadFuncPtr.getValue(2);
@@ -3513,8 +3507,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
MTCTROps[2] = InFlag;
}
- Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
- 2 + (InFlag.getNode() != 0));
+ Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys,
+ makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
InFlag = Chain.getValue(1);
NodeTys.clear();
@@ -3522,7 +3516,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
NodeTys.push_back(MVT::Glue);
Ops.push_back(Chain);
CallOpc = PPCISD::BCTRL;
- Callee.setNode(0);
+ Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
if (isSVR4ABI && isPPC64)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
@@ -3650,7 +3644,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
isa<ConstantSDNode>(Callee)) &&
"Expecting an global address, external symbol, absolute value or register");
- return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+ return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
}
// Add a NOP immediately after the branch instruction when using the 64-bit
@@ -3683,7 +3677,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
}
}
- Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
if (needsTOCRestore) {
@@ -3720,6 +3714,10 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
Ins, DAG);
+ if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
+ report_fatal_error("failed to perform tail call elimination on a call "
+ "site marked musttail");
+
if (PPCSubTarget.isSVR4ABI()) {
if (PPCSubTarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
@@ -3800,7 +3798,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
errs() << "Call operand #" << i << " has unhandled type "
<< EVT(ArgVT).getEVTString() << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
}
} else {
@@ -3921,8 +3919,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
@@ -3940,7 +3937,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
SDValue Ops[] = { Chain, InFlag };
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
- dl, VTs, Ops, InFlag.getNode() ? 2 : 1);
+ dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
InFlag = Chain.getValue(1);
}
@@ -4044,17 +4041,17 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR[] = {
+ static const MCPhysReg GPR[] = {
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
- static const uint16_t VSRH[] = {
+ static const MCPhysReg VSRH[] = {
PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
};
@@ -4333,8 +4330,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Check if this is an indirect call (MTCTR/BCTRL).
// See PrepareCall() for more information about calls through function
@@ -4448,17 +4444,17 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
- static const uint16_t GPR_32[] = { // 32-bit registers.
+ static const MCPhysReg GPR_32[] = { // 32-bit registers.
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
};
- static const uint16_t GPR_64[] = { // 64-bit registers.
+ static const MCPhysReg GPR_64[] = { // 64-bit registers.
PPC::X3, PPC::X4, PPC::X5, PPC::X6,
PPC::X7, PPC::X8, PPC::X9, PPC::X10,
};
- static const uint16_t *FPR = GetFPR();
+ static const MCPhysReg *FPR = GetFPR();
- static const uint16_t VR[] = {
+ static const MCPhysReg VR[] = {
PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
};
@@ -4466,7 +4462,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const unsigned NumFPRs = 13;
const unsigned NumVRs = array_lengthof(VR);
- const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32;
+ const MCPhysReg *GPR = isPPC64 ? GPR_64 : GPR_32;
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
@@ -4696,8 +4692,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// On Darwin, R12 must contain the address of an indirect callee. This does
// not mean the MTCTR instruction must use R12; it's easier to model this as
@@ -4785,8 +4780,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
}
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
@@ -4889,7 +4883,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Build a DYNALLOC node.
SDValue Ops[3] = { Chain, NegSize, FPSIdx };
SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
- return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+ return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
}
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
@@ -4925,7 +4919,7 @@ SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
@@ -5097,8 +5091,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4);
SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr };
Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
- MVT::i32, MMO);
+ DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
} else
Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
MPI, false, false, 0);
@@ -5225,7 +5218,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ?
PPCISD::LFIWZX : PPCISD::LFIWAX,
dl, DAG.getVTList(MVT::f64, MVT::Other),
- Ops, 2, MVT::i32, MMO);
+ Ops, MVT::i32, MMO);
} else {
assert(PPCSubTarget.isPPC64() &&
"i32->FP without LFIWAX supported only on PPC64");
@@ -5279,14 +5272,13 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
EVT VT = Op.getValueType();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- SDValue MFFSreg, InFlag;
// Save FP Control Word to register
EVT NodeTys[] = {
MVT::f64, // return register
MVT::Glue // unused in this context
};
- SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+ SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
// Save FP register to stack slot
int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
@@ -5345,7 +5337,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -5374,7 +5366,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
@@ -5403,7 +5395,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
Tmp4, Tmp6, ISD::SETLE);
SDValue OutOps[] = { OutLo, OutHi };
- return DAG.getMergeValues(OutOps, 2, dl);
+ return DAG.getMergeValues(OutOps, dl);
}
//===----------------------------------------------------------------------===//
@@ -5432,8 +5424,7 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
SDValue Elt = DAG.getConstant(Val, MVT::i32);
SmallVector<SDValue, 8> Ops;
Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
- &Ops[0], Ops.size());
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, Ops);
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
@@ -5492,7 +5483,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
- assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+ assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
// Check if this is a splat of a constant value.
APInt APSplatBits, APSplatUndef;
@@ -5540,10 +5531,14 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// we convert to a pseudo that will be expanded later into one of
// the above forms.
SDValue Elt = DAG.getConstant(SextVal, MVT::i32);
- EVT VT = Op.getValueType();
- int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4);
- SDValue EltSize = DAG.getConstant(Size, MVT::i32);
- return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ EVT VT = (SplatSize == 1 ? MVT::v16i8 :
+ (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
+ SDValue EltSize = DAG.getConstant(SplatSize, MVT::i32);
+ SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
+ if (VT == Op.getValueType())
+ return RetVal;
+ else
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
}
// If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
@@ -5838,7 +5833,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
}
SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
- &ResultMask[0], ResultMask.size());
+ ResultMask);
return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
}
@@ -5913,7 +5908,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
@@ -7232,8 +7227,8 @@ static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
return true;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const GlobalValue *GV1 = NULL;
- const GlobalValue *GV2 = NULL;
+ const GlobalValue *GV1 = nullptr;
+ const GlobalValue *GV2 = nullptr;
int64_t Offset1 = 0;
int64_t Offset2 = 0;
bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
@@ -7360,8 +7355,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
// that the high bits are equal.
APInt Op1Zero, Op1One;
APInt Op2Zero, Op2One;
- DAG.ComputeMaskedBits(N->getOperand(0), Op1Zero, Op1One);
- DAG.ComputeMaskedBits(N->getOperand(1), Op2Zero, Op2One);
+ DAG.computeKnownBits(N->getOperand(0), Op1Zero, Op1One);
+ DAG.computeKnownBits(N->getOperand(1), Op2Zero, Op2One);
// We don't really care about what is known about the first bit (if
// anything), so clear it in all masks prior to comparing them.
@@ -7579,8 +7574,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
DAG.ReplaceAllUsesOfValueWith(PromOp,
- DAG.getNode(PromOp.getOpcode(), dl, MVT::i1,
- Ops.data(), Ops.size()));
+ DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
}
// Now we're left with the initial truncation itself.
@@ -7816,8 +7810,7 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
}
DAG.ReplaceAllUsesOfValueWith(PromOp,
- DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0),
- Ops.data(), Ops.size()));
+ DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
}
// Now we're left with the initial extension itself.
@@ -7883,7 +7876,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (N->getOperand(1).getOpcode() == ISD::FSQRT) {
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
@@ -7893,7 +7886,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
@@ -7906,7 +7899,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
SDValue RV =
DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0),
DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
@@ -7918,7 +7911,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
N->getOperand(0), RV);
@@ -7933,10 +7926,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Compute this as 1/(1/sqrt(X)), which is the reciprocal of the
// reciprocal sqrt.
SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0) {
+ if (RV.getNode()) {
// Unfortunately, RV is now NaN if the input was exactly 0. Select out
// this case and force the answer to 0.
@@ -8014,7 +8007,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
- DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops),
+ DAG.getVTList(MVT::Other), Ops,
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
DCI.AddToWorklist(Val.getNode());
@@ -8041,8 +8034,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
};
return
DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
- Ops, array_lengthof(Ops),
- cast<StoreSDNode>(N)->getMemoryVT(),
+ Ops, cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
}
break;
@@ -8167,7 +8159,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
Ops.push_back(*O);
}
- DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+ DAG.UpdateNodeOperands(User, Ops);
}
return SDValue(N, 0);
@@ -8220,7 +8212,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
DAG.getVTList(N->getValueType(0) == MVT::i64 ?
MVT::i64 : MVT::i32, MVT::Other),
- Ops, 3, LD->getMemoryVT(), LD->getMemOperand());
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
// If this is an i16 load, insert the truncate.
SDValue ResVal = BSLoad;
@@ -8250,7 +8242,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
!N->getOperand(2).hasOneUse()) {
// Scan all of the users of the LHS, looking for VCMPo's that match.
- SDNode *VCMPoNode = 0;
+ SDNode *VCMPoNode = nullptr;
SDNode *LHSN = N->getOperand(0).getNode();
for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
@@ -8271,9 +8263,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Look at the (necessarily single) use of the flag value. If it has a
// chain, this transformation is more complex. Note that multiple things
// could use the value result, which we should ignore.
- SDNode *FlagUser = 0;
+ SDNode *FlagUser = nullptr;
for (SDNode::use_iterator UI = VCMPoNode->use_begin();
- FlagUser == 0; ++UI) {
+ FlagUser == nullptr; ++UI) {
assert(UI != VCMPoNode->use_end() && "Didn't find user!");
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
@@ -8378,7 +8370,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAG.getConstant(CompareOpc, MVT::i32)
};
EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
- SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+ SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops);
// Unpack the result based on how the target uses it.
PPC::Predicate CompOpc;
@@ -8414,11 +8406,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Inline Assembly Support
//===----------------------------------------------------------------------===//
-void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
@@ -8493,7 +8485,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
@@ -8599,7 +8591,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0,0);
+ SDValue Result;
// Only support length 1 constraints.
if (Constraint.length() > 1) return;
@@ -8766,6 +8758,30 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned PPCTargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ bool isPPC64 = PPCSubTarget.isPPC64();
+ bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+ if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) ||
+ (!isPPC64 && VT != MVT::i32))
+ report_fatal_error("Invalid register global variable type");
+
+ bool is64Bit = isPPC64 && VT == MVT::i64;
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
+ .Case("r2", isDarwinABI ? 0 : (is64Bit ? PPC::X2 : PPC::R2))
+ .Case("r13", (!isPPC64 && isDarwinABI) ? 0 :
+ (is64Bit ? PPC::X13 : PPC::R13))
+ .Default(0);
+
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
bool
PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// The PowerPC target isn't yet aware of offsets.
@@ -8795,6 +8811,42 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
}
}
+/// \brief Returns true if it is beneficial to convert a load of a constant
+/// to just the constant itself.
+bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const {
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0 || BitSize > 64)
+ return false;
+ return true;
+}
+
+bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
+ if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+ return false;
+ unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+ unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+ if (!VT1.isInteger() || !VT2.isInteger())
+ return false;
+ unsigned NumBits1 = VT1.getSizeInBits();
+ unsigned NumBits2 = VT2.getSizeInBits();
+ return NumBits1 == 64 && NumBits2 == 32;
+}
+
+bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
+bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<16>(Imm) || isUInt<16>(Imm);
+}
+
bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
unsigned,
bool *Fast) const {
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index da6d4dc..080ef5d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -351,20 +351,20 @@ namespace llvm {
/// getTargetNodeName() - This method returns the name of a target specific
/// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
- virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
- SDValue &Offset,
- ISD::MemIndexedMode &AM,
- SelectionDAG &DAG) const;
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
/// SelectAddressRegReg - Given the specified addressed, check to see if it
/// can be represented as an indexed [r+r] operation. Returns false if it
@@ -384,29 +384,31 @@ namespace llvm {
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
- Sched::Preference getSchedulingPreference(SDNode *N) const;
+ Sched::Preference getSchedulingPreference(SDNode *N) const override;
/// LowerOperation - Provide custom lowering hooks for some operations.
///
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
- virtual MachineBasicBlock *
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ MachineBasicBlock *MBB) const override;
MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
MachineBasicBlock *MBB, bool is64Bit,
unsigned BinOpcode) const;
@@ -420,34 +422,58 @@ namespace llvm {
MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr *MI,
MachineBasicBlock *MBB) const;
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType
+ getConstraintType(const std::string &Constraint) const override;
/// Examine constraint string and operand type and determine a weight value.
/// The operand object must already have been set up with the operand type.
ConstraintWeight getSingleConstraintMatchWeight(
- AsmOperandInfo &info, const char *constraint) const;
+ AsmOperandInfo &info, const char *constraint) const override;
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ MVT VT) const override;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
- unsigned getByValTypeAlignment(Type *Ty) const;
+ unsigned getByValTypeAlignment(Type *Ty) const override;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
/// vector. If it is invalid, don't add anything to Ops.
- virtual void LowerAsmOperandForConstraint(SDValue Op,
- std::string &Constraint,
- std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ void LowerAsmOperandForConstraint(SDValue Op,
+ std::string &Constraint,
+ std::vector<SDValue> &Ops,
+ SelectionDAG &DAG) const override;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
- virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+
+ /// isLegalAddImmediate - Return true if the specified immediate is legal
+ /// add immediate, that is the target has add instructions which can
+ /// add a register and the immediate without having to materialize
+ /// the immediate into a register.
+ bool isLegalAddImmediate(int64_t Imm) const override;
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+ /// isTruncateFree - Return true if it's free to truncate a value of
+ /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
+ /// register X1 to i32 by referencing its sub-register R1.
+ bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
+ bool isTruncateFree(EVT VT1, EVT VT2) const override;
+
+ /// \brief Returns true if it is beneficial to convert a load of a constant
+ /// to just the constant itself.
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
+
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
/// getOptimalMemOpType - Returns the target specific optimal type for load
/// and store operations as a result of memset, memcpy, and memmove
@@ -460,32 +486,32 @@ namespace llvm {
/// source is constant so it does not need to be loaded.
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
- virtual EVT
+ EVT
getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
- MachineFunction &MF) const;
+ MachineFunction &MF) const override;
/// Is unaligned memory access allowed for the given type, and is it fast
/// relative to software emulation.
- virtual bool allowsUnalignedMemoryAccesses(EVT VT,
- unsigned AddrSpace,
- bool *Fast = 0) const;
+ bool allowsUnalignedMemoryAccesses(EVT VT,
+ unsigned AddrSpace,
+ bool *Fast = nullptr) const override;
/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
/// expanded to FMAs when this method returns true, otherwise fmuladd is
/// expanded to fmul + fadd.
- virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
// Should we expand the build vector with shuffles?
- virtual bool
+ bool
shouldExpandBuildVectorWithShuffles(EVT VT,
- unsigned DefinedValues) const;
+ unsigned DefinedValues) const override;
/// createFastISel - This method returns a target-specific FastISel object,
/// or null if the target does not support "fast" instruction selection.
- virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo) const;
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const override;
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
@@ -559,29 +585,29 @@ namespace llvm {
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
+ SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual bool
+ bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- LLVMContext &Context) const;
+ LLVMContext &Context) const override;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const override;
SDValue
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 2fd4a3e..f3c2eab 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -223,7 +223,7 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
//===----------------------------------------------------------------------===//
// Instruction Definitions.
-def HasAltivec : Predicate<"PPCSubTarget.hasAltivec()">;
+def HasAltivec : Predicate<"PPCSubTarget->hasAltivec()">;
let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 939bbdc..fd72384 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -35,12 +35,14 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-instr-info"
+
#define GET_INSTRMAP_INFO
#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
-using namespace llvm;
-
static cl::
opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
cl::desc("Disable analysis for CTR loops"));
@@ -230,7 +232,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
// Cannot commute if it has a non-zero rotate count.
if (MI->getOperand(3).getImm() != 0)
- return 0;
+ return nullptr;
// If we have a zero rotate count, we have:
// M = mask(MB,ME)
@@ -539,7 +541,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
bool isPPC64 = TM.getSubtargetImpl()->isPPC64();
// One-way branch.
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) // Unconditional branch
BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)
@@ -1399,10 +1401,10 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
if (SrcReg2 != 0)
// MI is not a candidate for CMPrr.
- MI = NULL;
+ MI = nullptr;
// FIXME: Conservatively refuse to convert an instruction which isn't in the
// same BB as the comparison. This is to allow the check below to avoid calls
// (and other explicit clobbers); instead we should really check for these
@@ -1810,10 +1812,15 @@ protected:
}
public:
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX then go ahead and return without doing
+ // anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
+
LIS = &getAnalysis<LiveIntervals>();
- TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
bool Changed = false;
@@ -1830,7 +1837,7 @@ public:
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
@@ -1962,8 +1969,11 @@ protected:
}
public:
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX on the subtarget, don't do anything.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
TII = TM->getInstrInfo();
bool Changed = false;
@@ -1977,7 +1987,7 @@ public:
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -2036,8 +2046,11 @@ protected:
}
public:
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
+ // If we don't have VSX don't bother doing anything here.
+ if (!TM->getSubtargetImpl()->hasVSX())
+ return false;
TII = TM->getInstrInfo();
bool Changed = false;
@@ -2051,7 +2064,7 @@ public:
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
@@ -2193,7 +2206,7 @@ protected:
}
public:
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getInstrInfo();
@@ -2213,7 +2226,7 @@ public:
return Changed;
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 3c8117c..d9db3e1 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -86,151 +86,148 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+ const PPCRegisterInfo &getRegisterInfo() const { return RI; }
ScheduleHazardRecognizer *
CreateTargetHazardRecognizer(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAG *DAG) const override;
ScheduleHazardRecognizer *
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAG *DAG) const override;
- virtual
int getOperandLatency(const InstrItineraryData *ItinData,
const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const;
- virtual
+ const MachineInstr *UseMI,
+ unsigned UseIdx) const override;
int getOperandLatency(const InstrItineraryData *ItinData,
SDNode *DefNode, unsigned DefIdx,
- SDNode *UseNode, unsigned UseIdx) const {
+ SDNode *UseNode, unsigned UseIdx) const override {
return PPCGenInstrInfo::getOperandLatency(ItinData, DefNode, DefIdx,
UseNode, UseIdx);
}
bool isCoalescableExtInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
- unsigned &SubIdx) const;
+ unsigned &SubIdx) const override;
unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
// commuteInstruction - We can commute rlwimi instructions, but only if the
// rotate amt is zero. We also have to munge the immediates a bit.
- virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+ MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const override;
- virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
- unsigned &SrcOpIdx2) const;
+ bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+ unsigned &SrcOpIdx2) const override;
- virtual void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
+ void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const override;
// Branch analysis.
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
// Select analysis.
- virtual bool canInsertSelect(const MachineBasicBlock&,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned, unsigned, int&, int&, int&) const;
- virtual void insertSelect(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DstReg,
- const SmallVectorImpl<MachineOperand> &Cond,
- unsigned TrueReg, unsigned FalseReg) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
- unsigned Reg, MachineRegisterInfo *MRI) const;
+ bool canInsertSelect(const MachineBasicBlock&,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned, unsigned, int&, int&, int&) const override;
+ void insertSelect(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DstReg,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ unsigned TrueReg, unsigned FalseReg) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
+ bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+ unsigned Reg, MachineRegisterInfo *MRI) const override;
// If conversion by predication (only supported by some branch instructions).
// All of the profitability checks always return true; it is always
// profitable to use the predicated branches.
- virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles, unsigned ExtraPredCycles,
- const BranchProbability &Probability) const {
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles, unsigned ExtraPredCycles,
+ const BranchProbability &Probability) const override {
return true;
}
- virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
- unsigned NumT, unsigned ExtraT,
- MachineBasicBlock &FMBB,
- unsigned NumF, unsigned ExtraF,
- const BranchProbability &Probability) const;
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+ unsigned NumT, unsigned ExtraT,
+ MachineBasicBlock &FMBB,
+ unsigned NumF, unsigned ExtraF,
+ const BranchProbability &Probability) const override;
- virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
- unsigned NumCycles,
- const BranchProbability
- &Probability) const {
+ bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumCycles,
+ const BranchProbability
+ &Probability) const override {
return true;
}
- virtual bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const {
+ bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
+ MachineBasicBlock &FMBB) const override {
return false;
}
// Predication support.
- bool isPredicated(const MachineInstr *MI) const;
+ bool isPredicated(const MachineInstr *MI) const override;
- virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+ bool isUnpredicatedTerminator(const MachineInstr *MI) const override;
- virtual
bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
+ const SmallVectorImpl<MachineOperand> &Pred) const override;
- virtual
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
- virtual bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
+ bool DefinesPredicate(MachineInstr *MI,
+ std::vector<MachineOperand> &Pred) const override;
- virtual bool isPredicable(MachineInstr *MI) const;
+ bool isPredicable(MachineInstr *MI) const override;
// Comparison optimization.
- virtual bool analyzeCompare(const MachineInstr *MI,
- unsigned &SrcReg, unsigned &SrcReg2,
- int &Mask, int &Value) const;
+ bool analyzeCompare(const MachineInstr *MI,
+ unsigned &SrcReg, unsigned &SrcReg2,
+ int &Mask, int &Value) const override;
- virtual bool optimizeCompareInstr(MachineInstr *CmpInstr,
- unsigned SrcReg, unsigned SrcReg2,
- int Mask, int Value,
- const MachineRegisterInfo *MRI) const;
+ bool optimizeCompareInstr(MachineInstr *CmpInstr,
+ unsigned SrcReg, unsigned SrcReg2,
+ int Mask, int Value,
+ const MachineRegisterInfo *MRI) const override;
/// GetInstSize - Return the number of bytes of code the specified
/// instruction may be. This returns the maximum number of bytes.
///
- virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+ unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
};
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 1d984ab..e421f8e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -610,10 +610,10 @@ def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
//===----------------------------------------------------------------------===//
// PowerPC Instruction Predicate Definitions.
-def In32BitMode : Predicate<"!PPCSubTarget.isPPC64()">;
-def In64BitMode : Predicate<"PPCSubTarget.isPPC64()">;
-def IsBookE : Predicate<"PPCSubTarget.isBookE()">;
-def IsNotBookE : Predicate<"!PPCSubTarget.isBookE()">;
+def In32BitMode : Predicate<"!PPCSubTarget->isPPC64()">;
+def In64BitMode : Predicate<"PPCSubTarget->isPPC64()">;
+def IsBookE : Predicate<"PPCSubTarget->isBookE()">;
+def IsNotBookE : Predicate<"!PPCSubTarget->isBookE()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td
index 9cc919e..49bcc48 100644
--- a/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/lib/Target/PowerPC/PPCInstrVSX.td
@@ -39,7 +39,7 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, dag OOL, dag IOL,
}
}
-def HasVSX : Predicate<"PPCSubTarget.hasVSX()">;
+def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
let Predicates = [HasVSX] in {
let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
let neverHasSideEffects = 1 in { // VSX instructions don't have side effects.
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 227919c..7bbc71b 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "PPCJITInfo.h"
#include "PPCRelocations.h"
#include "PPCTargetMachine.h"
@@ -22,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
#define BUILD_ADDIS(RD,RS,IMM16) \
diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h
index 46d4a08..0693e3e 100644
--- a/lib/Target/PowerPC/PPCJITInfo.h
+++ b/lib/Target/PowerPC/PPCJITInfo.h
@@ -30,19 +30,19 @@ namespace llvm {
is64Bit = tmIs64Bit;
}
- virtual StubLayout getStubLayout();
- virtual void *emitFunctionStub(const Function* F, void *Fn,
- JITCodeEmitter &JCE);
- virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
- virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char* GOTBase);
-
+ StubLayout getStubLayout() override;
+ void *emitFunctionStub(const Function* F, void *Fn,
+ JITCodeEmitter &JCE) override;
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
+ void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char* GOTBase) override;
+
/// replaceMachineCodeForFunction - Make it so that calling the function
/// whose machine code is at OLD turns into a call to NEW, perhaps by
/// overwriting OLD with a branch to NEW. This is used for self-modifying
/// code.
///
- virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
};
}
diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp
index 029bb8a..f8e84a5 100644
--- a/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -96,7 +96,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
(MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ?
MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym = MachineModuleInfoImpl::
StubValueTy(AP.getSymbol(MO.getGlobal()),
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 4ff282e..e333b51 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reginfo"
#include "PPCRegisterInfo.h"
#include "PPC.h"
#include "PPCFrameLowering.h"
@@ -42,11 +41,13 @@
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
+using namespace llvm;
+
+#define DEBUG_TYPE "reginfo"
+
#define GET_REGINFO_TARGET_DESC
#include "PPCGenRegisterInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
cl::desc("Enable use of a base pointer for complex stack frames"));
@@ -96,7 +97,7 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &PPC::GPRCRegClass;
}
-const uint16_t*
+const MCPhysReg*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isDarwinABI())
return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index c3e54b4..13a35f6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -34,36 +34,37 @@ public:
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
- virtual const TargetRegisterClass *
- getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const;
+ const TargetRegisterClass *
+ getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
+ MachineFunction &MF) const override;
const TargetRegisterClass*
- getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const override;
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const uint32_t *getCallPreservedMask(CallingConv::ID CC) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction* MF =nullptr) const override;
+ const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override;
const uint32_t *getNoPreservedMask() const;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
/// We require the register scavenger.
- bool requiresRegisterScavenging(const MachineFunction &MF) const {
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override {
return true;
}
- bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+ bool requiresFrameIndexScavenging(const MachineFunction &MF) const override {
return true;
}
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const {
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+ bool requiresVirtualBaseRegisters(const MachineFunction &MF) const override {
return true;
}
@@ -82,28 +83,29 @@ public:
unsigned FrameIndex) const;
bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
- int &FrameIdx) const;
+ int &FrameIdx) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
// Support for virtual base registers.
- bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+ bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override;
void materializeFrameBaseRegister(MachineBasicBlock *MBB,
unsigned BaseReg, int FrameIdx,
- int64_t Offset) const;
+ int64_t Offset) const override;
void resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
- int64_t Offset) const;
- bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+ int64_t Offset) const override;
+ bool isFrameOffsetLegal(const MachineInstr *MI,
+ int64_t Offset) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
// Base pointer (stack realignment) support.
unsigned getBaseRegister(const MachineFunction &MF) const;
bool hasBasePointer(const MachineFunction &MF) const;
bool canRealignStack(const MachineFunction &MF) const;
- bool needsStackRealignment(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index e11f7d4..b3d145b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -188,6 +188,13 @@ def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74, 74]>;
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75, 75]>;
}
+// The full condition-code register. This is not modeled fully, but defined
+// here primarily, for compatibility with gcc, to allow the inline asm "cc"
+// clobber specification to work.
+def CC : PPCReg<"cc">, DwarfRegAlias<CR0> {
+ let Aliases = [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7];
+}
+
// Link register
def LR : SPR<8, "lr">, DwarfRegNum<[-2, 65]>;
//let Aliases = [LR] in
@@ -300,3 +307,8 @@ def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
let CopyCost = -1;
}
+
+def CCRC : RegisterClass<"PPC", [i32], 32, (add CC)> {
+ let isAllocatable = 0;
+}
+
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index d4258b4..f742f72 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "powerpc-selectiondag-info"
#include "PPCTargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+
PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index b07abe4..ea9daee 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -24,31 +24,21 @@
#include "llvm/Target/TargetMachine.h"
#include <cstdlib>
+using namespace llvm;
+
+#define DEBUG_TYPE "ppc-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "PPCGenSubtargetInfo.inc"
-using namespace llvm;
-
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit,
CodeGenOpt::Level OptLevel)
- : PPCGenSubtargetInfo(TT, CPU, FS)
- , IsPPC64(is64Bit)
- , TargetTriple(TT) {
+ : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT),
+ OptLevel(OptLevel) {
initializeEnvironment();
-
- std::string FullFS = FS;
-
- // At -O2 and above, track CR bits as individual registers.
- if (OptLevel >= CodeGenOpt::Default) {
- if (!FullFS.empty())
- FullFS = "+crbits," + FullFS;
- else
- FullFS = "+crbits";
- }
-
- resetSubtargetFeatures(CPU, FullFS);
+ resetSubtargetFeatures(CPU, FS);
}
/// SetJITMode - This is called to inform the subtarget info that we are
@@ -138,6 +128,14 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
FullFS = "+64bit";
}
+ // At -O2 and above, track CR bits as individual registers.
+ if (OptLevel >= CodeGenOpt::Default) {
+ if (!FullFS.empty())
+ FullFS = "+crbits," + FullFS;
+ else
+ FullFS = "+crbits";
+ }
+
// Parse features string.
ParseSubtargetFeatures(CPUName, FullFS);
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 87e012e..ee43fd5 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -99,6 +99,9 @@ protected:
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
+ /// OptLevel - What default optimization level we're emitting code for.
+ CodeGenOpt::Level OptLevel;
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -129,7 +132,7 @@ public:
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
/// \brief Reset the features for the PowerPC target.
- virtual void resetSubtargetFeatures(const MachineFunction *MF);
+ void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
void initializeEnvironment();
void resetSubtargetFeatures(StringRef CPU, StringRef FS);
@@ -200,15 +203,17 @@ public:
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const;
+ RegClassVector& CriticalPathRCs) const override;
+
+ bool enableEarlyIfConversion() const override { return hasISEL(); }
// Scheduling customization.
- bool enableMachineScheduler() const;
+ bool enableMachineScheduler() const override;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,
- unsigned NumRegionInstrs) const;
- bool useAA() const;
+ unsigned NumRegionInstrs) const override;
+ bool useAA() const override;
};
} // End llvm namespace
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index e7438f3..2323add 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -127,12 +127,12 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
- virtual bool addPreISel();
- virtual bool addILPOpts();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
+ bool addPreISel() override;
+ bool addILPOpts() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // namespace
@@ -148,12 +148,8 @@ bool PPCPassConfig::addPreISel() {
}
bool PPCPassConfig::addILPOpts() {
- if (getPPCSubtarget().hasISEL()) {
- addPass(&EarlyIfConverterID);
- return true;
- }
-
- return false;
+ addPass(&EarlyIfConverterID);
+ return true;
}
bool PPCPassConfig::addInstSelector() {
@@ -165,25 +161,19 @@ bool PPCPassConfig::addInstSelector() {
addPass(createPPCCTRLoopsVerify());
#endif
- if (getPPCSubtarget().hasVSX())
- addPass(createPPCVSXCopyPass());
-
+ addPass(createPPCVSXCopyPass());
return false;
}
bool PPCPassConfig::addPreRegAlloc() {
- if (getPPCSubtarget().hasVSX()) {
- initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
- insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
- &PPCVSXFMAMutateID);
- }
-
+ initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
+ insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
+ &PPCVSXFMAMutateID);
return false;
}
bool PPCPassConfig::addPreSched2() {
- if (getPPCSubtarget().hasVSX())
- addPass(createPPCVSXCopyCleanupPass());
+ addPass(createPPCVSXCopyCleanupPass());
if (getOptLevel() != CodeGenOpt::None)
addPass(&IfConverterID);
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 606ccb3..9e92494 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -43,34 +43,34 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64Bit);
- virtual const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const PPCFrameLowering *getFrameLowering() const {
+ const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const PPCFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual PPCJITInfo *getJITInfo() { return &JITInfo; }
- virtual const PPCTargetLowering *getTargetLowering() const {
+ PPCJITInfo *getJITInfo() override { return &JITInfo; }
+ const PPCTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ const PPCSelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
- virtual const PPCRegisterInfo *getRegisterInfo() const {
+ const PPCRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- virtual const DataLayout *getDataLayout() const { return &DL; }
- virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const InstrItineraryData *getInstrItineraryData() const {
+ const DataLayout *getDataLayout() const override { return &DL; }
+ const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const InstrItineraryData *getInstrItineraryData() const override {
return &InstrItins;
}
// Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM,
- JITCodeEmitter &JCE);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool addCodeEmitter(PassManagerBase &PM,
+ JITCodeEmitter &JCE) override;
/// \brief Register PPC analysis passes with a pass manager.
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ void addAnalysisPasses(PassManagerBase &PM) override;
};
/// PPC32TargetMachine - PowerPC 32-bit target machine.
diff --git a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2f4d5c1..007901b 100644
--- a/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -14,17 +14,22 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ppctti"
#include "PPC.h"
#include "PPCTargetMachine.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "ppctti"
+
+static cl::opt<bool> DisablePPCConstHoist("disable-ppc-constant-hoisting",
+cl::desc("disable constant hoisting on PPC"), cl::init(false), cl::Hidden);
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializePPCTTIPass(PassRegistry &);
@@ -33,21 +38,16 @@ void initializePPCTTIPass(PassRegistry &);
namespace {
class PPCTTI final : public ImmutablePass, public TargetTransformInfo {
- const PPCTargetMachine *TM;
const PPCSubtarget *ST;
const PPCTargetLowering *TLI;
- /// Estimate the overhead of scalarizing an instruction. Insert and Extract
- /// are set if the result needs to be inserted and/or extracted from vectors.
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
-
public:
- PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ PPCTTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
PPCTTI(const PPCTargetMachine *TM)
- : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()),
+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
TLI(TM->getTargetLowering()) {
initializePPCTTIPass(*PassRegistry::getPassRegistry());
}
@@ -72,6 +72,13 @@ public:
/// \name Scalar TTI Implementations
/// @{
+ unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;
+
+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty) const override;
+
virtual PopcntSupportKind
getPopcntSupport(unsigned TyWidth) const override;
virtual void getUnrollingPreferences(
@@ -128,6 +135,142 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software;
}
+unsigned PPCTTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ if (Imm == 0)
+ return TCC_Free;
+
+ if (Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Basic;
+
+ if (isInt<32>(Imm.getSExtValue())) {
+ // A constant that can be materialized using lis.
+ if ((Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Basic;
+
+ return 2 * TCC_Basic;
+ }
+ }
+
+ return 4 * TCC_Basic;
+}
+
+unsigned PPCTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(IID, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ switch (IID) {
+ default: return TCC_Free;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ case Intrinsic::usub_with_overflow:
+ if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+ break;
+ }
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
+unsigned PPCTTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
+ Type *Ty) const {
+ if (DisablePPCConstHoist)
+ return TargetTransformInfo::getIntImmCost(Opcode, Idx, Imm, Ty);
+
+ assert(Ty->isIntegerTy());
+
+ unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ if (BitSize == 0)
+ return ~0U;
+
+ unsigned ImmIdx = ~0U;
+ bool ShiftedFree = false, RunFree = false, UnsignedFree = false,
+ ZeroFree = false;
+ switch (Opcode) {
+ default: return TCC_Free;
+ case Instruction::GetElementPtr:
+ // Always hoist the base address of a GetElementPtr. This prevents the
+ // creation of new constants for every base constant that gets constant
+ // folded with the offset.
+ if (Idx == 0)
+ return 2 * TCC_Basic;
+ return TCC_Free;
+ case Instruction::And:
+ RunFree = true; // (for the rotate-and-mask instructions)
+ // Fallthrough...
+ case Instruction::Add:
+ case Instruction::Or:
+ case Instruction::Xor:
+ ShiftedFree = true;
+ // Fallthrough...
+ case Instruction::Sub:
+ case Instruction::Mul:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ ImmIdx = 1;
+ break;
+ case Instruction::ICmp:
+ UnsignedFree = true;
+ ImmIdx = 1;
+ // Fallthrough... (zero comparisons can use record-form instructions)
+ case Instruction::Select:
+ ZeroFree = true;
+ break;
+ case Instruction::PHI:
+ case Instruction::Call:
+ case Instruction::Ret:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ }
+
+ if (ZeroFree && Imm == 0)
+ return TCC_Free;
+
+ if (Idx == ImmIdx && Imm.getBitWidth() <= 64) {
+ if (isInt<16>(Imm.getSExtValue()))
+ return TCC_Free;
+
+ if (RunFree) {
+ if (Imm.getBitWidth() <= 32 &&
+ (isShiftedMask_32(Imm.getZExtValue()) ||
+ isShiftedMask_32(~Imm.getZExtValue())))
+ return TCC_Free;
+
+
+ if (ST->isPPC64() &&
+ (isShiftedMask_64(Imm.getZExtValue()) ||
+ isShiftedMask_64(~Imm.getZExtValue())))
+ return TCC_Free;
+ }
+
+ if (UnsignedFree && isUInt<16>(Imm.getZExtValue()))
+ return TCC_Free;
+
+ if (ShiftedFree && (Imm.getZExtValue() & 0xFFFF) == 0)
+ return TCC_Free;
+ }
+
+ return PPCTTI::getIntImmCost(Imm, Ty);
+}
+
void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
if (ST->getDarwinDirective() == PPC::DIR_A2) {
// The A2 is in-order with a deep pipeline, and concatenation unrolling
@@ -220,7 +363,9 @@ unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val,
// experimentally as a minimum needed to prevent unprofitable
// vectorization for the paq8p benchmark. It may need to be
// raised further if other unprofitable cases remain.
- unsigned LHSPenalty = 12;
+ unsigned LHSPenalty = 2;
+ if (ISD == ISD::INSERT_VECTOR_ELT)
+ LHSPenalty += 7;
// Vector element insert/extract with Altivec is very expensive,
// because they require store and reload with the attendant
@@ -244,14 +389,32 @@ unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
unsigned Cost =
TargetTransformInfo::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
- // FIXME: Update this for VSX loads/stores that support unaligned access.
+ // VSX loads/stores support unaligned access.
+ if (ST->hasVSX()) {
+ if (LT.second == MVT::v2f64 || LT.second == MVT::v2i64)
+ return Cost;
+ }
+
+ bool UnalignedAltivec =
+ Src->isVectorTy() &&
+ Src->getPrimitiveSizeInBits() >= LT.second.getSizeInBits() &&
+ LT.second.getSizeInBits() == 128 &&
+ Opcode == Instruction::Load;
// PPC in general does not support unaligned loads and stores. They'll need
// to be decomposed based on the alignment factor.
unsigned SrcBytes = LT.second.getStoreSize();
- if (SrcBytes && Alignment && Alignment < SrcBytes)
+ if (SrcBytes && Alignment && Alignment < SrcBytes && !UnalignedAltivec) {
Cost += LT.first*(SrcBytes/Alignment-1);
+ // For a vector type, there is also scalarization overhead (only for
+ // stores, loads are expanded using the vector-load + permutation sequence,
+ // which is much less expensive).
+ if (Src->isVectorTy() && Opcode == Instruction::Store)
+ for (int i = 0, e = Src->getVectorNumElements(); i < e; ++i)
+ Cost += getVectorInstrCost(Instruction::ExtractElement, Src, i);
+ }
+
return Cost;
}
diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
index 3e1848b..949fdfb 100644
--- a/lib/Target/R600/AMDGPU.h
+++ b/lib/Target/R600/AMDGPU.h
@@ -37,11 +37,15 @@ FunctionPass *createAMDGPUCFGStructurizerPass();
// SI Passes
FunctionPass *createSITypeRewriter();
FunctionPass *createSIAnnotateControlFlowPass();
+FunctionPass *createSILowerI1CopiesPass();
FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm);
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+void initializeSILowerI1CopiesPass(PassRegistry &);
+extern char &SILowerI1CopiesID;
+
// Passes common to R600 and SI
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
@@ -76,8 +80,8 @@ enum AddressSpaces {
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
CONSTANT_ADDRESS = 2, ///< Address space for constant memory
LOCAL_ADDRESS = 3, ///< Address space for local memory.
- REGION_ADDRESS = 4, ///< Address space for region memory.
- ADDRESS_NONE = 5, ///< Address space for unknown memory.
+ FLAT_ADDRESS = 4, ///< Address space for flat memory.
+ REGION_ADDRESS = 5, ///< Address space for region memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
@@ -102,7 +106,8 @@ enum AddressSpaces {
CONSTANT_BUFFER_13 = 21,
CONSTANT_BUFFER_14 = 22,
CONSTANT_BUFFER_15 = 23,
- LAST_ADDRESS = 24
+ ADDRESS_NONE = 24, ///< Address space for unknown memory.
+ LAST_ADDRESS = ADDRESS_NONE
};
} // namespace AMDGPUAS
diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
index d1e2cf5..2edc115 100644
--- a/lib/Target/R600/AMDGPU.td
+++ b/lib/Target/R600/AMDGPU.td
@@ -120,6 +120,17 @@ def AMDGPU : Target {
let InstructionSet = AMDGPUInstrInfo;
}
+//===----------------------------------------------------------------------===//
+// Predicate helper class
+//===----------------------------------------------------------------------===//
+
+class PredicateControl {
+ Predicate SubtargetPredicate;
+ list<Predicate> OtherPredicates = [];
+ list<Predicate> Predicates = !listconcat([SubtargetPredicate],
+ OtherPredicates);
+}
+
// Include AMDGPU TD files
include "R600Schedule.td"
include "SISchedule.td"
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
index b166c45..170f479 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
@@ -64,7 +64,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
- findNumUsedRegistersSI(MF, KernelInfo.NumSGPR, KernelInfo.NumVGPR);
+ getSIProgramInfo(KernelInfo, MF);
EmitProgramInfoSI(MF, KernelInfo);
} else {
EmitProgramInfoR600(MF);
@@ -84,8 +84,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SectionKind::getReadOnly());
OutStreamer.SwitchSection(CommentSection);
- if (STM.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ if (STM.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
OutStreamer.emitRawComment(" Kernel info:", false);
+ OutStreamer.emitRawComment(" codeLenInByte = " + Twine(KernelInfo.CodeLen),
+ false);
OutStreamer.emitRawComment(" NumSgprs: " + Twine(KernelInfo.NumSGPR),
false);
OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
@@ -184,9 +186,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(MachineFunction &MF) {
}
}
-void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
- unsigned &NumSGPR,
- unsigned &NumVGPR) const {
+void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
+ MachineFunction &MF) const {
+ uint64_t CodeSize = 0;
unsigned MaxSGPR = 0;
unsigned MaxVGPR = 0;
bool VCCUsed = false;
@@ -200,6 +202,9 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
I != E; ++I) {
MachineInstr &MI = *I;
+ // TODO: CodeSize should account for multiple functions.
+ CodeSize += MI.getDesc().Size;
+
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
MachineOperand &MO = MI.getOperand(op_idx);
@@ -274,13 +279,9 @@ void AMDGPUAsmPrinter::findNumUsedRegistersSI(MachineFunction &MF,
if (VCCUsed)
MaxSGPR += 2;
- NumSGPR = MaxSGPR;
- NumVGPR = MaxVGPR;
-}
-
-void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &Out,
- MachineFunction &MF) const {
- findNumUsedRegistersSI(MF, Out.NumSGPR, Out.NumVGPR);
+ ProgInfo.CodeLen = CodeSize;
+ ProgInfo.NumSGPR = MaxSGPR;
+ ProgInfo.NumVGPR = MaxVGPR;
}
void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF,
diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h
index a2b8337..71adc9a 100644
--- a/lib/Target/R600/AMDGPUAsmPrinter.h
+++ b/lib/Target/R600/AMDGPUAsmPrinter.h
@@ -24,7 +24,12 @@ namespace llvm {
class AMDGPUAsmPrinter : public AsmPrinter {
private:
struct SIProgramInfo {
- SIProgramInfo() : NumSGPR(0), NumVGPR(0) {}
+ SIProgramInfo() :
+ CodeLen(0),
+ NumSGPR(0),
+ NumVGPR(0) {}
+
+ uint64_t CodeLen;
unsigned NumSGPR;
unsigned NumVGPR;
};
@@ -42,14 +47,14 @@ private:
public:
explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "AMDGPU Assembly Printer";
}
/// Implemented in AMDGPUMCInstLower.cpp
- virtual void EmitInstruction(const MachineInstr *MI);
+ void EmitInstruction(const MachineInstr *MI) override;
protected:
bool DisasmEnabled;
diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
index 65cdb24..5f8ad8c 100644
--- a/lib/Target/R600/AMDGPUCallingConv.td
+++ b/lib/Target/R600/AMDGPUCallingConv.td
@@ -20,7 +20,7 @@ def CC_SI : CallingConv<[
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
]>>>,
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp
index 50297d1..91aeee2 100644
--- a/lib/Target/R600/AMDGPUConvertToISA.cpp
+++ b/lib/Target/R600/AMDGPUConvertToISA.cpp
@@ -31,9 +31,9 @@ public:
AMDGPUConvertToISAPass(TargetMachine &tm) :
MachineFunctionPass(ID), TM(tm) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {return "AMDGPU Convert to ISA";}
+ const char *getPassName() const override {return "AMDGPU Convert to ISA";}
};
diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp
index 0325a00..e7e90d3 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.cpp
+++ b/lib/Target/R600/AMDGPUFrameLowering.cpp
@@ -97,7 +97,7 @@ int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
const TargetFrameLowering::SpillSlot *
AMDGPUFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
NumEntries = 0;
- return 0;
+ return nullptr;
}
void
AMDGPUFrameLowering::emitPrologue(MachineFunction &MF) const {
diff --git a/lib/Target/R600/AMDGPUFrameLowering.h b/lib/Target/R600/AMDGPUFrameLowering.h
index cf5742e..d18ede5 100644
--- a/lib/Target/R600/AMDGPUFrameLowering.h
+++ b/lib/Target/R600/AMDGPUFrameLowering.h
@@ -33,12 +33,13 @@ public:
/// \returns The number of 32-bit sub-registers that are used when storing
/// values to the stack.
- virtual unsigned getStackWidth(const MachineFunction &MF) const;
- virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
- virtual const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const;
- virtual void emitPrologue(MachineFunction &MF) const;
- virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- virtual bool hasFP(const MachineFunction &MF) const;
+ unsigned getStackWidth(const MachineFunction &MF) const;
+ int getFrameIndexOffset(const MachineFunction &MF, int FI) const override;
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ bool hasFP(const MachineFunction &MF) const override;
};
} // namespace llvm
#endif // AMDILFRAME_LOWERING_H
diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
index e8c5f5b..f1f0bfa 100644
--- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
+++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
@@ -16,15 +16,11 @@
#include "AMDGPURegisterInfo.h"
#include "R600InstrInfo.h"
#include "SIISelLowering.h"
-#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/ValueMap.h"
-#include "llvm/Support/Compiler.h"
-#include <list>
-#include <queue>
+#include "llvm/IR/Function.h"
using namespace llvm;
@@ -43,11 +39,12 @@ public:
AMDGPUDAGToDAGISel(TargetMachine &TM);
virtual ~AMDGPUDAGToDAGISel();
- SDNode *Select(SDNode *N);
- virtual const char *getPassName() const;
- virtual void PostprocessISelDAG();
+ SDNode *Select(SDNode *N) override;
+ const char *getPassName() const override;
+ void PostprocessISelDAG() override;
private:
+ bool isInlineImmediate(SDNode *N) const;
inline SDValue getSmallIPtrImm(unsigned Imm);
bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
const R600InstrInfo *TII);
@@ -58,11 +55,9 @@ private:
bool SelectADDRParam(SDValue Addr, SDValue& R1, SDValue& R2);
bool SelectADDR(SDValue N, SDValue &R1, SDValue &R2);
bool SelectADDR64(SDValue N, SDValue &R1, SDValue &R2);
- SDValue SimplifyI24(SDValue &Op);
- bool SelectI24(SDValue Addr, SDValue &Op);
- bool SelectU24(SDValue Addr, SDValue &Op);
static bool checkType(const Value *ptr, unsigned int addrspace);
+ static bool checkPrivateAddress(const MachineMemOperand *Op);
static bool isGlobalStore(const StoreSDNode *N);
static bool isPrivateStore(const StoreSDNode *N);
@@ -77,10 +72,15 @@ private:
bool isLocalLoad(const LoadSDNode *N) const;
bool isRegionLoad(const LoadSDNode *N) const;
+ /// \returns True if the current basic block being selected is at control
+ /// flow depth 0. Meaning that the current block dominates the
+ // exit block.
+ bool isCFDepth0() const;
+
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue &BaseReg, SDValue& Offset);
+ bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+ SDValue& Offset);
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -91,8 +91,7 @@ private:
/// \brief This pass converts a legalized DAG into a AMDGPU-specific
// DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM
- ) {
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
return new AMDGPUDAGToDAGISel(TM);
}
@@ -103,32 +102,39 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
}
+bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
+ const SITargetLowering *TL
+ = static_cast<const SITargetLowering *>(getTargetLowering());
+ return TL->analyzeImmediate(N) == 0;
+}
+
/// \brief Determine the register class for \p OpNo
/// \returns The register class of the virtual register that will be used for
/// the given operand number \OpNo or NULL if the register class cannot be
/// determined.
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
unsigned OpNo) const {
- if (!N->isMachineOpcode()) {
- return NULL;
- }
+ if (!N->isMachineOpcode())
+ return nullptr;
+
switch (N->getMachineOpcode()) {
default: {
const MCInstrDesc &Desc = TM.getInstrInfo()->get(N->getMachineOpcode());
unsigned OpIdx = Desc.getNumDefs() + OpNo;
if (OpIdx >= Desc.getNumOperands())
- return NULL;
+ return nullptr;
int RegClass = Desc.OpInfo[OpIdx].RegClass;
- if (RegClass == -1) {
- return NULL;
- }
+ if (RegClass == -1)
+ return nullptr;
+
return TM.getRegisterInfo()->getRegClass(RegClass);
}
case AMDGPU::REG_SEQUENCE: {
- const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(
- cast<ConstantSDNode>(N->getOperand(0))->getZExtValue());
- unsigned SubRegIdx =
- dyn_cast<ConstantSDNode>(N->getOperand(OpNo + 1))->getZExtValue();
+ unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ const TargetRegisterClass *SuperRC = TM.getRegisterInfo()->getRegClass(RCID);
+
+ SDValue SubRegOp = N->getOperand(OpNo + 1);
+ unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
return TM.getRegisterInfo()->getSubClassWithSubReg(SuperRC, SubRegIdx);
}
}
@@ -139,7 +145,7 @@ SDValue AMDGPUDAGToDAGISel::getSmallIPtrImm(unsigned int Imm) {
}
bool AMDGPUDAGToDAGISel::SelectADDRParam(
- SDValue Addr, SDValue& R1, SDValue& R2) {
+ SDValue Addr, SDValue& R1, SDValue& R2) {
if (Addr.getOpcode() == ISD::FrameIndex) {
if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
@@ -196,15 +202,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
+
+ const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
switch (Opc) {
default: break;
// We are selecting i64 ADD here instead of custom lower it during
// DAG legalization, so we can fold some i64 ADDs used for address
// calculation into the LOAD and STORE instructions.
case ISD::ADD: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (N->getValueType(0) != MVT::i64 ||
ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
@@ -232,12 +239,13 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
AddLoArgs.push_back(SDValue(Lo0, 0));
AddLoArgs.push_back(SDValue(Lo1, 0));
- SDNode *AddLo = CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL,
- VTList, AddLoArgs);
+ SDNode *AddLo = CurDAG->getMachineNode(
+ isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32,
+ DL, VTList, AddLoArgs);
SDValue Carry = SDValue(AddLo, 1);
- SDNode *AddHi = CurDAG->getMachineNode(AMDGPU::S_ADDC_U32, DL,
- MVT::i32, SDValue(Hi0, 0),
- SDValue(Hi1, 0), Carry);
+ SDNode *AddHi = CurDAG->getMachineNode(
+ isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32,
+ DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
SDValue Args[5] = {
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
@@ -246,11 +254,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SDValue(AddHi,0),
Sub1,
};
- return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args, 5);
+ return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
}
case ISD::BUILD_VECTOR: {
unsigned RegClassID;
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
const AMDGPURegisterInfo *TRI =
static_cast<const AMDGPURegisterInfo*>(TM.getRegisterInfo());
const SIRegisterInfo *SIRI =
@@ -316,7 +323,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
// 16 = Max Num Vector Elements
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
// 1 = Vector Register Class
- SDValue RegSeqArgs[16 * 2 + 1];
+ SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1);
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32);
bool IsRegSeq = true;
@@ -333,11 +340,10 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
if (!IsRegSeq)
break;
return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(),
- RegSeqArgs, 2 * N->getNumOperands() + 1);
+ RegSeqArgs);
}
case ISD::BUILD_PAIR: {
SDValue RC, SubReg0, SubReg1;
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
break;
}
@@ -346,7 +352,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, MVT::i32);
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, MVT::i32);
} else if (N->getValueType(0) == MVT::i64) {
- RC = CurDAG->getTargetConstant(AMDGPU::VSrc_64RegClassID, MVT::i32);
+ RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32);
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32);
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32);
} else {
@@ -357,8 +363,37 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
SDLoc(N), N->getValueType(0), Ops);
}
- case AMDGPUISD::REGISTER_LOAD: {
+
+ case ISD::Constant:
+ case ISD::ConstantFP: {
const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
+ if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS ||
+ N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+ break;
+
+ uint64_t Imm;
+ if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
+ Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
+ else {
+ ConstantSDNode *C = cast<ConstantSDNode>(N);
+ Imm = C->getZExtValue();
+ }
+
+ SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(Imm & 0xFFFFFFFF, MVT::i32));
+ SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SDLoc(N), MVT::i32,
+ CurDAG->getConstant(Imm >> 32, MVT::i32));
+ const SDValue Ops[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32)
+ };
+
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SDLoc(N),
+ N->getValueType(0), Ops);
+ }
+
+ case AMDGPUISD::REGISTER_LOAD: {
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
@@ -375,7 +410,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
Ops);
}
case AMDGPUISD::REGISTER_STORE: {
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
break;
SDValue Addr, Offset;
@@ -391,42 +425,95 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
CurDAG->getVTList(MVT::Other),
Ops);
}
+
+ case AMDGPUISD::BFE_I32:
+ case AMDGPUISD::BFE_U32: {
+ if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+ break;
+
+ // There is a scalar version available, but unlike the vector version which
+ // has a separate operand for the offset and width, the scalar version packs
+ // the width and offset into a single operand. Try to move to the scalar
+ // version if the offsets are constant, so that we can try to keep extended
+ // loads of kernel arguments in SGPRs.
+
+ // TODO: Technically we could try to pattern match scalar bitshifts of
+ // dynamic values, but it's probably not useful.
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Offset)
+ break;
+
+ ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
+ if (!Width)
+ break;
+
+ bool Signed = Opc == AMDGPUISD::BFE_I32;
+
+ // Transformation function, pack the offset and width of a BFE into
+ // the format expected by the S_BFE_I32 / S_BFE_U32. In the second
+ // source, bits [5:0] contain the offset and bits [22:16] the width.
+
+ uint32_t OffsetVal = Offset->getZExtValue();
+ uint32_t WidthVal = Width->getZExtValue();
+
+ uint32_t PackedVal = OffsetVal | WidthVal << 16;
+
+ SDValue PackedOffsetWidth = CurDAG->getTargetConstant(PackedVal, MVT::i32);
+ return CurDAG->getMachineNode(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
+ SDLoc(N),
+ MVT::i32,
+ N->getOperand(0),
+ PackedOffsetWidth);
+
+ }
}
return SelectCode(N);
}
-bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
- if (!ptr) {
+bool AMDGPUDAGToDAGISel::checkType(const Value *Ptr, unsigned AS) {
+ assert(AS != 0 && "Use checkPrivateAddress instead.");
+ if (!Ptr)
return false;
- }
- Type *ptrType = ptr->getType();
- return dyn_cast<PointerType>(ptrType)->getAddressSpace() == addrspace;
+
+ return Ptr->getType()->getPointerAddressSpace() == AS;
+}
+
+bool AMDGPUDAGToDAGISel::checkPrivateAddress(const MachineMemOperand *Op) {
+ if (Op->getPseudoValue())
+ return true;
+
+ if (PointerType *PT = dyn_cast<PointerType>(Op->getValue()->getType()))
+ return PT->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS;
+
+ return false;
}
bool AMDGPUDAGToDAGISel::isGlobalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isPrivateStore(const StoreSDNode *N) {
- return (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS));
+ const Value *MemVal = N->getMemOperand()->getValue();
+ return (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::REGION_ADDRESS));
}
bool AMDGPUDAGToDAGISel::isLocalStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
- return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
- if (CbId == -1) {
- return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS);
- }
- return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
+ const Value *MemVal = N->getMemOperand()->getValue();
+ if (CbId == -1)
+ return checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS);
+
+ return checkType(MemVal, AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
@@ -437,27 +524,26 @@ bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {
return true;
}
}
- return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::GLOBAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isParamLoad(const LoadSDNode *N) const {
- return checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::PARAM_I_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isLocalLoad(const LoadSDNode *N) const {
- return checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::LOCAL_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isRegionLoad(const LoadSDNode *N) const {
- return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
+ return checkType(N->getMemOperand()->getValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
MachineMemOperand *MMO = N->getMemOperand();
- if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (checkPrivateAddress(N->getMemOperand())) {
if (MMO) {
- const Value *V = MMO->getValue();
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V);
+ const PseudoSourceValue *PSV = MMO->getPseudoValue();
if (PSV && PSV == PseudoSourceValue::getConstantPool()) {
return true;
}
@@ -467,24 +553,34 @@ bool AMDGPUDAGToDAGISel::isCPLoad(const LoadSDNode *N) const {
}
bool AMDGPUDAGToDAGISel::isPrivateLoad(const LoadSDNode *N) const {
- if (checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS)) {
+ if (checkPrivateAddress(N->getMemOperand())) {
// Check to make sure we are not a constant pool load or a constant load
// that is marked as a private load
if (isCPLoad(N) || isConstantLoad(N, -1)) {
return false;
}
}
- if (!checkType(N->getSrcValue(), AMDGPUAS::LOCAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_D_ADDRESS)
- && !checkType(N->getSrcValue(), AMDGPUAS::PARAM_I_ADDRESS)) {
+
+ const Value *MemVal = N->getMemOperand()->getValue();
+ if (!checkType(MemVal, AMDGPUAS::LOCAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::GLOBAL_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::REGION_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::CONSTANT_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::PARAM_D_ADDRESS) &&
+ !checkType(MemVal, AMDGPUAS::PARAM_I_ADDRESS)){
return true;
}
return false;
}
+bool AMDGPUDAGToDAGISel::isCFDepth0() const {
+ // FIXME: Figure out a way to use DominatorTree analysis here.
+ const BasicBlock *CurBlock = FuncInfo->MBB->getBasicBlock();
+ const Function *Fn = FuncInfo->Fn;
+ return &Fn->front() == CurBlock || &Fn->back() == CurBlock;
+}
+
+
const char *AMDGPUDAGToDAGISel::getPassName() const {
return "AMDGPU DAG->DAG Pattern Instruction Selection";
}
@@ -499,7 +595,7 @@ const char *AMDGPUDAGToDAGISel::getPassName() const {
//===----------------------------------------------------------------------===//
bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
+ SDValue& IntPtr) {
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, true);
return true;
@@ -509,7 +605,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
SDValue& BaseReg, SDValue &Offset) {
- if (!dyn_cast<ConstantSDNode>(Addr)) {
+ if (!isa<ConstantSDNode>(Addr)) {
BaseReg = Addr;
Offset = CurDAG->getIntPtrConstant(0, true);
return true;
@@ -519,7 +615,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
- ConstantSDNode * IMMOffset;
+ ConstantSDNode *IMMOffset;
if (Addr.getOpcode() == ISD::ADD
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
@@ -563,52 +659,9 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
return true;
}
-SDValue AMDGPUDAGToDAGISel::SimplifyI24(SDValue &Op) {
- APInt Demanded = APInt(32, 0x00FFFFFF);
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true);
- const TargetLowering *TLI = getTargetLowering();
- if (TLI->SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) {
- CurDAG->ReplaceAllUsesWith(Op, TLO.New);
- CurDAG->RepositionNode(Op.getNode(), TLO.New.getNode());
- return SimplifyI24(TLO.New);
- } else {
- return Op;
- }
-}
-
-bool AMDGPUDAGToDAGISel::SelectI24(SDValue Op, SDValue &I24) {
-
- assert(Op.getValueType() == MVT::i32);
-
- if (CurDAG->ComputeNumSignBits(Op) == 9) {
- I24 = SimplifyI24(Op);
- return true;
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
- APInt KnownZero;
- APInt KnownOne;
- CurDAG->ComputeMaskedBits(Op, KnownZero, KnownOne);
-
- assert (Op.getValueType() == MVT::i32);
-
- // ANY_EXTEND and EXTLOAD operations can only be done on types smaller than
- // i32. These smaller types are legal to use with the i24 instructions.
- if ((KnownZero & APInt(KnownZero.getBitWidth(), 0xFF000000)) == 0xFF000000 ||
- Op.getOpcode() == ISD::ANY_EXTEND ||
- ISD::isEXTLoad(Op.getNode())) {
- U24 = SimplifyI24(Op);
- return true;
- }
- return false;
-}
-
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
const AMDGPUTargetLowering& Lowering =
- (*(const AMDGPUTargetLowering*)getTargetLowering());
+ *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
bool IsModified = false;
do {
IsModified = false;
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 183725c..6c443ea 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -28,8 +28,50 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
using namespace llvm;
+
+namespace {
+
+/// Diagnostic information for unimplemented or unsupported feature reporting.
+class DiagnosticInfoUnsupported : public DiagnosticInfo {
+private:
+ const Twine &Description;
+ const Function &Fn;
+
+ static int KindID;
+
+ static int getKindID() {
+ if (KindID == 0)
+ KindID = llvm::getNextAvailablePluginDiagnosticKind();
+ return KindID;
+ }
+
+public:
+ DiagnosticInfoUnsupported(const Function &Fn, const Twine &Desc,
+ DiagnosticSeverity Severity = DS_Error)
+ : DiagnosticInfo(getKindID(), Severity),
+ Description(Desc),
+ Fn(Fn) { }
+
+ const Function &getFunction() const { return Fn; }
+ const Twine &getDescription() const { return Description; }
+
+ void print(DiagnosticPrinter &DP) const override {
+ DP << "unsupported " << getDescription() << " in " << Fn.getName();
+ }
+
+ static bool classof(const DiagnosticInfo *DI) {
+ return DI->getKind() == getKindID();
+ }
+};
+
+int DiagnosticInfoUnsupported::KindID = 0;
+}
+
+
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
@@ -88,6 +130,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ setOperationAction(ISD::STORE, MVT::v2f64, Promote);
+ AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v2i64);
+
// Custom lowering of vector stores is required for local address space
// stores.
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
@@ -103,6 +148,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
// handle 64-bit stores.
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
setTruncStoreAction(MVT::v4i64, MVT::v4i1, Expand);
@@ -126,6 +173,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
+ setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v2i64);
+
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i32, Custom);
@@ -152,15 +202,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+
setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand);
+ setOperationAction(ISD::SUB, MVT::i64, Expand);
setOperationAction(ISD::UDIV, MVT::i32, Expand);
setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
@@ -168,10 +222,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
static const MVT::SimpleValueType IntTypes[] = {
MVT::v2i32, MVT::v4i32
};
- const size_t NumIntTypes = array_lengthof(IntTypes);
- for (unsigned int x = 0; x < NumIntTypes; ++x) {
- MVT::SimpleValueType VT = IntTypes[x];
+ for (MVT VT : IntTypes) {
//Expand the following operations for the current type by default
setOperationAction(ISD::ADD, VT, Expand);
setOperationAction(ISD::AND, VT, Expand);
@@ -195,12 +247,11 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
static const MVT::SimpleValueType FloatTypes[] = {
MVT::v2f32, MVT::v4f32
};
- const size_t NumFloatTypes = array_lengthof(FloatTypes);
- for (unsigned int x = 0; x < NumFloatTypes; ++x) {
- MVT::SimpleValueType VT = FloatTypes[x];
+ for (MVT VT : FloatTypes) {
setOperationAction(ISD::FABS, VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
setOperationAction(ISD::FFLOOR, VT, Expand);
@@ -208,25 +259,13 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
setOperationAction(ISD::FSUB, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
}
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
-
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+ setTargetDAGCombine(ISD::MUL);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
//===----------------------------------------------------------------------===//
@@ -325,6 +364,25 @@ SDValue AMDGPUTargetLowering::LowerReturn(
// Target specific lowering
//===---------------------------------------------------------------------===//
+SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SDValue Callee = CLI.Callee;
+ SelectionDAG &DAG = CLI.DAG;
+
+ const Function &Fn = *DAG.getMachineFunction().getFunction();
+
+ StringRef FuncName("<unknown>");
+
+ if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
+ FuncName = G->getSymbol();
+ else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ FuncName = G->getGlobal()->getName();
+
+ DiagnosticInfoUnsupported NoCalls(Fn, "call to function " + FuncName);
+ DAG.getContext()->diagnose(NoCalls);
+ return SDValue();
+}
+
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
const {
switch (Op.getOpcode()) {
@@ -361,12 +419,111 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
+ case ISD::UDIV: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM);
+ break;
+ }
+ case ISD::UREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT),
+ N->getOperand(0), N->getOperand(1));
+ Results.push_back(UDIVREM.getValue(1));
+ break;
+ }
+ case ISD::UDIVREM: {
+ SDValue Op = SDValue(N, 0);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+
+ SDValue one = DAG.getConstant(1, HalfVT);
+ SDValue zero = DAG.getConstant(0, HalfVT);
+
+ //HiLo split
+ SDValue LHS = N->getOperand(0);
+ SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero);
+ SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one);
+
+ SDValue RHS = N->getOperand(1);
+ SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero);
+ SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one);
+
+ // Get Speculative values
+ SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
+ SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
+
+ SDValue REM_Hi = zero;
+ SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ);
+
+ SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ);
+ SDValue DIV_Lo = zero;
+
+ const unsigned halfBitWidth = HalfVT.getSizeInBits();
+
+ for (unsigned i = 0; i < halfBitWidth; ++i) {
+ SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT);
+ // Get Value of high bit
+ SDValue HBit;
+ if (halfBitWidth == 32 && Subtarget->hasBFE()) {
+ HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one);
+ } else {
+ HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
+ HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one);
+ }
+
+ SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo,
+ DAG.getConstant(halfBitWidth - 1, HalfVT));
+ REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one);
+ REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry);
+
+ REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one);
+ REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit);
+
+
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+
+ SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT);
+ SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE);
+
+ DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
+
+ // Update REM
+
+ SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
+
+ REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE);
+ REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero);
+ REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one);
+ }
+ SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi);
+ SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi);
+ Results.push_back(DIV);
+ Results.push_back(REM);
+ break;
+ }
default:
return;
}
}
+// FIXME: This implements accesses to initialized globals in the constant
+// address space by copying them to private and accessing that. It does not
+// properly handle illegal types or vectors. The private vector loads are not
+// scalarized, and the illegal scalars hit an assertion. This technique will not
+// work well with large initializers, and this should eventually be
+// removed. Initialized globals should be placed into a data section that the
+// runtime will load into a buffer before the kernel is executed. Uses of the
+// global need to be replaced with a pointer loaded from an implicit kernel
+// argument into this buffer holding the copy of the data, which will remove the
+// need for any of this.
SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
const GlobalValue *GV,
const SDValue &InitPtr,
@@ -380,29 +537,60 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr,
MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
TD->getPrefTypeAlignment(CI->getType()));
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
+ }
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
EVT VT = EVT::getEVT(CFP->getType());
PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, VT), InitPtr,
MachinePointerInfo(UndefValue::get(PtrTy)), false, false,
TD->getPrefTypeAlignment(CFP->getType()));
- } else if (Init->getType()->isAggregateType()) {
+ }
+
+ Type *InitTy = Init->getType();
+ if (StructType *ST = dyn_cast<StructType>(InitTy)) {
+ const StructLayout *SL = TD->getStructLayout(ST);
+
EVT PtrVT = InitPtr.getValueType();
- unsigned NumElements = Init->getType()->getArrayNumElements();
+ SmallVector<SDValue, 8> Chains;
+
+ for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
+ SDValue Offset = DAG.getConstant(SL->getElementOffset(I), PtrVT);
+ SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
+
+ Constant *Elt = Init->getAggregateElement(I);
+ Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
+ }
+
+ if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
+ EVT PtrVT = InitPtr.getValueType();
+
+ unsigned NumElements;
+ if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
+ NumElements = AT->getNumElements();
+ else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
+ NumElements = VT->getNumElements();
+ else
+ llvm_unreachable("Unexpected type");
+
+ unsigned EltSize = TD->getTypeAllocSize(SeqTy->getElementType());
SmallVector<SDValue, 8> Chains;
for (unsigned i = 0; i < NumElements; ++i) {
- SDValue Offset = DAG.getConstant(i * TD->getTypeAllocSize(
- Init->getType()->getArrayElementType()), PtrVT);
+ SDValue Offset = DAG.getConstant(i * EltSize, PtrVT);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
- Chains.push_back(LowerConstantInitializer(Init->getAggregateElement(i),
- GV, Ptr, Chain, DAG));
+
+ Constant *Elt = Init->getAggregateElement(i);
+ Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
}
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
- Chains.size());
- } else {
- Init->dump();
- llvm_unreachable("Unhandled constant initializer");
+
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+
+ Init->dump();
+ llvm_unreachable("Unhandled constant initializer");
}
SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
@@ -440,7 +628,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
unsigned Size = TD->getTypeAllocSize(EltType);
unsigned Alignment = TD->getPrefTypeAlignment(EltType);
- const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
+ const GlobalVariable *Var = cast<GlobalVariable>(GV);
const Constant *Init = Var->getInitializer();
int FI = FrameInfo->CreateStackObject(Size, Alignment, false);
SDValue InitPtr = DAG.getFrameIndex(FI,
@@ -461,7 +649,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
for (unsigned i = 1; i < (*I)->getNumOperands(); ++i) {
Ops.push_back((*I)->getOperand(i));
}
- DAG.UpdateNodeOperands(*I, &Ops[0], Ops.size());
+ DAG.UpdateNodeOperands(*I, Ops);
}
return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op),
getPointerTy(AMDGPUAS::CONSTANT_ADDRESS));
@@ -469,44 +657,28 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
}
}
-void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &Args,
- unsigned Start,
- unsigned Count) const {
- EVT VT = Op.getValueType();
- for (unsigned i = Start, e = Start + Count; i != e; ++i) {
- Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
- VT.getVectorElementType(),
- Op, DAG.getConstant(i, MVT::i32)));
- }
-}
-
SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
SDValue A = Op.getOperand(0);
SDValue B = Op.getOperand(1);
- ExtractVectorElements(A, DAG, Args, 0,
- A.getValueType().getVectorNumElements());
- ExtractVectorElements(B, DAG, Args, 0,
- B.getValueType().getVectorNumElements());
+ DAG.ExtractVectorElements(A, Args);
+ DAG.ExtractVectorElements(B, Args);
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
- &Args[0], Args.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
}
SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SmallVector<SDValue, 8> Args;
- EVT VT = Op.getValueType();
unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
- VT.getVectorNumElements());
+ EVT VT = Op.getValueType();
+ DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
+ VT.getVectorNumElements());
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
- &Args[0], Args.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), Args);
}
SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
@@ -560,6 +732,22 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
Op.getOperand(2));
+ case AMDGPUIntrinsic::AMDGPU_umul24:
+ return DAG.getNode(AMDGPUISD::MUL_U24, DL, VT,
+ Op.getOperand(1), Op.getOperand(2));
+
+ case AMDGPUIntrinsic::AMDGPU_imul24:
+ return DAG.getNode(AMDGPUISD::MUL_I24, DL, VT,
+ Op.getOperand(1), Op.getOperand(2));
+
+ case AMDGPUIntrinsic::AMDGPU_umad24:
+ return DAG.getNode(AMDGPUISD::MAD_U24, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+ case AMDGPUIntrinsic::AMDGPU_imad24:
+ return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT,
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
case AMDGPUIntrinsic::AMDGPU_bfe_i32:
return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT,
Op.getOperand(1),
@@ -590,8 +778,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
///IABS(a) = SMAX(sub(0, a), a)
SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
- SelectionDAG &DAG) const {
-
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
@@ -603,7 +790,7 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
/// Linear Interpolation
/// LRP(a, b, c) = muladd(a, b, (1 - a) * c)
SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
@@ -617,16 +804,16 @@ SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
}
/// \brief Generate Min/Max node
-SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
- SelectionDAG &DAG) const {
- SDLoc DL(Op);
- EVT VT = Op.getValueType();
+SDValue AMDGPUTargetLowering::CombineMinMax(SDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
- SDValue LHS = Op.getOperand(0);
- SDValue RHS = Op.getOperand(1);
- SDValue True = Op.getOperand(2);
- SDValue False = Op.getOperand(3);
- SDValue CC = Op.getOperand(4);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ SDValue True = N->getOperand(2);
+ SDValue False = N->getOperand(3);
+ SDValue CC = N->getOperand(4);
if (VT != MVT::f32 ||
!((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
@@ -654,10 +841,8 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
case ISD::SETOLT:
case ISD::SETLE:
case ISD::SETLT: {
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
- else
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
+ unsigned Opc = (LHS == True) ? AMDGPUISD::FMIN : AMDGPUISD::FMAX;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETGT:
case ISD::SETGE:
@@ -665,15 +850,13 @@ SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
case ISD::SETOGE:
case ISD::SETUGT:
case ISD::SETOGT: {
- if (LHS == True)
- return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
- else
- return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
+ unsigned Opc = (LHS == True) ? AMDGPUISD::FMAX : AMDGPUISD::FMIN;
+ return DAG.getNode(Opc, DL, VT, LHS, RHS);
}
case ISD::SETCC_INVALID:
llvm_unreachable("Invalid setcc condcode!");
}
- return Op;
+ return SDValue();
}
SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
@@ -695,8 +878,7 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
Load->getAlignment()));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(),
- Loads.data(), Loads.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads);
}
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
@@ -713,32 +895,46 @@ SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
}
SDLoc DL(Op);
- const SDValue &Value = Store->getValue();
+ SDValue Value = Store->getValue();
EVT VT = Value.getValueType();
- const SDValue &Ptr = Store->getBasePtr();
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Ptr = Store->getBasePtr();
EVT MemEltVT = MemVT.getVectorElementType();
unsigned MemEltBits = MemEltVT.getSizeInBits();
unsigned MemNumElements = MemVT.getVectorNumElements();
- EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
- SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, PackedVT);
+ unsigned PackedSize = MemVT.getStoreSizeInBits();
+ SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
+
+ assert(Value.getValueType().getScalarSizeInBits() >= 32);
SDValue PackedValue;
for (unsigned i = 0; i < MemNumElements; ++i) {
- EVT ElemVT = VT.getVectorElementType();
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
DAG.getConstant(i, MVT::i32));
- Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
- Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
- SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
- Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
+ Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
+
+ SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
+ Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
+
if (i == 0) {
PackedValue = Elt;
} else {
- PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
}
}
+
+ if (PackedSize < 32) {
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
+ return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
+ Store->getMemOperand()->getPointerInfo(),
+ PackedVT,
+ Store->isNonTemporal(), Store->isVolatile(),
+ Store->getAlignment());
+ }
+
return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
- MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->getMemOperand()->getPointerInfo(),
Store->isVolatile(), Store->isNonTemporal(),
Store->getAlignment());
}
@@ -766,7 +962,7 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
Store->getAlignment()));
}
- return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains);
}
SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
@@ -788,9 +984,24 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32);
}
+ if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) {
+ assert(VT == MVT::i1 && "Only i1 non-extloads expected");
+ // FIXME: Copied from PPC
+ // First, load into 32 bits, then truncate to 1 bit.
+
+ SDValue Chain = Load->getChain();
+ SDValue BasePtr = Load->getBasePtr();
+ MachineMemOperand *MMO = Load->getMemOperand();
+
+ SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+ BasePtr, MVT::i8, MMO);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
+ }
+
// Lower loads constant address space global variable loads
if (Load->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
- isa<GlobalVariable>(GetUnderlyingObject(Load->getPointerInfo().V))) {
+ isa<GlobalVariable>(
+ GetUnderlyingObject(Load->getMemOperand()->getValue()))) {
SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL,
getPointerTy(AMDGPUAS::PRIVATE_ADDRESS));
@@ -887,15 +1098,13 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
SDValue Num = Op.getOperand(0);
SDValue Den = Op.getOperand(1);
- SmallVector<SDValue, 8> Results;
-
// RCP = URECIP(Den) = 2^32 / Den + e
// e is rounding error.
SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
@@ -985,10 +1194,11 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
// Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
Remainder_A_Den, Rem, ISD::SETEQ);
- SDValue Ops[2];
- Ops[0] = Div;
- Ops[1] = Rem;
- return DAG.getMergeValues(Ops, 2, DL);
+ SDValue Ops[2] = {
+ Div,
+ Rem
+ };
+ return DAG.getMergeValues(Ops, DL);
}
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
@@ -1029,81 +1239,197 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
MVT VT = Op.getSimpleValueType();
MVT ScalarVT = VT.getScalarType();
- unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits();
- unsigned DestBits = ScalarVT.getSizeInBits();
- unsigned BitsDiff = DestBits - SrcBits;
-
- if (!Subtarget->hasBFE())
- return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
+ if (!VT.isVector())
+ return SDValue();
SDValue Src = Op.getOperand(0);
- if (VT.isVector()) {
- SDLoc DL(Op);
- // Need to scalarize this, and revisit each of the scalars later.
- // TODO: Don't scalarize on Evergreen?
- unsigned NElts = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Args;
- ExtractVectorElements(Src, DAG, Args, 0, NElts);
+ SDLoc DL(Op);
- SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
- for (unsigned I = 0; I < NElts; ++I)
- Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
+ // TODO: Don't scalarize on Evergreen?
+ unsigned NElts = VT.getVectorNumElements();
+ SmallVector<SDValue, 8> Args;
+ DAG.ExtractVectorElements(Src, Args, 0, NElts);
- return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size());
- }
+ SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
+ for (unsigned I = 0; I < NElts; ++I)
+ Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
- if (SrcBits == 32) {
- SDLoc DL(Op);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args);
+}
- // If the source is 32-bits, this is really half of a 2-register pair, and
- // we need to discard the unused half of the pair.
- SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src);
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc);
- }
+//===----------------------------------------------------------------------===//
+// Custom DAG optimizations
+//===----------------------------------------------------------------------===//
- unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+static bool isU24(SDValue Op, SelectionDAG &DAG) {
+ APInt KnownZero, KnownOne;
+ EVT VT = Op.getValueType();
+ DAG.computeKnownBits(Op, KnownZero, KnownOne);
- // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
- // might not be worth the effort, and will need to expand to shifts when
- // fixing SGPR copies.
- if (SrcBits < 32 && DestBits <= 32) {
- SDLoc DL(Op);
- MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
-
- if (DestBits != 32)
- Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src);
-
- // FIXME: This should use TargetConstant, but that hits assertions for
- // Evergreen.
- SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT,
- Op.getOperand(0), // Operand
- DAG.getConstant(0, ExtVT), // Offset
- DAG.getConstant(SrcBits, ExtVT)); // Width
-
- // Truncate to the original type if necessary.
- if (ScalarVT == MVT::i32)
- return Ext;
- return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext);
- }
+ return (VT.getSizeInBits() - KnownZero.countLeadingOnes()) <= 24;
+}
- // For small types, extend to 32-bits first.
- if (SrcBits < 32) {
- SDLoc DL(Op);
- MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts);
+static bool isI24(SDValue Op, SelectionDAG &DAG) {
+ EVT VT = Op.getValueType();
- SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src);
- SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32,
- DL,
- ExtVT,
- TruncSrc, // Operand
- DAG.getConstant(0, ExtVT), // Offset
- DAG.getConstant(SrcBits, ExtVT)); // Width
+ // In order for this to be a signed 24-bit value, bit 23, must
+ // be a sign bit.
+ return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
+ // as unsigned 24-bit values.
+ (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
+}
+
+static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = Op.getValueType();
+
+ APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
+ if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+ DCI.CommitTargetLoweringOpt(TLO);
+}
- return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32);
+template <typename IntTy>
+static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0,
+ uint32_t Offset, uint32_t Width) {
+ if (Width + Offset < 32) {
+ IntTy Result = (Src0 << (32 - Offset - Width)) >> (32 - Width);
+ return DAG.getConstant(Result, MVT::i32);
}
- // For everything else, use the standard bitshift expansion.
- return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG);
+ return DAG.getConstant(Src0 >> Offset, MVT::i32);
+}
+
+SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ switch(N->getOpcode()) {
+ default: break;
+ case ISD::MUL: {
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Mul;
+
+ // FIXME: Add support for 24-bit multiply with 64-bit output on SI.
+ if (VT.isVector() || VT.getSizeInBits() > 32)
+ break;
+
+ if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
+ N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
+ } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
+ N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+ N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+ Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
+ } else {
+ break;
+ }
+
+ // We need to use sext even for MUL_U24, because MUL_U24 is used
+ // for signed multiply of 8 and 16-bit types.
+ SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT);
+
+ return Reg;
+ }
+ case AMDGPUISD::MUL_I24:
+ case AMDGPUISD::MUL_U24: {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ simplifyI24(N0, DCI);
+ simplifyI24(N1, DCI);
+ return SDValue();
+ }
+ case ISD::SELECT_CC: {
+ return CombineMinMax(N, DAG);
+ }
+ case AMDGPUISD::BFE_I32:
+ case AMDGPUISD::BFE_U32: {
+ assert(!N->getValueType(0).isVector() &&
+ "Vector handling of BFE not implemented");
+ ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
+ if (!Width)
+ break;
+
+ uint32_t WidthVal = Width->getZExtValue() & 0x1f;
+ if (WidthVal == 0)
+ return DAG.getConstant(0, MVT::i32);
+
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Offset)
+ break;
+
+ SDValue BitsFrom = N->getOperand(0);
+ uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
+
+ bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
+
+ if (OffsetVal == 0) {
+ // This is already sign / zero extended, so try to fold away extra BFEs.
+ unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
+
+ unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
+ if (OpSignBits >= SignBits)
+ return BitsFrom;
+
+ EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
+ if (Signed) {
+ // This is a sign_extend_inreg. Replace it to take advantage of existing
+ // DAG Combines. If not eliminated, we will match back to BFE during
+ // selection.
+
+ // TODO: The sext_inreg of extended types ends, although we can could
+ // handle them in a single BFE.
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
+ DAG.getValueType(SmallVT));
+ }
+
+ return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
+ }
+
+ if (ConstantSDNode *Val = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+ if (Signed) {
+ return constantFoldBFE<int32_t>(DAG,
+ Val->getSExtValue(),
+ OffsetVal,
+ WidthVal);
+ }
+
+ return constantFoldBFE<uint32_t>(DAG,
+ Val->getZExtValue(),
+ OffsetVal,
+ WidthVal);
+ }
+
+ APInt Demanded = APInt::getBitsSet(32,
+ OffsetVal,
+ OffsetVal + WidthVal);
+
+ if ((OffsetVal + WidthVal) >= 32) {
+ SDValue ShiftVal = DAG.getConstant(OffsetVal, MVT::i32);
+ return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
+ BitsFrom, ShiftVal);
+ }
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(BitsFrom, Demanded) ||
+ TLI.SimplifyDemandedBits(BitsFrom, Demanded, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ break;
+ }
+ }
+ return SDValue();
}
//===----------------------------------------------------------------------===//
@@ -1181,7 +1507,7 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
// AMDIL DAG nodes
NODE_NAME_CASE(CALL);
NODE_NAME_CASE(UMUL);
@@ -1202,6 +1528,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(MUL_U24)
+ NODE_NAME_CASE(MUL_I24)
+ NODE_NAME_CASE(MAD_U24)
+ NODE_NAME_CASE(MAD_I24)
NODE_NAME_CASE(URECIP)
NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(EXPORT)
@@ -1219,22 +1549,22 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
-static void computeMaskedBitsForMinMax(const SDValue Op0,
- const SDValue Op1,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) {
+static void computeKnownBitsForMinMax(const SDValue Op0,
+ const SDValue Op1,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) {
APInt Op0Zero, Op0One;
APInt Op1Zero, Op1One;
- DAG.ComputeMaskedBits(Op0, Op0Zero, Op0One, Depth);
- DAG.ComputeMaskedBits(Op1, Op1Zero, Op1One, Depth);
+ DAG.computeKnownBits(Op0, Op0Zero, Op0One, Depth);
+ DAG.computeKnownBits(Op1, Op1Zero, Op1One, Depth);
KnownZero = Op0Zero & Op1Zero;
KnownOne = Op0One & Op1One;
}
-void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
+void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
@@ -1242,8 +1572,14 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything.
+
+ APInt KnownZero2;
+ APInt KnownOne2;
unsigned Opc = Op.getOpcode();
+
switch (Opc) {
+ default:
+ break;
case ISD::INTRINSIC_WO_CHAIN: {
// FIXME: The intrinsic should just use the node.
switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
@@ -1251,8 +1587,8 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
case AMDGPUIntrinsic::AMDGPU_umax:
case AMDGPUIntrinsic::AMDGPU_imin:
case AMDGPUIntrinsic::AMDGPU_umin:
- computeMaskedBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
- KnownZero, KnownOne, DAG, Depth);
+ computeKnownBitsForMinMax(Op.getOperand(1), Op.getOperand(2),
+ KnownZero, KnownOne, DAG, Depth);
break;
default:
break;
@@ -1264,10 +1600,62 @@ void AMDGPUTargetLowering::computeMaskedBitsForTargetNode(
case AMDGPUISD::UMAX:
case AMDGPUISD::SMIN:
case AMDGPUISD::UMIN:
- computeMaskedBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
- KnownZero, KnownOne, DAG, Depth);
+ computeKnownBitsForMinMax(Op.getOperand(0), Op.getOperand(1),
+ KnownZero, KnownOne, DAG, Depth);
break;
- default:
+
+ case AMDGPUISD::BFE_I32:
+ case AMDGPUISD::BFE_U32: {
+ ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ if (!CWidth)
+ return;
+
+ unsigned BitWidth = 32;
+ uint32_t Width = CWidth->getZExtValue() & 0x1f;
+ if (Width == 0) {
+ KnownZero = APInt::getAllOnesValue(BitWidth);
+ KnownOne = APInt::getNullValue(BitWidth);
+ return;
+ }
+
+ // FIXME: This could do a lot more. If offset is 0, should be the same as
+ // sign_extend_inreg implementation, but that involves duplicating it.
+ if (Opc == AMDGPUISD::BFE_I32)
+ KnownOne = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
+ else
+ KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - Width);
+
break;
}
+ }
+}
+
+unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
+ SDValue Op,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ case AMDGPUISD::BFE_I32: {
+ ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ if (!Width)
+ return 1;
+
+ unsigned SignBits = 32 - Width->getZExtValue() + 1;
+ ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!Offset || !Offset->isNullValue())
+ return SignBits;
+
+ // TODO: Could probably figure something out with non-0 offsets.
+ unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ return std::max(SignBits, Op0SignBits);
+ }
+
+ case AMDGPUISD::BFE_U32: {
+ ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
+ }
+
+ default:
+ return 1;
+ }
}
diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
index a019616..d5d821d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.h
+++ b/lib/Target/R600/AMDGPUISelLowering.h
@@ -29,9 +29,6 @@ protected:
const AMDGPUSubtarget *Subtarget;
private:
- void ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &Args,
- unsigned Start, unsigned Count) const;
SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
const SDValue &InitPtr,
SDValue Chain,
@@ -44,7 +41,7 @@ private:
/// of the same bitwidth.
SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
/// \brief Split a vector store into multiple scalar stores.
- /// \returns The resulting chain.
+ /// \returns The resulting chain.
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -83,62 +80,67 @@ protected:
public:
AMDGPUTargetLowering(TargetMachine &TM);
- virtual bool isFAbsFree(EVT VT) const override;
- virtual bool isFNegFree(EVT VT) const override;
- virtual bool isTruncateFree(EVT Src, EVT Dest) const override;
- virtual bool isTruncateFree(Type *Src, Type *Dest) const override;
-
- virtual bool isZExtFree(Type *Src, Type *Dest) const override;
- virtual bool isZExtFree(EVT Src, EVT Dest) const override;
-
- virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
-
- virtual MVT getVectorIdxTy() const override;
- virtual bool isLoadBitCastBeneficial(EVT, EVT) const override;
- virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<SDValue> &OutVals,
- SDLoc DL, SelectionDAG &DAG) const;
- virtual SDValue LowerCall(CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const {
- CLI.Callee.dump();
- llvm_unreachable("Undefined function");
- }
+ bool isFAbsFree(EVT VT) const override;
+ bool isFNegFree(EVT VT) const override;
+ bool isTruncateFree(EVT Src, EVT Dest) const override;
+ bool isTruncateFree(Type *Src, Type *Dest) const override;
+
+ bool isZExtFree(Type *Src, Type *Dest) const override;
+ bool isZExtFree(EVT Src, EVT Dest) const override;
+
+ bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual void ReplaceNodeResults(SDNode * N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
+ MVT getVectorIdxTy() const override;
+ bool isLoadBitCastBeneficial(EVT, EVT) const override;
+ SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const override;
+ SDValue LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
- virtual const char* getTargetNodeName(unsigned Opcode) const;
+ SDValue CombineMinMax(SDNode *N, SelectionDAG &DAG) const;
+ const char* getTargetNodeName(unsigned Opcode) const override;
- virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const {
+ virtual SDNode *PostISelFolding(MachineSDNode *N,
+ SelectionDAG &DAG) const {
return N;
}
/// \brief Determine which of the bits specified in \p Mask are known to be
/// either zero or one and return them in the \p KnownZero and \p KnownOne
/// bitsets.
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
+
+ virtual unsigned ComputeNumSignBitsForTargetNode(
+ SDValue Op,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
// Functions defined in AMDILISelLowering.cpp
public:
- virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
- const CallInst &I, unsigned Intrinsic) const;
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const override;
/// We want to mark f32/f64 floating point values as legal.
- bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
/// We don't want to shrink f64/f32 constants.
- bool ShouldShrinkFPConstant(EVT VT) const;
+ bool ShouldShrinkFPConstant(EVT VT) const override;
+
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
private:
void InitAMDILLowering();
@@ -158,7 +160,6 @@ private:
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
};
namespace AMDGPUISD {
@@ -188,6 +189,10 @@ enum {
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
+ MUL_U24,
+ MUL_I24,
+ MAD_U24,
+ MAD_I24,
TEXTURE_FETCH,
EXPORT,
CONST_ADDRESS,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp
index e32dd9f..1c3361a 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.cpp
+++ b/lib/Target/R600/AMDGPUInstrInfo.cpp
@@ -20,14 +20,13 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRINFO_NAMED_OPS
#define GET_INSTRMAP_INFO
#include "AMDGPUGenInstrInfo.inc"
-using namespace llvm;
-
-
// Pin the vtable to this file.
void AMDGPUInstrInfo::anchor() {}
@@ -85,7 +84,7 @@ AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const {
// TODO: Implement this function
- return NULL;
+ return nullptr;
}
bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator &iter,
MachineBasicBlock &MBB) const {
@@ -176,7 +175,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// TODO: Implement this function
- return 0;
+ return nullptr;
}
MachineInstr*
AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
@@ -184,7 +183,7 @@ AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr *LoadMI) const {
// TODO: Implement this function
- return 0;
+ return nullptr;
}
bool
AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
@@ -356,3 +355,14 @@ int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
}
}
+
+// Wrapper for Tablegen'd function. enum Subtarget is not defined in any
+// header files, so we need to wrap it in a function that takes unsigned
+// instead.
+namespace llvm {
+namespace AMDGPU {
+int getMCOpcode(uint16_t Opcode, unsigned Gen) {
+ return getMCOpcode(Opcode);
+}
+}
+}
diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h
index 426910c..74baf6b 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.h
+++ b/lib/Target/R600/AMDGPUInstrInfo.h
@@ -52,14 +52,15 @@ public:
virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
- unsigned &DstReg, unsigned &SubIdx) const;
+ unsigned &DstReg, unsigned &SubIdx) const override;
- unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
bool hasLoadFromStackSlot(const MachineInstr *MI,
const MachineMemOperand *&MMO,
- int &FrameIndex) const;
+ int &FrameIndex) const override;
unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
int &FrameIndex) const;
@@ -70,7 +71,7 @@ public:
MachineInstr *
convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
- LiveVariables *LV) const;
+ LiveVariables *LV) const override;
virtual void copyPhysReg(MachineBasicBlock &MBB,
@@ -78,61 +79,62 @@ public:
unsigned DestReg, unsigned SrcReg,
bool KillSrc) const = 0;
- virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
+ bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const override;
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
protected:
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
+ int FrameIndex) const override;
MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
- MachineInstr *LoadMI) const;
+ MachineInstr *LoadMI) const override;
/// \returns the smallest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexBegin(const MachineFunction &MF) const;
+ int getIndirectIndexBegin(const MachineFunction &MF) const;
/// \returns the largest register index that will be accessed by an indirect
/// read or write or -1 if indirect addressing is not used by this program.
- virtual int getIndirectIndexEnd(const MachineFunction &MF) const;
+ int getIndirectIndexEnd(const MachineFunction &MF) const;
public:
bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
+ const SmallVectorImpl<unsigned> &Ops) const override;
bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
- unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
- SmallVectorImpl<MachineInstr *> &NewMIs) const;
+ unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+ SmallVectorImpl<MachineInstr *> &NewMIs) const override;
bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
- SmallVectorImpl<SDNode *> &NewNodes) const;
+ SmallVectorImpl<SDNode *> &NewNodes) const override;
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
- bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex = 0) const;
+ bool UnfoldLoad, bool UnfoldStore,
+ unsigned *LoadRegIndex = nullptr) const override;
bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const;
+ unsigned NumLoads) const override;
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
void insertNoop(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI) const;
- bool isPredicated(const MachineInstr *MI) const;
+ MachineBasicBlock::iterator MI) const override;
+ bool isPredicated(const MachineInstr *MI) const override;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
- bool isPredicable(MachineInstr *MI) const;
- bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ std::vector<MachineOperand> &Pred) const override;
+ bool isPredicable(MachineInstr *MI) const override;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
// Helper functions that check the opcode for status information
bool isLoadInst(llvm::MachineInstr *MI) const;
@@ -186,8 +188,7 @@ public:
/// \brief Convert the AMDIL MachineInstr to a supported ISA
/// MachineInstr
- virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
- DebugLoc DL) const;
+ void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const;
/// \brief Build a MOV instruction.
virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td
index 69d8059..f96dbb4 100644
--- a/lib/Target/R600/AMDGPUInstrInfo.td
+++ b/lib/Target/R600/AMDGPUInstrInfo.td
@@ -92,3 +92,18 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
+// performing the mulitply. The result is a 32-bit value.
+def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
+ [SDNPCommutative]
+>;
+def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
+ [SDNPCommutative]
+>;
+
+def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
+ []
+>;
+def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
+ []
+>;
diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index 505fc81..80bdf5b 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -37,6 +37,18 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
+def u32imm : Operand<i32> {
+ let PrintMethod = "printU32ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+ let PrintMethod = "printU16ImmOperand";
+}
+
+def u8imm : Operand<i8> {
+ let PrintMethod = "printU8ImmOperand";
+}
+
//===----------------------------------------------------------------------===//
// PatLeafs for floating-point comparisons
//===----------------------------------------------------------------------===//
@@ -253,9 +265,6 @@ def FP_ONE : PatLeaf <
[{return N->isExactlyValue(1.0);}]
>;
-def U24 : ComplexPattern<i32, 1, "SelectU24", [], []>;
-def I24 : ComplexPattern<i32, 1, "SelectI24", [], []>;
-
let isCodeGenOnly = 1, isPseudo = 1 in {
let usesCustomInserter = 1 in {
@@ -414,6 +423,40 @@ class UMUL24Pattern <Instruction UMUL24> : Pat <
>;
*/
+class IMad24Pat<Instruction Inst> : Pat <
+ (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2),
+ (Inst $src0, $src1, $src2)
+>;
+
+class UMad24Pat<Instruction Inst> : Pat <
+ (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2),
+ (Inst $src0, $src1, $src2)
+>;
+
+multiclass Expand24IBitOps<Instruction MulInst, Instruction AddInst> {
+ def _expand_imad24 : Pat <
+ (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2),
+ (AddInst (MulInst $src0, $src1), $src2)
+ >;
+
+ def _expand_imul24 : Pat <
+ (AMDGPUmul_i24 i32:$src0, i32:$src1),
+ (MulInst $src0, $src1)
+ >;
+}
+
+multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> {
+ def _expand_umad24 : Pat <
+ (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2),
+ (AddInst (MulInst $src0, $src1), $src2)
+ >;
+
+ def _expand_umul24 : Pat <
+ (AMDGPUmul_u24 i32:$src0, i32:$src1),
+ (MulInst $src0, $src1)
+ >;
+}
+
include "R600Instructions.td"
include "R700Instructions.td"
include "EvergreenInstructions.td"
diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
index c6521d0..9ad5e72 100644
--- a/lib/Target/R600/AMDGPUIntrinsics.td
+++ b/lib/Target/R600/AMDGPUIntrinsics.td
@@ -49,6 +49,10 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp
index 2c9909f..b759495 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.cpp
+++ b/lib/Target/R600/AMDGPUMCInstLower.cpp
@@ -17,6 +17,7 @@
#include "AMDGPUAsmPrinter.h"
#include "InstPrinter/AMDGPUInstPrinter.h"
#include "R600InstrInfo.h"
+#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/IR/Constants.h"
@@ -31,16 +32,30 @@
using namespace llvm;
-AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx):
- Ctx(ctx)
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
+ Ctx(ctx), ST(st)
{ }
+enum AMDGPUMCInstLower::SISubtarget
+AMDGPUMCInstLower::AMDGPUSubtargetToSISubtarget(unsigned) const {
+ return AMDGPUMCInstLower::SI;
+}
+
+unsigned AMDGPUMCInstLower::getMCOpcode(unsigned MIOpcode) const {
+
+ int MCOpcode = AMDGPU::getMCOpcode(MIOpcode,
+ AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
+ if (MCOpcode == -1)
+ MCOpcode = MIOpcode;
+
+ return MCOpcode;
+}
+
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
- OutMI.setOpcode(MI->getOpcode());
- for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ OutMI.setOpcode(getMCOpcode(MI->getOpcode()));
+ for (const MachineOperand &MO : MI->explicit_operands()) {
MCOperand MCOp;
switch (MO.getType()) {
default:
@@ -67,7 +82,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
}
void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
- AMDGPUMCInstLower MCInstLowering(OutContext);
+ AMDGPUMCInstLower MCInstLowering(OutContext,
+ MF->getTarget().getSubtarget<AMDGPUSubtarget>());
#ifdef _DEBUG
StringRef Err;
diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h
index d7d538e..2b7f1e3 100644
--- a/lib/Target/R600/AMDGPUMCInstLower.h
+++ b/lib/Target/R600/AMDGPUMCInstLower.h
@@ -13,16 +13,30 @@
namespace llvm {
+class AMDGPUSubtarget;
class MCInst;
class MCContext;
class MachineInstr;
class AMDGPUMCInstLower {
+ // This must be kept in sync with the SISubtarget class in SIInstrInfo.td
+ enum SISubtarget {
+ SI = 0
+ };
+
MCContext &Ctx;
+ const AMDGPUSubtarget &ST;
+
+ /// Convert a member of the AMDGPUSubtarget::Generation enum to the
+ /// SISubtarget enum.
+ enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) const;
+
+ /// Get the MC opcode for this MachineInstr.
+ unsigned getMCOpcode(unsigned MIOpcode) const;
public:
- AMDGPUMCInstLower(MCContext &ctx);
+ AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
/// \brief Lower a MachineInstr to an MCInst
void lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp
index 8fbec4e..19927fa 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.cpp
+++ b/lib/Target/R600/AMDGPURegisterInfo.cpp
@@ -27,10 +27,10 @@ AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm)
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
+const MCPhysReg AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
-const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
+const MCPhysReg*
+AMDGPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return &CalleeSavedReg;
}
@@ -54,7 +54,7 @@ unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) const {
AMDGPU::sub15
};
- assert (Channel < array_lengthof(SubRegs));
+ assert(Channel < array_lengthof(SubRegs));
return SubRegs[Channel];
}
diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h
index 688e1a0..a7cba0d 100644
--- a/lib/Target/R600/AMDGPURegisterInfo.h
+++ b/lib/Target/R600/AMDGPURegisterInfo.h
@@ -30,11 +30,11 @@ class TargetInstrInfo;
struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
TargetMachine &TM;
- static const uint16_t CalleeSavedReg;
+ static const MCPhysReg CalleeSavedReg;
AMDGPURegisterInfo(TargetMachine &tm);
- virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+ BitVector getReservedRegs(const MachineFunction &MF) const override {
assert(!"Unimplemented"); return BitVector();
}
@@ -43,11 +43,11 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// \returns The ISA reg class that is equivalent to \p RC.
virtual const TargetRegisterClass * getISARegClass(
const TargetRegisterClass * RC) const {
- assert(!"Unimplemented"); return NULL;
+ assert(!"Unimplemented"); return nullptr;
}
virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const {
- assert(!"Unimplemented"); return NULL;
+ assert(!"Unimplemented"); return nullptr;
}
virtual unsigned getHWRegIndex(unsigned Reg) const {
@@ -58,11 +58,11 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
unsigned getSubRegFromChannel(unsigned Channel) const;
- const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
+ const MCPhysReg* getCalleeSavedRegs(const MachineFunction *MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
- RegScavenger *RS) const;
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ RegScavenger *RS) const override;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
unsigned getIndirectSubReg(unsigned IndirectIndex) const;
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index e77ab5e..f3b9932 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -16,6 +16,8 @@
using namespace llvm;
+#define DEBUG_TYPE "amdgpu-subtarget"
+
#define GET_SUBTARGETINFO_ENUM
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
@@ -28,9 +30,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
// Default card
StringRef GPU = CPU;
Is64bit = false;
- DefaultSize[0] = 64;
- DefaultSize[1] = 1;
- DefaultSize[2] = 1;
HasVertexCache = false;
TexVTXClauseSize = 0;
Gen = AMDGPUSubtarget::R600;
@@ -106,14 +105,6 @@ bool
AMDGPUSubtarget::isTargetELF() const {
return false;
}
-size_t
-AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
- if (dim > 2) {
- return 1;
- } else {
- return DefaultSize[dim];
- }
-}
std::string
AMDGPUSubtarget::getDeviceName() const {
diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h
index 8874d14..1b041d6 100644
--- a/lib/Target/R600/AMDGPUSubtarget.h
+++ b/lib/Target/R600/AMDGPUSubtarget.h
@@ -38,7 +38,6 @@ public:
};
private:
- size_t DefaultSize[3];
std::string DevName;
bool Is64bit;
bool Is32on64bit;
@@ -60,7 +59,7 @@ public:
AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
- virtual void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
bool is64bit() const;
bool hasVertexCache() const;
@@ -77,20 +76,28 @@ public:
return hasBFE();
}
+ bool hasMulU24() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasMulI24() const {
+ return (getGeneration() >= SOUTHERN_ISLANDS ||
+ hasCaymanISA());
+ }
+
bool IsIRStructurizerEnabled() const;
bool isIfCvtEnabled() const;
unsigned getWavefrontSize() const;
unsigned getStackEntrySize() const;
bool hasCFAluBug() const;
- virtual bool enableMachineScheduler() const {
+ bool enableMachineScheduler() const override {
return getGeneration() <= NORTHERN_ISLANDS;
}
// Helper functions to simplify if statements
bool isTargetELF() const;
std::string getDeviceName() const;
- virtual size_t getDefaultSize(uint32_t dim) const;
bool dumpCode() const { return DumpCode; }
bool r600ALUEncoding() const { return R600ALUInst; }
diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
index b11fce3..174fdca 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.cpp
+++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
@@ -42,7 +42,7 @@ extern "C" void LLVMInitializeR600Target() {
}
static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, new R600SchedStrategy());
+ return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
}
static MachineSchedRegistry
@@ -54,7 +54,7 @@ static std::string computeDataLayout(const AMDGPUSubtarget &ST) {
if (ST.is64bit()) {
// 32-bit private, local, and region pointers. 64-bit global and constant.
- Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:64:64";
+ Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64";
}
Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256"
@@ -103,20 +103,20 @@ public:
return getTM<AMDGPUTargetMachine>();
}
- virtual ScheduleDAGInstrs *
- createMachineScheduler(MachineSchedContext *C) const {
+ ScheduleDAGInstrs *
+ createMachineScheduler(MachineSchedContext *C) const override {
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS)
return createR600MachineScheduler(C);
- return 0;
+ return nullptr;
}
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addPreRegAlloc();
- virtual bool addPostRegAlloc();
- virtual bool addPreSched2();
- virtual bool addPreEmitPass();
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addPreRegAlloc() override;
+ bool addPostRegAlloc() override;
+ bool addPreSched2() override;
+ bool addPreEmitPass() override;
};
} // End of anonymous namespace
@@ -154,6 +154,7 @@ AMDGPUPassConfig::addPreISel() {
bool AMDGPUPassConfig::addInstSelector() {
addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+ addPass(createSILowerI1CopiesPass());
return false;
}
diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h
index f942614..1287e13 100644
--- a/lib/Target/R600/AMDGPUTargetMachine.h
+++ b/lib/Target/R600/AMDGPUTargetMachine.h
@@ -20,7 +20,6 @@
#include "AMDGPUSubtarget.h"
#include "AMDILIntrinsicInfo.h"
#include "R600ISelLowering.h"
-#include "llvm/ADT/OwningPtr.h"
#include "llvm/IR/DataLayout.h"
namespace llvm {
@@ -31,8 +30,8 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
const DataLayout Layout;
AMDGPUFrameLowering FrameLowering;
AMDGPUIntrinsicInfo IntrinsicInfo;
- OwningPtr<AMDGPUInstrInfo> InstrInfo;
- OwningPtr<AMDGPUTargetLowering> TLInfo;
+ std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
+ std::unique_ptr<AMDGPUTargetLowering> TLInfo;
const InstrItineraryData *InstrItins;
public:
@@ -40,30 +39,32 @@ public:
StringRef CPU, TargetOptions Options, Reloc::Model RM,
CodeModel::Model CM, CodeGenOpt::Level OL);
~AMDGPUTargetMachine();
- virtual const AMDGPUFrameLowering *getFrameLowering() const {
+ const AMDGPUFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const AMDGPUIntrinsicInfo *getIntrinsicInfo() const {
+ const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
return &IntrinsicInfo;
}
- virtual const AMDGPUInstrInfo *getInstrInfo() const {
+ const AMDGPUInstrInfo *getInstrInfo() const override {
return InstrInfo.get();
}
- virtual const AMDGPUSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const AMDGPURegisterInfo *getRegisterInfo() const {
+ const AMDGPUSubtarget *getSubtargetImpl() const override {
+ return &Subtarget;
+ }
+ const AMDGPURegisterInfo *getRegisterInfo() const override {
return &InstrInfo->getRegisterInfo();
}
- virtual AMDGPUTargetLowering *getTargetLowering() const {
+ AMDGPUTargetLowering *getTargetLowering() const override {
return TLInfo.get();
}
- virtual const InstrItineraryData *getInstrItineraryData() const {
+ const InstrItineraryData *getInstrItineraryData() const override {
return InstrItins;
}
- virtual const DataLayout *getDataLayout() const { return &Layout; }
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ const DataLayout *getDataLayout() const override { return &Layout; }
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
/// \brief Register R600 analysis passes with a pass manager.
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ void addAnalysisPasses(PassManagerBase &PM) override;
};
} // End namespace llvm
diff --git a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
index 51225eb..ea78f43 100644
--- a/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
+++ b/lib/Target/R600/AMDGPUTargetTransformInfo.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "AMDGPUtti"
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -26,6 +25,8 @@
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "AMDGPUtti"
+
// Declare the pass initialization routine locally as target-specific passes
// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
@@ -45,7 +46,7 @@ class AMDGPUTTI final : public ImmutablePass, public TargetTransformInfo {
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- AMDGPUTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) {
+ AMDGPUTTI() : ImmutablePass(ID), TM(nullptr), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
@@ -55,9 +56,9 @@ public:
initializeAMDGPUTTIPass(*PassRegistry::getPassRegistry());
}
- virtual void initializePass() override { pushTTIStack(this); }
+ void initializePass() override { pushTTIStack(this); }
- virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
TargetTransformInfo::getAnalysisUsage(AU);
}
@@ -65,15 +66,16 @@ public:
static char ID;
/// Provide necessary pointer adjustments for the two base classes.
- virtual void *getAdjustedAnalysisPointer(const void *ID) override {
+ void *getAdjustedAnalysisPointer(const void *ID) override {
if (ID == &TargetTransformInfo::ID)
return (TargetTransformInfo *)this;
return this;
}
- virtual bool hasBranchDivergence() const override;
+ bool hasBranchDivergence() const override;
- virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
+ void getUnrollingPreferences(Loop *L,
+ UnrollingPreferences &UP) const override;
/// @}
};
@@ -109,11 +111,11 @@ void AMDGPUTTI::getUnrollingPreferences(Loop *L,
// require us to use indirect addressing, which is slow and prone to
// compiler bugs. If this loop does an address calculation on an
// alloca ptr, then we want to use a higher than normal loop unroll
- // threshold. This will give SROA a better chance to eliminate these
- // allocas.
- //
- // Don't use the maximum allowed value here as it will make some
- // programs way too big.
+ // threshold. This will give SROA a better chance to eliminate these
+ // allocas.
+ //
+ // Don't use the maximum allowed value here as it will make some
+ // programs way too big.
UP.Threshold = 500;
}
}
diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 21ca560..f3a0391 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -8,8 +8,6 @@
/// \file
//==-----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structcfg"
-
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "R600InstrInfo.h"
@@ -34,6 +32,8 @@
using namespace llvm;
+#define DEBUG_TYPE "structcfg"
+
#define DEFAULT_VEC_SLOTS 8
// TODO: move-begin.
@@ -135,15 +135,15 @@ public:
static char ID;
AMDGPUCFGStructurizer() :
- MachineFunctionPass(ID), TII(NULL), TRI(NULL) {
+ MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {
initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "AMDGPU Control Flow Graph structurizer Pass";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addPreserved<MachineFunctionAnalysis>();
AU.addRequired<MachineFunctionAnalysis>();
AU.addRequired<MachineDominatorTree>();
@@ -159,7 +159,7 @@ public:
/// sure all loops have an exit block
bool prepare();
- bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
TRI = &TII->getRegisterInfo();
DEBUG(MF.dump(););
@@ -168,7 +168,7 @@ public:
MLI = &getAnalysis<MachineLoopInfo>();
DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
MDT = &getAnalysis<MachineDominatorTree>();
- DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
+ DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr););
PDT = &getAnalysis<MachinePostDominatorTree>();
DEBUG(PDT->print(dbgs()););
prepare();
@@ -334,7 +334,7 @@ protected:
MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
void retireBlock(MachineBasicBlock *MBB);
- void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
+ void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = nullptr);
MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
/// This is work around solution for findNearestCommonDominator not avaiable
@@ -361,7 +361,7 @@ MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
const {
LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
if (It == LLInfoMap.end())
- return NULL;
+ return nullptr;
return (*It).second;
}
@@ -632,7 +632,7 @@ MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
MachineInstr *MI = &*It;
if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
return MI;
- return NULL;
+ return nullptr;
}
MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
@@ -648,7 +648,7 @@ MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
break;
}
}
- return NULL;
+ return nullptr;
}
MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
@@ -658,7 +658,7 @@ MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
if (instr->getOpcode() == AMDGPU::RETURN)
return instr;
}
- return NULL;
+ return nullptr;
}
MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
@@ -668,7 +668,7 @@ MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
if (MI->getOpcode() == AMDGPU::CONTINUE)
return MI;
}
- return NULL;
+ return nullptr;
}
bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
@@ -819,7 +819,7 @@ bool AMDGPUCFGStructurizer::run() {
SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
It;
- MachineBasicBlock *SccBeginMBB = NULL;
+ MachineBasicBlock *SccBeginMBB = nullptr;
int SccNumBlk = 0; // The number of active blocks, init to a
// maximum possible number.
int SccNumIter; // Number of iteration in this SCC.
@@ -874,7 +874,7 @@ bool AMDGPUCFGStructurizer::run() {
}
if (ContNextScc)
- SccBeginMBB = NULL;
+ SccBeginMBB = nullptr;
} //while, "one iteration" over the function.
MachineBasicBlock *EntryMBB =
@@ -933,7 +933,7 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
MachineBasicBlock *MBB;
for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
++It, ++SccNum) {
- std::vector<MachineBasicBlock *> &SccNext = *It;
+ const std::vector<MachineBasicBlock *> &SccNext = *It;
for (std::vector<MachineBasicBlock *>::const_iterator
blockIter = SccNext.begin(), blockEnd = SccNext.end();
blockIter != blockEnd; ++blockIter) {
@@ -1026,7 +1026,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
} else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
// Triangle pattern, false is empty
LandBlk = FalseMBB;
- FalseMBB = NULL;
+ FalseMBB = nullptr;
} else if (FalseMBB->succ_size() == 1
&& *FalseMBB->succ_begin() == TrueMBB) {
// Triangle pattern, true is empty
@@ -1034,7 +1034,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
std::swap(TrueMBB, FalseMBB);
reversePredicateSetter(MBB->end());
LandBlk = FalseMBB;
- FalseMBB = NULL;
+ FalseMBB = nullptr;
} else if (FalseMBB->succ_size() == 1
&& isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
LandBlk = *FalseMBB->succ_begin();
@@ -1075,13 +1075,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
int AMDGPUCFGStructurizer::loopendPatternMatch() {
std::vector<MachineLoop *> NestedLoops;
- for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
- It != E; ++It) {
- df_iterator<MachineLoop *> LpIt = df_begin(*It),
- LpE = df_end(*It);
- for (; LpIt != LpE; ++LpIt)
- NestedLoops.push_back(*LpIt);
- }
+ for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end(); It != E;
+ ++It)
+ for (MachineLoop *ML : depth_first(*It))
+ NestedLoops.push_back(ML);
+
if (NestedLoops.size() == 0)
return 0;
@@ -1244,7 +1242,7 @@ int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
DEBUG(
dbgs() << " not working\n";
);
- DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
+ DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
} // walk down the postDomTree
return Num;
@@ -1723,11 +1721,11 @@ AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
if (!LoopHeader || !LoopLatch)
- return NULL;
+ return nullptr;
MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
// Is LoopRep an infinite loop ?
if (!BranchMI || !isUncondBranch(BranchMI))
- return NULL;
+ return nullptr;
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
FuncRep->push_back(DummyExitBlk); //insert to function
@@ -1860,7 +1858,7 @@ AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
if (!Node1 || !Node2)
- return NULL;
+ return nullptr;
Node1 = Node1->getIDom();
while (Node1) {
@@ -1869,7 +1867,7 @@ AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
Node1 = Node1->getIDom();
}
- return NULL;
+ return nullptr;
}
MachineBasicBlock *
diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
index 0761ff4..7cea803 100644
--- a/lib/Target/R600/AMDILISelLowering.cpp
+++ b/lib/Target/R600/AMDILISelLowering.cpp
@@ -39,61 +39,55 @@ using namespace llvm;
// TargetLowering Class Implementation Begins
//===----------------------------------------------------------------------===//
void AMDGPUTargetLowering::InitAMDILLowering() {
- static const int types[] = {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::f32,
- (int)MVT::f64,
- (int)MVT::i64,
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
+ static const MVT::SimpleValueType types[] = {
+ MVT::i8,
+ MVT::i16,
+ MVT::i32,
+ MVT::f32,
+ MVT::f64,
+ MVT::i64,
+ MVT::v2i8,
+ MVT::v4i8,
+ MVT::v2i16,
+ MVT::v4i16,
+ MVT::v4f32,
+ MVT::v4i32,
+ MVT::v2f32,
+ MVT::v2i32,
+ MVT::v2f64,
+ MVT::v2i64
};
- static const int IntTypes[] = {
- (int)MVT::i8,
- (int)MVT::i16,
- (int)MVT::i32,
- (int)MVT::i64
+ static const MVT::SimpleValueType IntTypes[] = {
+ MVT::i8,
+ MVT::i16,
+ MVT::i32,
+ MVT::i64
};
- static const int FloatTypes[] = {
- (int)MVT::f32,
- (int)MVT::f64
+ static const MVT::SimpleValueType FloatTypes[] = {
+ MVT::f32,
+ MVT::f64
};
- static const int VectorTypes[] = {
- (int)MVT::v2i8,
- (int)MVT::v4i8,
- (int)MVT::v2i16,
- (int)MVT::v4i16,
- (int)MVT::v4f32,
- (int)MVT::v4i32,
- (int)MVT::v2f32,
- (int)MVT::v2i32,
- (int)MVT::v2f64,
- (int)MVT::v2i64
+ static const MVT::SimpleValueType VectorTypes[] = {
+ MVT::v2i8,
+ MVT::v4i8,
+ MVT::v2i16,
+ MVT::v4i16,
+ MVT::v4f32,
+ MVT::v4i32,
+ MVT::v2f32,
+ MVT::v2i32,
+ MVT::v2f64,
+ MVT::v2i64
};
- const size_t NumTypes = array_lengthof(types);
- const size_t NumFloatTypes = array_lengthof(FloatTypes);
- const size_t NumIntTypes = array_lengthof(IntTypes);
- const size_t NumVectorTypes = array_lengthof(VectorTypes);
const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>();
// These are the current register classes that are
// supported
- for (unsigned int x = 0; x < NumTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
-
+ for (MVT VT : types) {
setOperationAction(ISD::SUBE, VT, Expand);
setOperationAction(ISD::SUBC, VT, Expand);
setOperationAction(ISD::ADDE, VT, Expand);
@@ -109,9 +103,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
setOperationAction(ISD::SDIV, VT, Custom);
}
}
- for (unsigned int x = 0; x < NumFloatTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
-
+ for (MVT VT : FloatTypes) {
// IL does not have these operations for floating point types
setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
setOperationAction(ISD::SETOLT, VT, Expand);
@@ -124,9 +116,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
setOperationAction(ISD::SETULE, VT, Expand);
}
- for (unsigned int x = 0; x < NumIntTypes; ++x) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
-
+ for (MVT VT : IntTypes) {
// GPU also does not have divrem function for signed or unsigned
setOperationAction(ISD::SDIVREM, VT, Expand);
@@ -142,9 +132,7 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
setOperationAction(ISD::CTLZ, VT, Expand);
}
- for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) {
- MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
-
+ for (MVT VT : VectorTypes) {
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDILIntrinsicInfo.cpp
index 762ee39..fab4a3b 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.cpp
+++ b/lib/Target/R600/AMDILIntrinsicInfo.cpp
@@ -38,7 +38,7 @@ AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys,
};
if (IntrID < Intrinsic::num_intrinsics) {
- return 0;
+ return nullptr;
}
assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics
&& "Invalid intrinsic ID");
diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDILIntrinsicInfo.h
index 35559e2..924275a 100644
--- a/lib/Target/R600/AMDILIntrinsicInfo.h
+++ b/lib/Target/R600/AMDILIntrinsicInfo.h
@@ -34,13 +34,13 @@ enum ID {
class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo {
public:
AMDGPUIntrinsicInfo(TargetMachine *tm);
- std::string getName(unsigned int IntrId, Type **Tys = 0,
- unsigned int numTys = 0) const;
- unsigned int lookupName(const char *Name, unsigned int Len) const;
- bool isOverloaded(unsigned int IID) const;
+ std::string getName(unsigned int IntrId, Type **Tys = nullptr,
+ unsigned int numTys = 0) const override;
+ unsigned int lookupName(const char *Name, unsigned int Len) const override;
+ bool isOverloaded(unsigned int IID) const override;
Function *getDeclaration(Module *M, unsigned int ID,
- Type **Tys = 0,
- unsigned int numTys = 0) const;
+ Type **Tys = nullptr,
+ unsigned int numTys = 0) const override;
};
} // end namespace llvm
diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td
index 658deb5..4a3e02e 100644
--- a/lib/Target/R600/AMDILIntrinsics.td
+++ b/lib/Target/R600/AMDILIntrinsics.td
@@ -92,10 +92,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {
BinaryIntInt;
def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">,
BinaryIntInt;
- def int_AMDIL_mul24_i32 : GCCBuiltin<"__amdil_imul24">,
- BinaryIntInt;
- def int_AMDIL_mul24_u32 : GCCBuiltin<"__amdil_umul24">,
- BinaryIntInt;
def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">,
BinaryIntInt;
def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">,
diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt
index 93a5117..3c6fa5a 100644
--- a/lib/Target/R600/CMakeLists.txt
+++ b/lib/Target/R600/CMakeLists.txt
@@ -45,6 +45,7 @@ add_llvm_target(R600CodeGen
SIInstrInfo.cpp
SIISelLowering.cpp
SILowerControlFlow.cpp
+ SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
SIRegisterInfo.cpp
SITypeRewriter.cpp
diff --git a/lib/Target/R600/CaymanInstructions.td b/lib/Target/R600/CaymanInstructions.td
index acd7bde..2630345 100644
--- a/lib/Target/R600/CaymanInstructions.td
+++ b/lib/Target/R600/CaymanInstructions.td
@@ -21,12 +21,14 @@ def isCayman : Predicate<"Subtarget.hasCaymanISA()">;
let Predicates = [isCayman] in {
def MULADD_INT24_cm : R600_3OP <0x08, "MULADD_INT24",
- [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))], VecALU
+ [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))], VecALU
>;
def MUL_INT24_cm : R600_2OP <0x5B, "MUL_INT24",
- [(set i32:$dst, (mul I24:$src0, I24:$src1))], VecALU
+ [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))], VecALU
>;
+def : IMad24Pat<MULADD_INT24_cm>;
+
let isVector = 1 in {
def RECIP_IEEE_cm : RECIP_IEEE_Common<0x86>;
@@ -47,6 +49,7 @@ def COS_cm : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_cm, EXP_IEEE_cm, MUL>;
defm DIV_cm : DIV_Common<RECIP_IEEE_cm>;
+defm : Expand24UBitOps<MULLO_UINT_cm, ADD_INT>;
// RECIP_UINT emulation for Cayman
// The multiplication scales from [0,1] to the unsigned integer range
diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td
index 6430ca6..2065441 100644
--- a/lib/Target/R600/EvergreenInstructions.td
+++ b/lib/Target/R600/EvergreenInstructions.td
@@ -75,6 +75,8 @@ def COS_eg : COS_Common<0x8E>;
def : POW_Common <LOG_IEEE_eg, EXP_IEEE_eg, MUL>;
def : Pat<(fsqrt f32:$src), (MUL $src, (RECIPSQRT_CLAMPED_eg $src))>;
+defm : Expand24IBitOps<MULLO_INT_eg, ADD_INT>;
+
//===----------------------------------------------------------------------===//
// Memory read/write instructions
//===----------------------------------------------------------------------===//
@@ -273,7 +275,7 @@ def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT",
VecALU
>;
-def BFE_INT_eg : R600_3OP <0x4, "BFE_INT",
+def BFE_INT_eg : R600_3OP <0x5, "BFE_INT",
[(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))],
VecALU
>;
@@ -286,6 +288,13 @@ def BFI_INT_eg : R600_3OP <0x06, "BFI_INT",
VecALU
>;
+def : Pat<(i32 (sext_inreg i32:$src, i1)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (i32 ONE_INT))>;
+def : Pat<(i32 (sext_inreg i32:$src, i8)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 8))>;
+def : Pat<(i32 (sext_inreg i32:$src, i16)),
+ (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>;
+
defm : BFIPatterns <BFI_INT_eg>;
def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
@@ -294,8 +303,11 @@ def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT",
>;
def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
- [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))], VecALU
+ [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))], VecALU
>;
+
+def : UMad24Pat<MULADD_UINT24_eg>;
+
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
def : ROTRPattern <BIT_ALIGN_INT_eg>;
def MULADD_eg : MULADD_Common<0x14>;
@@ -309,7 +321,7 @@ def CNDGE_eg : CNDGE_Common<0x1B>;
def MUL_LIT_eg : MUL_LIT_Common<0x1F>;
def LOG_CLAMPED_eg : LOG_CLAMPED_Common<0x82>;
def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24",
- [(set i32:$dst, (mul U24:$src0, U24:$src1))], VecALU
+ [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))], VecALU
>;
def DOT4_eg : DOT4_Common<0xBE>;
defm CUBE_eg : CUBE_Common<0xC0>;
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
index 7105879..11ae091 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
@@ -12,6 +12,8 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -23,6 +25,21 @@ void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
printAnnotation(OS, Annot);
}
+void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
+}
+
+void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff);
+}
+
+void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+}
+
void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
switch (reg) {
case AMDGPU::VCC:
@@ -41,43 +58,78 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) {
break;
}
- // It's seems there's no way to use SIRegisterInfo here, and dealing with the
- // giant enum of all the different shifted sets of registers is pretty
- // unmanagable, so parse the name and reformat it to be prettier.
- StringRef Name(getRegisterName(reg));
-
- std::pair<StringRef, StringRef> Split = Name.split('_');
- StringRef SubRegName = Split.first;
- StringRef Rest = Split.second;
+ char Type;
+ unsigned NumRegs;
+
+ if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 1;
+ } else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
+ Type = 's';
+ NumRegs = 1;
+ } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 2;
+ } else if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) {
+ Type = 's';
+ NumRegs = 2;
+ } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 4;
+ } else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
+ Type = 's';
+ NumRegs = 4;
+ } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 3;
+ } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 8;
+ } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
+ Type = 's';
+ NumRegs = 8;
+ } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
+ Type = 'v';
+ NumRegs = 16;
+ } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
+ Type = 's';
+ NumRegs = 16;
+ } else {
+ O << getRegisterName(reg);
+ return;
+ }
- if (SubRegName.size() <= 4) { // Must at least be as long as "SGPR"/"VGPR".
- O << Name;
+ // The low 8 bits encoding value is the register index, for both VGPRs and
+ // SGPRs.
+ unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+ if (NumRegs == 1) {
+ O << Type << RegIdx;
return;
}
- unsigned RegIndex;
- StringRef RegIndexStr = SubRegName.drop_front(4);
+ O << Type << '[' << RegIdx << ':' << (RegIdx + NumRegs - 1) << ']';
+}
- if (RegIndexStr.getAsInteger(10, RegIndex)) {
- O << Name;
+void AMDGPUInstPrinter::printImmediate(uint32_t Imm, raw_ostream &O) {
+ int32_t SImm = static_cast<int32_t>(Imm);
+ if (SImm >= -16 && SImm <= 64) {
+ O << SImm;
return;
}
- if (SubRegName.front() == 'V')
- O << 'v';
- else if (SubRegName.front() == 'S')
- O << 's';
- else {
- O << Name;
+ if (Imm == FloatToBits(1.0f) ||
+ Imm == FloatToBits(-1.0f) ||
+ Imm == FloatToBits(0.5f) ||
+ Imm == FloatToBits(-0.5f) ||
+ Imm == FloatToBits(2.0f) ||
+ Imm == FloatToBits(-2.0f) ||
+ Imm == FloatToBits(4.0f) ||
+ Imm == FloatToBits(-4.0f)) {
+ O << BitsToFloat(Imm);
return;
}
- if (Rest.empty()) // Only 1 32-bit register
- O << RegIndex;
- else {
- unsigned NumReg = Rest.count('_') + 2;
- O << '[' << RegIndex << ':' << (RegIndex + NumReg - 1) << ']';
- }
+ O << formatHex(static_cast<uint64_t>(Imm));
}
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
@@ -95,7 +147,7 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
break;
}
} else if (Op.isImm()) {
- O << Op.getImm();
+ printImmediate(Op.getImm(), O);
} else if (Op.isFPImm()) {
O << Op.getFPImm();
} else if (Op.isExpr()) {
@@ -106,6 +158,18 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
+void AMDGPUInstPrinter::printOperandAndMods(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ unsigned InputModifiers = MI->getOperand(OpNo).getImm();
+ if (InputModifiers & 0x1)
+ O << "-";
+ if (InputModifiers & 0x2)
+ O << "|";
+ printOperand(MI, OpNo + 1, O);
+ if (InputModifiers & 0x2)
+ O << "|";
+}
+
void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNum).getImm();
diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
index 1d24680..6ca7170 100644
--- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
+++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
@@ -29,13 +29,18 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
private:
- static void printRegOperand(unsigned RegNo, raw_ostream &O);
- static void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printRegOperand(unsigned RegNo, raw_ostream &O);
+ void printImmediate(uint32_t Imm, raw_ostream &O);
+ void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printOperandAndMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- static void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef Asm, StringRef Default = "");
static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
index a6bb59f..489cec7 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -23,8 +23,8 @@ namespace {
class AMDGPUMCObjectWriter : public MCObjectWriter {
public:
AMDGPUMCObjectWriter(raw_ostream &OS) : MCObjectWriter(OS, true) { }
- virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+ void ExecutePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) override {
//XXX: Implement if necessary.
}
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
@@ -34,7 +34,7 @@ public:
assert(!"Not implemented");
}
- virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+ void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
@@ -43,19 +43,19 @@ public:
AMDGPUAsmBackend(const Target &T)
: MCAsmBackend() {}
- virtual unsigned getNumFixupKinds() const { return 0; };
- virtual void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const;
- virtual bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
- const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+ unsigned getNumFixupKinds() const override { return 0; };
+ void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+ uint64_t Value, bool IsPCRel) const override;
+ bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+ const MCRelaxableFragment *DF,
+ const MCAsmLayout &Layout) const override {
return false;
}
- virtual void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
assert(!"Not implemented");
}
- virtual bool mayNeedRelaxation(const MCInst &Inst) const { return false; }
- virtual bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
return true;
}
};
@@ -88,7 +88,7 @@ class ELFAMDGPUAsmBackend : public AMDGPUAsmBackend {
public:
ELFAMDGPUAsmBackend(const Target &T) : AMDGPUAsmBackend(T) { }
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
return createAMDGPUELFObjectWriter(OS);
}
};
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index aee9bd1..78bbe0a 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -35,7 +35,7 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
Data16bitsDirective = ".short\t";
Data32bitsDirective = ".long\t";
Data64bitsDirective = ".quad\t";
- GPRel32Directive = 0;
+ GPRel32Directive = nullptr;
SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
@@ -58,5 +58,5 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(StringRef &TT) : MCAsmInfo() {
const MCSection*
AMDGPUMCAsmInfo::getNonexecutableStackSection(MCContext &CTX) const {
- return 0;
+ return nullptr;
}
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
index 22afd63..59aebec 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
@@ -22,7 +22,7 @@ class StringRef;
class AMDGPUMCAsmInfo : public MCAsmInfo {
public:
explicit AMDGPUMCAsmInfo(StringRef &TT);
- const MCSection* getNonexecutableStackSection(MCContext &CTX) const;
+ const MCSection* getNonexecutableStackSection(MCContext &CTX) const override;
};
} // namespace llvm
#endif // AMDGPUMCASMINFO_H
diff --git a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 6592b0e..38a2956 100644
--- a/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -24,6 +24,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "AMDGPUGenInstrInfo.inc"
@@ -33,8 +35,6 @@
#define GET_REGINFO_MC_DESC
#include "AMDGPUGenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createAMDGPUMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitAMDGPUMCInstrInfo(X);
diff --git a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
index b1beab0..74b8ca0 100644
--- a/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Target/R600/MCTargetDesc/LLVMBuild.txt -------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
@@ -19,5 +19,5 @@
type = Library
name = R600Desc
parent = R600
-required_libraries = R600AsmPrinter R600Info MC
+required_libraries = MC R600AsmPrinter R600Info Support
add_to_library_groups = R600
diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
index 286c7d1..5e7cefe 100644
--- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -41,14 +41,14 @@ public:
: MCII(mcii), MRI(mri) { }
/// \brief Encode the instruction and write it to the OS.
- virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
/// \returns the encoding for an MCOperand.
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
private:
void EmitByte(unsigned int byte, raw_ostream &OS) const;
diff --git a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
index f42e978..ee02111 100644
--- a/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -54,14 +54,14 @@ public:
~SIMCCodeEmitter() { }
/// \brief Encode the instruction and write it to the OS.
- virtual void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
/// \returns the encoding for an MCOperand.
- virtual uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
};
} // End anonymous namespace
diff --git a/lib/Target/R600/Processors.td b/lib/Target/R600/Processors.td
index fde4481..ce17d7c 100644
--- a/lib/Target/R600/Processors.td
+++ b/lib/Target/R600/Processors.td
@@ -106,3 +106,5 @@ def : Proc<"kabini", SI_Itin, [FeatureSeaIslands]>;
def : Proc<"kaveri", SI_Itin, [FeatureSeaIslands]>;
def : Proc<"hawaii", SI_Itin, [FeatureSeaIslands]>;
+
+def : Proc<"mullins", SI_Itin, [FeatureSeaIslands]>;
diff --git a/lib/Target/R600/R600ClauseMergePass.cpp b/lib/Target/R600/R600ClauseMergePass.cpp
index 3d9015c..92bf0df 100644
--- a/lib/Target/R600/R600ClauseMergePass.cpp
+++ b/lib/Target/R600/R600ClauseMergePass.cpp
@@ -13,7 +13,6 @@
/// It needs to be called after IfCvt for best results.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "r600mergeclause"
#include "AMDGPU.h"
#include "R600Defines.h"
#include "R600InstrInfo.h"
@@ -27,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "r600mergeclause"
+
namespace {
static bool isCFAlu(const MachineInstr *MI) {
@@ -62,9 +63,9 @@ private:
public:
R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const;
+ const char *getPassName() const override;
};
char R600ClauseMergePass::ID = 0;
diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
index f74bef3..d255e96 100644
--- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
+++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
@@ -12,7 +12,6 @@
/// computing their address on the fly ; it also sets STACK_SIZE info.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "r600cf"
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600Defines.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "r600cf"
+
namespace {
struct CFStack {
@@ -468,13 +469,13 @@ private:
public:
R600ControlFlowFinalizer(TargetMachine &tm) : MachineFunctionPass(ID),
- TII (0), TRI(0),
+ TII (nullptr), TRI(nullptr),
ST(tm.getSubtarget<AMDGPUSubtarget>()) {
const AMDGPUSubtarget &ST = tm.getSubtarget<AMDGPUSubtarget>();
MaxFetchInst = ST.getTexVTXClauseSize();
}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TII=static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
TRI=static_cast<const R600RegisterInfo *>(MF.getTarget().getRegisterInfo());
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
@@ -501,13 +502,13 @@ public:
DEBUG(dbgs() << CfCount << ":"; I->dump(););
FetchClauses.push_back(MakeFetchClause(MBB, I));
CfCount++;
- LastAlu.back() = 0;
+ LastAlu.back() = nullptr;
continue;
}
MachineBasicBlock::iterator MI = I;
if (MI->getOpcode() != AMDGPU::ENDIF)
- LastAlu.back() = 0;
+ LastAlu.back() = nullptr;
if (MI->getOpcode() == AMDGPU::CF_ALU)
LastAlu.back() = MI;
I++;
@@ -558,7 +559,7 @@ public:
break;
}
case AMDGPU::IF_PREDICATE_SET: {
- LastAlu.push_back(0);
+ LastAlu.push_back(nullptr);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
.addImm(0)
@@ -665,7 +666,7 @@ public:
return false;
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Control Flow Finalizer Pass";
}
};
diff --git a/lib/Target/R600/R600EmitClauseMarkers.cpp b/lib/Target/R600/R600EmitClauseMarkers.cpp
index 5bd793a..38afebe 100644
--- a/lib/Target/R600/R600EmitClauseMarkers.cpp
+++ b/lib/Target/R600/R600EmitClauseMarkers.cpp
@@ -291,12 +291,12 @@ private:
public:
static char ID;
- R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(0), Address(0) {
+ R600EmitClauseMarkers() : MachineFunctionPass(ID), TII(nullptr), Address(0) {
initializeR600EmitClauseMarkersPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF) {
+ bool runOnMachineFunction(MachineFunction &MF) override {
TII = static_cast<const R600InstrInfo *>(MF.getTarget().getInstrInfo());
for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
@@ -315,7 +315,7 @@ public:
return false;
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Emit Clause Markers Pass";
}
};
diff --git a/lib/Target/R600/R600ExpandSpecialInstrs.cpp b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
index ca1189d..732b06d 100644
--- a/lib/Target/R600/R600ExpandSpecialInstrs.cpp
+++ b/lib/Target/R600/R600ExpandSpecialInstrs.cpp
@@ -38,11 +38,11 @@ private:
public:
R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
- TII(0) { }
+ TII(nullptr) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Expand special instructions pass";
}
};
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 6405a82..d6c6830 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -82,9 +82,31 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SELECT, MVT::i32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v2f32, Expand);
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
- setOperationAction(ISD::SELECT, MVT::v4f32, Expand);
+
+ // Expand sign extension of vectors
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
+
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
+
+ if (!Subtarget->hasBFE())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
@@ -117,6 +139,11 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+ // These should be replaced by UDVIREM, but it does not happen automatically
+ // during Type Legalization
+ setOperationAction(ISD::UDIV, MVT::i64, Custom);
+ setOperationAction(ISD::UREM, MVT::i64, Custom);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setBooleanContents(ZeroOrNegativeOneBooleanContent);
@@ -538,8 +565,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DAG.getConstant(2, MVT::i32), // SWZ_Z
DAG.getConstant(3, MVT::i32) // SWZ_W
};
- return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(),
- Args, 8);
+ return DAG.getNode(AMDGPUISD::EXPORT, SDLoc(Op), Op.getValueType(), Args);
}
// default for switch(IntrinsicID)
@@ -689,7 +715,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(9),
Op.getOperand(10)
};
- return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs, 19);
+ return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
}
case AMDGPUIntrinsic::AMDGPU_dp4: {
SDValue Args[8] = {
@@ -710,7 +736,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
DAG.getConstant(3, MVT::i32))
};
- return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args, 8);
+ return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
case Intrinsic::r600_read_ngroups_x:
@@ -960,13 +986,6 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
}
-
- // Possible Min/Max pattern
- SDValue MinMax = LowerMinMax(Op, DAG);
- if (MinMax.getNode()) {
- return MinMax;
- }
-
// If we make it this for it means we have no native instructions to handle
// this SELECT_CC, so we must lower it.
SDValue HWTrue, HWFalse;
@@ -1088,10 +1107,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(0, MVT::i32),
Mask
};
- SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src, 4);
+ SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
SDValue Args[3] = { Chain, Input, DWordAddr };
return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
- Op->getVTList(), Args, 3, MemVT,
+ Op->getVTList(), Args, MemVT,
StoreNode->getMemOperand());
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
Value.getValueType().bitsGE(MVT::i32)) {
@@ -1131,7 +1150,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
if (ValueVT.isVector()) {
unsigned NumElemVT = ValueVT.getVectorNumElements();
EVT ElemVT = ValueVT.getVectorElementType();
- SDValue Stores[4];
+ SmallVector<SDValue, 4> Stores(NumElemVT);
assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
"vector width in load");
@@ -1148,7 +1167,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Chain, Elem, Ptr,
DAG.getTargetConstant(Channel, MVT::i32));
}
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores, NumElemVT);
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
} else {
if (ValueVT == MVT::i8) {
Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
@@ -1212,10 +1231,11 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
if (Ret.getNode()) {
- SDValue Ops[2];
- Ops[0] = Ret;
- Ops[1] = Chain;
- return DAG.getMergeValues(Ops, 2, DL);
+ SDValue Ops[2] = {
+ Ret,
+ Chain
+ };
+ return DAG.getMergeValues(Ops, DL);
}
@@ -1224,7 +1244,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SplitVectorLoad(Op, DAG),
Chain
};
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
@@ -1232,8 +1252,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
SDValue Result;
- if (isa<ConstantExpr>(LoadNode->getSrcValue()) ||
- isa<Constant>(LoadNode->getSrcValue()) ||
+ if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
+ isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
isa<ConstantSDNode>(Ptr)) {
SDValue Slots[4];
for (unsigned i = 0; i < 4; i++) {
@@ -1252,7 +1272,8 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
NewVT = VT;
NumElements = VT.getVectorNumElements();
}
- Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT, Slots, NumElements);
+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
+ makeArrayRef(Slots, NumElements));
} else {
// non-constant ptr can't be folded, keeps it as a v4f32 load
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
@@ -1268,10 +1289,10 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
}
SDValue MergedValues[2] = {
- Result,
- Chain
+ Result,
+ Chain
};
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
// For most operations returning SDValue() will result in the node being
@@ -1295,7 +1316,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
SDValue MergedValues[2] = { Sra, Chain };
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
@@ -1332,7 +1353,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
Loads[i] = DAG.getUNDEF(ElemVT);
}
EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
- LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads, 4);
+ LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
} else {
LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
Chain, Ptr,
@@ -1340,11 +1361,12 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(2));
}
- SDValue Ops[2];
- Ops[0] = LoweredLoad;
- Ops[1] = Chain;
+ SDValue Ops[2] = {
+ LoweredLoad,
+ Chain
+ };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
/// XXX Only kernel functions are supported, so we can assume for now that
@@ -1365,8 +1387,7 @@ SDValue R600TargetLowering::LowerFormalArguments(
SmallVector<ISD::InputArg, 8> LocalIns;
- getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
- LocalIns);
+ getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
AnalyzeFormalArguments(CCInfo, LocalIns);
@@ -1392,32 +1413,38 @@ SDValue R600TargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
- SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain,
+
+ // FIXME: This should really check the extload type, but the handling of
+ // extload vecto parameters seems to be broken.
+ //ISD::LoadExtType Ext = Ins[i].Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+ ISD::LoadExtType Ext = ISD::SEXTLOAD;
+ SDValue Arg = DAG.getExtLoad(Ext, DL, VT, Chain,
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
MachinePointerInfo(UndefValue::get(PtrTy)),
MemVT, false, false, 4);
- // 4 is the preferred alignment for
- // the CONSTANT memory space.
+
+ // 4 is the preferred alignment for the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;
}
EVT R600TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
- if (!VT.isVector()) return MVT::i32;
+ if (!VT.isVector())
+ return MVT::i32;
return VT.changeVectorElementTypeToInteger();
}
-static SDValue
-CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
- DenseMap<unsigned, unsigned> &RemapSwizzle) {
+static SDValue CompactSwizzlableVector(
+ SelectionDAG &DAG, SDValue VectorEntry,
+ DenseMap<unsigned, unsigned> &RemapSwizzle) {
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
assert(RemapSwizzle.empty());
SDValue NewBldVec[4] = {
- VectorEntry.getOperand(0),
- VectorEntry.getOperand(1),
- VectorEntry.getOperand(2),
- VectorEntry.getOperand(3)
+ VectorEntry.getOperand(0),
+ VectorEntry.getOperand(1),
+ VectorEntry.getOperand(2),
+ VectorEntry.getOperand(3)
};
for (unsigned i = 0; i < 4; i++) {
@@ -1448,7 +1475,7 @@ CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry,
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec, 4);
+ VectorEntry.getValueType(), NewBldVec);
}
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
@@ -1486,7 +1513,7 @@ static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
}
return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
- VectorEntry.getValueType(), NewBldVec, 4);
+ VectorEntry.getValueType(), NewBldVec);
}
@@ -1524,6 +1551,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
switch (N->getOpcode()) {
+ default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
case ISD::FP_ROUND: {
SDValue Arg = N->getOperand(0);
@@ -1613,8 +1641,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
// Return the new vector
- return DAG.getNode(ISD::BUILD_VECTOR, dl,
- VT, &Ops[0], Ops.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
// Extract_vec (Build_vector) generated by custom lowering
@@ -1638,6 +1665,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::SELECT_CC: {
+ // Try common optimizations
+ SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+ if (Ret.getNode())
+ return Ret;
+
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
// selectcc x, y, a, b, inv(cc)
//
@@ -1697,7 +1729,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
};
SDLoc DL(N);
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG);
- return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs, 8);
+ return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
}
case AMDGPUISD::TEXTURE_FETCH: {
SDValue Arg = N->getOperand(1);
@@ -1727,10 +1759,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
};
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG);
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, SDLoc(N), N->getVTList(),
- NewArgs, 19);
+ NewArgs);
}
}
- return SDValue();
+
+ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
static bool
@@ -1779,8 +1812,7 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
};
std::vector<unsigned> Consts;
- for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
- int OtherSrcIdx = SrcIndices[i];
+ for (int OtherSrcIdx : SrcIndices) {
int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
if (OtherSrcIdx < 0 || OtherSelIdx < 0)
continue;
@@ -1791,14 +1823,14 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
if (RegisterSDNode *Reg =
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
if (Reg->getReg() == AMDGPU::ALU_CONST) {
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
- ParentNode->getOperand(OtherSelIdx));
+ ConstantSDNode *Cst
+ = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Consts.push_back(Cst->getZExtValue());
}
}
}
- ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+ ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
Consts.push_back(Cst->getZExtValue());
if (!TII->fitsConstReadLimitations(Consts)) {
return false;
diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
index 22ef728..a8a464f 100644
--- a/lib/Target/R600/R600ISelLowering.h
+++ b/lib/Target/R600/R600ISelLowering.h
@@ -24,21 +24,21 @@ class R600InstrInfo;
class R600TargetLowering : public AMDGPUTargetLowering {
public:
R600TargetLowering(TargetMachine &TM);
- virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock * BB) const;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- virtual void ReplaceNodeResults(SDNode * N,
- SmallVectorImpl<SDValue> &Results,
- SelectionDAG &DAG) const override;
- virtual SDValue LowerFormalArguments(
- SDValue Chain,
- CallingConv::ID CallConv,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
- virtual EVT getSetCCResultType(LLVMContext &, EVT VT) const;
+ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock * BB) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ void ReplaceNodeResults(SDNode * N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) const override;
+ SDValue LowerFormalArguments(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SDLoc DL, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const override;
+ EVT getSetCCResultType(LLVMContext &, EVT VT) const override;
private:
unsigned Gen;
/// Each OpenCL kernel has nine implicit parameters that are stored in the
@@ -66,7 +66,7 @@ private:
void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
unsigned &Channel, unsigned &PtrIncr) const;
bool isZero(SDValue Op) const;
- virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
+ SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
};
} // End namespace llvm;
diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp
index 0281dd0..b0d9ae3 100644
--- a/lib/Target/R600/R600InstrInfo.cpp
+++ b/lib/Target/R600/R600InstrInfo.cpp
@@ -23,11 +23,11 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#include "AMDGPUGenDFAPacketizer.inc"
-using namespace llvm;
-
R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm)
: AMDGPUInstrInfo(tm),
RI(tm),
@@ -677,7 +677,7 @@ findFirstPredicateSetterFrom(MachineBasicBlock &MBB,
return MI;
}
- return NULL;
+ return nullptr;
}
static
@@ -797,7 +797,7 @@ R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
DebugLoc DL) const {
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
- if (FBB == 0) {
+ if (!FBB) {
if (Cond.empty()) {
BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
return 1;
diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h
index d5ff4de..b5304a0 100644
--- a/lib/Target/R600/R600InstrInfo.h
+++ b/lib/Target/R600/R600InstrInfo.h
@@ -50,13 +50,13 @@ namespace llvm {
explicit R600InstrInfo(AMDGPUTargetMachine &tm);
- const R600RegisterInfo &getRegisterInfo() const;
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
+ const R600RegisterInfo &getRegisterInfo() const override;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const;
+ MachineBasicBlock::iterator MBBI) const override;
bool isTrig(const MachineInstr &MI) const;
bool isPlaceHolderOpcode(unsigned opcode) const;
@@ -142,79 +142,79 @@ namespace llvm {
/// instruction slots within an instruction group.
bool isVector(const MachineInstr &MI) const;
- virtual unsigned getIEQOpcode() const;
- virtual bool isMov(unsigned Opcode) const;
+ unsigned getIEQOpcode() const override;
+ bool isMov(unsigned Opcode) const override;
DFAPacketizer *CreateTargetScheduleState(const TargetMachine *TM,
- const ScheduleDAG *DAG) const;
+ const ScheduleDAG *DAG) const override;
- bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+ bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
+ SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const override;
- unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const override;
- unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
- bool isPredicated(const MachineInstr *MI) const;
+ bool isPredicated(const MachineInstr *MI) const override;
- bool isPredicable(MachineInstr *MI) const;
+ bool isPredicable(MachineInstr *MI) const override;
bool
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
- const BranchProbability &Probability) const;
+ const BranchProbability &Probability) const override;
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
unsigned ExtraPredCycles,
- const BranchProbability &Probability) const ;
+ const BranchProbability &Probability) const override ;
bool
isProfitableToIfCvt(MachineBasicBlock &TMBB,
unsigned NumTCycles, unsigned ExtraTCycles,
MachineBasicBlock &FMBB,
unsigned NumFCycles, unsigned ExtraFCycles,
- const BranchProbability &Probability) const;
+ const BranchProbability &Probability) const override;
bool DefinesPredicate(MachineInstr *MI,
- std::vector<MachineOperand> &Pred) const;
+ std::vector<MachineOperand> &Pred) const override;
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
- const SmallVectorImpl<MachineOperand> &Pred2) const;
+ const SmallVectorImpl<MachineOperand> &Pred2) const override;
bool isProfitableToUnpredicate(MachineBasicBlock &TMBB,
- MachineBasicBlock &FMBB) const;
+ MachineBasicBlock &FMBB) const override;
bool PredicateInstruction(MachineInstr *MI,
- const SmallVectorImpl<MachineOperand> &Pred) const;
+ const SmallVectorImpl<MachineOperand> &Pred) const override;
- unsigned int getPredicationCost(const MachineInstr *) const;
+ unsigned int getPredicationCost(const MachineInstr *) const override;
unsigned int getInstrLatency(const InstrItineraryData *ItinData,
const MachineInstr *MI,
- unsigned *PredCost = 0) const;
+ unsigned *PredCost = nullptr) const override;
- virtual int getInstrLatency(const InstrItineraryData *ItinData,
- SDNode *Node) const { return 1;}
+ int getInstrLatency(const InstrItineraryData *ItinData,
+ SDNode *Node) const override { return 1;}
/// \brief Reserve the registers that may be accesed using indirect addressing.
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const;
- virtual unsigned calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const;
+ unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const override;
- virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
+ const TargetRegisterClass *getIndirectAddrRegClass() const override;
- virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const;
+ MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const override;
- virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg, unsigned Address,
- unsigned OffsetReg) const;
+ MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg, unsigned Address,
+ unsigned OffsetReg) const override;
unsigned getMaxAlusPerClause() const;
@@ -244,7 +244,7 @@ namespace llvm {
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const;
+ unsigned DstReg, unsigned SrcReg) const override;
/// \brief Get the index of Op in the MachineInstr.
///
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index d2075c0..590fde2 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -1625,6 +1625,12 @@ def : DwordAddrPat <i32, R600_Reg32>;
} // End isR600toCayman Predicate
+let Predicates = [isR600] in {
+// Intrinsic patterns
+defm : Expand24IBitOps<MULLO_INT_r600, ADD_INT>;
+defm : Expand24UBitOps<MULLO_UINT_r600, ADD_INT>;
+} // End isR600
+
def getLDSNoRetOp : InstrMapping {
let FilterClass = "R600_LDS_1A1D";
let RowFields = ["BaseOp"];
diff --git a/lib/Target/R600/R600MachineFunctionInfo.h b/lib/Target/R600/R600MachineFunctionInfo.h
index c1bec0a..b0ae22e 100644
--- a/lib/Target/R600/R600MachineFunctionInfo.h
+++ b/lib/Target/R600/R600MachineFunctionInfo.h
@@ -21,7 +21,7 @@
namespace llvm {
class R600MachineFunctionInfo : public AMDGPUMachineFunction {
- virtual void anchor();
+ void anchor() override;
public:
R600MachineFunctionInfo(const MachineFunction &MF);
SmallVector<unsigned, 4> LiveOuts;
diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp
index d3ffb50..d1655d1 100644
--- a/lib/Target/R600/R600MachineScheduler.cpp
+++ b/lib/Target/R600/R600MachineScheduler.cpp
@@ -12,8 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "misched"
-
#include "R600MachineScheduler.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -23,6 +21,8 @@
using namespace llvm;
+#define DEBUG_TYPE "misched"
+
void R600SchedStrategy::initialize(ScheduleDAGMI *dag) {
assert(dag->hasVRegLiveness() && "R600SchedStrategy needs vreg liveness");
DAG = static_cast<ScheduleDAGMILive*>(dag);
@@ -56,7 +56,7 @@ unsigned getWFCountLimitedByGPR(unsigned GPRCount) {
}
SUnit* R600SchedStrategy::pickNode(bool &IsTopNode) {
- SUnit *SU = 0;
+ SUnit *SU = nullptr;
NextInstKind = IDOther;
IsTopNode = false;
@@ -316,7 +316,7 @@ int R600SchedStrategy::getInstKind(SUnit* SU) {
SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
if (Q.empty())
- return NULL;
+ return nullptr;
for (std::vector<SUnit *>::reverse_iterator It = Q.rbegin(), E = Q.rend();
It != E; ++It) {
SUnit *SU = *It;
@@ -331,7 +331,7 @@ SUnit *R600SchedStrategy::PopInst(std::vector<SUnit *> &Q, bool AnyALU) {
InstructionsGroupCandidate.pop_back();
}
}
- return NULL;
+ return nullptr;
}
void R600SchedStrategy::LoadAlu() {
@@ -448,11 +448,11 @@ SUnit* R600SchedStrategy::pickAlu() {
}
PrepareNextSlot();
}
- return NULL;
+ return nullptr;
}
SUnit* R600SchedStrategy::pickOther(int QID) {
- SUnit *SU = 0;
+ SUnit *SU = nullptr;
std::vector<SUnit *> &AQ = Available[QID];
if (AQ.empty()) {
diff --git a/lib/Target/R600/R600MachineScheduler.h b/lib/Target/R600/R600MachineScheduler.h
index b909ff7..fd475af 100644
--- a/lib/Target/R600/R600MachineScheduler.h
+++ b/lib/Target/R600/R600MachineScheduler.h
@@ -68,17 +68,16 @@ class R600SchedStrategy : public MachineSchedStrategy {
public:
R600SchedStrategy() :
- DAG(0), TII(0), TRI(0), MRI(0) {
+ DAG(nullptr), TII(nullptr), TRI(nullptr), MRI(nullptr) {
}
- virtual ~R600SchedStrategy() {
- }
+ virtual ~R600SchedStrategy() {}
- virtual void initialize(ScheduleDAGMI *dag);
- virtual SUnit *pickNode(bool &IsTopNode);
- virtual void schedNode(SUnit *SU, bool IsTopNode);
- virtual void releaseTopNode(SUnit *SU);
- virtual void releaseBottomNode(SUnit *SU);
+ void initialize(ScheduleDAGMI *dag) override;
+ SUnit *pickNode(bool &IsTopNode) override;
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+ void releaseTopNode(SUnit *SU) override;
+ void releaseBottomNode(SUnit *SU) override;
private:
std::vector<MachineInstr *> InstructionsGroupCandidate;
diff --git a/lib/Target/R600/R600OptimizeVectorRegisters.cpp b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
index 767e5e3..2314136 100644
--- a/lib/Target/R600/R600OptimizeVectorRegisters.cpp
+++ b/lib/Target/R600/R600OptimizeVectorRegisters.cpp
@@ -27,7 +27,6 @@
/// to reduce MOV count.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "vec-merger"
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600InstrInfo.h"
@@ -42,6 +41,8 @@
using namespace llvm;
+#define DEBUG_TYPE "vec-merger"
+
namespace {
static bool
@@ -107,9 +108,9 @@ private:
public:
static char ID;
R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
- TII(0) { }
+ TII(nullptr) { }
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -118,11 +119,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Vector Registers Merge Pass";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
char R600VectorRegMerger::ID = 0;
diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp
index b7b7610..c2f6c03 100644
--- a/lib/Target/R600/R600Packetizer.cpp
+++ b/lib/Target/R600/R600Packetizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "packets"
#include "llvm/Support/Debug.h"
#include "AMDGPU.h"
#include "R600InstrInfo.h"
@@ -28,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "packets"
+
namespace {
class R600Packetizer : public MachineFunctionPass {
@@ -36,7 +37,7 @@ public:
static char ID;
R600Packetizer(const TargetMachine &TM) : MachineFunctionPass(ID) {}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
@@ -45,11 +46,11 @@ public:
MachineFunctionPass::getAnalysisUsage(AU);
}
- const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Packetizer";
}
- bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
};
char R600Packetizer::ID = 0;
@@ -155,18 +156,19 @@ public:
}
// initPacketizerState - initialize some internal flags.
- void initPacketizerState() {
+ void initPacketizerState() override {
ConsideredInstUsesAlreadyWrittenVectorElement = false;
}
// ignorePseudoInstruction - Ignore bundling of pseudo instructions.
- bool ignorePseudoInstruction(MachineInstr *MI, MachineBasicBlock *MBB) {
+ bool ignorePseudoInstruction(MachineInstr *MI,
+ MachineBasicBlock *MBB) override {
return false;
}
// isSoloInstruction - return true if instruction MI can not be packetized
// with any other instruction, which means that MI itself is a packet.
- bool isSoloInstruction(MachineInstr *MI) {
+ bool isSoloInstruction(MachineInstr *MI) override {
if (TII->isVector(*MI))
return true;
if (!TII->isALUInstr(MI->getOpcode()))
@@ -182,7 +184,7 @@ public:
// isLegalToPacketizeTogether - Is it legal to packetize SUI and SUJ
// together.
- bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
+ bool isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) override {
MachineInstr *MII = SUI->getInstr(), *MIJ = SUJ->getInstr();
if (getSlot(MII) == getSlot(MIJ))
ConsideredInstUsesAlreadyWrittenVectorElement = true;
@@ -219,7 +221,9 @@ public:
// isLegalToPruneDependencies - Is it legal to prune dependece between SUI
// and SUJ.
- bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {return false;}
+ bool isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) override {
+ return false;
+ }
void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
@@ -288,7 +292,7 @@ public:
return true;
}
- MachineBasicBlock::iterator addToPacket(MachineInstr *MI) {
+ MachineBasicBlock::iterator addToPacket(MachineInstr *MI) override {
MachineBasicBlock::iterator FirstInBundle =
CurrentPacketMIs.empty() ? MI : CurrentPacketMIs.front();
const DenseMap<unsigned, unsigned> &PV =
diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h
index c74c49e..52e1a4b 100644
--- a/lib/Target/R600/R600RegisterInfo.h
+++ b/lib/Target/R600/R600RegisterInfo.h
@@ -28,27 +28,28 @@ struct R600RegisterInfo : public AMDGPURegisterInfo {
R600RegisterInfo(AMDGPUTargetMachine &tm);
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
/// \param RC is an AMDIL reg class.
///
/// \returns the R600 reg class that is equivalent to \p RC.
- virtual const TargetRegisterClass *getISARegClass(
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *getISARegClass(
+ const TargetRegisterClass *RC) const override;
/// \brief get the HW encoding for a register's channel.
unsigned getHWRegChan(unsigned reg) const;
- virtual unsigned getHWRegIndex(unsigned Reg) const;
+ unsigned getHWRegIndex(unsigned Reg) const override;
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
- virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+ const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
- virtual const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
+ const RegClassWeight &
+ getRegClassWeight(const TargetRegisterClass *RC) const override;
// \returns true if \p Reg can be defined in one ALU caluse and used in another.
- virtual bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
+ bool isPhysRegLiveAcrossClauses(unsigned Reg) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
index 9d24404..419ec8b 100644
--- a/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
+++ b/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp
@@ -209,7 +209,7 @@ public:
FunctionPass(ID) {
}
- virtual bool doInitialization(Module &M) {
+ bool doInitialization(Module &M) override {
LLVMContext &Ctx = M.getContext();
Mod = &M;
FloatType = Type::getFloatTy(Ctx);
@@ -245,16 +245,16 @@ public:
return false;
}
- virtual bool runOnFunction(Function &F) {
+ bool runOnFunction(Function &F) override {
visit(F);
return false;
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "R600 Texture Intrinsics Replacer";
}
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
}
void visitCallInst(CallInst &I) {
diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp
index f9214a8..d6e4451 100644
--- a/lib/Target/R600/SIAnnotateControlFlow.cpp
+++ b/lib/Target/R600/SIAnnotateControlFlow.cpp
@@ -12,8 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "si-annotate-control-flow"
-
#include "AMDGPU.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/IR/Constants.h"
@@ -26,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "si-annotate-control-flow"
+
namespace {
// Complex types used in this pass
@@ -91,15 +91,15 @@ public:
SIAnnotateControlFlow():
FunctionPass(ID) { }
- virtual bool doInitialization(Module &M);
+ bool doInitialization(Module &M) override;
- virtual bool runOnFunction(Function &F);
+ bool runOnFunction(Function &F) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "SI annotate control flow";
}
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
@@ -118,7 +118,7 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
Void = Type::getVoidTy(Context);
Boolean = Type::getInt1Ty(Context);
Int64 = Type::getInt64Ty(Context);
- ReturnStruct = StructType::get(Boolean, Int64, (Type *)0);
+ ReturnStruct = StructType::get(Boolean, Int64, (Type *)nullptr);
BoolTrue = ConstantInt::getTrue(Context);
BoolFalse = ConstantInt::getFalse(Context);
@@ -126,25 +126,25 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
Int64Zero = ConstantInt::get(Int64, 0);
If = M.getOrInsertFunction(
- IfIntrinsic, ReturnStruct, Boolean, (Type *)0);
+ IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
Else = M.getOrInsertFunction(
- ElseIntrinsic, ReturnStruct, Int64, (Type *)0);
+ ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
Break = M.getOrInsertFunction(
- BreakIntrinsic, Int64, Int64, (Type *)0);
+ BreakIntrinsic, Int64, Int64, (Type *)nullptr);
IfBreak = M.getOrInsertFunction(
- IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)0);
+ IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
ElseBreak = M.getOrInsertFunction(
- ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)0);
+ ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
Loop = M.getOrInsertFunction(
- LoopIntrinsic, Boolean, Int64, (Type *)0);
+ LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
EndCf = M.getOrInsertFunction(
- EndCfIntrinsic, Void, Int64, (Type *)0);
+ EndCfIntrinsic, Void, Int64, (Type *)nullptr);
return false;
}
diff --git a/lib/Target/R600/SIFixSGPRCopies.cpp b/lib/Target/R600/SIFixSGPRCopies.cpp
index 402f1f4..5f71453 100644
--- a/lib/Target/R600/SIFixSGPRCopies.cpp
+++ b/lib/Target/R600/SIFixSGPRCopies.cpp
@@ -65,7 +65,6 @@
/// ultimately led to the creation of an illegal COPY.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sgpr-copies"
#include "AMDGPU.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -77,6 +76,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sgpr-copies"
+
namespace {
class SIFixSGPRCopies : public MachineFunctionPass {
@@ -97,9 +98,9 @@ private:
public:
SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "SI Fix SGPR copies";
}
@@ -184,7 +185,8 @@ bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy,
const TargetRegisterClass *SrcRC;
if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- DstRC == &AMDGPU::M0RegRegClass)
+ DstRC == &AMDGPU::M0RegRegClass ||
+ MRI.getRegClass(SrcReg) == &AMDGPU::VReg_1RegClass)
return false;
SrcRC = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg);
@@ -256,6 +258,19 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TII->moveToVALU(MI);
break;
}
+ case AMDGPU::INSERT_SUBREG: {
+ const TargetRegisterClass *DstRC, *Src0RC, *Src1RC;
+ DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+ Src0RC = MRI.getRegClass(MI.getOperand(1).getReg());
+ Src1RC = MRI.getRegClass(MI.getOperand(2).getReg());
+ if (TRI->isSGPRClass(DstRC) &&
+ (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) {
+ DEBUG(dbgs() << " Fixing INSERT_SUBREG:\n");
+ DEBUG(MI.print(dbgs()));
+ TII->moveToVALU(MI);
+ }
+ break;
+ }
}
}
}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0b55411..c9e247c 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -29,22 +29,21 @@ using namespace llvm;
SITargetLowering::SITargetLowering(TargetMachine &TM) :
AMDGPUTargetLowering(TM) {
- addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
- addRegisterClass(MVT::i64, &AMDGPU::VSrc_64RegClass);
+ addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
+ addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::VSrc_32RegClass);
- addRegisterClass(MVT::f32, &AMDGPU::VSrc_32RegClass);
+ addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
- addRegisterClass(MVT::f64, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2i32, &AMDGPU::VSrc_64RegClass);
- addRegisterClass(MVT::v2f32, &AMDGPU::VSrc_64RegClass);
+ addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
+ addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
+ addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
+ addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
- addRegisterClass(MVT::i128, &AMDGPU::SReg_128RegClass);
addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
@@ -78,8 +77,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::ADDC, MVT::i32, Legal);
setOperationAction(ISD::ADDE, MVT::i32, Legal);
- setOperationAction(ISD::BITCAST, MVT::i128, Legal);
-
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
@@ -99,10 +96,11 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::STORE, MVT::i1, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::i64, Custom);
- setOperationAction(ISD::STORE, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f32, Promote);
+ AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32);
setOperationAction(ISD::SELECT, MVT::i64, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
@@ -119,6 +117,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
+
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
+
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
@@ -126,39 +140,48 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
- setTruncStoreAction(MVT::i128, MVT::i64, Expand);
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
+ setOperationAction(ISD::LOAD, MVT::i1, Custom);
+
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
+ // These should use UDIVREM, so set them to expand
+ setOperationAction(ISD::UDIV, MVT::i64, Expand);
+ setOperationAction(ISD::UREM, MVT::i64, Expand);
+
// We only support LOAD/STORE and vector manipulation ops for vectors
// with > 4 elements.
MVT VecTypes[] = {
MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
};
- const size_t NumVecTypes = array_lengthof(VecTypes);
- for (unsigned Type = 0; Type < NumVecTypes; ++Type) {
+ for (MVT VT : VecTypes) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
switch(Op) {
case ISD::LOAD:
@@ -172,7 +195,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
case ISD::EXTRACT_SUBVECTOR:
break;
default:
- setOperationAction(Op, VecTypes[Type], Expand);
+ setOperationAction(Op, VT, Expand);
break;
}
}
@@ -189,6 +212,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
}
setTargetDAGCombine(ISD::SELECT_CC);
@@ -204,10 +228,40 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
unsigned AddrSpace,
bool *IsFast) const {
+ if (IsFast)
+ *IsFast = false;
+
// XXX: This depends on the address space and also we may want to revist
// the alignment values we specify in the DataLayout.
+
+ // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
+ // which isn't a simple VT.
if (!VT.isSimple() || VT == MVT::Other)
return false;
+
+ // XXX - CI changes say "Support for unaligned memory accesses" but I don't
+ // see what for specifically. The wording everywhere else seems to be the
+ // same.
+
+ // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
+ // no alignment restrictions.
+ if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
+ // Using any pair of GPRs should be the same as any other pair.
+ if (IsFast)
+ *IsFast = true;
+ return VT.bitsGE(MVT::i64);
+ }
+
+ // XXX - The only mention I see of this in the ISA manual is for LDS direct
+ // reads the "byte address and must be dword aligned". Is it also true for the
+ // normal loads and stores?
+ if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
+ return false;
+
+ // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
+ // byte-address are ignored, thus forcing Dword alignment.
+ if (IsFast)
+ *IsFast = true;
return VT.bitsGT(MVT::i32);
}
@@ -224,7 +278,7 @@ bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDLoc DL, SDValue Chain,
- unsigned Offset) const {
+ unsigned Offset, bool Signed) const {
MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::CONSTANT_ADDRESS);
@@ -232,7 +286,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
DAG.getConstant(Offset, MVT::i64));
- return DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, Ptr,
+ return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
false, false, MemVT.getSizeInBits() >> 3);
@@ -340,7 +394,8 @@ SDValue SITargetLowering::LowerFormalArguments(
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(),
- 36 + VA.getLocMemOffset());
+ 36 + VA.getLocMemOffset(),
+ Ins[i].Flags.isSExt());
InVals.push_back(Arg);
continue;
}
@@ -381,8 +436,7 @@ SDValue SITargetLowering::LowerFormalArguments(
for (unsigned j = 0; j != NumElements; ++j)
Regs.push_back(DAG.getUNDEF(VT));
- InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
- Regs.data(), Regs.size()));
+ InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
continue;
}
@@ -395,15 +449,15 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
MachineBasicBlock::iterator I = *MI;
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
switch (MI->getOpcode()) {
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
case AMDGPU::BRANCH: return BB;
case AMDGPU::SI_ADDR64_RSRC: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned SuperReg = MI->getOperand(0).getReg();
unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
@@ -428,9 +482,7 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MI->eraseFromParent();
break;
}
- case AMDGPU::V_SUB_F64: {
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ case AMDGPU::V_SUB_F64:
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
@@ -442,11 +494,9 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
.addImm(2); /* NEG */
MI->eraseFromParent();
break;
- }
+
case AMDGPU::SI_RegisterStorePseudo: {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
- const SIInstrInfo *TII =
- static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
MachineInstrBuilder MIB =
BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
@@ -455,6 +505,50 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MIB.addOperand(MI->getOperand(i));
MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FABS_SI: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
+ Reg)
+ .addImm(0x7fffffff);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(Reg);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FNEG_SI: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
+ Reg)
+ .addImm(0x80000000);
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
+ MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(1).getReg())
+ .addReg(Reg);
+ MI->eraseFromParent();
+ break;
+ }
+ case AMDGPU::FCLAMP_SI: {
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
+ BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
+ MI->getOperand(0).getReg())
+ .addImm(0) // SRC0 modifiers
+ .addOperand(MI->getOperand(1))
+ .addImm(0) // SRC1 modifiers
+ .addImm(0) // SRC1
+ .addImm(1) // CLAMP
+ .addImm(0); // OMOD
+ MI->eraseFromParent();
}
}
return BB;
@@ -510,7 +604,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SplitVectorLoad(Op, DAG),
Load->getChain()
};
- return DAG.getMergeValues(MergedValues, 2, SDLoc(Op));
+ return DAG.getMergeValues(MergedValues, SDLoc(Op));
} else {
return LowerLOAD(Op, DAG);
}
@@ -533,23 +627,23 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (IntrinsicID) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case Intrinsic::r600_read_ngroups_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
case Intrinsic::r600_read_ngroups_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
case Intrinsic::r600_read_ngroups_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
case Intrinsic::r600_read_global_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
case Intrinsic::r600_read_global_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
case Intrinsic::r600_read_global_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
case Intrinsic::r600_read_local_size_x:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
case Intrinsic::r600_read_local_size_y:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
case Intrinsic::r600_read_local_size_z:
- return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32);
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
case Intrinsic::r600_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
@@ -570,7 +664,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
AMDGPU::VGPR2, VT);
case AMDGPUIntrinsic::SI_load_const: {
SDValue Ops [] = {
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2)
};
@@ -579,7 +673,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
- Op->getVTList(), Ops, 2, VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
case AMDGPUIntrinsic::SI_sample:
return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
@@ -591,7 +685,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
- ResourceDescriptorToi128(Op.getOperand(1), DAG),
+ Op.getOperand(1),
Op.getOperand(2),
Op.getOperand(3));
}
@@ -606,7 +700,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
SDValue Ops [] = {
Chain,
- ResourceDescriptorToi128(Op.getOperand(2), DAG),
+ Op.getOperand(2),
Op.getOperand(3),
Op.getOperand(4),
Op.getOperand(5),
@@ -627,8 +721,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
MachineMemOperand::MOStore,
VT.getSizeInBits() / 8, 4);
return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
- Op->getVTList(), Ops,
- sizeof(Ops)/sizeof(Ops[0]), VT, MMO);
+ Op->getVTList(), Ops, VT, MMO);
}
default:
break;
@@ -650,7 +743,7 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
if (I->getOpcode() == Opcode)
return *I;
}
- return 0;
+ return nullptr;
}
/// This transforms the control flow intrinsics to get the branch destination as
@@ -662,7 +755,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
SDNode *Intr = BRCOND.getOperand(1).getNode();
SDValue Target = BRCOND.getOperand(2);
- SDNode *BR = 0;
+ SDNode *BR = nullptr;
if (Intr->getOpcode() == ISD::SETCC) {
// As long as we negate the condition everything is fine
@@ -695,7 +788,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
// build the new intrinsic call
SDNode *Result = DAG.getNode(
Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res.data(), Res.size()), Ops.data(), Ops.size()).getNode();
+ DAG.getVTList(Res), Ops).getNode();
if (BR) {
// Give the branch instruction our target
@@ -703,7 +796,7 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
BR->getOperand(0),
BRCOND.getOperand(2)
};
- DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops, 2);
+ DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
}
SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
@@ -739,7 +832,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
MergedValues[1] = Load->getChain();
if (Ret.getNode()) {
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
@@ -770,30 +863,16 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
MergedValues[0] = Ret;
- return DAG.getMergeValues(MergedValues, 2, DL);
+ return DAG.getMergeValues(MergedValues, DL);
}
-SDValue SITargetLowering::ResourceDescriptorToi128(SDValue Op,
- SelectionDAG &DAG) const {
-
- if (Op.getValueType() == MVT::i128) {
- return Op;
- }
-
- assert(Op.getOpcode() == ISD::UNDEF);
-
- return DAG.getNode(ISD::BUILD_PAIR, SDLoc(Op), MVT::i128,
- DAG.getConstant(0, MVT::i64),
- DAG.getConstant(0, MVT::i64));
-}
-
SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
const SDValue &Op,
SelectionDAG &DAG) const {
return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
Op.getOperand(2),
- ResourceDescriptorToi128(Op.getOperand(3), DAG),
+ Op.getOperand(3),
Op.getOperand(4));
}
@@ -833,12 +912,6 @@ SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc DL(Op);
- // Possible Min/Max pattern
- SDValue MinMax = LowerMinMax(Op, DAG);
- if (MinMax.getNode()) {
- return MinMax;
- }
-
SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC);
return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False);
}
@@ -948,8 +1021,12 @@ SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op,
return SDValue();
}
- return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType() != MVT::i32)
+ Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
+
+ SDValue Zero = DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero);
}
//===----------------------------------------------------------------------===//
@@ -963,7 +1040,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
EVT VT = N->getValueType(0);
switch (N->getOpcode()) {
- default: break;
+ default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
case ISD::SELECT_CC: {
ConstantSDNode *True, *False;
// i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
@@ -982,7 +1059,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
SDValue Arg0 = N->getOperand(0);
SDValue Arg1 = N->getOperand(1);
SDValue CC = N->getOperand(2);
- ConstantSDNode * C = NULL;
+ ConstantSDNode * C = nullptr;
ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
// i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
@@ -998,7 +1075,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
break;
}
}
- return SDValue();
+
+ return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
/// \brief Test if RegClass is one of the VSrc classes
@@ -1029,9 +1107,11 @@ int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
return -1;
}
Imm.I = Node->getSExtValue();
- } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N))
+ } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
+ if (N->getValueType(0) != MVT::f32)
+ return -1;
Imm.F = Node->getValueAPF().convertToFloat();
- else
+ } else
return -1; // It isn't an immediate
if ((Imm.I >= -16 && Imm.I <= 64) ||
@@ -1051,7 +1131,7 @@ bool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
const SIInstrInfo *TII =
static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
- if (Mov == 0 || !TII->isMov(Mov->getMachineOpcode()))
+ if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
return false;
const SDValue &Op = Mov->getOperand(0);
@@ -1098,7 +1178,7 @@ const TargetRegisterClass *SITargetLowering::getRegClassForNode(
}
return TRI.getPhysRegClass(Reg);
}
- default: return NULL;
+ default: return nullptr;
}
}
const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
@@ -1202,17 +1282,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
// Commuted opcode if available
int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
- const MCInstrDesc *DescRev = OpcodeRev == -1 ? 0 : &TII->get(OpcodeRev);
+ const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
assert(!DescRev || DescRev->getNumDefs() == NumDefs);
assert(!DescRev || DescRev->getNumOperands() == NumOps);
// e64 version if available, -1 otherwise
int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
- const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? 0 : &TII->get(OpcodeE64);
+ const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
+ int InputModifiers[3] = {0};
assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
- assert(!DescE64 || DescE64->getNumOperands() == (NumOps + 4));
int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
bool HaveVSrc = false, HaveSSrc = false;
@@ -1279,17 +1359,18 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
fitsRegClass(DAG, Ops[1], OtherRegClass))) {
// Swap commutable operands
- SDValue Tmp = Ops[1];
- Ops[1] = Ops[0];
- Ops[0] = Tmp;
+ std::swap(Ops[0], Ops[1]);
Desc = DescRev;
- DescRev = 0;
+ DescRev = nullptr;
continue;
}
}
- if (DescE64 && !Immediate) {
+ if (Immediate)
+ continue;
+
+ if (DescE64) {
// Test if it makes sense to switch to e64 encoding
unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
@@ -1305,14 +1386,46 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
Immediate = -1;
Promote2e64 = true;
Desc = DescE64;
- DescE64 = 0;
+ DescE64 = nullptr;
}
}
+
+ if (!DescE64 && !Promote2e64)
+ continue;
+ if (!Operand.isMachineOpcode())
+ continue;
+ if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
+ Ops.pop_back();
+ Ops.push_back(Operand.getOperand(0));
+ InputModifiers[i] = 1;
+ Promote2e64 = true;
+ if (!DescE64)
+ continue;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
+ else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
+ Ops.pop_back();
+ Ops.push_back(Operand.getOperand(0));
+ InputModifiers[i] = 2;
+ Promote2e64 = true;
+ if (!DescE64)
+ continue;
+ Desc = DescE64;
+ DescE64 = 0;
+ }
}
if (Promote2e64) {
+ std::vector<SDValue> OldOps(Ops);
+ Ops.clear();
+ for (unsigned i = 0; i < OldOps.size(); ++i) {
+ // src_modifier
+ Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
+ Ops.push_back(OldOps[i]);
+ }
// Add the modifier flags while promoting
- for (unsigned i = 0; i < 4; ++i)
+ for (unsigned i = 0; i < 2; ++i)
Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
}
@@ -1390,7 +1503,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node,
Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
Ops.push_back(Node->getOperand(i));
- Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size());
+ Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
// If we only got one lane, replace it with a copy
// (if NewDmask has only one bit set...)
diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
index ca73f53..c6eaa81 100644
--- a/lib/Target/R600/SIISelLowering.h
+++ b/lib/Target/R600/SIISelLowering.h
@@ -22,7 +22,7 @@ namespace llvm {
class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, SDLoc DL,
- SDValue Chain, unsigned Offset) const;
+ SDValue Chain, unsigned Offset, bool Signed) const;
SDValue LowerSampleIntrinsic(unsigned Opcode, const SDValue &Op,
SelectionDAG &DAG) const;
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
@@ -33,7 +33,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
- SDValue ResourceDescriptorToi128(SDValue Op, SelectionDAG &DAG) const;
bool foldImm(SDValue &Operand, int32_t &Immediate,
bool &ScalarSlotUsed) const;
const TargetRegisterClass *getRegClassForNode(SelectionDAG &DAG,
@@ -49,32 +48,33 @@ class SITargetLowering : public AMDGPUTargetLowering {
public:
SITargetLowering(TargetMachine &tm);
- bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, bool *IsFast) const;
- virtual bool shouldSplitVectorType(EVT VT) const override;
+ bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS,
+ bool *IsFast) const override;
+ bool shouldSplitVectorType(EVT VT) const override;
- virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
- Type *Ty) const override;
+ bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
+ Type *Ty) const override;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc DL, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
- MachineBasicBlock * BB) const;
- virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
- virtual MVT getScalarShiftAmountTy(EVT VT) const;
- virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
- virtual void AdjustInstrPostInstrSelection(MachineInstr *MI,
- SDNode *Node) const;
+ MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
+ MachineBasicBlock * BB) const override;
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
+ MVT getScalarShiftAmountTy(EVT VT) const override;
+ bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
+ void AdjustInstrPostInstrSelection(MachineInstr *MI,
+ SDNode *Node) const override;
int32_t analyzeImmediate(const SDNode *N) const;
SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
- unsigned Reg, EVT VT) const;
+ unsigned Reg, EVT VT) const override;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp
index 695ec40..a17fed7 100644
--- a/lib/Target/R600/SIInsertWaits.cpp
+++ b/lib/Target/R600/SIInsertWaits.cpp
@@ -97,13 +97,13 @@ private:
public:
SIInsertWaits(TargetMachine &tm) :
MachineFunctionPass(ID),
- TII(0),
- TRI(0),
+ TII(nullptr),
+ TRI(nullptr),
ExpInstrTypesSeen(0) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "SI insert wait instructions";
}
diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td
index aa2c22c..168eff2 100644
--- a/lib/Target/R600/SIInstrFormats.td
+++ b/lib/Target/R600/SIInstrFormats.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
class InstSI <dag outs, dag ins, string asm, list<dag> pattern> :
- AMDGPUInst<outs, ins, asm, pattern> {
+ AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
field bits<1> VM_CNT = 0;
field bits<1> EXP_CNT = 0;
@@ -210,16 +210,19 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> dst;
+ bits<2> src0_modifiers;
bits<9> src0;
+ bits<2> src1_modifiers;
bits<9> src1;
+ bits<2> src2_modifiers;
bits<9> src2;
- bits<3> abs;
bits<1> clamp;
bits<2> omod;
- bits<3> neg;
let Inst{7-0} = dst;
- let Inst{10-8} = abs;
+ let Inst{8} = src0_modifiers{1};
+ let Inst{9} = src1_modifiers{1};
+ let Inst{10} = src2_modifiers{1};
let Inst{11} = clamp;
let Inst{25-17} = op;
let Inst{31-26} = 0x34; //encoding
@@ -227,7 +230,9 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{49-41} = src1;
let Inst{58-50} = src2;
let Inst{60-59} = omod;
- let Inst{63-61} = neg;
+ let Inst{61} = src0_modifiers{0};
+ let Inst{62} = src1_modifiers{0};
+ let Inst{63} = src2_modifiers{0};
let mayLoad = 0;
let mayStore = 0;
@@ -240,12 +245,14 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
Enc64 <outs, ins, asm, pattern> {
bits<8> dst;
+ bits<2> src0_modifiers;
bits<9> src0;
+ bits<2> src1_modifiers;
bits<9> src1;
+ bits<2> src2_modifiers;
bits<9> src2;
bits<7> sdst;
bits<2> omod;
- bits<3> neg;
let Inst{7-0} = dst;
let Inst{14-8} = sdst;
@@ -255,7 +262,9 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> :
let Inst{49-41} = src1;
let Inst{58-50} = src2;
let Inst{60-59} = omod;
- let Inst{63-61} = neg;
+ let Inst{61} = src0_modifiers{0};
+ let Inst{62} = src1_modifiers{0};
+ let Inst{63} = src2_modifiers{0};
let mayLoad = 0;
let mayStore = 0;
diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp
index ab2fe09..4a9e346 100644
--- a/lib/Target/R600/SIInstrInfo.cpp
+++ b/lib/Target/R600/SIInstrInfo.cpp
@@ -187,27 +187,45 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned KillFlag = isKill ? RegState::Kill : 0;
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) {
- unsigned Lane = MFI->SpillTracker.getNextLane(MRI);
- BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
- MFI->SpillTracker.LaneVGPR)
+ unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent());
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR)
.addReg(SrcReg, KillFlag)
.addImm(Lane);
+ MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane);
+ } else if (RI.isSGPRClass(RC)) {
+ // We are only allowed to create one new instruction when spilling
+ // registers, so we need to use pseudo instruction for vector
+ // registers.
+ //
+ // Reserve a spot in the spill tracker for each sub-register of
+ // the vector register.
+ unsigned NumSubRegs = RC->getSize() / 4;
+ unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(),
+ NumSubRegs);
MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR,
- Lane);
- } else {
- for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
- unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), get(AMDGPU::COPY), SubReg)
- .addReg(SrcReg, 0, RI.getSubRegFromChannel(i));
- storeRegToStackSlot(MBB, MI, SubReg, isKill, FrameIndex + i,
- &AMDGPU::SReg_32RegClass, TRI);
+ FirstLane);
+
+ unsigned Opcode;
+ switch (RC->getSize() * 8) {
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_SAVE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_SAVE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_SAVE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
+ default: llvm_unreachable("Cannot spill register class");
}
+
+ BuildMI(MBB, MI, DL, get(Opcode), MFI->SpillTracker.LaneVGPR)
+ .addReg(SrcReg)
+ .addImm(FrameIndex);
+ } else {
+ llvm_unreachable("VGPR spilling not supported");
}
}
@@ -216,30 +234,125 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>();
DebugLoc DL = MBB.findDebugLoc(MI);
if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) {
- SIMachineFunctionInfo::SpilledReg Spill =
+ SIMachineFunctionInfo::SpilledReg Spill =
MFI->SpillTracker.getSpilledReg(FrameIndex);
assert(Spill.VGPR);
BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg)
.addReg(Spill.VGPR)
.addImm(Spill.Lane);
+ insertNOPs(MI, 3);
+ } else if (RI.isSGPRClass(RC)){
+ unsigned Opcode;
+ switch(RC->getSize() * 8) {
+ case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break;
+ case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break;
+ case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break;
+ case 512: Opcode = AMDGPU::SI_SPILL_S512_RESTORE; break;
+ default: llvm_unreachable("Cannot spill register class");
+ }
+
+ SIMachineFunctionInfo::SpilledReg Spill =
+ MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(Opcode), DestReg)
+ .addReg(Spill.VGPR)
+ .addImm(FrameIndex);
+ insertNOPs(MI, 3);
} else {
- for (unsigned i = 0, e = RC->getSize() / 4; i != e; ++i) {
- unsigned Flags = RegState::Define;
- if (i == 0) {
- Flags |= RegState::Undef;
- }
- unsigned SubReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
- loadRegFromStackSlot(MBB, MI, SubReg, FrameIndex + i,
- &AMDGPU::SReg_32RegClass, TRI);
- BuildMI(MBB, MI, DL, get(AMDGPU::COPY))
- .addReg(DestReg, Flags, RI.getSubRegFromChannel(i))
- .addReg(SubReg);
+ llvm_unreachable("VGPR spilling not supported");
+ }
+}
+
+static unsigned getNumSubRegsForSpillOp(unsigned Op) {
+
+ switch (Op) {
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ return 16;
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ return 8;
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ return 4;
+ case AMDGPU::SI_SPILL_S64_SAVE:
+ case AMDGPU::SI_SPILL_S64_RESTORE:
+ return 2;
+ default: llvm_unreachable("Invalid spill opcode");
+ }
+}
+
+void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
+ int Count) const {
+ while (Count > 0) {
+ int Arg;
+ if (Count >= 8)
+ Arg = 7;
+ else
+ Arg = Count - 1;
+ Count -= 8;
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(AMDGPU::S_NOP))
+ .addImm(Arg);
+ }
+}
+
+bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const {
+ SIMachineFunctionInfo *MFI =
+ MI->getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
+ MachineBasicBlock &MBB = *MI->getParent();
+ DebugLoc DL = MBB.findDebugLoc(MI);
+ switch (MI->getOpcode()) {
+ default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+
+ // SGPR register spill
+ case AMDGPU::SI_SPILL_S512_SAVE:
+ case AMDGPU::SI_SPILL_S256_SAVE:
+ case AMDGPU::SI_SPILL_S128_SAVE:
+ case AMDGPU::SI_SPILL_S64_SAVE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned FrameIndex = MI->getOperand(2).getImm();
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ SIMachineFunctionInfo::SpilledReg Spill;
+ unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(1).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32),
+ MI->getOperand(0).getReg())
+ .addReg(SubReg)
+ .addImm(Spill.Lane + i);
+ }
+ MI->eraseFromParent();
+ break;
+ }
+
+ // SGPR register restore
+ case AMDGPU::SI_SPILL_S512_RESTORE:
+ case AMDGPU::SI_SPILL_S256_RESTORE:
+ case AMDGPU::SI_SPILL_S128_RESTORE:
+ case AMDGPU::SI_SPILL_S64_RESTORE: {
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+
+ for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+ SIMachineFunctionInfo::SpilledReg Spill;
+ unsigned FrameIndex = MI->getOperand(2).getImm();
+ unsigned SubReg = RI.getPhysRegSubReg(MI->getOperand(0).getReg(),
+ &AMDGPU::SGPR_32RegClass, i);
+ Spill = MFI->SpillTracker.getSpilledReg(FrameIndex);
+
+ BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), SubReg)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(Spill.Lane + i);
}
+ MI->eraseFromParent();
+ break;
}
+ }
+ return true;
}
MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
@@ -247,18 +360,18 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
if (MI->getNumOperands() < 3 || !MI->getOperand(1).isReg())
- return 0;
+ return nullptr;
// Cannot commute VOP2 if src0 is SGPR.
if (isVOP2(MI->getOpcode()) && MI->getOperand(1).isReg() &&
RI.isSGPRClass(MRI.getRegClass(MI->getOperand(1).getReg())))
- return 0;
+ return nullptr;
if (!MI->getOperand(2).isReg()) {
// XXX: Commute instructions with FPImm operands
if (NewMI || MI->getOperand(2).isFPImm() ||
(!isVOP2(MI->getOpcode()) && !isVOP3(MI->getOpcode()))) {
- return 0;
+ return nullptr;
}
// XXX: Commute VOP3 instructions with abs and neg set.
@@ -267,7 +380,7 @@ MachineInstr *SIInstrInfo::commuteInstruction(MachineInstr *MI,
AMDGPU::OpName::abs)).getImm() ||
MI->getOperand(AMDGPU::getNamedOperandIdx(MI->getOpcode(),
AMDGPU::OpName::neg)).getImm()))
- return 0;
+ return nullptr;
unsigned Reg = MI->getOperand(1).getReg();
unsigned SubReg = MI->getOperand(1).getSubReg();
@@ -516,6 +629,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::REG_SEQUENCE: return AMDGPU::REG_SEQUENCE;
case AMDGPU::COPY: return AMDGPU::COPY;
case AMDGPU::PHI: return AMDGPU::PHI;
+ case AMDGPU::INSERT_SUBREG: return AMDGPU::INSERT_SUBREG;
case AMDGPU::S_MOV_B32:
return MI.getOperand(1).isReg() ?
AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
@@ -536,6 +650,23 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
case AMDGPU::S_LSHL_B64: return AMDGPU::V_LSHL_B64;
case AMDGPU::S_LSHR_B32: return AMDGPU::V_LSHR_B32_e32;
case AMDGPU::S_LSHR_B64: return AMDGPU::V_LSHR_B64;
+ case AMDGPU::S_SEXT_I32_I8: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32;
+ case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32;
+ case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32;
+ case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32;
+ case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32;
+ case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32;
+ case AMDGPU::S_CMP_GE_I32: return AMDGPU::V_CMP_GE_I32_e32;
+ case AMDGPU::S_CMP_LT_I32: return AMDGPU::V_CMP_LT_I32_e32;
+ case AMDGPU::S_CMP_LE_I32: return AMDGPU::V_CMP_LE_I32_e32;
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ case AMDGPU::S_LOAD_DWORD_SGPR: return AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
+ case AMDGPU::S_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
}
}
@@ -559,6 +690,8 @@ bool SIInstrInfo::canReadVGPR(const MachineInstr &MI, unsigned OpNo) const {
switch (MI.getOpcode()) {
case AMDGPU::COPY:
case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::PHI:
+ case AMDGPU::INSERT_SUBREG:
return RI.hasVGPRs(getOpRegClass(MI, 0));
default:
return RI.hasVGPRs(getOpRegClass(MI, OpNo));
@@ -737,11 +870,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
}
- // Legalize REG_SEQUENCE
+ // Legalize REG_SEQUENCE and PHI
// The register class of the operands much be the same type as the register
// class of the output.
- if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
- const TargetRegisterClass *RC = NULL, *SRC = NULL, *VRC = NULL;
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE ||
+ MI->getOpcode() == AMDGPU::PHI) {
+ const TargetRegisterClass *RC = nullptr, *SRC = nullptr, *VRC = nullptr;
for (unsigned i = 1, e = MI->getNumOperands(); i != e; i+=2) {
if (!MI->getOperand(i).isReg() ||
!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
@@ -774,13 +908,40 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
!TargetRegisterInfo::isVirtualRegister(MI->getOperand(i).getReg()))
continue;
unsigned DstReg = MRI.createVirtualRegister(RC);
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ MachineBasicBlock *InsertBB;
+ MachineBasicBlock::iterator Insert;
+ if (MI->getOpcode() == AMDGPU::REG_SEQUENCE) {
+ InsertBB = MI->getParent();
+ Insert = MI;
+ } else {
+ // MI is a PHI instruction.
+ InsertBB = MI->getOperand(i + 1).getMBB();
+ Insert = InsertBB->getFirstTerminator();
+ }
+ BuildMI(*InsertBB, Insert, MI->getDebugLoc(),
get(AMDGPU::COPY), DstReg)
.addOperand(MI->getOperand(i));
MI->getOperand(i).setReg(DstReg);
}
}
+ // Legalize INSERT_SUBREG
+ // src0 must have the same register class as dst
+ if (MI->getOpcode() == AMDGPU::INSERT_SUBREG) {
+ unsigned Dst = MI->getOperand(0).getReg();
+ unsigned Src0 = MI->getOperand(1).getReg();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
+ if (DstRC != Src0RC) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
+ BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
+ .addReg(Src0);
+ MI->getOperand(1).setReg(NewSrc0);
+ }
+ return;
+ }
+
// Legalize MUBUF* instructions
// FIXME: If we start using the non-addr64 instructions for compute, we
// may need to legalize them here.
@@ -886,6 +1047,72 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
}
}
+void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const {
+ MachineBasicBlock *MBB = MI->getParent();
+ switch (MI->getOpcode()) {
+ case AMDGPU::S_LOAD_DWORD_IMM:
+ case AMDGPU::S_LOAD_DWORD_SGPR:
+ case AMDGPU::S_LOAD_DWORDX2_IMM:
+ case AMDGPU::S_LOAD_DWORDX2_SGPR:
+ case AMDGPU::S_LOAD_DWORDX4_IMM:
+ case AMDGPU::S_LOAD_DWORDX4_SGPR:
+ unsigned NewOpcode = getVALUOp(*MI);
+ unsigned RegOffset;
+ unsigned ImmOffset;
+
+ if (MI->getOperand(2).isReg()) {
+ RegOffset = MI->getOperand(2).getReg();
+ ImmOffset = 0;
+ } else {
+ assert(MI->getOperand(2).isImm());
+ // SMRD instructions take a dword offsets and MUBUF instructions
+ // take a byte offset.
+ ImmOffset = MI->getOperand(2).getImm() << 2;
+ RegOffset = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ if (isUInt<12>(ImmOffset)) {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ RegOffset)
+ .addImm(0);
+ } else {
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
+ RegOffset)
+ .addImm(ImmOffset);
+ ImmOffset = 0;
+ }
+ }
+
+ unsigned SRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
+ unsigned DWord0 = RegOffset;
+ unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
+ .addImm(0);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
+ .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
+ BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
+ .addReg(DWord0)
+ .addImm(AMDGPU::sub0)
+ .addReg(DWord1)
+ .addImm(AMDGPU::sub1)
+ .addReg(DWord2)
+ .addImm(AMDGPU::sub2)
+ .addReg(DWord3)
+ .addImm(AMDGPU::sub3);
+ MI->setDesc(get(NewOpcode));
+ if (MI->getOperand(2).isReg()) {
+ MI->getOperand(2).setReg(MI->getOperand(1).getReg());
+ } else {
+ MI->getOperand(2).ChangeToRegister(MI->getOperand(1).getReg(), false);
+ }
+ MI->getOperand(1).setReg(SRsrc);
+ MI->addOperand(*MBB->getParent(), MachineOperand::CreateImm(ImmOffset));
+ }
+}
+
void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
SmallVector<MachineInstr *, 128> Worklist;
Worklist.push_back(&TopInst);
@@ -895,8 +1122,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
MachineBasicBlock *MBB = Inst->getParent();
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Opcode = Inst->getOpcode();
+ unsigned NewOpcode = getVALUOp(*Inst);
+
// Handle some special cases
- switch(Inst->getOpcode()) {
+ switch (Opcode) {
+ default:
+ if (isSMRD(Inst->getOpcode())) {
+ moveSMRDToVALU(Inst, MRI);
+ }
+ break;
case AMDGPU::S_MOV_B64: {
DebugLoc DL = Inst->getDebugLoc();
@@ -947,7 +1182,6 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
llvm_unreachable("Moving this op to VALU not implemented");
}
- unsigned NewOpcode = getVALUOp(*Inst);
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
// We cannot move this instruction to the VALU, so we should try to
// legalize its operands instead.
@@ -968,27 +1202,52 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
Inst->RemoveOperand(i);
}
- // Add the implict and explicit register definitions.
- if (NewDesc.ImplicitUses) {
- for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitUses[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
- }
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Size));
+
+ // XXX - Other pointless operands. There are 4, but it seems you only need
+ // 3 to not hit an assertion later in MCInstLower.
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
}
- if (NewDesc.ImplicitDefs) {
- for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
- unsigned Reg = NewDesc.ImplicitDefs[i];
- Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
- }
+ addDescImplicitUseDef(NewDesc, Inst);
+
+ if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+ const MachineOperand &OffsetWidthOp = Inst->getOperand(2);
+ // If we need to move this to VGPRs, we need to unpack the second operand
+ // back into the 2 separate ones for bit offset and width.
+ assert(OffsetWidthOp.isImm() &&
+ "Scalar BFE is only implemented for constant width and offset");
+ uint32_t Imm = OffsetWidthOp.getImm();
+
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+
+ Inst->RemoveOperand(2); // Remove old immediate.
+ Inst->addOperand(Inst->getOperand(1));
+ Inst->getOperand(1).ChangeToImmediate(0);
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(Offset));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(BitWidth));
+ Inst->addOperand(MachineOperand::CreateImm(0));
+ Inst->addOperand(MachineOperand::CreateImm(0));
}
- legalizeOperands(Inst);
-
// Update the destination register class.
+
const TargetRegisterClass *NewDstRC = getOpRegClass(*Inst, 0);
- switch (Inst->getOpcode()) {
+ switch (Opcode) {
// For target instructions, getOpRegClass just returns the virtual
// register class associated with the operand, so we need to find an
// equivalent VGPR register class in order to move the instruction to the
@@ -996,6 +1255,7 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
case AMDGPU::COPY:
case AMDGPU::PHI:
case AMDGPU::REG_SEQUENCE:
+ case AMDGPU::INSERT_SUBREG:
if (RI.hasVGPRs(NewDstRC))
continue;
NewDstRC = RI.getEquivalentVGPRClass(NewDstRC);
@@ -1010,6 +1270,9 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
unsigned NewDstReg = MRI.createVirtualRegister(NewDstRC);
MRI.replaceRegWith(DstReg, NewDstReg);
+ // Legalize the operands
+ legalizeOperands(Inst);
+
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(NewDstReg),
E = MRI.use_end(); I != E; ++I) {
MachineInstr &UseMI = *I->getParent();
@@ -1097,6 +1360,24 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist,
Worklist.push_back(HiHalf);
}
+void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc,
+ MachineInstr *Inst) const {
+ // Add the implict and explicit register definitions.
+ if (NewDesc.ImplicitUses) {
+ for (unsigned i = 0; NewDesc.ImplicitUses[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitUses[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, false, true));
+ }
+ }
+
+ if (NewDesc.ImplicitDefs) {
+ for (unsigned i = 0; NewDesc.ImplicitDefs[i]; ++i) {
+ unsigned Reg = NewDesc.ImplicitDefs[i];
+ Inst->addOperand(MachineOperand::CreateReg(Reg, true, true));
+ }
+ }
+}
+
MachineInstrBuilder SIInstrInfo::buildIndirectWrite(
MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h
index c537038..7b31a81 100644
--- a/lib/Target/R600/SIInstrInfo.h
+++ b/lib/Target/R600/SIInstrInfo.h
@@ -47,49 +47,52 @@ private:
void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist,
MachineInstr *Inst, unsigned Opcode) const;
+ void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const;
public:
explicit SIInstrInfo(AMDGPUTargetMachine &tm);
- const SIRegisterInfo &getRegisterInfo() const {
+ const SIRegisterInfo &getRegisterInfo() const override {
return RI;
}
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo *TRI) const override;
+
+ virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const;
unsigned commuteOpcode(unsigned Opcode) const;
- virtual MachineInstr *commuteInstruction(MachineInstr *MI,
- bool NewMI=false) const;
+ MachineInstr *commuteInstruction(MachineInstr *MI,
+ bool NewMI=false) const override;
bool isTriviallyReMaterializable(const MachineInstr *MI,
- AliasAnalysis *AA = 0) const;
+ AliasAnalysis *AA = nullptr) const;
- virtual unsigned getIEQOpcode() const {
+ unsigned getIEQOpcode() const override {
llvm_unreachable("Unimplemented");
}
MachineInstr *buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
- unsigned DstReg, unsigned SrcReg) const;
- virtual bool isMov(unsigned Opcode) const;
+ unsigned DstReg, unsigned SrcReg) const override;
+ bool isMov(unsigned Opcode) const override;
- virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+ bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
bool isDS(uint16_t Opcode) const;
int isMIMG(uint16_t Opcode) const;
int isSMRD(uint16_t Opcode) const;
@@ -101,8 +104,8 @@ public:
bool isInlineConstant(const MachineOperand &MO) const;
bool isLiteralConstant(const MachineOperand &MO) const;
- virtual bool verifyInstruction(const MachineInstr *MI,
- StringRef &ErrInfo) const;
+ bool verifyInstruction(const MachineInstr *MI,
+ StringRef &ErrInfo) const override;
bool isSALUInstr(const MachineInstr &MI) const;
static unsigned getVALUOp(const MachineInstr &MI);
@@ -136,32 +139,36 @@ public:
/// create new instruction and insert them before \p MI.
void legalizeOperands(MachineInstr *MI) const;
+ void moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) const;
+
/// \brief Replace this instruction's opcode with the equivalent VALU
/// opcode. This function will also move the users of \p MI to the
/// VALU if necessary.
void moveToVALU(MachineInstr &MI) const;
- virtual unsigned calculateIndirectAddress(unsigned RegIndex,
- unsigned Channel) const;
+ unsigned calculateIndirectAddress(unsigned RegIndex,
+ unsigned Channel) const override;
- virtual const TargetRegisterClass *getIndirectAddrRegClass() const;
+ const TargetRegisterClass *getIndirectAddrRegClass() const override;
- virtual MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address,
- unsigned OffsetReg) const;
+ MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const override;
- virtual MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- unsigned ValueReg,
- unsigned Address,
- unsigned OffsetReg) const;
+ MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ unsigned ValueReg,
+ unsigned Address,
+ unsigned OffsetReg) const override;
void reserveIndirectRegisters(BitVector &Reserved,
const MachineFunction &MF) const;
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
unsigned SavReg, unsigned IndexReg) const;
+
+ void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
};
namespace AMDGPU {
@@ -169,6 +176,7 @@ namespace AMDGPU {
int getVOPe64(uint16_t Opcode);
int getCommuteRev(uint16_t Opcode);
int getCommuteOrig(uint16_t Opcode);
+ int getMCOpcode(uint16_t Opcode, unsigned Gen);
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td
index e05ab65..2242e6d 100644
--- a/lib/Target/R600/SIInstrInfo.td
+++ b/lib/Target/R600/SIInstrInfo.td
@@ -7,23 +7,25 @@
//
//===----------------------------------------------------------------------===//
+// Execpt for the NONE field, this must be kept in sync with the SISubtarget enum
+// in AMDGPUMCInstLower.h
+def SISubtarget {
+ int NONE = -1;
+ int SI = 0;
+}
+
//===----------------------------------------------------------------------===//
// SI DAG Nodes
//===----------------------------------------------------------------------===//
-// SMRD takes a 64bit memory address and can only add an 32bit offset
-def SIadd64bit32bit : SDNode<"ISD::ADD",
- SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisVT<0, i64>, SDTCisVT<2, i32>]>
->;
-
def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT",
- SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, i128>, SDTCisVT<2, i32>]>,
+ SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i32>]>,
[SDNPMayLoad, SDNPMemOperand]
>;
def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
SDTypeProfile<0, 13,
- [SDTCisVT<0, i128>, // rsrc(SGPR)
+ [SDTCisVT<0, v4i32>, // rsrc(SGPR)
SDTCisVT<1, iAny>, // vdata(VGPR)
SDTCisVT<2, i32>, // num_channels(imm)
SDTCisVT<3, i32>, // vaddr(VGPR)
@@ -41,13 +43,13 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
>;
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>,
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
SDTCisVT<3, i32>]>
>;
class SDSample<string opcode> : SDNode <opcode,
SDTypeProfile<1, 4, [SDTCisVT<0, v4f32>, SDTCisVT<2, v32i8>,
- SDTCisVT<3, i128>, SDTCisVT<4, i32>]>
+ SDTCisVT<3, v4i32>, SDTCisVT<4, i32>]>
>;
def SIsample : SDSample<"AMDGPUISD::SAMPLE">;
@@ -111,14 +113,17 @@ def IMM16bit : PatLeaf <(imm),
[{return isUInt<16>(N->getZExtValue());}]
>;
+def IMM32bit : PatLeaf <(imm),
+ [{return isUInt<32>(N->getZExtValue());}]
+>;
+
def mubuf_vaddr_offset : PatFrag<
(ops node:$ptr, node:$offset, node:$imm_offset),
(add (add node:$ptr, node:$offset), node:$imm_offset)
>;
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
- return
- (*(const SITargetLowering *)getTargetLowering()).analyzeImmediate(N) == 0;
+ return isInlineImmediate(N);
}]>;
class SGPRImm <dag frag> : PatLeaf<frag, [{
@@ -138,7 +143,7 @@ class SGPRImm <dag frag> : PatLeaf<frag, [{
}]>;
def FRAMEri32 : Operand<iPTR> {
- let MIOperandInfo = (ops SReg_32:$ptr, i32imm:$index);
+ let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
}
//===----------------------------------------------------------------------===//
@@ -197,15 +202,17 @@ class SOP2_SHIFT_64 <bits<7> op, string opName, list<dag> pattern> : SOP2 <
opName#" $dst, $src0, $src1", pattern
>;
-class SOPC_32 <bits<7> op, string opName, list<dag> pattern> : SOPC <
- op, (outs SCCReg:$dst), (ins SSrc_32:$src0, SSrc_32:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
-class SOPC_64 <bits<7> op, string opName, list<dag> pattern> : SOPC <
- op, (outs SCCReg:$dst), (ins SSrc_64:$src0, SSrc_64:$src1),
- opName#" $dst, $src0, $src1", pattern
->;
+class SOPC_Helper <bits<7> op, RegisterClass rc, ValueType vt,
+ string opName, PatLeaf cond> : SOPC <
+ op, (outs SCCReg:$dst), (ins rc:$src0, rc:$src1),
+ opName#" $dst, $src0, $src1", []>;
+
+class SOPC_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
+ : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
+
+class SOPC_64<bits<7> op, string opName, PatLeaf cond = COND_NULL>
+ : SOPC_Helper<op, SSrc_64, i64, opName, cond>;
class SOPK_32 <bits<5> op, string opName, list<dag> pattern> : SOPK <
op, (outs SReg_32:$dst), (ins i16imm:$src0),
@@ -221,7 +228,7 @@ multiclass SMRD_Helper <bits<5> op, string asm, RegisterClass baseClass,
RegisterClass dstClass> {
def _IMM : SMRD <
op, 1, (outs dstClass:$dst),
- (ins baseClass:$sbase, i32imm:$offset),
+ (ins baseClass:$sbase, u32imm:$offset),
asm#" $dst, $sbase, $offset", []
>;
@@ -245,6 +252,28 @@ class VOP2_REV <string revOp, bit isOrig> {
bit IsOrig = isOrig;
}
+class SIMCInstr <string pseudo, int subtarget> {
+ string PseudoInstr = pseudo;
+ int Subtarget = subtarget;
+}
+
+multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern,
+ string opName> {
+
+ def "" : InstSI <outs, ins, "", pattern>, VOP <opName>,
+ SIMCInstr<OpName, SISubtarget.NONE> {
+ let isPseudo = 1;
+ }
+
+ def _si : VOP3 <op, outs, ins, asm, []>, SIMCInstr<opName, SISubtarget.SI>;
+
+}
+
+// This must always be right before the operand being input modified.
+def InputMods : OperandWithDefaultOps <i32, (ops (i32 0))> {
+ let PrintMethod = "printOperandAndMods";
+}
+
multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
string opName, list<dag> pattern> {
@@ -256,10 +285,8 @@ multiclass VOP1_Helper <bits<8> op, RegisterClass drc, RegisterClass src,
def _e64 : VOP3 <
{1, 1, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs drc:$dst),
- (ins src:$src0,
- i32imm:$abs, i32imm:$clamp,
- i32imm:$omod, i32imm:$neg),
- opName#"_e64 $dst, $src0, $abs, $clamp, $omod, $neg", []
+ (ins InputMods:$src0_modifiers, src:$src0, i32imm:$clamp, i32imm:$omod),
+ opName#"_e64 $dst, $src0_modifiers, $clamp, $omod", []
>, VOP <opName> {
let src1 = SIOperand.ZERO;
let src2 = SIOperand.ZERO;
@@ -288,10 +315,10 @@ multiclass VOP2_Helper <bits<6> op, RegisterClass vrc, RegisterClass arc,
def _e64 : VOP3 <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs vrc:$dst),
- (ins arc:$src0, arc:$src1,
- i32imm:$abs, i32imm:$clamp,
- i32imm:$omod, i32imm:$neg),
- opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ (ins InputMods:$src0_modifiers, arc:$src0,
+ InputMods:$src1_modifiers, arc:$src1,
+ i32imm:$clamp, i32imm:$omod),
+ opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
>, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let src2 = SIOperand.ZERO;
}
@@ -316,10 +343,10 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern,
def _e64 : VOP3b <
{1, 0, 0, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs VReg_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1,
- i32imm:$abs, i32imm:$clamp,
- i32imm:$omod, i32imm:$neg),
- opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg", []
+ (ins InputMods: $src0_modifiers, VSrc_32:$src0,
+ InputMods:$src1_modifiers, VSrc_32:$src1,
+ i32imm:$clamp, i32imm:$omod),
+ opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod", []
>, VOP <opName>, VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
let src2 = SIOperand.ZERO;
/* the VOP2 variant puts the carry out into VCC, the VOP3 variant
@@ -340,15 +367,16 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc,
def _e64 : VOP3 <
{0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}},
(outs SReg_64:$dst),
- (ins arc:$src0, arc:$src1,
- InstFlag:$abs, InstFlag:$clamp,
- InstFlag:$omod, InstFlag:$neg),
- opName#"_e64 $dst, $src0, $src1, $abs, $clamp, $omod, $neg",
+ (ins InputMods:$src0_modifiers, arc:$src0,
+ InputMods:$src1_modifiers, arc:$src1,
+ InstFlag:$clamp, InstFlag:$omod),
+ opName#"_e64 $dst, $src0_modifiers, $src1_modifiers, $clamp, $omod",
!if(!eq(!cast<string>(cond), "COND_NULL"), []<dag>,
[(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))]
)
>, VOP <opName> {
let src2 = SIOperand.ZERO;
+ let src2_modifiers = 0;
}
}
@@ -360,12 +388,13 @@ multiclass VOPC_64 <bits<8> op, string opName,
ValueType vt = untyped, PatLeaf cond = COND_NULL>
: VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>;
-class VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
+multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m <
op, (outs VReg_32:$dst),
- (ins VSrc_32:$src0, VSrc_32:$src1, VSrc_32:$src2,
- InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
- opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern
->, VOP <opName>;
+ (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers,
+ VSrc_32:$src1, InputMods:$src2_modifiers, VSrc_32:$src2,
+ InstFlag:$clamp, InstFlag:$omod),
+ opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName
+>;
class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
op, (outs VReg_64:$dst),
@@ -374,10 +403,9 @@ class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 <
>, VOP <opName> {
let src2 = SIOperand.ZERO;
- let abs = 0;
+ let src0_modifiers = 0;
let clamp = 0;
let omod = 0;
- let neg = 0;
}
class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 <
@@ -403,7 +431,7 @@ class DS_1A <bits<8> op, dag outs, dag ins, string asm, list<dag> pat> :
class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, i16imm:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, u16imm:$offset),
asm#" $vdst, $addr, $offset, [M0]",
[]> {
let data0 = 0;
@@ -415,7 +443,7 @@ class DS_Load_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
op,
(outs regClass:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, i8imm:$offset0, i8imm:$offset1),
+ (ins i1imm:$gds, VReg_32:$addr, u8imm:$offset0, u8imm:$offset1),
asm#" $gds, $vdst, $addr, $offset0, $offset1, [M0]",
[]> {
let data0 = 0;
@@ -427,7 +455,7 @@ class DS_Load2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS <
class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, i16imm:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u16imm:$offset),
asm#" $addr, $data0, $offset [M0]",
[]> {
let data1 = 0;
@@ -439,7 +467,7 @@ class DS_Store_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A <
op,
(outs),
- (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, i8imm:$offset0, i8imm:$offset1),
+ (ins i1imm:$gds, VReg_32:$addr, regClass:$data0, u8imm:$offset0, u8imm:$offset1),
asm#" $addr, $data0, $data1, $offset0, $offset1 [M0]",
[]> {
let mayStore = 1;
@@ -450,7 +478,7 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A
class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
op,
(outs rc:$vdst),
- (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, i16imm:$offset),
+ (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset),
asm#" $vdst, $addr, $data0, $offset, [M0]",
[]> {
@@ -462,7 +490,7 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A <
class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs),
- (ins regClass:$vdata, i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+ (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr,
SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
@@ -481,7 +509,7 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let offen = 0, idxen = 0 in {
def _OFFSET : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
- i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
i1imm:$slc, i1imm:$tfe),
asm#" $vdata, $srsrc + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
}
@@ -497,7 +525,7 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let offen = 0, idxen = 1 in {
def _IDXEN : MUBUF <op, (outs regClass:$vdata),
(ins SReg_128:$srsrc, VReg_32:$vaddr,
- i16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
+ u16imm:$offset, SSrc_32:$soffset, i1imm:$glc,
i1imm:$slc, i1imm:$tfe),
asm#" $vdata, $srsrc[$vaddr] + $offset + $soffset, glc=$glc, slc=$slc, tfe=$tfe", []>;
}
@@ -513,7 +541,7 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in {
def _ADDR64 : MUBUF <op, (outs regClass:$vdata),
- (ins SReg_128:$srsrc, VReg_64:$vaddr, i16imm:$offset),
+ (ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset),
asm#" $vdata, $srsrc + $vaddr + $offset", []>;
}
}
@@ -521,7 +549,7 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> {
class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr,
- i16imm:$offset),
+ u16imm:$offset),
name#" $vdata, $srsrc + $vaddr + $offset",
[]> {
@@ -542,7 +570,7 @@ class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> :
class MTBUF_Load_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF <
op,
(outs regClass:$dst),
- (ins i16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+ (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
i8imm:$dfmt, i8imm:$nfmt, VReg_32:$vaddr, SReg_128:$srsrc,
i1imm:$slc, i1imm:$tfe, SSrc_32:$soffset),
asm#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
@@ -677,4 +705,12 @@ def isDS : InstrMapping {
let ValueCols = [["8"]];
}
+def getMCOpcode : InstrMapping {
+ let FilterClass = "SIMCInstr";
+ let RowFields = ["PseudoInstr"];
+ let ColFields = ["Subtarget"];
+ let KeyCol = [!cast<string>(SISubtarget.NONE)];
+ let ValueCols = [[!cast<string>(SISubtarget.SI)]];
+}
+
include "SIInstructions.td"
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 5232139..500fa78 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -32,9 +32,56 @@ def isSI : Predicate<"Subtarget.getGeneration() "
def isCI : Predicate<"Subtarget.getGeneration() "
">= AMDGPUSubtarget::SEA_ISLANDS">;
+def isCFDepth0 : Predicate<"isCFDepth0()">;
+
def WAIT_FLAG : InstFlag<"printWaitFlag">;
-let Predicates = [isSI] in {
+let SubtargetPredicate = isSI in {
+let OtherPredicates = [isCFDepth0] in {
+
+//===----------------------------------------------------------------------===//
+// SMRD Instructions
+//===----------------------------------------------------------------------===//
+
+let mayLoad = 1 in {
+
+// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SGPR_32 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
+defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
+defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
+ 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
+>;
+
+defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
+ 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
+ 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
+ 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
+ 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
+>;
+
+} // mayLoad = 1
+
+//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
+//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
+
+//===----------------------------------------------------------------------===//
+// SOP1 Instructions
+//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
@@ -45,7 +92,10 @@ def S_CMOV_B32 : SOP1_32 <0x00000005, "S_CMOV_B32", []>;
def S_CMOV_B64 : SOP1_64 <0x00000006, "S_CMOV_B64", []>;
} // End isMoveImm = 1
-def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", []>;
+def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32",
+ [(set i32:$dst, (not i32:$src0))]
+>;
+
def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>;
def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>;
def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>;
@@ -65,8 +115,13 @@ def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>;
//def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>;
def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>;
//def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>;
-//def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8", []>;
-//def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16", []>;
+def S_SEXT_I32_I8 : SOP1_32 <0x00000019, "S_SEXT_I32_I8",
+ [(set i32:$dst, (sext_inreg i32:$src0, i8))]
+>;
+def S_SEXT_I32_I16 : SOP1_32 <0x0000001a, "S_SEXT_I32_I16",
+ [(set i32:$dst, (sext_inreg i32:$src0, i16))]
+>;
+
////def S_BITSET0_B32 : SOP1_BITSET0 <0x0000001b, "S_BITSET0_B32", []>;
////def S_BITSET0_B64 : SOP1_BITSET0 <0x0000001c, "S_BITSET0_B64", []>;
////def S_BITSET1_B32 : SOP1_BITSET1 <0x0000001d, "S_BITSET1_B32", []>;
@@ -99,6 +154,150 @@ def S_MOVRELD_B64 : SOP1_64 <0x00000031, "S_MOVRELD_B64", []>;
def S_MOV_REGRD_B32 : SOP1_32 <0x00000033, "S_MOV_REGRD_B32", []>;
def S_ABS_I32 : SOP1_32 <0x00000034, "S_ABS_I32", []>;
def S_MOV_FED_B32 : SOP1_32 <0x00000035, "S_MOV_FED_B32", []>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SCC] in { // Carry out goes to SCC
+let isCommutable = 1 in {
+def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
+def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
+ [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
+>;
+} // End isCommutable = 1
+
+def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
+def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
+ [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
+>;
+
+let Uses = [SCC] in { // Carry in comes from SCC
+let isCommutable = 1 in {
+def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
+ [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End isCommutable = 1
+
+def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
+ [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+} // End Uses = [SCC]
+} // End Defs = [SCC]
+
+def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
+ [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
+>;
+def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
+ [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
+>;
+def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
+ [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
+>;
+def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
+ [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
+>;
+
+def S_CSELECT_B32 : SOP2 <
+ 0x0000000a, (outs SReg_32:$dst),
+ (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
+ []
+>;
+
+def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
+
+def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
+ [(set i32:$dst, (and i32:$src0, i32:$src1))]
+>;
+
+def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
+ [(set i64:$dst, (and i64:$src0, i64:$src1))]
+>;
+
+def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
+ [(set i32:$dst, (or i32:$src0, i32:$src1))]
+>;
+
+def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
+ [(set i64:$dst, (or i64:$src0, i64:$src1))]
+>;
+
+def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
+ [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+>;
+
+def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
+ [(set i64:$dst, (xor i64:$src0, i64:$src1))]
+>;
+def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
+def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
+def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
+def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
+def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
+def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
+def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
+def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
+def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
+def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+
+def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
+def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
+ [(set i64:$dst, (shl i64:$src0, i32:$src1))]
+>;
+def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
+def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
+ [(set i64:$dst, (srl i64:$src0, i32:$src1))]
+>;
+def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
+def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
+ [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+>;
+
+} // End AddedComplexity = 1
+
+def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
+def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
+def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
+def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
+def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
+def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
+def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
+//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
+def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
+
+//===----------------------------------------------------------------------===//
+// SOPC Instructions
+//===----------------------------------------------------------------------===//
+
+def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32">;
+def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32">;
+def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32">;
+def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32">;
+def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32">;
+def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32">;
+def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32">;
+def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32">;
+def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32">;
+def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32">;
+def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32">;
+def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32">;
+////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
+////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
+////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
+////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
+//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+//===----------------------------------------------------------------------===//
+// SOPK Instructions
+//===----------------------------------------------------------------------===//
+
def S_MOVK_I32 : SOPK_32 <0x00000000, "S_MOVK_I32", []>;
def S_CMOVK_I32 : SOPK_32 <0x00000002, "S_CMOVK_I32", []>;
@@ -147,6 +346,108 @@ def S_GETREG_REGRD_B32 : SOPK_32 <0x00000014, "S_GETREG_REGRD_B32", []>;
//def S_SETREG_IMM32_B32 : SOPK_32 <0x00000015, "S_SETREG_IMM32_B32", []>;
//def EXP : EXP_ <0x00000000, "EXP", []>;
+} // End let OtherPredicates = [isCFDepth0]
+
+//===----------------------------------------------------------------------===//
+// SOPP Instructions
+//===----------------------------------------------------------------------===//
+
+def S_NOP : SOPP <0x00000000, (ins i16imm:$SIMM16), "S_NOP $SIMM16", []>;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
+ [(IL_retflag)]> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+}
+
+let isBranch = 1 in {
+def S_BRANCH : SOPP <
+ 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
+ [(br bb:$target)]> {
+ let isBarrier = 1;
+}
+
+let DisableEncoding = "$scc" in {
+def S_CBRANCH_SCC0 : SOPP <
+ 0x00000004, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC0 $target", []
+>;
+def S_CBRANCH_SCC1 : SOPP <
+ 0x00000005, (ins brtarget:$target, SCCReg:$scc),
+ "S_CBRANCH_SCC1 $target",
+ []
+>;
+} // End DisableEncoding = "$scc"
+
+def S_CBRANCH_VCCZ : SOPP <
+ 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCZ $target",
+ []
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+ 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
+ "S_CBRANCH_VCCNZ $target",
+ []
+>;
+
+let DisableEncoding = "$exec" in {
+def S_CBRANCH_EXECZ : SOPP <
+ 0x00000008, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECZ $target",
+ []
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+ 0x00000009, (ins brtarget:$target, EXECReg:$exec),
+ "S_CBRANCH_EXECNZ $target",
+ []
+>;
+} // End DisableEncoding = "$exec"
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+let hasSideEffects = 1 in {
+def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
+ [(int_AMDGPU_barrier_local)]
+> {
+ let SIMM16 = 0;
+ let isBarrier = 1;
+ let hasCtrlDep = 1;
+ let mayLoad = 1;
+ let mayStore = 1;
+}
+
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
+ []
+>;
+//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
+//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
+//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
+
+let Uses = [EXEC] in {
+ def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
+ [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
+ > {
+ let DisableEncoding = "$m0";
+ }
+} // End Uses = [EXEC]
+
+//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
+//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
+//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
+//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
+//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
+//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
+} // End hasSideEffects
+
+//===----------------------------------------------------------------------===//
+// VOPC Instructions
+//===----------------------------------------------------------------------===//
+
let isCompare = 1 in {
defm V_CMP_F_F32 : VOPC_32 <0x00000000, "V_CMP_F_F32">;
@@ -403,6 +704,10 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">;
} // End isCompare = 1
+//===----------------------------------------------------------------------===//
+// DS Instructions
+//===----------------------------------------------------------------------===//
+
def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>;
def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>;
def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>;
@@ -427,6 +732,9 @@ def DS_READ2_B64 : DS_Load2_Helper <0x00000075, "DS_READ2_B64", VReg_128>;
// TODO: DS_READ2ST64_B32, DS_READ2ST64_B64,
// DS_WRITE2ST64_B32, DS_WRITE2ST64_B64
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
//def BUFFER_LOAD_FORMAT_X : MUBUF_ <0x00000000, "BUFFER_LOAD_FORMAT_X", []>;
//def BUFFER_LOAD_FORMAT_XY : MUBUF_ <0x00000001, "BUFFER_LOAD_FORMAT_XY", []>;
@@ -499,6 +807,11 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <0x00000060, "BUFFER_ATOMIC_FMAX_X2", []>;
//def BUFFER_WBINVL1_SC : MUBUF_WBINVL1 <0x00000070, "BUFFER_WBINVL1_SC", []>;
//def BUFFER_WBINVL1 : MUBUF_WBINVL1 <0x00000071, "BUFFER_WBINVL1", []>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Instructions
+//===----------------------------------------------------------------------===//
+
//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "TBUFFER_LOAD_FORMAT_X", []>;
//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>;
//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>;
@@ -508,41 +821,10 @@ def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FOR
def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>;
def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>;
-let mayLoad = 1 in {
-
-// We are using the SGPR_32 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SGPR_32 register class does not include M0
-// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <0x00, "S_LOAD_DWORD", SReg_64, SGPR_32>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <0x01, "S_LOAD_DWORDX2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <0x02, "S_LOAD_DWORDX4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <0x03, "S_LOAD_DWORDX8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <0x04, "S_LOAD_DWORDX16", SReg_64, SReg_512>;
-
-defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
- 0x08, "S_BUFFER_LOAD_DWORD", SReg_128, SGPR_32
->;
-
-defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
- 0x09, "S_BUFFER_LOAD_DWORDX2", SReg_128, SReg_64
->;
-
-defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
- 0x0a, "S_BUFFER_LOAD_DWORDX4", SReg_128, SReg_128
->;
-
-defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
- 0x0b, "S_BUFFER_LOAD_DWORDX8", SReg_128, SReg_256
->;
-
-defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
- 0x0c, "S_BUFFER_LOAD_DWORDX16", SReg_128, SReg_512
->;
-
-} // mayLoad = 1
+//===----------------------------------------------------------------------===//
+// MIMG Instructions
+//===----------------------------------------------------------------------===//
-//def S_MEMTIME : SMRD_ <0x0000001e, "S_MEMTIME", []>;
-//def S_DCACHE_INV : SMRD_ <0x0000001f, "S_DCACHE_INV", []>;
defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "IMAGE_LOAD">;
defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "IMAGE_LOAD_MIP">;
//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"IMAGE_LOAD_PCK", 0x00000002>;
@@ -638,8 +920,12 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">;
//def IMAGE_SAMPLE_C_CD_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD_CL_O", 0x0000006f>;
//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"IMAGE_RSRC256", 0x0000007e>;
//def IMAGE_SAMPLER : MIMG_NoPattern_ <"IMAGE_SAMPLER", 0x0000007f>;
-//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+//def V_NOP : VOP1_ <0x00000000, "V_NOP", []>;
let neverHasSideEffects = 1, isMoveImm = 1 in {
defm V_MOV_B32 : VOP1_32 <0x00000001, "V_MOV_B32", []>;
@@ -691,8 +977,13 @@ defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32",
//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>;
//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>;
//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>;
-//defm V_CVT_U32_F64 : VOP1_32 <0x00000015, "V_CVT_U32_F64", []>;
-//defm V_CVT_F64_U32 : VOP1_64 <0x00000016, "V_CVT_F64_U32", []>;
+defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64",
+ [(set i32:$dst, (fp_to_uint f64:$src0))]
+>;
+defm V_CVT_F64_U32 : VOP1_64_32 <0x00000016, "V_CVT_F64_U32",
+ [(set f64:$dst, (uint_to_fp i32:$src0))]
+>;
+
defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32",
[(set f32:$dst, (AMDGPUfract f32:$src0))]
>;
@@ -756,6 +1047,11 @@ defm V_MOVRELD_B32 : VOP1_32 <0x00000042, "V_MOVRELD_B32", []>;
defm V_MOVRELS_B32 : VOP1_32 <0x00000043, "V_MOVRELS_B32", []>;
defm V_MOVRELSD_B32 : VOP1_32 <0x00000044, "V_MOVRELSD_B32", []>;
+
+//===----------------------------------------------------------------------===//
+// VINTRP Instructions
+//===----------------------------------------------------------------------===//
+
def V_INTERP_P1_F32 : VINTRP <
0x00000000,
(outs VReg_32:$dst),
@@ -786,97 +1082,9 @@ def V_INTERP_MOV_F32 : VINTRP <
let DisableEncoding = "$m0";
}
-//def S_NOP : SOPP_ <0x00000000, "S_NOP", []>;
-
-let isTerminator = 1 in {
-
-def S_ENDPGM : SOPP <0x00000001, (ins), "S_ENDPGM",
- [(IL_retflag)]> {
- let SIMM16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
-}
-
-let isBranch = 1 in {
-def S_BRANCH : SOPP <
- 0x00000002, (ins brtarget:$target), "S_BRANCH $target",
- [(br bb:$target)]> {
- let isBarrier = 1;
-}
-
-let DisableEncoding = "$scc" in {
-def S_CBRANCH_SCC0 : SOPP <
- 0x00000004, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC0 $target", []
->;
-def S_CBRANCH_SCC1 : SOPP <
- 0x00000005, (ins brtarget:$target, SCCReg:$scc),
- "S_CBRANCH_SCC1 $target",
- []
->;
-} // End DisableEncoding = "$scc"
-
-def S_CBRANCH_VCCZ : SOPP <
- 0x00000006, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCZ $target",
- []
->;
-def S_CBRANCH_VCCNZ : SOPP <
- 0x00000007, (ins brtarget:$target, VCCReg:$vcc),
- "S_CBRANCH_VCCNZ $target",
- []
->;
-
-let DisableEncoding = "$exec" in {
-def S_CBRANCH_EXECZ : SOPP <
- 0x00000008, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECZ $target",
- []
->;
-def S_CBRANCH_EXECNZ : SOPP <
- 0x00000009, (ins brtarget:$target, EXECReg:$exec),
- "S_CBRANCH_EXECNZ $target",
- []
->;
-} // End DisableEncoding = "$exec"
-
-
-} // End isBranch = 1
-} // End isTerminator = 1
-
-let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "S_BARRIER",
- [(int_AMDGPU_barrier_local)]
-> {
- let SIMM16 = 0;
- let isBarrier = 1;
- let hasCtrlDep = 1;
- let mayLoad = 1;
- let mayStore = 1;
-}
-
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "S_WAITCNT $simm16",
- []
->;
-//def S_SETHALT : SOPP_ <0x0000000d, "S_SETHALT", []>;
-//def S_SLEEP : SOPP_ <0x0000000e, "S_SLEEP", []>;
-//def S_SETPRIO : SOPP_ <0x0000000f, "S_SETPRIO", []>;
-
-let Uses = [EXEC] in {
- def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "S_SENDMSG $simm16",
- [(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
- > {
- let DisableEncoding = "$m0";
- }
-} // End Uses = [EXEC]
-
-//def S_SENDMSGHALT : SOPP_ <0x00000011, "S_SENDMSGHALT", []>;
-//def S_TRAP : SOPP_ <0x00000012, "S_TRAP", []>;
-//def S_ICACHE_INV : SOPP_ <0x00000013, "S_ICACHE_INV", []>;
-//def S_INCPERFLEVEL : SOPP_ <0x00000014, "S_INCPERFLEVEL", []>;
-//def S_DECPERFLEVEL : SOPP_ <0x00000015, "S_DECPERFLEVEL", []>;
-//def S_TTRACEDATA : SOPP_ <0x00000016, "S_TTRACEDATA", []>;
-} // End hasSideEffects
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
def V_CNDMASK_B32_e32 : VOP2 <0x00000000, (outs VReg_32:$dst),
(ins VSrc_32:$src0, VReg_32:$src1, VCCReg:$vcc),
@@ -891,18 +1099,11 @@ def V_CNDMASK_B32_e64 : VOP3 <0x00000100, (outs VReg_32:$dst),
InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg),
"V_CNDMASK_B32_e64 $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg",
[(set i32:$dst, (select i1:$src2, i32:$src1, i32:$src0))]
->;
-
-//f32 pattern for V_CNDMASK_B32_e64
-def : Pat <
- (f32 (select i1:$src2, f32:$src1, f32:$src0)),
- (V_CNDMASK_B32_e64 $src0, $src1, $src2)
->;
-
-def : Pat <
- (i32 (trunc i64:$val)),
- (EXTRACT_SUBREG $val, sub0)
->;
+> {
+ let src0_modifiers = 0;
+ let src1_modifiers = 0;
+ let src2_modifiers = 0;
+}
def V_READLANE_B32 : VOP2 <
0x00000001,
@@ -946,11 +1147,11 @@ defm V_MUL_F32 : VOP2_32 <0x00000008, "V_MUL_F32",
defm V_MUL_I32_I24 : VOP2_32 <0x00000009, "V_MUL_I32_I24",
- [(set i32:$dst, (mul I24:$src0, I24:$src1))]
+ [(set i32:$dst, (AMDGPUmul_i24 i32:$src0, i32:$src1))]
>;
//defm V_MUL_HI_I32_I24 : VOP2_32 <0x0000000a, "V_MUL_HI_I32_I24", []>;
defm V_MUL_U32_U24 : VOP2_32 <0x0000000b, "V_MUL_U32_U24",
- [(set i32:$dst, (mul U24:$src0, U24:$src1))]
+ [(set i32:$dst, (AMDGPUmul_u24 i32:$src0, i32:$src1))]
>;
//defm V_MUL_HI_U32_U24 : VOP2_32 <0x0000000c, "V_MUL_HI_U32_U24", []>;
@@ -965,27 +1166,43 @@ defm V_MAX_LEGACY_F32 : VOP2_32 <0x0000000e, "V_MAX_LEGACY_F32",
defm V_MIN_F32 : VOP2_32 <0x0000000f, "V_MIN_F32", []>;
defm V_MAX_F32 : VOP2_32 <0x00000010, "V_MAX_F32", []>;
-defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32", []>;
-defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32", []>;
-defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32", []>;
-defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32", []>;
+defm V_MIN_I32 : VOP2_32 <0x00000011, "V_MIN_I32",
+ [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]>;
+defm V_MAX_I32 : VOP2_32 <0x00000012, "V_MAX_I32",
+ [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]>;
+defm V_MIN_U32 : VOP2_32 <0x00000013, "V_MIN_U32",
+ [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]>;
+defm V_MAX_U32 : VOP2_32 <0x00000014, "V_MAX_U32",
+ [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]>;
+
+defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32",
+ [(set i32:$dst, (srl i32:$src0, i32:$src1))]
+>;
-defm V_LSHR_B32 : VOP2_32 <0x00000015, "V_LSHR_B32", []>;
defm V_LSHRREV_B32 : VOP2_32 <0x00000016, "V_LSHRREV_B32", [], "V_LSHR_B32">;
-defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32", []>;
+defm V_ASHR_I32 : VOP2_32 <0x00000017, "V_ASHR_I32",
+ [(set i32:$dst, (sra i32:$src0, i32:$src1))]
+>;
defm V_ASHRREV_I32 : VOP2_32 <0x00000018, "V_ASHRREV_I32", [], "V_ASHR_I32">;
let hasPostISelHook = 1 in {
-defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32", []>;
+defm V_LSHL_B32 : VOP2_32 <0x00000019, "V_LSHL_B32",
+ [(set i32:$dst, (shl i32:$src0, i32:$src1))]
+>;
}
defm V_LSHLREV_B32 : VOP2_32 <0x0000001a, "V_LSHLREV_B32", [], "V_LSHL_B32">;
-defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32", []>;
-defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32", []>;
-defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32", []>;
+defm V_AND_B32 : VOP2_32 <0x0000001b, "V_AND_B32",
+ [(set i32:$dst, (and i32:$src0, i32:$src1))]>;
+defm V_OR_B32 : VOP2_32 <0x0000001c, "V_OR_B32",
+ [(set i32:$dst, (or i32:$src0, i32:$src1))]
+>;
+defm V_XOR_B32 : VOP2_32 <0x0000001d, "V_XOR_B32",
+ [(set i32:$dst, (xor i32:$src0, i32:$src1))]
+>;
} // End isCommutable = 1
@@ -1001,14 +1218,18 @@ defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>;
let isCommutable = 1, Defs = [VCC] in { // Carry-out goes to VCC
// No patterns so that the scalar instructions are always selected.
// The scalar versions will be replaced with vector when needed later.
-defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32", [], VSrc_32>;
-defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32", [], VSrc_32>;
+defm V_ADD_I32 : VOP2b_32 <0x00000025, "V_ADD_I32",
+ [(set i32:$dst, (add i32:$src0, i32:$src1))], VSrc_32>;
+defm V_SUB_I32 : VOP2b_32 <0x00000026, "V_SUB_I32",
+ [(set i32:$dst, (sub i32:$src0, i32:$src1))], VSrc_32>;
defm V_SUBREV_I32 : VOP2b_32 <0x00000027, "V_SUBREV_I32", [], VSrc_32,
"V_SUB_I32">;
let Uses = [VCC] in { // Carry-in comes from VCC
-defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32", [], VReg_32>;
-defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32", [], VReg_32>;
+defm V_ADDC_U32 : VOP2b_32 <0x00000028, "V_ADDC_U32",
+ [(set i32:$dst, (adde i32:$src0, i32:$src1))], VReg_32>;
+defm V_SUBB_U32 : VOP2b_32 <0x00000029, "V_SUBB_U32",
+ [(set i32:$dst, (sube i32:$src0, i32:$src1))], VReg_32>;
defm V_SUBBREV_U32 : VOP2b_32 <0x0000002a, "V_SUBBREV_U32", [], VReg_32,
"V_SUBB_U32">;
} // End Uses = [VCC]
@@ -1023,63 +1244,51 @@ defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32",
>;
////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>;
////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>;
-def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>;
-def S_CMP_LG_I32 : SOPC_32 <0x00000001, "S_CMP_LG_I32", []>;
-def S_CMP_GT_I32 : SOPC_32 <0x00000002, "S_CMP_GT_I32", []>;
-def S_CMP_GE_I32 : SOPC_32 <0x00000003, "S_CMP_GE_I32", []>;
-def S_CMP_LT_I32 : SOPC_32 <0x00000004, "S_CMP_LT_I32", []>;
-def S_CMP_LE_I32 : SOPC_32 <0x00000005, "S_CMP_LE_I32", []>;
-def S_CMP_EQ_U32 : SOPC_32 <0x00000006, "S_CMP_EQ_U32", []>;
-def S_CMP_LG_U32 : SOPC_32 <0x00000007, "S_CMP_LG_U32", []>;
-def S_CMP_GT_U32 : SOPC_32 <0x00000008, "S_CMP_GT_U32", []>;
-def S_CMP_GE_U32 : SOPC_32 <0x00000009, "S_CMP_GE_U32", []>;
-def S_CMP_LT_U32 : SOPC_32 <0x0000000a, "S_CMP_LT_U32", []>;
-def S_CMP_LE_U32 : SOPC_32 <0x0000000b, "S_CMP_LE_U32", []>;
-////def S_BITCMP0_B32 : SOPC_BITCMP0 <0x0000000c, "S_BITCMP0_B32", []>;
-////def S_BITCMP1_B32 : SOPC_BITCMP1 <0x0000000d, "S_BITCMP1_B32", []>;
-////def S_BITCMP0_B64 : SOPC_BITCMP0 <0x0000000e, "S_BITCMP0_B64", []>;
-////def S_BITCMP1_B64 : SOPC_BITCMP1 <0x0000000f, "S_BITCMP1_B64", []>;
-//def S_SETVSKIP : SOPC_ <0x00000010, "S_SETVSKIP", []>;
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
let neverHasSideEffects = 1 in {
-def V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
-def V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32", []>;
-def V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
- [(set i32:$dst, (add (mul I24:$src0, I24:$src1), i32:$src2))]
+defm V_MAD_LEGACY_F32 : VOP3_32 <0x00000140, "V_MAD_LEGACY_F32", []>;
+defm V_MAD_F32 : VOP3_32 <0x00000141, "V_MAD_F32",
+ [(set f32:$dst, (fadd (fmul f32:$src0, f32:$src1), f32:$src2))]
+>;
+defm V_MAD_I32_I24 : VOP3_32 <0x00000142, "V_MAD_I32_I24",
+ [(set i32:$dst, (AMDGPUmad_i24 i32:$src0, i32:$src1, i32:$src2))]
>;
-def V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
- [(set i32:$dst, (add (mul U24:$src0, U24:$src1), i32:$src2))]
+defm V_MAD_U32_U24 : VOP3_32 <0x00000143, "V_MAD_U32_U24",
+ [(set i32:$dst, (AMDGPUmad_u24 i32:$src0, i32:$src1, i32:$src2))]
>;
} // End neverHasSideEffects
-def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
-def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
-def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
-def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
+
+defm V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;
+defm V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;
+defm V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;
+defm V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>;
let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
-def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
+defm V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32",
[(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>;
-def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
+defm V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32",
[(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>;
}
-def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
+defm V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32",
[(set i32:$dst, (AMDGPUbfi i32:$src0, i32:$src1, i32:$src2))]>;
-defm : BFIPatterns <V_BFI_B32>;
-def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
+defm V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32",
[(set f32:$dst, (fma f32:$src0, f32:$src1, f32:$src2))]
>;
def V_FMA_F64 : VOP3_64 <0x0000014c, "V_FMA_F64",
[(set f64:$dst, (fma f64:$src0, f64:$src1, f64:$src2))]
>;
//def V_LERP_U8 : VOP3_U8 <0x0000014d, "V_LERP_U8", []>;
-def V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-def : ROTRPattern <V_ALIGNBIT_B32>;
+defm V_ALIGNBIT_B32 : VOP3_32 <0x0000014e, "V_ALIGNBIT_B32", []>;
-def V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
-def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
+defm V_ALIGNBYTE_B32 : VOP3_32 <0x0000014f, "V_ALIGNBYTE_B32", []>;
+defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "V_MIN3_F32", []>;
////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "V_MIN3_I32", []>;
////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "V_MIN3_U32", []>;
@@ -1092,9 +1301,9 @@ def V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>;
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "V_SAD_U8", []>;
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "V_SAD_HI_U8", []>;
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>;
-def V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
+defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>;
////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>;
-def V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
+defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>;
def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>;
def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64",
@@ -1116,181 +1325,46 @@ def V_MAX_F64 : VOP3_64 <0x00000167, "V_MAX_F64", []>;
} // isCommutable = 1
-def : Pat <
- (fadd f64:$src0, f64:$src1),
- (V_ADD_F64 $src0, $src1, (i64 0))
->;
-
-def : Pat <
- (fmul f64:$src0, f64:$src1),
- (V_MUL_F64 $src0, $src1, (i64 0))
->;
-
def V_LDEXP_F64 : VOP3_64 <0x00000168, "V_LDEXP_F64", []>;
let isCommutable = 1 in {
-def V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
-def V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
-def V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
-def V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
+defm V_MUL_LO_U32 : VOP3_32 <0x00000169, "V_MUL_LO_U32", []>;
+defm V_MUL_HI_U32 : VOP3_32 <0x0000016a, "V_MUL_HI_U32", []>;
+defm V_MUL_LO_I32 : VOP3_32 <0x0000016b, "V_MUL_LO_I32", []>;
+defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>;
} // isCommutable = 1
-def : Pat <
- (mul i32:$src0, i32:$src1),
- (V_MUL_LO_I32 $src0, $src1, (i32 0))
->;
-
-def : Pat <
- (mulhu i32:$src0, i32:$src1),
- (V_MUL_HI_U32 $src0, $src1, (i32 0))
->;
-
-def : Pat <
- (mulhs i32:$src0, i32:$src1),
- (V_MUL_HI_I32 $src0, $src1, (i32 0))
->;
-
-def V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
+defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>;
def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>;
-def V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
+defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>;
def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>;
//def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>;
//def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>;
//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>;
def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>;
-let Defs = [SCC] in { // Carry out goes to SCC
-let isCommutable = 1 in {
-def S_ADD_U32 : SOP2_32 <0x00000000, "S_ADD_U32", []>;
-def S_ADD_I32 : SOP2_32 <0x00000002, "S_ADD_I32",
- [(set i32:$dst, (add SSrc_32:$src0, SSrc_32:$src1))]
->;
-} // End isCommutable = 1
-
-def S_SUB_U32 : SOP2_32 <0x00000001, "S_SUB_U32", []>;
-def S_SUB_I32 : SOP2_32 <0x00000003, "S_SUB_I32",
- [(set i32:$dst, (sub SSrc_32:$src0, SSrc_32:$src1))]
->;
-
-let Uses = [SCC] in { // Carry in comes from SCC
-let isCommutable = 1 in {
-def S_ADDC_U32 : SOP2_32 <0x00000004, "S_ADDC_U32",
- [(set i32:$dst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End isCommutable = 1
-
-def S_SUBB_U32 : SOP2_32 <0x00000005, "S_SUBB_U32",
- [(set i32:$dst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End Uses = [SCC]
-} // End Defs = [SCC]
-
-def S_MIN_I32 : SOP2_32 <0x00000006, "S_MIN_I32",
- [(set i32:$dst, (AMDGPUsmin i32:$src0, i32:$src1))]
->;
-def S_MIN_U32 : SOP2_32 <0x00000007, "S_MIN_U32",
- [(set i32:$dst, (AMDGPUumin i32:$src0, i32:$src1))]
->;
-def S_MAX_I32 : SOP2_32 <0x00000008, "S_MAX_I32",
- [(set i32:$dst, (AMDGPUsmax i32:$src0, i32:$src1))]
->;
-def S_MAX_U32 : SOP2_32 <0x00000009, "S_MAX_U32",
- [(set i32:$dst, (AMDGPUumax i32:$src0, i32:$src1))]
->;
-
-def S_CSELECT_B32 : SOP2 <
- 0x0000000a, (outs SReg_32:$dst),
- (ins SReg_32:$src0, SReg_32:$src1, SCCReg:$scc), "S_CSELECT_B32",
- []
->;
-
-def S_CSELECT_B64 : SOP2_64 <0x0000000b, "S_CSELECT_B64", []>;
-
-def S_AND_B32 : SOP2_32 <0x0000000e, "S_AND_B32",
- [(set i32:$dst, (and i32:$src0, i32:$src1))]
->;
-
-def S_AND_B64 : SOP2_64 <0x0000000f, "S_AND_B64",
- [(set i64:$dst, (and i64:$src0, i64:$src1))]
->;
-
-def : Pat <
- (i1 (and i1:$src0, i1:$src1)),
- (S_AND_B64 $src0, $src1)
->;
-
-def S_OR_B32 : SOP2_32 <0x00000010, "S_OR_B32",
- [(set i32:$dst, (or i32:$src0, i32:$src1))]
->;
-
-def S_OR_B64 : SOP2_64 <0x00000011, "S_OR_B64",
- [(set i64:$dst, (or i64:$src0, i64:$src1))]
->;
-
-def : Pat <
- (i1 (or i1:$src0, i1:$src1)),
- (S_OR_B64 $src0, $src1)
->;
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
-def S_XOR_B32 : SOP2_32 <0x00000012, "S_XOR_B32",
- [(set i32:$dst, (xor i32:$src0, i32:$src1))]
->;
+let isCodeGenOnly = 1, isPseudo = 1 in {
-def S_XOR_B64 : SOP2_64 <0x00000013, "S_XOR_B64",
- [(set i1:$dst, (xor i1:$src0, i1:$src1))]
+def V_MOV_I1 : InstSI <
+ (outs VReg_1:$dst),
+ (ins i1imm:$src),
+ "", [(set i1:$dst, (imm:$src))]
>;
-def S_ANDN2_B32 : SOP2_32 <0x00000014, "S_ANDN2_B32", []>;
-def S_ANDN2_B64 : SOP2_64 <0x00000015, "S_ANDN2_B64", []>;
-def S_ORN2_B32 : SOP2_32 <0x00000016, "S_ORN2_B32", []>;
-def S_ORN2_B64 : SOP2_64 <0x00000017, "S_ORN2_B64", []>;
-def S_NAND_B32 : SOP2_32 <0x00000018, "S_NAND_B32", []>;
-def S_NAND_B64 : SOP2_64 <0x00000019, "S_NAND_B64", []>;
-def S_NOR_B32 : SOP2_32 <0x0000001a, "S_NOR_B32", []>;
-def S_NOR_B64 : SOP2_64 <0x0000001b, "S_NOR_B64", []>;
-def S_XNOR_B32 : SOP2_32 <0x0000001c, "S_XNOR_B32", []>;
-def S_XNOR_B64 : SOP2_64 <0x0000001d, "S_XNOR_B64", []>;
-
-// Use added complexity so these patterns are preferred to the VALU patterns.
-let AddedComplexity = 1 in {
-def S_LSHL_B32 : SOP2_32 <0x0000001e, "S_LSHL_B32",
- [(set i32:$dst, (shl i32:$src0, i32:$src1))]
->;
-def S_LSHL_B64 : SOP2_SHIFT_64 <0x0000001f, "S_LSHL_B64",
- [(set i64:$dst, (shl i64:$src0, i32:$src1))]
->;
-def S_LSHR_B32 : SOP2_32 <0x00000020, "S_LSHR_B32",
- [(set i32:$dst, (srl i32:$src0, i32:$src1))]
->;
-def S_LSHR_B64 : SOP2_SHIFT_64 <0x00000021, "S_LSHR_B64",
- [(set i64:$dst, (srl i64:$src0, i32:$src1))]
->;
-def S_ASHR_I32 : SOP2_32 <0x00000022, "S_ASHR_I32",
- [(set i32:$dst, (sra i32:$src0, i32:$src1))]
->;
-def S_ASHR_I64 : SOP2_SHIFT_64 <0x00000023, "S_ASHR_I64",
- [(set i64:$dst, (sra i64:$src0, i32:$src1))]
+def V_AND_I1 : InstSI <
+ (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
+ [(set i1:$dst, (and i1:$src0, i1:$src1))]
>;
-} // End AddedComplexity = 1
-
-def S_BFM_B32 : SOP2_32 <0x00000024, "S_BFM_B32", []>;
-def S_BFM_B64 : SOP2_64 <0x00000025, "S_BFM_B64", []>;
-def S_MUL_I32 : SOP2_32 <0x00000026, "S_MUL_I32", []>;
-def S_BFE_U32 : SOP2_32 <0x00000027, "S_BFE_U32", []>;
-def S_BFE_I32 : SOP2_32 <0x00000028, "S_BFE_I32", []>;
-def S_BFE_U64 : SOP2_64 <0x00000029, "S_BFE_U64", []>;
-def S_BFE_I64 : SOP2_64 <0x0000002a, "S_BFE_I64", []>;
-//def S_CBRANCH_G_FORK : SOP2_ <0x0000002b, "S_CBRANCH_G_FORK", []>;
-def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
-
-let isCodeGenOnly = 1, isPseudo = 1 in {
-
-def LOAD_CONST : AMDGPUShaderInst <
- (outs GPRF32:$dst),
- (ins i32imm:$src),
- "LOAD_CONST $dst, $src",
- [(set GPRF32:$dst, (int_AMDGPU_load_const imm:$src))]
+def V_OR_I1 : InstSI <
+ (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "",
+ [(set i1:$dst, (or i1:$src0, i1:$src1))]
>;
// SI pseudo instructions. These are used by the CFG structurizer pass
@@ -1301,19 +1375,19 @@ let mayLoad = 1, mayStore = 1, hasSideEffects = 1,
let isBranch = 1, isTerminator = 1 in {
-def SI_IF : InstSI <
+def SI_IF: InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$vcc, brtarget:$target),
- "SI_IF $dst, $vcc, $target",
+ "",
[(set i64:$dst, (int_SI_if i1:$vcc, bb:$target))]
>;
def SI_ELSE : InstSI <
(outs SReg_64:$dst),
(ins SReg_64:$src, brtarget:$target),
- "SI_ELSE $dst, $src, $target",
- [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]> {
-
+ "",
+ [(set i64:$dst, (int_SI_else i64:$src, bb:$target))]
+> {
let Constraints = "$src = $dst";
}
@@ -1370,7 +1444,7 @@ let Uses = [EXEC], Defs = [EXEC,VCC,M0] in {
let UseNamedOperandTable = 1 in {
-def SI_RegisterLoad : AMDGPUShaderInst <
+def SI_RegisterLoad : InstSI <
(outs VReg_32:$dst, SReg_64:$temp),
(ins FRAMEri32:$addr, i32imm:$chan),
"", []
@@ -1379,7 +1453,7 @@ def SI_RegisterLoad : AMDGPUShaderInst <
let mayLoad = 1;
}
-class SIRegStore<dag outs> : AMDGPUShaderInst <
+class SIRegStore<dag outs> : InstSI <
outs,
(ins VReg_32:$val, FRAMEri32:$addr, i32imm:$chan),
"", []
@@ -1439,8 +1513,33 @@ def V_SUB_F64 : InstSI <
} // end usesCustomInserter
+multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
+
+ def _SAVE : InstSI <
+ (outs VReg_32:$dst),
+ (ins sgpr_class:$src, i32imm:$frame_idx),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs sgpr_class:$dst),
+ (ins VReg_32:$src, i32imm:$frame_idx),
+ "", []
+ >;
+
+}
+
+defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>;
+defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
+defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
+defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
+
} // end IsCodeGenOnly, isPseudo
+} // end SubtargetPredicate = SI
+
+let Predicates = [isSI] in {
+
def : Pat<
(int_AMDGPU_cndlt f32:$src0, f32:$src1, f32:$src2),
(V_CNDMASK_B32_e64 $src2, $src1, (V_CMP_GT_F32_e64 0, $src0))
@@ -1453,7 +1552,7 @@ def : Pat <
/* int_SI_vs_load_input */
def : Pat<
- (SIload_input i128:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
+ (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr),
(BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0)
>;
@@ -1470,40 +1569,116 @@ def : Pat <
(V_SUB_F64 $src0, $src1)
>;
+//===----------------------------------------------------------------------===//
+// SMRD Patterns
+//===----------------------------------------------------------------------===//
+
+multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
+
+ // 1. Offset as 8bit DWORD immediate
+ def : Pat <
+ (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
+ (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
+ >;
+
+ // 2. Offset loaded in an 32bit SGPR
+ def : Pat <
+ (constant_load (add i64:$sbase, (i64 IMM32bit:$offset))),
+ (vt (Instr_SGPR $sbase, (S_MOV_B32 (i32 (as_i32imm $offset)))))
+ >;
+
+ // 3. No offset at all
+ def : Pat <
+ (constant_load i64:$sbase),
+ (vt (Instr_IMM $sbase, 0))
+ >;
+}
+
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
+defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
+defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
+defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
+defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
+
+// 1. Offset as 8bit DWORD immediate
+def : Pat <
+ (SIload_constant v4i32:$sbase, IMM8bitDWORD:$offset),
+ (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+ (SIload_constant v4i32:$sbase, imm:$offset),
+ (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
+>;
+
+//===----------------------------------------------------------------------===//
+// SOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (i1 (xor i1:$src0, i1:$src1)),
+ (S_XOR_B64 $src0, $src1)
+>;
+
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+ (or i64:$src0, i64:$src1),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0),
+ (EXTRACT_SUBREG i64:$src1, sub0)), sub0),
+ (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1),
+ (EXTRACT_SUBREG i64:$src1, sub1)), sub1)
+>;
+
+class SextInReg <ValueType vt, int ShiftAmt> : Pat <
+ (sext_inreg i32:$src0, vt),
+ (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0))
+>;
+
+def : SextInReg <i8, 24>;
+def : SextInReg <i16, 16>;
+
/********** ======================= **********/
/********** Image sampling patterns **********/
/********** ======================= **********/
/* SIsample for simple 1D texture lookup */
def : Pat <
- (SIsample i32:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(IMAGE_SAMPLE_V4_V1 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, imm),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, imm),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_RECT),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_RECT),
(opcode 0xf, 1, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW),
(opcode 0xf, 0, 0, 0, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
class SampleShadowArrayPattern<SDNode name, MIMG opcode,
ValueType vt> : Pat <
- (name vt:$addr, v32i8:$rsrc, i128:$sampler, TEX_SHADOW_ARRAY),
+ (name vt:$addr, v32i8:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
(opcode 0xf, 0, 0, 1, 0, 0, 0, 0, $addr, $rsrc, $sampler)
>;
@@ -1692,8 +1867,6 @@ def : BitConvert <i64, v2i32, VReg_64>;
def : BitConvert <v4f32, v4i32, VReg_128>;
def : BitConvert <v4i32, v4f32, VReg_128>;
-def : BitConvert <v4i32, i128, VReg_128>;
-def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8f32, v8i32, SReg_256>;
def : BitConvert <v8i32, v8f32, SReg_256>;
@@ -1711,10 +1884,18 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
/********** Src & Dst modifiers **********/
/********** =================== **********/
+def FCLAMP_SI : AMDGPUShaderInst <
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0),
+ "FCLAMP_SI $dst, $src0",
+ []
+> {
+ let usesCustomInserter = 1;
+}
+
def : Pat <
(int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)),
- (V_ADD_F32_e64 $src, (i32 0 /* SRC1 */),
- 0 /* ABS */, 1 /* CLAMP */, 0 /* OMOD */, 0 /* NEG */)
+ (FCLAMP_SI f32:$src)
>;
/********** ================================ **********/
@@ -1733,14 +1914,32 @@ def : Pat <
(V_OR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Set sign bit */
>;
+def FABS_SI : AMDGPUShaderInst <
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0),
+ "FABS_SI $dst, $src0",
+ []
+> {
+ let usesCustomInserter = 1;
+}
+
def : Pat <
(fabs f32:$src),
- (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff)) /* Clear sign bit */
+ (FABS_SI f32:$src)
>;
+def FNEG_SI : AMDGPUShaderInst <
+ (outs VReg_32:$dst),
+ (ins VSrc_32:$src0),
+ "FNEG_SI $dst, $src0",
+ []
+> {
+ let usesCustomInserter = 1;
+}
+
def : Pat <
(fneg f32:$src),
- (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000)) /* Toggle sign bit */
+ (FNEG_SI f32:$src)
>;
/********** ================== **********/
@@ -1768,30 +1967,10 @@ def : Pat <
>;
def : Pat <
- (i1 imm:$imm),
- (S_MOV_B64 imm:$imm)
->;
-
-def : Pat <
(i64 InlineImm<i64>:$imm),
(S_MOV_B64 InlineImm<i64>:$imm)
>;
-// i64 immediates aren't supported in hardware, split it into two 32bit values
-def : Pat <
- (i64 imm:$imm),
- (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (S_MOV_B32 (i32 (LO32 imm:$imm))), sub0),
- (S_MOV_B32 (i32 (HI32 imm:$imm))), sub1)
->;
-
-def : Pat <
- (f64 fpimm:$imm),
- (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
- (V_MOV_B32_e32 (f32 (LO32f fpimm:$imm))), sub0),
- (V_MOV_B32_e32 (f32 (HI32f fpimm:$imm))), sub1)
->;
-
/********** ===================== **********/
/********** Interpolation Paterns **********/
/********** ===================== **********/
@@ -1875,21 +2054,9 @@ class Ext32Pat <SDNode ext> : Pat <
def : Ext32Pat <zext>;
def : Ext32Pat <anyext>;
-// 1. Offset as 8bit DWORD immediate
+// Offset in an 32Bit VGPR
def : Pat <
- (SIload_constant i128:$sbase, IMM8bitDWORD:$offset),
- (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_dword_i32imm $offset))
->;
-
-// 2. Offset loaded in an 32bit SGPR
-def : Pat <
- (SIload_constant i128:$sbase, imm:$offset),
- (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset))
->;
-
-// 3. Offset in an 32Bit VGPR
-def : Pat <
- (SIload_constant i128:$sbase, i32:$voff),
+ (SIload_constant v4i32:$sbase, i32:$voff),
(BUFFER_LOAD_DWORD_OFFEN $sbase, $voff, 0, 0, 0, 0)
>;
@@ -1904,18 +2071,44 @@ def : Pat <
def : Pat <
(int_SI_tid),
(V_MBCNT_HI_U32_B32_e32 0xffffffff,
- (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0, 0, 0))
+ (V_MBCNT_LO_U32_B32_e64 0xffffffff, 0, 0, 0))
>;
-/********** ================== **********/
-/********** VOP3 Patterns **********/
-/********** ================== **********/
+//===----------------------------------------------------------------------===//
+// VOP3 Patterns
+//===----------------------------------------------------------------------===//
+
+def : IMad24Pat<V_MAD_I32_I24>;
+def : UMad24Pat<V_MAD_U32_U24>;
def : Pat <
- (f32 (fadd (fmul f32:$src0, f32:$src1), f32:$src2)),
- (V_MAD_F32 $src0, $src1, $src2)
+ (fadd f64:$src0, f64:$src1),
+ (V_ADD_F64 $src0, $src1, (i64 0))
+>;
+
+def : Pat <
+ (fmul f64:$src0, f64:$src1),
+ (V_MUL_F64 $src0, $src1, (i64 0))
+>;
+
+def : Pat <
+ (mul i32:$src0, i32:$src1),
+ (V_MUL_LO_I32 $src0, $src1, (i32 0))
+>;
+
+def : Pat <
+ (mulhu i32:$src0, i32:$src1),
+ (V_MUL_HI_U32 $src0, $src1, (i32 0))
+>;
+
+def : Pat <
+ (mulhs i32:$src0, i32:$src1),
+ (V_MUL_HI_I32 $src0, $src1, (i32 0))
>;
+defm : BFIPatterns <V_BFI_B32>;
+def : ROTRPattern <V_ALIGNBIT_B32>;
+
/********** ======================= **********/
/********** Load/Store Patterns **********/
/********** ======================= **********/
@@ -1962,41 +2155,6 @@ def : Pat <(atomic_load_add_local i32:$ptr, i32:$val),
def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val),
(DS_SUB_U32_RTN 0, $ptr, $val, 0)>;
-/********** ================== **********/
-/********** SMRD Patterns **********/
-/********** ================== **********/
-
-multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
-
- // 1. Offset as 8bit DWORD immediate
- def : Pat <
- (constant_load (add i64:$sbase, (i64 IMM8bitDWORD:$offset))),
- (vt (Instr_IMM $sbase, (as_dword_i32imm $offset)))
- >;
-
- // 2. Offset loaded in an 32bit SGPR
- def : Pat <
- (constant_load (SIadd64bit32bit i64:$sbase, imm:$offset)),
- (vt (Instr_SGPR $sbase, (S_MOV_B32 imm:$offset)))
- >;
-
- // 3. No offset at all
- def : Pat <
- (constant_load i64:$sbase),
- (vt (Instr_IMM $sbase, 0))
- >;
-}
-
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
-defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>;
-defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, i128>;
-defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
-defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
-defm : SMRD_Pattern <S_LOAD_DWORDX16_IMM, S_LOAD_DWORDX16_SGPR, v16i32>;
-
//===----------------------------------------------------------------------===//
// MUBUF Patterns
//===----------------------------------------------------------------------===//
@@ -2083,7 +2241,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
MUBUF bothen> {
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offset $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
@@ -2091,7 +2249,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm, 1, 0, imm:$glc, imm:$slc,
imm:$tfe)),
(offen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
@@ -2099,7 +2257,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
imm:$offset, 0, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(idxen $rsrc, $vaddr, (as_i16imm $offset), $soffset, (as_i1imm $glc),
@@ -2107,7 +2265,7 @@ multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxe
>;
def : Pat <
- (vt (int_SI_buffer_load_dword i128:$rsrc, v2i32:$vaddr, i32:$soffset,
+ (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
imm, 1, 1, imm:$glc, imm:$slc,
imm:$tfe)),
(bothen $rsrc, $vaddr, $soffset, (as_i1imm $glc), (as_i1imm $slc),
@@ -2128,7 +2286,7 @@ defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_
// TBUFFER_STORE_FORMAT_*, addr64=0
class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
- (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+ (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
i32:$soffset, imm:$inst_offset, imm:$dfmt,
imm:$nfmt, imm:$offen, imm:$idxen,
imm:$glc, imm:$slc, imm:$tfe),
@@ -2156,12 +2314,13 @@ defm V_CEIL_F64 : VOP1_64 <0x00000018, "V_CEIL_F64",
defm V_FLOOR_F64 : VOP1_64 <0x0000001A, "V_FLOOR_F64",
[(set f64:$dst, (ffloor f64:$src0))]
>;
+defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64",
+ [(set f64:$dst, (frint f64:$src0))]
+>;
-defm V_RNDNE_F64 : VOP1_64 <0x00000019, "V_RNDNE_F64", []>;
-
-def V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
-def V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
-def V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
+defm V_QSAD_PK_U16_U8 : VOP3_32 <0x00000173, "V_QSAD_PK_U16_U8", []>;
+defm V_MQSAD_U16_U8 : VOP3_32 <0x000000172, "V_MQSAD_U16_U8", []>;
+defm V_MQSAD_U32_U8 : VOP3_32 <0x00000175, "V_MQSAD_U32_U8", []>;
def V_MAD_U64_U32 : VOP3_64 <0x00000176, "V_MAD_U64_U32", []>;
// XXX - Does this set VCC?
@@ -2248,17 +2407,43 @@ def : Pat<
>;
//===----------------------------------------------------------------------===//
-// Miscellaneous Patterns
+// Conversion Patterns
//===----------------------------------------------------------------------===//
+def : Pat<(i32 (sext_inreg i32:$src, i1)),
+ (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
+
+// TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it
+// might not be worth the effort, and will need to expand to shifts when
+// fixing SGPR copies.
+
+// Handle sext_inreg in i64
def : Pat <
- (i64 (trunc i128:$x)),
+ (i64 (sext_inreg i64:$src, i1)),
(INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (i32 (EXTRACT_SUBREG $x, sub0)), sub0),
- (i32 (EXTRACT_SUBREG $x, sub1)), sub1)
+ (S_BFE_I32 (EXTRACT_SUBREG i64:$src, sub0), 65536), sub0), // 0 | 1 << 16
+ (S_MOV_B32 -1), sub1)
>;
def : Pat <
+ (i64 (sext_inreg i64:$src, i8)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_SEXT_I32_I8 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
+ (S_MOV_B32 -1), sub1)
+>;
+
+def : Pat <
+ (i64 (sext_inreg i64:$src, i16)),
+ (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (S_SEXT_I32_I16 (EXTRACT_SUBREG i64:$src, sub0)), sub0),
+ (S_MOV_B32 -1), sub1)
+>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
(i32 (trunc i64:$a)),
(EXTRACT_SUBREG $a, sub0)
>;
diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp
index c2f8696..6601f2a 100644
--- a/lib/Target/R600/SILowerControlFlow.cpp
+++ b/lib/Target/R600/SILowerControlFlow.cpp
@@ -67,7 +67,7 @@ private:
static const unsigned SkipThreshold = 12;
static char ID;
- const TargetRegisterInfo *TRI;
+ const SIRegisterInfo *TRI;
const SIInstrInfo *TII;
bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
@@ -92,11 +92,11 @@ private:
public:
SILowerControlFlowPass(TargetMachine &tm) :
- MachineFunctionPass(ID), TRI(0), TII(0) { }
+ MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- const char *getPassName() const {
+ const char *getPassName() const override {
return "SI Lower control flow instructions";
}
@@ -427,7 +427,7 @@ void SILowerControlFlowPass::IndirectDst(MachineInstr &MI) {
bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) {
TII = static_cast<const SIInstrInfo*>(MF.getTarget().getInstrInfo());
- TRI = MF.getTarget().getRegisterInfo();
+ TRI = static_cast<const SIRegisterInfo*>(MF.getTarget().getRegisterInfo());
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
bool HaveKill = false;
diff --git a/lib/Target/R600/SILowerI1Copies.cpp b/lib/Target/R600/SILowerI1Copies.cpp
new file mode 100644
index 0000000..738c90b
--- /dev/null
+++ b/lib/Target/R600/SILowerI1Copies.cpp
@@ -0,0 +1,148 @@
+//===-- SILowerI1Copies.cpp - Lower I1 Copies -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+/// i1 values are usually inserted by the CFG Structurize pass and they are
+/// unique in that they can be copied from VALU to SALU registers.
+/// This is not possible for any other value type. Since there are no
+/// MOV instructions for i1, we to use V_CMP_* and V_CNDMASK to move the i1.
+///
+//===----------------------------------------------------------------------===//
+//
+
+#define DEBUG_TYPE "si-i1-copies"
+#include "AMDGPU.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+namespace {
+
+class SILowerI1Copies : public MachineFunctionPass {
+public:
+ static char ID;
+
+public:
+ SILowerI1Copies() : MachineFunctionPass(ID) {
+ initializeSILowerI1CopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) override;
+
+ virtual const char *getPassName() const override {
+ return "SI Lower il Copies";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(SILowerI1Copies, DEBUG_TYPE,
+ "SI Lower il Copies", false, false)
+
+char SILowerI1Copies::ID = 0;
+
+char &llvm::SILowerI1CopiesID = SILowerI1Copies::ID;
+
+FunctionPass *llvm::createSILowerI1CopiesPass() {
+ return new SILowerI1Copies();
+}
+
+bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
+ MF.getTarget().getInstrInfo());
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+ std::vector<unsigned> I1Defs;
+
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ MachineBasicBlock::iterator I, Next;
+ for (I = MBB.begin(); I != MBB.end(); I = Next) {
+ Next = std::next(I);
+ MachineInstr &MI = *I;
+
+ if (MI.getOpcode() == AMDGPU::V_MOV_I1) {
+ I1Defs.push_back(MI.getOperand(0).getReg());
+ MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() == AMDGPU::V_AND_I1) {
+ I1Defs.push_back(MI.getOperand(0).getReg());
+ MI.setDesc(TII->get(AMDGPU::V_AND_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() == AMDGPU::V_OR_I1) {
+ I1Defs.push_back(MI.getOperand(0).getReg());
+ MI.setDesc(TII->get(AMDGPU::V_OR_B32_e32));
+ continue;
+ }
+
+ if (MI.getOpcode() != AMDGPU::COPY ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) ||
+ !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg()))
+ continue;
+
+
+ const TargetRegisterClass *DstRC =
+ MRI.getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterClass *SrcRC =
+ MRI.getRegClass(MI.getOperand(1).getReg());
+
+ if (DstRC == &AMDGPU::VReg_1RegClass &&
+ TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
+ I1Defs.push_back(MI.getOperand(0).getReg());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addImm(-1)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
+ SrcRC == &AMDGPU::VReg_1RegClass) {
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
+ .addOperand(MI.getOperand(0))
+ .addImm(0)
+ .addOperand(MI.getOperand(1))
+ .addImm(0)
+ .addImm(0)
+ .addImm(0)
+ .addImm(0);
+ MI.eraseFromParent();
+ }
+ }
+ }
+
+ for (unsigned Reg : I1Defs)
+ MRI.setRegClass(Reg, &AMDGPU::VReg_32RegClass);
+
+ return false;
+}
diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
index ea04346..af60995 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.cpp
+++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
@@ -10,8 +10,11 @@
#include "SIMachineFunctionInfo.h"
+#include "SIInstrInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
#define MAX_LANES 64
@@ -26,21 +29,57 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
PSInputAddr(0),
SpillTracker() { }
-static unsigned createLaneVGPR(MachineRegisterInfo &MRI) {
- return MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) {
+ unsigned VGPR = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
+
+ // We need to add this register as live out for the function, in order to
+ // have the live range calculated directly.
+ //
+ // When register spilling begins, we have already calculated the live
+ // live intervals for all the registers. Since we are spilling SGPRs to
+ // VGPRs, we need to update the Lane VGPR's live interval every time we
+ // spill or restore a register.
+ //
+ // Unfortunately, there is no good way to update the live interval as
+ // the TargetInstrInfo callbacks for spilling and restoring don't give
+ // us access to the live interval information.
+ //
+ // We are lucky, though, because the InlineSpiller calls
+ // LiveRangeEdit::calculateRegClassAndHint() which iterates through
+ // all the new register that have been created when restoring a register
+ // and calls LiveIntervals::getInterval(), which creates and computes
+ // the live interval for the newly created register. However, once this
+ // live intervals is created, it doesn't change and since we usually reuse
+ // the Lane VGPR multiple times, this means any uses after the first aren't
+ // added to the live interval.
+ //
+ // To work around this, we add Lane VGPRs to the functions live out list,
+ // so that we can guarantee its live range will cover all of its uses.
+
+ for (MachineBasicBlock &MBB : *MF) {
+ if (MBB.back().getOpcode() == AMDGPU::S_ENDPGM) {
+ MBB.back().addOperand(*MF, MachineOperand::CreateReg(VGPR, false, true));
+ return VGPR;
+ }
+ }
+ MF->getFunction()->getContext().emitError(
+ "Could not found S_ENGPGM instrtuction.");
+ return VGPR;
}
-unsigned SIMachineFunctionInfo::RegSpillTracker::getNextLane(MachineRegisterInfo &MRI) {
+unsigned SIMachineFunctionInfo::RegSpillTracker::reserveLanes(
+ MachineRegisterInfo &MRI, MachineFunction *MF, unsigned NumRegs) {
+ unsigned StartLane = CurrentLane;
+ CurrentLane += NumRegs;
if (!LaneVGPR) {
- LaneVGPR = createLaneVGPR(MRI);
+ LaneVGPR = createLaneVGPR(MRI, MF);
} else {
- CurrentLane++;
- if (CurrentLane == MAX_LANES) {
- CurrentLane = 0;
- LaneVGPR = createLaneVGPR(MRI);
+ if (CurrentLane >= MAX_LANES) {
+ StartLane = CurrentLane = 0;
+ LaneVGPR = createLaneVGPR(MRI, MF);
}
}
- return CurrentLane;
+ return StartLane;
}
void SIMachineFunctionInfo::RegSpillTracker::addSpilledReg(unsigned FrameIndex,
diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
index 8dc82a0..96e619b 100644
--- a/lib/Target/R600/SIMachineFunctionInfo.h
+++ b/lib/Target/R600/SIMachineFunctionInfo.h
@@ -25,7 +25,7 @@ class MachineRegisterInfo;
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
class SIMachineFunctionInfo : public AMDGPUMachineFunction {
- virtual void anchor();
+ void anchor() override;
public:
struct SpilledReg {
@@ -43,7 +43,12 @@ public:
public:
unsigned LaneVGPR;
RegSpillTracker() : CurrentLane(0), SpilledRegisters(), LaneVGPR(0) { }
- unsigned getNextLane(MachineRegisterInfo &MRI);
+ /// \p NumRegs The number of consecutive registers what need to be spilled.
+ /// This function will ensure that all registers are stored in
+ /// the same VGPR.
+ /// \returns The lane to be used for storing the first register.
+ unsigned reserveLanes(MachineRegisterInfo &MRI, MachineFunction *MF,
+ unsigned NumRegs = 1);
void addSpilledReg(unsigned FrameIndex, unsigned Reg, int Lane = -1);
const SpilledReg& getSpilledReg(unsigned FrameIndex);
bool programSpillsRegisters() { return !SpilledRegisters.empty(); }
diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp
index 6cef195..c72d549 100644
--- a/lib/Target/R600/SIRegisterInfo.cpp
+++ b/lib/Target/R600/SIRegisterInfo.cpp
@@ -71,13 +71,12 @@ const TargetRegisterClass *SIRegisterInfo::getPhysRegClass(unsigned Reg) const {
&AMDGPU::SReg_256RegClass
};
- for (unsigned i = 0, e = sizeof(BaseClasses) /
- sizeof(const TargetRegisterClass*); i != e; ++i) {
- if (BaseClasses[i]->contains(Reg)) {
- return BaseClasses[i];
+ for (const TargetRegisterClass *BaseClass : BaseClasses) {
+ if (BaseClass->contains(Reg)) {
+ return BaseClass;
}
}
- return NULL;
+ return nullptr;
}
bool SIRegisterInfo::isSGPRClass(const TargetRegisterClass *RC) const {
@@ -113,7 +112,7 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
} else if (getCommonSubClass(SRC, &AMDGPU::SReg_512RegClass)) {
return &AMDGPU::VReg_512RegClass;
}
- return NULL;
+ return nullptr;
}
const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -129,3 +128,10 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
return &AMDGPU::VGPR_32RegClass;
}
}
+
+unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
+ const TargetRegisterClass *SubRC,
+ unsigned Channel) const {
+ unsigned Index = getHWRegIndex(Reg);
+ return SubRC->getRegister(Index + Channel);
+}
diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h
index 8148f7f..36b4fcd 100644
--- a/lib/Target/R600/SIRegisterInfo.h
+++ b/lib/Target/R600/SIRegisterInfo.h
@@ -27,22 +27,22 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
SIRegisterInfo(AMDGPUTargetMachine &tm);
- virtual BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
- virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const override;
/// \param RC is an AMDIL reg class.
///
/// \returns the SI register class that is equivalent to \p RC.
- virtual const TargetRegisterClass *
- getISARegClass(const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *
+ getISARegClass(const TargetRegisterClass *RC) const override;
/// \brief get the register class of the specified type to use in the
/// CFGStructurizer
- virtual const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const;
+ const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override;
- virtual unsigned getHWRegIndex(unsigned Reg) const;
+ unsigned getHWRegIndex(unsigned Reg) const override;
/// \brief Return the 'base' register class for this register.
/// e.g. SGPR0 => SReg_32, VGPR => VReg_32 SGPR0_SGPR1 -> SReg_32, etc.
@@ -63,6 +63,12 @@ struct SIRegisterInfo : public AMDGPURegisterInfo {
/// be returned.
const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
unsigned SubIdx) const;
+
+ /// \p Channel This is the register channel (e.g. a value from 0-16), not the
+ /// SubReg index.
+ /// \returns The sub-register of Reg that is in Channel.
+ unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
+ unsigned Channel) const;
};
} // End namespace llvm
diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
index 65cf311..f1f01de 100644
--- a/lib/Target/R600/SIRegisterInfo.td
+++ b/lib/Target/R600/SIRegisterInfo.td
@@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64,
(add SGPR_64Regs, VCCReg, EXECReg)
>;
-def SReg_128 : RegisterClass<"AMDGPU", [i128, v4i32], 128, (add SGPR_128)>;
+def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>;
def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>;
@@ -183,14 +183,16 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 96, (add VGPR_96)> {
let Size = 96;
}
-def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, i128], 128, (add VGPR_128)>;
+def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128, (add VGPR_128)>;
def VReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add VGPR_256)>;
def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 512, (add VGPR_512)>;
+def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)>;
+
//===----------------------------------------------------------------------===//
-// [SV]Src_* register classes, can have either an immediate or an register
+// [SV]Src_(32|64) register classes, can have either an immediate or an register
//===----------------------------------------------------------------------===//
def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
@@ -201,3 +203,9 @@ def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
def VSrc_64 : RegisterClass<"AMDGPU", [i64, f64], 64, (add VReg_64, SReg_64)>;
+//===----------------------------------------------------------------------===//
+// SGPR and VGPR register classes
+//===----------------------------------------------------------------------===//
+
+def VSrc_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 128,
+ (add VReg_128, SReg_128)>;
diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp
index 9bf2caf..a0b6907 100644
--- a/lib/Target/R600/SITypeRewriter.cpp
+++ b/lib/Target/R600/SITypeRewriter.cpp
@@ -35,13 +35,13 @@ class SITypeRewriter : public FunctionPass,
static char ID;
Module *Mod;
Type *v16i8;
- Type *i128;
+ Type *v4i32;
public:
SITypeRewriter() : FunctionPass(ID) { }
- virtual bool doInitialization(Module &M);
- virtual bool runOnFunction(Function &F);
- virtual const char *getPassName() const {
+ bool doInitialization(Module &M) override;
+ bool runOnFunction(Function &F) override;
+ const char *getPassName() const override {
return "SI Type Rewriter";
}
void visitLoadInst(LoadInst &I);
@@ -56,7 +56,7 @@ char SITypeRewriter::ID = 0;
bool SITypeRewriter::doInitialization(Module &M) {
Mod = &M;
v16i8 = VectorType::get(Type::getInt8Ty(M.getContext()), 16);
- i128 = Type::getIntNTy(M.getContext(), 128);
+ v4i32 = VectorType::get(Type::getInt32Ty(M.getContext()), 4);
return false;
}
@@ -84,7 +84,8 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
Type *ElemTy = PtrTy->getPointerElementType();
IRBuilder<> Builder(&I);
if (ElemTy == v16i8) {
- Value *BitCast = Builder.CreateBitCast(Ptr, Type::getIntNPtrTy(I.getContext(), 128, 2));
+ Value *BitCast = Builder.CreateBitCast(Ptr,
+ PointerType::get(v4i32,PtrTy->getPointerAddressSpace()));
LoadInst *Load = Builder.CreateLoad(BitCast);
SmallVector <std::pair<unsigned, MDNode*>, 8> MD;
I.getAllMetadataOtherThanDebugLoc(MD);
@@ -99,6 +100,7 @@ void SITypeRewriter::visitLoadInst(LoadInst &I) {
void SITypeRewriter::visitCallInst(CallInst &I) {
IRBuilder<> Builder(&I);
+
SmallVector <Value*, 8> Args;
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
@@ -107,10 +109,10 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
if (Arg->getType() == v16i8) {
- Args.push_back(Builder.CreateBitCast(Arg, i128));
- Types.push_back(i128);
+ Args.push_back(Builder.CreateBitCast(Arg, v4i32));
+ Types.push_back(v4i32);
NeedToReplace = true;
- Name = Name + ".i128";
+ Name = Name + ".v4i32";
} else if (Arg->getType()->isVectorTy() &&
Arg->getType()->getVectorNumElements() == 1 &&
Arg->getType()->getVectorElementType() ==
@@ -144,12 +146,12 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
void SITypeRewriter::visitBitCast(BitCastInst &I) {
IRBuilder<> Builder(&I);
- if (I.getDestTy() != i128) {
+ if (I.getDestTy() != v4i32) {
return;
}
if (BitCastInst *Op = dyn_cast<BitCastInst>(I.getOperand(0))) {
- if (Op->getSrcTy() == i128) {
+ if (Op->getSrcTy() == v4i32) {
I.replaceAllUsesWith(Op->getOperand(0));
I.eraseFromParent();
}
diff --git a/lib/Target/Sparc/AsmParser/LLVMBuild.txt b/lib/Target/Sparc/AsmParser/LLVMBuild.txt
index c3ddf5a..08fdc9d 100644
--- a/lib/Target/Sparc/AsmParser/LLVMBuild.txt
+++ b/lib/Target/Sparc/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SparcAsmParser
parent = Sparc
-required_libraries = MC MCParser Support SparcDesc SparcInfo
+required_libraries = MC MCParser SparcDesc SparcInfo Support
add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 2ff6cdd..da88820 100644
--- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -49,15 +49,15 @@ class SparcAsmParser : public MCTargetAsmParser {
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out, unsigned &ErrorInfo,
- bool MatchingInlineAsm);
- bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ bool MatchingInlineAsm) override;
+ bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc,
- SmallVectorImpl<MCParsedAsmOperand*> &Operands);
- bool ParseDirective(AsmToken DirectiveID);
+ SmallVectorImpl<MCParsedAsmOperand*> &Operands) override;
+ bool ParseDirective(AsmToken DirectiveID) override;
- virtual unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
- unsigned Kind);
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op,
+ unsigned Kind) override;
// Custom parse functions for Sparc specific operands.
OperandMatchResultTy
@@ -83,7 +83,8 @@ class SparcAsmParser : public MCTargetAsmParser {
bool is64Bit() const { return STI.getTargetTriple().startswith("sparcv9"); }
public:
SparcAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
@@ -181,10 +182,10 @@ private:
struct MemOp Mem;
};
public:
- bool isToken() const { return Kind == k_Token; }
- bool isReg() const { return Kind == k_Register; }
- bool isImm() const { return Kind == k_Immediate; }
- bool isMem() const { return isMEMrr() || isMEMri(); }
+ bool isToken() const override { return Kind == k_Token; }
+ bool isReg() const override { return Kind == k_Register; }
+ bool isImm() const override { return Kind == k_Immediate; }
+ bool isMem() const override { return isMEMrr() || isMEMri(); }
bool isMEMrr() const { return Kind == k_MemoryReg; }
bool isMEMri() const { return Kind == k_MemoryImm; }
@@ -203,7 +204,7 @@ public:
return StringRef(Tok.Data, Tok.Length);
}
- unsigned getReg() const {
+ unsigned getReg() const override {
assert((Kind == k_Register) && "Invalid access!");
return Reg.RegNum;
}
@@ -229,22 +230,22 @@ public:
}
/// getStartLoc - Get the location of the first token of this operand.
- SMLoc getStartLoc() const {
+ SMLoc getStartLoc() const override {
return StartLoc;
}
/// getEndLoc - Get the location of the last token of this operand.
- SMLoc getEndLoc() const {
+ SMLoc getEndLoc() const override {
return EndLoc;
}
- virtual void print(raw_ostream &OS) const {
+ void print(raw_ostream &OS) const override {
switch (Kind) {
case k_Token: OS << "Token: " << getToken() << "\n"; break;
case k_Register: OS << "Reg: #" << getReg() << "\n"; break;
case k_Immediate: OS << "Imm: " << getImm() << "\n"; break;
case k_MemoryReg: OS << "Mem: " << getMemBase() << "+"
<< getMemOffsetReg() << "\n"; break;
- case k_MemoryImm: assert(getMemOff() != 0);
+ case k_MemoryImm: assert(getMemOff() != nullptr);
OS << "Mem: " << getMemBase()
<< "+" << *getMemOff()
<< "\n"; break;
@@ -264,7 +265,7 @@ public:
void addExpr(MCInst &Inst, const MCExpr *Expr) const{
// Add as immediate when possible. Null MCExpr = 0.
- if (Expr == 0)
+ if (!Expr)
Inst.addOperand(MCOperand::CreateImm(0));
else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
@@ -323,7 +324,7 @@ public:
assert(Op->Reg.Kind == rk_FloatReg);
unsigned regIdx = Reg - Sparc::F0;
if (regIdx % 2 || regIdx > 31)
- return 0;
+ return nullptr;
Op->Reg.RegNum = DoubleRegs[regIdx / 2];
Op->Reg.Kind = rk_DoubleReg;
return Op;
@@ -337,13 +338,13 @@ public:
case rk_FloatReg:
regIdx = Reg - Sparc::F0;
if (regIdx % 4 || regIdx > 31)
- return 0;
+ return nullptr;
Reg = QuadFPRegs[regIdx / 4];
break;
case rk_DoubleReg:
regIdx = Reg - Sparc::D0;
if (regIdx % 2 || regIdx > 31)
- return 0;
+ return nullptr;
Reg = QuadFPRegs[regIdx / 2];
break;
}
@@ -357,7 +358,7 @@ public:
Op->Kind = k_MemoryReg;
Op->Mem.Base = Base;
Op->Mem.OffsetReg = offsetReg;
- Op->Mem.Off = 0;
+ Op->Mem.Off = nullptr;
return Op;
}
@@ -564,7 +565,7 @@ parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
case AsmToken::Comma:
case AsmToken::RBrac:
case AsmToken::EndOfStatement:
- Operands.push_back(SparcOperand::CreateMEMri(BaseReg, 0, S, E));
+ Operands.push_back(SparcOperand::CreateMEMri(BaseReg, nullptr, S, E));
return MatchOperand_Success;
case AsmToken:: Plus:
@@ -574,7 +575,7 @@ parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands)
break;
}
- SparcOperand *Offset = 0;
+ SparcOperand *Offset = nullptr;
OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset);
if (ResTy != MatchOperand_Success || !Offset)
return MatchOperand_NoMatch;
@@ -636,7 +637,7 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
return MatchOperand_Success;
}
- SparcOperand *Op = 0;
+ SparcOperand *Op = nullptr;
ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call"));
if (ResTy != MatchOperand_Success || !Op)
@@ -656,7 +657,7 @@ SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall)
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
const MCExpr *EVal;
- Op = 0;
+ Op = nullptr;
switch (getLexer().getKind()) {
default: break;
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 88fba39..f3441ff 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -12,7 +12,6 @@
// NOP is placed.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "delay-slot-filler"
#include "Sparc.h"
#include "SparcSubtarget.h"
#include "llvm/ADT/SmallSet.h"
@@ -27,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "delay-slot-filler"
+
STATISTIC(FilledSlots, "Number of delay slots filled");
static cl::opt<bool> DisableDelaySlotFiller(
@@ -49,12 +50,12 @@ namespace {
Subtarget(&TM.getSubtarget<SparcSubtarget>()) {
}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "SPARC Delay Slot Filler";
}
bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
- bool runOnMachineFunction(MachineFunction &F) {
+ bool runOnMachineFunction(MachineFunction &F) override {
bool Changed = false;
// This pass invalidates liveness information when it reorders
diff --git a/lib/Target/Sparc/Disassembler/LLVMBuild.txt b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
index e7387cd..c27398f 100644
--- a/lib/Target/Sparc/Disassembler/LLVMBuild.txt
+++ b/lib/Target/Sparc/Disassembler/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SparcDisassembler
parent = Sparc
-required_libraries = MC Support SparcInfo
+required_libraries = MC SparcInfo Support
add_to_library_groups = Sparc
diff --git a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 5cd99d6..4df0990 100644
--- a/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sparc-disassembler"
-
#include "Sparc.h"
#include "SparcRegisterInfo.h"
#include "SparcSubtarget.h"
@@ -23,6 +21,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sparc-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
@@ -32,22 +32,18 @@ class SparcDisassembler : public MCDisassembler {
public:
/// Constructor - Initializes the disassembler.
///
- SparcDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
- MCDisassembler(STI), RegInfo(Info)
+ SparcDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx)
{}
virtual ~SparcDisassembler() {}
- const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
-
/// getInstruction - See MCDisassembler.
- virtual DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject &region,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
-private:
- OwningPtr<const MCRegisterInfo> RegInfo;
+ DecodeStatus getInstruction(MCInst &instr,
+ uint64_t &size,
+ const MemoryObject &region,
+ uint64_t address,
+ raw_ostream &vStream,
+ raw_ostream &cStream) const override;
};
}
@@ -58,8 +54,9 @@ namespace llvm {
static MCDisassembler *createSparcDisassembler(
const Target &T,
- const MCSubtargetInfo &STI) {
- return new SparcDisassembler(STI, T.createMCRegInfo(""));
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SparcDisassembler(STI, Ctx);
}
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
index fabc125..261fb38 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp
@@ -11,15 +11,17 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "SparcInstPrinter.h"
#include "Sparc.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
// The generated AsmMatcher SparcGenAsmWriter uses "Sparc" as the target
// namespace. But SPARC backend uses "SP" as its namespace.
namespace llvm {
diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
index 45ee6c0..8fe4075 100644
--- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
+++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.h
@@ -30,19 +30,21 @@ public:
const MCSubtargetInfo &sti)
: MCInstPrinter(MAI, MII, MRI), STI(sti) {}
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
bool printSparcAliasInstr(const MCInst *MI, raw_ostream &OS);
bool isV9() const;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printOperand(const MCInst *MI, int opNum, raw_ostream &OS);
void printMemOperand(const MCInst *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MCInst *MI, int opNum, raw_ostream &OS);
bool printGetPCX(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 39c9996..7d517b6 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -102,11 +102,11 @@ namespace {
public:
SparcAsmBackend(const Target &T) : MCAsmBackend(), TheTarget(T) {}
- unsigned getNumFixupKinds() const {
+ unsigned getNumFixupKinds() const override {
return Sparc::NumTargetFixupKinds;
}
- const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+ const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override {
const static MCFixupKindInfo Infos[Sparc::NumTargetFixupKinds] = {
// name offset bits flags
{ "fixup_sparc_call30", 2, 30, MCFixupKindInfo::FKF_IsPCRel },
@@ -184,7 +184,7 @@ namespace {
}
}
- bool mayNeedRelaxation(const MCInst &Inst) const {
+ bool mayNeedRelaxation(const MCInst &Inst) const override {
// FIXME.
return false;
}
@@ -194,17 +194,17 @@ namespace {
bool fixupNeedsRelaxation(const MCFixup &Fixup,
uint64_t Value,
const MCRelaxableFragment *DF,
- const MCAsmLayout &Layout) const {
+ const MCAsmLayout &Layout) const override {
// FIXME.
assert(0 && "fixupNeedsRelaxation() unimplemented");
return false;
}
- void relaxInstruction(const MCInst &Inst, MCInst &Res) const {
+ void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {
// FIXME.
assert(0 && "relaxInstruction() unimplemented");
}
- bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+ bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override {
// Cannot emit NOP with size not multiple of 32 bits.
if (Count % 4 != 0)
return false;
@@ -229,7 +229,7 @@ namespace {
SparcAsmBackend(T), OSType(OSType) { }
void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
- uint64_t Value, bool IsPCRel) const {
+ uint64_t Value, bool IsPCRel) const override {
Value = adjustFixupValue(Fixup.getKind(), Value);
if (!Value) return; // Doesn't change encoding.
@@ -244,7 +244,7 @@ namespace {
}
- MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+ MCObjectWriter *createObjectWriter(raw_ostream &OS) const override {
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(OSType);
return createSparcELFObjectWriter(OS, is64Bit(), OSABI);
}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index ef5f8ce..6875fc6 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -32,7 +32,7 @@ SparcELFMCAsmInfo::SparcELFMCAsmInfo(StringRef TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
// .xword is only supported by V9.
- Data64bitsDirective = (isV9) ? "\t.xword\t" : 0;
+ Data64bitsDirective = (isV9) ? "\t.xword\t" : nullptr;
ZeroDirective = "\t.skip\t";
CommentString = "!";
HasLEB128 = true;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index d53d09d..e126b68 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
@@ -20,15 +20,15 @@ namespace llvm {
class StringRef;
class SparcELFMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit SparcELFMCAsmInfo(StringRef TT);
- virtual const MCExpr* getExprForPersonalitySymbol(const MCSymbol *Sym,
- unsigned Encoding,
- MCStreamer &Streamer) const;
- virtual const MCExpr* getExprForFDESymbol(const MCSymbol *Sym,
- unsigned Encoding,
- MCStreamer &Streamer) const;
+ const MCExpr*
+ getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
+ MCStreamer &Streamer) const override;
+ const MCExpr* getExprForFDESymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const override;
};
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 310fbd9..b19ad7b 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "SparcMCExpr.h"
#include "MCTargetDesc/SparcFixupKinds.h"
#include "SparcMCTargetDesc.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
namespace {
@@ -41,7 +42,7 @@ public:
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ const MCSubtargetInfo &STI) const override;
// getBinaryCodeForInstr - TableGen'erated function for getting the
// binary encoding for an instruction.
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
index e6b2aca..ae57fdc 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sparcmcexpr"
#include "SparcMCExpr.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
@@ -23,6 +22,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sparcmcexpr"
+
const SparcMCExpr*
SparcMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
MCContext &Ctx) {
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index be6526e..78dd945 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -85,15 +85,15 @@ public:
Sparc::Fixups getFixupKind() const { return getFixupKind(Kind); }
/// @}
- void PrintImpl(raw_ostream &OS) const;
+ void PrintImpl(raw_ostream &OS) const override;
bool EvaluateAsRelocatableImpl(MCValue &Res,
- const MCAsmLayout *Layout) const;
- void AddValueSymbols(MCAssembler *) const;
- const MCSection *FindAssociatedSection() const {
+ const MCAsmLayout *Layout) const override;
+ void AddValueSymbols(MCAssembler *) const override;
+ const MCSection *FindAssociatedSection() const override {
return getSubExpr()->FindAssociatedSection();
}
- void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const;
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index c69af56..571017d 100644
--- a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -22,6 +22,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "SparcGenInstrInfo.inc"
@@ -31,14 +33,11 @@
#define GET_REGINFO_MC_DESC
#include "SparcGenRegisterInfo.inc"
-using namespace llvm;
-
-
static MCAsmInfo *createSparcMCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 0);
MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -47,7 +46,7 @@ static MCAsmInfo *createSparcV9MCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
MCAsmInfo *MAI = new SparcELFMCAsmInfo(TT);
unsigned Reg = MRI.getDwarfRegNum(SP::O6, true);
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 2047);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, Reg, 2047);
MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -136,13 +135,12 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
static MCStreamer *
createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new SparcTargetAsmStreamer(*S, OS);
return S;
}
diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp
index 50506a6..1b7330e 100644
--- a/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "Sparc.h"
#include "InstPrinter/SparcInstPrinter.h"
#include "MCTargetDesc/SparcMCExpr.h"
@@ -35,6 +34,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
namespace {
class SparcAsmPrinter : public AsmPrinter {
SparcTargetStreamer &getTargetStreamer() {
@@ -45,18 +46,18 @@ namespace {
explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Sparc Assembly Printer";
}
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
- const char *Modifier = 0);
+ const char *Modifier = nullptr);
void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
- virtual void EmitFunctionBodyStart();
- virtual void EmitInstruction(const MachineInstr *MI);
- virtual void EmitEndOfAsmFile(Module &M);
+ void EmitFunctionBodyStart() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ void EmitEndOfAsmFile(Module &M) override;
static const char *getRegisterName(unsigned RegNo) {
return SparcInstPrinter::getRegisterName(RegNo);
@@ -64,10 +65,10 @@ namespace {
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
void LowerGETPCXAndEmitMCInsts(const MachineInstr *MI,
const MCSubtargetInfo &STI);
diff --git a/lib/Target/Sparc/SparcCodeEmitter.cpp b/lib/Target/Sparc/SparcCodeEmitter.cpp
index 4f8d477..247da2a 100644
--- a/lib/Target/Sparc/SparcCodeEmitter.cpp
+++ b/lib/Target/Sparc/SparcCodeEmitter.cpp
@@ -12,7 +12,6 @@
//
//===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "Sparc.h"
#include "MCTargetDesc/SparcMCExpr.h"
#include "SparcRelocations.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
@@ -39,7 +40,7 @@ class SparcCodeEmitter : public MachineFunctionPass {
const std::vector<MachineConstantPoolEntry> *MCPEs;
bool IsPIC;
- void getAnalysisUsage(AnalysisUsage &AU) const {
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineModuleInfo> ();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -48,13 +49,13 @@ class SparcCodeEmitter : public MachineFunctionPass {
public:
SparcCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
- : MachineFunctionPass(ID), JTI(0), II(0), TD(0),
- TM(tm), MCE(mce), MCPEs(0),
+ : MachineFunctionPass(ID), JTI(nullptr), II(nullptr), TD(nullptr),
+ TM(tm), MCE(mce), MCPEs(nullptr),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
- bool runOnMachineFunction(MachineFunction &MF);
+ bool runOnMachineFunction(MachineFunction &MF) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "Sparc Machine Code Emitter";
}
diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp
index d96a4c0..a37da94 100644
--- a/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -109,18 +109,21 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
// Emit ".cfi_def_cfa_register 30".
unsigned CFIIndex =
MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
- BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
// Emit ".cfi_window_save".
CFIIndex = MMI.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
- BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
unsigned regInRA = MRI->getDwarfRegNum(SP::I7, true);
unsigned regOutRA = MRI->getDwarfRegNum(SP::O7, true);
// Emit ".cfi_register 15, 31".
CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
- BuildMI(MBB, MBBI, dl, TII.get(SP::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
void SparcFrameLowering::
diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h
index 072fde3..bda7b7c 100644
--- a/lib/Target/Sparc/SparcFrameLowering.h
+++ b/lib/Target/Sparc/SparcFrameLowering.h
@@ -31,17 +31,18 @@ public:
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
- void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ void
+ eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
- bool hasReservedCallFrame(const MachineFunction &MF) const;
- bool hasFP(const MachineFunction &MF) const;
+ bool hasReservedCallFrame(const MachineFunction &MF) const override;
+ bool hasFP(const MachineFunction &MF) const override;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
private:
// Remap input registers to output registers for leaf procedure.
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index b012bfd..2fade27 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -41,7 +41,7 @@ public:
TM(tm) {
}
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
// Complex Pattern Selectors.
bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
@@ -49,11 +49,11 @@ public:
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
- virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
- char ConstraintCode,
- std::vector<SDValue> &OutOps);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+ char ConstraintCode,
+ std::vector<SDValue> &OutOps) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "SPARC DAG->DAG Pattern Instruction Selection";
}
@@ -143,7 +143,7 @@ SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (N->getOpcode()) {
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 8e720ee..ef61466 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -53,7 +53,7 @@ static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
MVT &LocVT, CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State)
{
- static const uint16_t RegList[] = {
+ static const MCPhysReg RegList[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
// Try to get first reg.
@@ -235,8 +235,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
}
// Lower return values for the 64-bit ABI.
@@ -315,8 +314,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
}
SDValue SparcTargetLowering::
@@ -357,10 +355,13 @@ LowerFormalArguments_32(SDValue Chain,
const unsigned StackOffset = 92;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ unsigned InIdx = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i, ++InIdx) {
CCValAssign &VA = ArgLocs[i];
- if (i == 0 && Ins[i].Flags.isSRet()) {
+ if (Ins[InIdx].Flags.isSRet()) {
+ if (InIdx != 0)
+ report_fatal_error("sparc only supports sret on the first parameter");
// Get SRet from [%fp+64].
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
@@ -493,11 +494,11 @@ LowerFormalArguments_32(SDValue Chain,
// Store remaining ArgRegs to the stack if this is a varargs function.
if (isVarArg) {
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
- const uint16_t *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+ const MCPhysReg *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
unsigned ArgOffset = CCInfo.getNextStackOffset();
if (NumAllocated == 6)
ArgOffset += StackOffset;
@@ -528,8 +529,7 @@ LowerFormalArguments_32(SDValue Chain,
if (!OutChains.empty()) {
OutChains.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], OutChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
}
@@ -644,8 +644,7 @@ LowerFormalArguments_64(SDValue Chain,
}
if (!OutChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &OutChains[0], OutChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
return Chain;
}
@@ -663,7 +662,7 @@ static bool hasReturnsTwiceAttr(SelectionDAG &DAG, SDValue Callee,
if (CS)
return CS->hasFnAttr(Attribute::ReturnsTwice);
- const Function *CalleeFn = 0;
+ const Function *CalleeFn = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
CalleeFn = dyn_cast<Function>(G->getGlobal());
} else if (ExternalSymbolSDNode *E =
@@ -877,8 +876,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Emit all stores, make sure the occur before any copies into physregs.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
@@ -927,7 +925,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (InFlag.getNode())
Ops.push_back(InFlag);
- Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
@@ -961,9 +959,9 @@ static bool isFP128ABICall(const char *CalleeName)
"_Q_sqrt", "_Q_neg",
"_Q_itoq", "_Q_stoq", "_Q_dtoq", "_Q_utoq",
"_Q_lltoq", "_Q_ulltoq",
- 0
+ nullptr
};
- for (const char * const *I = ABICalls; *I != 0; ++I)
+ for (const char * const *I = ABICalls; *I != nullptr; ++I)
if (strcmp(CalleeName, *I) == 0)
return true;
return false;
@@ -972,7 +970,7 @@ static bool isFP128ABICall(const char *CalleeName)
unsigned
SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
{
- const Function *CalleeFn = 0;
+ const Function *CalleeFn = nullptr;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
CalleeFn = dyn_cast<Function>(G->getGlobal());
} else if (ExternalSymbolSDNode *E =
@@ -1194,8 +1192,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Emit all stores, make sure they occur before the call.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Build a sequence of CopyToReg nodes glued together with token chain and
// glue operands which copy the outgoing args into registers. The InGlue is
@@ -1245,7 +1242,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Now the call itself.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(SPISD::CALL, DL, NodeTys, Ops);
InGlue = Chain.getValue(1);
// Revert the stack pointer immediately after the call.
@@ -1263,7 +1260,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Set inreg flag manually for codegen generated library calls that
// return float.
- if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && CLI.CS == 0)
+ if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && CLI.CS == nullptr)
CLI.Ins[0].Flags.setInReg();
RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_Sparc64);
@@ -1677,7 +1674,7 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return 0;
+ default: return nullptr;
case SPISD::CMPICC: return "SPISD::CMPICC";
case SPISD::CMPFCC: return "SPISD::CMPFCC";
case SPISD::BRICC: return "SPISD::BRICC";
@@ -1711,7 +1708,7 @@ EVT SparcTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
/// be zero. Op is expected to be a target specific node. Used by DAG
/// combiner.
-void SparcTargetLowering::computeMaskedBitsForTargetNode
+void SparcTargetLowering::computeKnownBitsForTargetNode
(const SDValue Op,
APInt &KnownZero,
APInt &KnownOne,
@@ -1725,10 +1722,8 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode
case SPISD::SELECT_ICC:
case SPISD::SELECT_XCC:
case SPISD::SELECT_FCC:
- DAG.ComputeMaskedBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
- DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+ DAG.computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
+ DAG.computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
@@ -1914,7 +1909,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
Ops.push_back(InFlag);
- Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(1, true),
DAG.getIntPtrConstant(0, true), InFlag, DL);
@@ -2033,13 +2028,10 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
for (unsigned i = 0, e = numArgs; i != e; ++i) {
Chain = LowerF128_LibCallArg(Chain, Args, Op.getOperand(i), SDLoc(Op), DAG);
}
- TargetLowering::
- CallLoweringInfo CLI(Chain,
- RetTyABI,
- false, false, false, false,
- 0, CallingConv::C,
- false, false, true,
- Callee, Args, DAG, SDLoc(Op));
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(SDLoc(Op)).setChain(Chain)
+ .setCallee(CallingConv::C, RetTyABI, Callee, &Args, 0);
+
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
// chain is in second result.
@@ -2065,7 +2057,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
SDLoc DL,
SelectionDAG &DAG) const {
- const char *LibCall = 0;
+ const char *LibCall = nullptr;
bool is64Bit = Subtarget->is64Bit();
switch(SPCC) {
default: llvm_unreachable("Unhandled conditional code!");
@@ -2092,13 +2084,9 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS,
Chain = LowerF128_LibCallArg(Chain, Args, LHS, DL, DAG);
Chain = LowerF128_LibCallArg(Chain, Args, RHS, DL, DAG);
- TargetLowering::
- CallLoweringInfo CLI(Chain,
- RetTy,
- false, false, false, false,
- 0, CallingConv::C,
- false, false, true,
- Callee, Args, DAG, DL);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL).setChain(Chain)
+ .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
@@ -2174,7 +2162,7 @@ LowerF128_FPEXTEND(SDValue Op, SelectionDAG &DAG,
TLI.getLibcallName(RTLIB::FPEXT_F32_F128), 1);
llvm_unreachable("fpextend with non-float operand!");
- return SDValue(0, 0);
+ return SDValue();
}
static SDValue
@@ -2192,7 +2180,7 @@ LowerF128_FPROUND(SDValue Op, SelectionDAG &DAG,
TLI.getLibcallName(RTLIB::FPROUND_F128_F32), 1);
llvm_unreachable("fpround to non-float!");
- return SDValue(0, 0);
+ return SDValue();
}
static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
@@ -2213,7 +2201,7 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG,
// Expand if the resulting type is illegal.
if (!TLI.isTypeLegal(VT))
- return SDValue(0, 0);
+ return SDValue();
// Otherwise, Convert the fp value to integer in an FP register.
if (VT == MVT::i32)
@@ -2244,7 +2232,7 @@ static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG,
// Expand if the operand type is illegal.
if (!TLI.isTypeLegal(OpVT))
- return SDValue(0, 0);
+ return SDValue();
// Otherwise, Convert the int value to FP in an FP register.
SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, floatVT, Op.getOperand(0));
@@ -2262,7 +2250,7 @@ static SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG,
// quad floating point instructions and the resulting type is legal.
if (Op.getOperand(0).getValueType() != MVT::f128 ||
(hasHardQuad && TLI.isTypeLegal(VT)))
- return SDValue(0, 0);
+ return SDValue();
assert(VT == MVT::i32 || VT == MVT::i64);
@@ -2283,7 +2271,7 @@ static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
// Expand if it does not involve f128 or the target has support for
// quad floating point instructions and the operand type is legal.
if (Op.getValueType() != MVT::f128 || (hasHardQuad && TLI.isTypeLegal(OpVT)))
- return SDValue(0, 0);
+ return SDValue();
return TLI.LowerF128Op(Op, DAG,
TLI.getLibcallName(OpVT == MVT::i32
@@ -2428,7 +2416,7 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
DAG.getConstant(regSpillArea, VT));
SDValue Ops[2] = { NewVal, Chain };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
@@ -2597,10 +2585,9 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG)
SubRegOdd);
SDValue OutChains[2] = { SDValue(Hi64.getNode(), 1),
SDValue(Lo64.getNode(), 1) };
- SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], 2);
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
SDValue Ops[2] = {SDValue(InFP128,0), OutChain};
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
// Lower a f128 store into two f64 stores.
@@ -2644,8 +2631,7 @@ static SDValue LowerF128Store(SDValue Op, SelectionDAG &DAG) {
LoPtr,
MachinePointerInfo(),
false, false, alignment);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &OutChains[0], 2);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
static SDValue LowerFNEGorFABS(SDValue Op, SelectionDAG &DAG, bool isV9) {
@@ -2726,7 +2712,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
SDValue Dst = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, Lo);
SDValue Ops[2] = { Dst, Carry };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
// Custom lower UMULO/SMULO for SPARC. This code is similar to ExpandNode()
@@ -2773,7 +2759,7 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
DAG.DeleteNode(MulResult.getNode());
SDValue Ops[2] = { BottomHalf, TopHalf } ;
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
static SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) {
@@ -3092,7 +3078,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
// Look at the constraint type.
@@ -3117,7 +3103,7 @@ LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result(nullptr, 0);
// Only support length 1 constraints for now.
if (Constraint.length() > 1)
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index f7b45d0..a24cc82 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -55,47 +55,47 @@ namespace llvm {
const SparcSubtarget *Subtarget;
public:
SparcTargetLowering(TargetMachine &TM);
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
- virtual MachineBasicBlock *
+ MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ MachineBasicBlock *MBB) const override;
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- ConstraintType getConstraintType(const std::string &Constraint) const;
+ ConstraintType getConstraintType(const std::string &Constraint) const override;
ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo &info,
- const char *constraint) const;
+ const char *constraint) const override;
void LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue> &Ops,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG) const override;
std::pair<unsigned, const TargetRegisterClass*>
- getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const;
+ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const override;
- virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
+ EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerFormalArguments_32(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
@@ -109,20 +109,20 @@ namespace llvm {
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
SDValue LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const override;
SDValue LowerReturn_32(SDValue Chain,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -156,15 +156,15 @@ namespace llvm {
SDLoc DL,
SelectionDAG &DAG) const;
- bool ShouldShrinkFPConstant(EVT VT) const {
+ bool ShouldShrinkFPConstant(EVT VT) const override {
// Do not shrink FP constpool if VT == MVT::f128.
// (ldd, call _Q_fdtoq) is more expensive than two ldds.
return VT != MVT::f128;
}
- virtual void ReplaceNodeResults(SDNode *N,
+ void ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>& Results,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG) const override;
MachineBasicBlock *expandSelectCC(MachineInstr *MI, MachineBasicBlock *BB,
unsigned BROpcode) const;
diff --git a/lib/Target/Sparc/SparcInstr64Bit.td b/lib/Target/Sparc/SparcInstr64Bit.td
index a34ce26..54d8240 100644
--- a/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/lib/Target/Sparc/SparcInstr64Bit.td
@@ -359,9 +359,9 @@ multiclass BranchOnReg<bits<3> cond, string OpcStr> {
multiclass bpr_alias<string OpcStr, Instruction NAPT, Instruction APT> {
def : InstAlias<!strconcat(OpcStr, ",pt $rs1, $imm16"),
- (NAPT I64Regs:$rs1, bprtarget16:$imm16)>;
+ (NAPT I64Regs:$rs1, bprtarget16:$imm16), 0>;
def : InstAlias<!strconcat(OpcStr, ",a,pt $rs1, $imm16"),
- (APT I64Regs:$rs1, bprtarget16:$imm16)>;
+ (APT I64Regs:$rs1, bprtarget16:$imm16), 0>;
}
defm BPZ : BranchOnReg<0b001, "brz">;
diff --git a/lib/Target/Sparc/SparcInstrAliases.td b/lib/Target/Sparc/SparcInstrAliases.td
index 33c2aa1..d36f67b 100644
--- a/lib/Target/Sparc/SparcInstrAliases.td
+++ b/lib/Target/Sparc/SparcInstrAliases.td
@@ -281,12 +281,12 @@ defm : fp_cond_alias<"o", 0b1111>;
// Instruction aliases for JMPL.
// jmp addr -> jmpl addr, %g0
-def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr)>;
-def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr)>;
+def : InstAlias<"jmp $addr", (JMPLrr G0, MEMrr:$addr), 0>;
+def : InstAlias<"jmp $addr", (JMPLri G0, MEMri:$addr), 0>;
// call addr -> jmpl addr, %o7
-def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr)>;
-def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr)>;
+def : InstAlias<"call $addr", (JMPLrr O7, MEMrr:$addr), 0>;
+def : InstAlias<"call $addr", (JMPLri O7, MEMri:$addr), 0>;
// retl -> RETL 8
def : InstAlias<"retl", (RETL 8)>;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index abf6c17..8b2e6bc 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -24,11 +24,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
-#define GET_INSTRINFO_CTOR_DTOR
-#include "SparcGenInstrInfo.inc"
-
using namespace llvm;
+#define GET_INSTRINFO_CTOR_DTOR
+#include "SparcGenInstrInfo.inc"
// Pin the vtable to this file.
void SparcInstrInfo::anchor() {}
@@ -162,10 +161,10 @@ bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
std::next(I)->eraseFromParent();
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
UnCondBrIter = MBB.end();
@@ -285,7 +284,7 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
bool KillSrc) const {
unsigned numSubRegs = 0;
unsigned movOpc = 0;
- const unsigned *subRegIdx = 0;
+ const unsigned *subRegIdx = nullptr;
const unsigned DFP_FP_SubRegsIdx[] = { SP::sub_even, SP::sub_odd };
const unsigned QFP_DFP_SubRegsIdx[] = { SP::sub_even64, SP::sub_odd64 };
@@ -329,11 +328,11 @@ void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else
llvm_unreachable("Impossible reg-to-reg copy");
- if (numSubRegs == 0 || subRegIdx == 0 || movOpc == 0)
+ if (numSubRegs == 0 || subRegIdx == nullptr || movOpc == 0)
return;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- MachineInstr *MovMI = 0;
+ MachineInstr *MovMI = nullptr;
for (unsigned i = 0; i != numSubRegs; ++i) {
unsigned Dst = TRI->getSubReg(DestReg, subRegIdx[i]);
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index a86cbcb..3a1472e 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -45,52 +45,52 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+ const SparcRegisterInfo &getRegisterInfo() const { return RI; }
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify = false) const ;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify = false) const override ;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
unsigned getGlobalBaseReg(MachineFunction *MF) const;
};
diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp
index 959d12f..c775e9e 100644
--- a/lib/Target/Sparc/SparcJITInfo.cpp
+++ b/lib/Target/Sparc/SparcJITInfo.cpp
@@ -10,7 +10,6 @@
// This file implements the JIT interfaces for the Sparc target.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "SparcJITInfo.h"
#include "Sparc.h"
#include "SparcRelocations.h"
@@ -20,6 +19,8 @@
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
/// JITCompilerFunction - This contains the address of the JIT function used to
/// compile a function lazily.
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
diff --git a/lib/Target/Sparc/SparcJITInfo.h b/lib/Target/Sparc/SparcJITInfo.h
index 9c6e488..ff1b43a 100644
--- a/lib/Target/Sparc/SparcJITInfo.h
+++ b/lib/Target/Sparc/SparcJITInfo.h
@@ -34,27 +34,27 @@ class SparcJITInfo : public TargetJITInfo {
/// overwriting OLD with a branch to NEW. This is used for self-modifying
/// code.
///
- virtual void replaceMachineCodeForFunction(void *Old, void *New);
+ void replaceMachineCodeForFunction(void *Old, void *New) override;
// getStubLayout - Returns the size and alignment of the largest call stub
// on Sparc.
- virtual StubLayout getStubLayout();
+ StubLayout getStubLayout() override;
/// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
/// small native function that simply calls the function at the specified
/// address.
- virtual void *emitFunctionStub(const Function *F, void *Fn,
- JITCodeEmitter &JCE);
+ void *emitFunctionStub(const Function *F, void *Fn,
+ JITCodeEmitter &JCE) override;
/// getLazyResolverFunction - Expose the lazy resolver to the JIT.
- virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+ LazyResolverFn getLazyResolverFunction(JITCompilerFn) override;
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
- virtual void relocate(void *Function, MachineRelocation *MR,
- unsigned NumRelocs, unsigned char *GOTBase);
+ void relocate(void *Function, MachineRelocation *MR,
+ unsigned NumRelocs, unsigned char *GOTBase) override;
/// Initialize - Initialize internal stage for the function being JITted.
void Initialize(const MachineFunction &MF, bool isPIC) {
diff --git a/lib/Target/Sparc/SparcMCInstLower.cpp b/lib/Target/Sparc/SparcMCInstLower.cpp
index 737e378..9e94d2c 100644
--- a/lib/Target/Sparc/SparcMCInstLower.cpp
+++ b/lib/Target/Sparc/SparcMCInstLower.cpp
@@ -34,7 +34,7 @@ static MCOperand LowerSymbolOperand(const MachineInstr *MI,
SparcMCExpr::VariantKind Kind =
(SparcMCExpr::VariantKind)MO.getTargetFlags();
- const MCSymbol *Symbol = 0;
+ const MCSymbol *Symbol = nullptr;
switch(MO.getType()) {
default: llvm_unreachable("Unknown type in LowerSymbolOperand");
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index f222382..dc1ec7c 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -25,11 +25,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetInstrInfo.h"
+using namespace llvm;
+
#define GET_REGINFO_TARGET_DESC
#include "SparcGenRegisterInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
ReserveAppRegisters("sparc-reserve-app-registers", cl::Hidden, cl::init(false),
cl::desc("Reserve application registers (%g2-%g4)"));
@@ -38,8 +38,8 @@ SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st)
: SparcGenRegisterInfo(SP::O7), Subtarget(st) {
}
-const uint16_t* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
- const {
+const MCPhysReg*
+SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_SaveList;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 00b5a98..77f879a 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -31,25 +31,26 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
SparcRegisterInfo(SparcSubtarget &st);
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const uint32_t* getCallPreservedMask(CallingConv::ID CC) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
+ const uint32_t* getCallPreservedMask(CallingConv::ID CC) const override;
const uint32_t* getRTCallPreservedMask(CallingConv::ID CC) const;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF,
- unsigned Kind) const;
+ unsigned Kind) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
};
} // end namespace llvm
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index 190c575..eb36d29 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sparc-selectiondag-info"
#include "SparcTargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "sparc-selectiondag-info"
+
SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index 6fc9d56..e38fb02 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -16,12 +16,14 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "sparc-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "SparcGenSubtargetInfo.inc"
-using namespace llvm;
-
void SparcSubtarget::anchor() { }
SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 83f3474..2469d93 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -77,8 +77,8 @@ public:
return getTM<SparcTargetMachine>();
}
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
+ bool addInstSelector() override;
+ bool addPreEmitPass() override;
};
} // namespace
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 8c9bcd3..7d04338 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -40,28 +40,28 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL, bool is64bit);
- virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const TargetFrameLowering *getFrameLowering() const {
+ const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const TargetFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const SparcSubtarget *getSubtargetImpl() const{ return &Subtarget; }
- virtual const SparcRegisterInfo *getRegisterInfo() const {
+ const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const SparcRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- virtual const SparcTargetLowering* getTargetLowering() const {
+ const SparcTargetLowering* getTargetLowering() const override {
return &TLInfo;
}
- virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ const SparcSelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
- virtual SparcJITInfo *getJITInfo() {
+ SparcJITInfo *getJITInfo() override {
return &JITInfo;
}
- virtual const DataLayout *getDataLayout() const { return &DL; }
+ const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
- virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+ bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override;
};
/// SparcV8TargetMachine - Sparc 32-bit target machine
diff --git a/lib/Target/Sparc/SparcTargetObjectFile.cpp b/lib/Target/Sparc/SparcTargetObjectFile.cpp
index f1630e0..32b2240 100644
--- a/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -28,7 +28,7 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
// Add information about the stub reference to ELFMMI so that the stub
// gets emitted by the asmprinter.
MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
MCSymbol *Sym = TM.getSymbol(GV, Mang);
StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
}
diff --git a/lib/Target/Sparc/SparcTargetStreamer.h b/lib/Target/Sparc/SparcTargetStreamer.h
index 503ebd9..3767d8e 100644
--- a/lib/Target/Sparc/SparcTargetStreamer.h
+++ b/lib/Target/Sparc/SparcTargetStreamer.h
@@ -31,8 +31,8 @@ class SparcTargetAsmStreamer : public SparcTargetStreamer {
public:
SparcTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
- virtual void emitSparcRegisterIgnore(unsigned reg);
- virtual void emitSparcRegisterScratch(unsigned reg);
+ void emitSparcRegisterIgnore(unsigned reg) override;
+ void emitSparcRegisterScratch(unsigned reg) override;
};
@@ -41,8 +41,8 @@ class SparcTargetELFStreamer : public SparcTargetStreamer {
public:
SparcTargetELFStreamer(MCStreamer &S);
MCELFStreamer &getStreamer();
- virtual void emitSparcRegisterIgnore(unsigned reg) {}
- virtual void emitSparcRegisterScratch(unsigned reg) {}
+ void emitSparcRegisterIgnore(unsigned reg) override {}
+ void emitSparcRegisterScratch(unsigned reg) override {}
};
} // end namespace llvm
diff --git a/lib/Target/SystemZ/AsmParser/LLVMBuild.txt b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt
index 0b97e71..602898e 100644
--- a/lib/Target/SystemZ/AsmParser/LLVMBuild.txt
+++ b/lib/Target/SystemZ/AsmParser/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SystemZAsmParser
parent = SystemZ
-required_libraries = SystemZDesc SystemZInfo MC MCParser Support
+required_libraries = MC MCParser Support SystemZDesc SystemZInfo
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index a3dd4b6..71de64f 100644
--- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -110,7 +110,7 @@ private:
void addExpr(MCInst &Inst, const MCExpr *Expr) const {
// Add as immediates when possible. Null MCExpr = 0.
- if (Expr == 0)
+ if (!Expr)
Inst.addOperand(MCOperand::CreateImm(0));
else if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
@@ -208,7 +208,7 @@ public:
return (Kind == KindMem &&
Mem.RegKind == RegKind &&
(MemKind == BDXMem || !Mem.Index) &&
- (MemKind == BDLMem) == (Mem.Length != 0));
+ (MemKind == BDLMem) == (Mem.Length != nullptr));
}
bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const {
return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff);
@@ -331,7 +331,8 @@ private:
public:
SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII)
+ const MCInstrInfo &MII,
+ const MCTargetOptions &Options)
: MCTargetAsmParser(), STI(sti), Parser(parser) {
MCAsmParserExtension::Initialize(Parser);
@@ -526,7 +527,7 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
// Parse the optional base and index.
Index = 0;
Base = 0;
- Length = 0;
+ Length = nullptr;
if (getLexer().is(AsmToken::LParen)) {
Parser.Lex();
@@ -758,7 +759,7 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
return MatchOperand_NoMatch;
Register Reg;
- if (parseRegister(Reg, RegAccess, 0))
+ if (parseRegister(Reg, RegAccess, nullptr))
return MatchOperand_ParseFail;
Operands.push_back(SystemZOperand::createAccessReg(Reg.Num,
diff --git a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 59a1fe9..2350776 100644
--- a/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -17,13 +17,15 @@
using namespace llvm;
+#define DEBUG_TYPE "systemz-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
class SystemZDisassembler : public MCDisassembler {
public:
- SystemZDisassembler(const MCSubtargetInfo &STI)
- : MCDisassembler(STI) {}
+ SystemZDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+ : MCDisassembler(STI, Ctx) {}
virtual ~SystemZDisassembler() {}
// Override MCDisassembler.
@@ -35,8 +37,9 @@ public:
} // end anonymous namespace
static MCDisassembler *createSystemZDisassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new SystemZDisassembler(STI);
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new SystemZDisassembler(STI, Ctx);
}
extern "C" void LLVMInitializeSystemZDisassembler() {
diff --git a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index e1e64d3..d2ba9b6 100644
--- a/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -7,8 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
-
#include "SystemZInstPrinter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -16,6 +14,8 @@
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "SystemZGenAsmWriter.inc"
void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
diff --git a/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
index cbdb59c..dabd214 100644
--- a/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/SystemZ/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = SystemZDesc
parent = SystemZ
-required_libraries = MC SystemZAsmPrinter SystemZInfo Support
+required_libraries = MC Support SystemZAsmPrinter SystemZInfo
add_to_library_groups = SystemZ
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index df50863..27b4bd8 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "MCTargetDesc/SystemZMCFixups.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -21,6 +20,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
namespace {
class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 54c6987..c6a1816 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -82,7 +82,7 @@ static unsigned getPLTReloc(unsigned Kind) {
unsigned SystemZObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant();
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Kind = Fixup.getKind();
switch (Modifier) {
case MCSymbolRefExpr::VK_None:
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index 8d1bac9..cc94869 100644
--- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -16,6 +16,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "SystemZGenInstrInfo.inc"
@@ -25,8 +27,6 @@
#define GET_REGINFO_MC_DESC
#include "SystemZGenRegisterInfo.inc"
-using namespace llvm;
-
const unsigned SystemZMC::GR32Regs[16] = {
SystemZ::R0L, SystemZ::R1L, SystemZ::R2L, SystemZ::R3L,
SystemZ::R4L, SystemZ::R5L, SystemZ::R6L, SystemZ::R7L,
@@ -98,7 +98,8 @@ static MCAsmInfo *createSystemZMCAsmInfo(const MCRegisterInfo &MRI,
StringRef TT) {
MCAsmInfo *MAI = new SystemZMCAsmInfo(TT);
MCCFIInstruction Inst =
- MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(SystemZ::R15D, true),
+ MCCFIInstruction::createDefCfa(nullptr,
+ MRI.getDwarfRegNum(SystemZ::R15D, true),
SystemZMC::CFAOffsetFromInitialSP);
MAI->addInitialFrameState(Inst);
return MAI;
diff --git a/lib/Target/SystemZ/SystemZElimCompare.cpp b/lib/Target/SystemZ/SystemZElimCompare.cpp
index fdf80a9..dc210d6 100644
--- a/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -13,8 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-elim-compare"
-
#include "SystemZTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -28,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "systemz-elim-compare"
+
STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
@@ -64,14 +64,14 @@ class SystemZElimCompare : public MachineFunctionPass {
public:
static char ID;
SystemZElimCompare(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(0), TRI(0) {}
+ : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {}
const char *getPassName() const override {
return "SystemZ Comparison Elimination";
}
bool processBlock(MachineBasicBlock &MBB);
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
private:
Reference getRegReferences(MachineInstr *MI, unsigned Reg);
diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp
index c856955..65f3caf 100644
--- a/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -93,7 +93,7 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// save and restore the stack pointer at the same time, via STMG and LMG.
// This allows the deallocation to be done by the LMG, rather than needing
// a separate %r15 addition.
- const uint16_t *CSRegs = TRI->getCalleeSavedRegs(&MF);
+ const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
for (unsigned I = 0; CSRegs[I]; ++I) {
unsigned Reg = CSRegs[I];
if (SystemZ::GR64BitRegClass.contains(Reg) && MRI.isPhysRegUsed(Reg)) {
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index f46eb16..24f7584 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
+#define DEBUG_TYPE "systemz-isel"
+
namespace {
// Used to build addressing modes.
struct SystemZAddressingMode {
@@ -72,14 +74,14 @@ struct SystemZAddressingMode {
errs() << "SystemZAddressingMode " << this << '\n';
errs() << " Base ";
- if (Base.getNode() != 0)
+ if (Base.getNode())
Base.getNode()->dump();
else
errs() << "null\n";
if (hasIndexField()) {
errs() << " Index ";
- if (Index.getNode() != 0)
+ if (Index.getNode())
Index.getNode()->dump();
else
errs() << "null\n";
@@ -663,7 +665,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
uint64_t Used = allOnes(Op.getValueType().getSizeInBits());
if (Used != (AndMask | InsertMask)) {
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne);
+ CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne);
if (Used != (AndMask | InsertMask | KnownZero.getZExtValue()))
return false;
}
@@ -712,7 +714,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// been removed from the mask. See if adding them back in makes the
// mask suitable.
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+ CurDAG->computeKnownBits(Input, KnownZero, KnownOne);
Mask |= KnownZero.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -736,7 +738,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
// been removed from the mask. See if adding them back in makes the
// mask suitable.
APInt KnownZero, KnownOne;
- CurDAG->ComputeMaskedBits(Input, KnownZero, KnownOne);
+ CurDAG->computeKnownBits(Input, KnownZero, KnownOne);
Mask &= ~KnownOne.getZExtValue();
if (!refineRxSBGMask(RxSBG, Mask))
return false;
@@ -867,12 +869,12 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND)
Count += 1;
if (Count == 0)
- return 0;
+ return nullptr;
if (Count == 1) {
// Prefer to use normal shift instructions over RISBG, since they can handle
// all cases and are sometimes shorter.
if (N->getOpcode() != ISD::AND)
- return 0;
+ return nullptr;
// Prefer register extensions like LLC over RISBG. Also prefer to start
// out with normal ANDs if one instruction would be enough. We can convert
@@ -889,7 +891,7 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
return SelectCode(N);
}
- return 0;
+ return nullptr;
}
}
@@ -927,7 +929,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
// Do nothing if neither operand is suitable.
if (Count[0] == 0 && Count[1] == 0)
- return 0;
+ return nullptr;
// Pick the deepest second operand.
unsigned I = Count[0] > Count[1] ? 0 : 1;
@@ -937,7 +939,7 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) {
if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0)
if (auto *Load = dyn_cast<LoadSDNode>(Op0.getNode()))
if (Load->getMemoryVT() == MVT::i8)
- return 0;
+ return nullptr;
// See whether we can avoid an AND in the first operand by converting
// ROSBG to RISBG.
@@ -986,8 +988,8 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
return true;
// Otherwise we need to check whether there's an alias.
- const Value *V1 = Load->getSrcValue();
- const Value *V2 = Store->getSrcValue();
+ const Value *V1 = Load->getMemOperand()->getValue();
+ const Value *V2 = Store->getMemOperand()->getValue();
if (!V1 || !V2)
return false;
@@ -1037,11 +1039,11 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
Node->setNodeId(-1);
- return 0;
+ return nullptr;
}
unsigned Opcode = Node->getOpcode();
- SDNode *ResNode = 0;
+ SDNode *ResNode = nullptr;
switch (Opcode) {
case ISD::OR:
if (Node->getOperand(1).getOpcode() != ISD::Constant)
@@ -1114,7 +1116,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
ResNode = SelectCode(Node);
DEBUG(errs() << "=> ";
- if (ResNode == NULL || ResNode == Node)
+ if (ResNode == nullptr || ResNode == Node)
Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 714b6c9..6fe1fb9 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-lower"
-
#include "SystemZISelLowering.h"
#include "SystemZCallingConv.h"
#include "SystemZConstantPoolValue.h"
@@ -26,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "systemz-lower"
+
namespace {
// Represents a sequence for extracting a 0/1 value from an IPM result:
// (((X ^ XORValue) + AddValue) >> Bit)
@@ -424,7 +424,7 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
@@ -492,7 +492,7 @@ parseRegisterNumber(const std::string &Constraint,
if (Index < 16 && Map[Index])
return std::make_pair(Map[Index], RC);
}
- return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+ return std::make_pair(0U, nullptr);
}
std::pair<unsigned, const TargetRegisterClass *> SystemZTargetLowering::
@@ -772,8 +772,8 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
}
// Join the stores, which are independent of one another.
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &MemOps[NumFixedFPRs],
- SystemZ::NumArgFPRs - NumFixedFPRs);
+ makeArrayRef(&MemOps[NumFixedFPRs],
+ SystemZ::NumArgFPRs-NumFixedFPRs));
}
}
@@ -875,8 +875,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Join the stores, which are independent of one another.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
// Accept direct calls by converting symbolic call addresses to the
// associated Target* opcodes. Force %r1 to be used for indirect
@@ -919,8 +918,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall)
- return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, &Ops[0], Ops.size());
- Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
+ Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
Glue = Chain.getValue(1);
// Mark the end of the call, which is glued to the call itself.
@@ -996,8 +995,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
if (Glue.getNode())
RetOps.push_back(Glue);
- return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other,
- RetOps.data(), RetOps.size());
+ return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
}
SDValue SystemZTargetLowering::
@@ -1489,7 +1487,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, Comparison &C) {
// Check whether the nonconstant input is an AND with a constant mask.
Comparison NewC(C);
uint64_t MaskVal;
- ConstantSDNode *Mask = 0;
+ ConstantSDNode *Mask = nullptr;
if (C.Op0.getOpcode() == ISD::AND) {
NewC.Op0 = C.Op0.getOperand(0);
NewC.Op1 = C.Op0.getOperand(1);
@@ -1779,7 +1777,7 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
Ops.push_back(Glue);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
- return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
}
SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
@@ -1971,7 +1969,7 @@ SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
false, false, 0);
Offset += 8;
}
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps, NumFields);
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
@@ -2012,7 +2010,7 @@ lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
SDValue Ops[2] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
@@ -2054,7 +2052,7 @@ SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
}
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
@@ -2073,7 +2071,7 @@ SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
// low half first, so the results are in reverse order.
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, SystemZISD::UMUL_LOHI64,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
@@ -2100,7 +2098,7 @@ SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
SDValue Ops[2];
lowerGR128Binary(DAG, DL, VT, SystemZ::AEXT128_64, Opcode,
Op0, Op1, Ops[1], Ops[0]);
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
@@ -2118,7 +2116,7 @@ SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
else
lowerGR128Binary(DAG, DL, VT, SystemZ::ZEXT128_64, SystemZISD::UDIVREM64,
Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
@@ -2127,8 +2125,8 @@ SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
// Get the known-zero masks for each operand.
SDValue Ops[] = { Op.getOperand(0), Op.getOperand(1) };
APInt KnownZero[2], KnownOne[2];
- DAG.ComputeMaskedBits(Ops[0], KnownZero[0], KnownOne[0]);
- DAG.ComputeMaskedBits(Ops[1], KnownZero[1], KnownOne[1]);
+ DAG.computeKnownBits(Ops[0], KnownZero[0], KnownOne[0]);
+ DAG.computeKnownBits(Ops[1], KnownZero[1], KnownOne[1]);
// See if the upper 32 bits of one operand and the lower 32 bits of the
// other are known zero. They are the low and high operands respectively.
@@ -2259,7 +2257,6 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
DAG.getConstant(BitSize, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
- array_lengthof(Ops),
NarrowVT, MMO);
// Rotate the result of the final CS so that the field is in the lower
@@ -2269,7 +2266,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
- return DAG.getMergeValues(RetOps, 2, DL);
+ return DAG.getMergeValues(RetOps, DL);
}
// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
@@ -2351,8 +2348,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
NegBitShift, DAG.getConstant(BitSize, WideVT) };
SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
- VTList, Ops, array_lengthof(Ops),
- NarrowVT, MMO);
+ VTList, Ops, NarrowVT, MMO);
return AtomicOp;
}
@@ -2388,7 +2384,7 @@ SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
Op.getOperand(1)
};
return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op),
- Node->getVTList(), Ops, array_lengthof(Ops),
+ Node->getVTList(), Ops,
Node->getMemoryVT(), Node->getMemOperand());
}
@@ -2517,7 +2513,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(ATOMIC_CMP_SWAPW);
OPCODE(PREFETCH);
}
- return NULL;
+ return nullptr;
#undef OPCODE
}
@@ -3116,7 +3112,7 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
// When generating more than one CLC, all but the last will need to
// branch to the end when a difference is found.
MachineBasicBlock *EndMBB = (Length > 256 && Opcode == SystemZ::CLC ?
- splitBlockAfter(MI, MBB) : 0);
+ splitBlockAfter(MI, MBB) : nullptr);
// Check for the loop form, in which operand 5 is the trip count.
if (MI->getNumExplicitOperands() > 5) {
diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td
index 50badf8..add675a 100644
--- a/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -516,7 +516,7 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
//
// Binary:
// One register output operand and two input operands. The first
-// input operand is always a register and he second may be a register,
+// input operand is always a register and the second may be a register,
// immediate or memory.
//
// Shift:
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index e20834c..6a18b2d 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -17,12 +17,12 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#define GET_INSTRMAP_INFO
#include "SystemZGenInstrInfo.inc"
-using namespace llvm;
-
// Return a mask with Count low bits set.
static uint64_t allOnes(unsigned int Count) {
return Count == 0 ? 0 : (uint64_t(1) << (Count - 1) << 1) - 1;
@@ -284,11 +284,11 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
std::next(I)->eraseFromParent();
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(Branch.Target->getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
continue;
@@ -418,7 +418,7 @@ bool SystemZInstrInfo::analyzeCompare(const MachineInstr *MI,
static MachineInstr *getDef(unsigned Reg,
const MachineRegisterInfo *MRI) {
if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return 0;
+ return nullptr;
return MRI->getUniqueVRegDef(Reg);
}
@@ -442,7 +442,7 @@ static void eraseIfDead(MachineInstr *MI, const MachineRegisterInfo *MRI) {
static bool removeIPMBasedCompare(MachineInstr *Compare, unsigned SrcReg,
const MachineRegisterInfo *MRI,
const TargetRegisterInfo *TRI) {
- MachineInstr *LGFR = 0;
+ MachineInstr *LGFR = nullptr;
MachineInstr *RLL = getDef(SrcReg, MRI);
if (RLL && RLL->getOpcode() == SystemZ::LGFR) {
LGFR = RLL;
@@ -542,7 +542,7 @@ PredicateInstruction(MachineInstr *MI,
MI->setDesc(get(CondOpcode));
MachineInstrBuilder(*MI->getParent()->getParent(), MI)
.addImm(CCValid).addImm(CCMask)
- .addReg(SystemZ::CC, RegState::Implicit);;
+ .addReg(SystemZ::CC, RegState::Implicit);
return true;
}
}
@@ -740,7 +740,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return finishConvertToThreeAddress(MI, MIB, LV);
}
}
- return 0;
+ return nullptr;
}
MachineInstr *
@@ -761,12 +761,12 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
.addFrameIndex(FrameIndex).addImm(0)
.addImm(MI->getOperand(2).getImm());
}
- return 0;
+ return nullptr;
}
// All other cases require a single operand.
if (Ops.size() != 1)
- return 0;
+ return nullptr;
unsigned OpNum = Ops[0];
assert(Size == MF.getRegInfo()
@@ -858,14 +858,14 @@ SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
}
- return 0;
+ return nullptr;
}
MachineInstr *
SystemZInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
- return 0;
+ return nullptr;
}
bool
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 55f80af..09aee5d 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -229,7 +229,7 @@ public:
// BRANCH exists, return the opcode for the latter, otherwise return 0.
// MI, if nonnull, is the compare instruction.
unsigned getCompareAndBranch(unsigned Opcode,
- const MachineInstr *MI = 0) const;
+ const MachineInstr *MI = nullptr) const;
// Emit code before MBBI in MI to move immediate value Value into
// physical register Reg.
diff --git a/lib/Target/SystemZ/SystemZLongBranch.cpp b/lib/Target/SystemZ/SystemZLongBranch.cpp
index 1b88d06..8081334 100644
--- a/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -53,8 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-long-branch"
-
#include "SystemZTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -68,6 +66,8 @@
using namespace llvm;
+#define DEBUG_TYPE "systemz-long-branch"
+
STATISTIC(LongBranches, "Number of long branches.");
namespace {
@@ -111,7 +111,8 @@ struct TerminatorInfo {
// otherwise it is zero.
unsigned ExtraRelaxSize;
- TerminatorInfo() : Branch(0), Size(0), TargetBlock(0), ExtraRelaxSize(0) {}
+ TerminatorInfo() : Branch(nullptr), Size(0), TargetBlock(0),
+ ExtraRelaxSize(0) {}
};
// Used to keep track of the current position while iterating over the blocks.
@@ -131,13 +132,13 @@ class SystemZLongBranch : public MachineFunctionPass {
public:
static char ID;
SystemZLongBranch(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(0) {}
+ : MachineFunctionPass(ID), TII(nullptr) {}
const char *getPassName() const override {
return "SystemZ Long Branch";
}
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
private:
void skipNonTerminators(BlockPosition &Position, MBBInfo &Block);
@@ -424,7 +425,7 @@ void SystemZLongBranch::relaxBranch(TerminatorInfo &Terminator) {
Terminator.Size += Terminator.ExtraRelaxSize;
Terminator.ExtraRelaxSize = 0;
- Terminator.Branch = 0;
+ Terminator.Branch = nullptr;
++LongBranches;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 1ac4e32..a04d703 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -12,17 +12,17 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+using namespace llvm;
+
#define GET_REGINFO_TARGET_DESC
#include "SystemZGenRegisterInfo.inc"
-using namespace llvm;
-
SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm)
: SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {}
-const uint16_t*
+const MCPhysReg*
SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
- static const uint16_t CalleeSavedRegs[] = {
+ static const MCPhysReg CalleeSavedRegs[] = {
SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D,
SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
SystemZ::R14D, SystemZ::R15D,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 4ad8048..e236f71 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -49,7 +49,7 @@ public:
bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override {
return true;
}
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const
+ const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const
override;
BitVector getReservedRegs(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 7635bdc..97abee3 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-selectiondag-info"
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
+#define DEBUG_TYPE "systemz-selectiondag-info"
+
SystemZSelectionDAGInfo::
SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
@@ -230,7 +231,7 @@ EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Ops.push_back(DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, MVT::i32));
Ops.push_back(Glue);
VTs = DAG.getVTList(PtrVT, MVT::Glue);
- End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, &Ops[0], Ops.size());
+ End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops);
return std::make_pair(End, Chain);
}
diff --git a/lib/Target/SystemZ/SystemZShortenInst.cpp b/lib/Target/SystemZ/SystemZShortenInst.cpp
index 9350779..aad899c 100644
--- a/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -13,13 +13,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "systemz-shorten-inst"
-
#include "SystemZTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
using namespace llvm;
+#define DEBUG_TYPE "systemz-shorten-inst"
+
namespace {
class SystemZShortenInst : public MachineFunctionPass {
public:
@@ -31,7 +31,7 @@ public:
}
bool processBlock(MachineBasicBlock &MBB);
- bool runOnMachineFunction(MachineFunction &F);
+ bool runOnMachineFunction(MachineFunction &F) override;
private:
bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
@@ -53,7 +53,7 @@ FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
}
SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
- : MachineFunctionPass(ID), TII(0), LowGPRs(), HighGPRs() {
+ : MachineFunctionPass(ID), TII(nullptr), LowGPRs(), HighGPRs() {
// Set up LowGPRs and HighGPRs.
for (unsigned I = 0; I < 16; ++I) {
LowGPRs[SystemZMC::GR32Regs[I]] |= 1 << I;
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index 33d7e06..a011157 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -12,12 +12,14 @@
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Host.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemz-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "SystemZGenSubtargetInfo.inc"
-using namespace llvm;
-
// Pin the vtabel to this file.
void SystemZSubtarget::anchor() {}
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 627786d..d277f82 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -24,14 +24,6 @@
using namespace llvm;
-inline DataLayout *unwrap(LLVMTargetDataRef P) {
- return reinterpret_cast<DataLayout*>(P);
-}
-
-inline LLVMTargetDataRef wrap(const DataLayout *P) {
- return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
-}
-
inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
return reinterpret_cast<TargetLibraryInfo*>(P);
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 50b1e31..39e0459 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -46,7 +46,7 @@ void TargetLoweringObjectFile::Initialize(MCContext &ctx,
InitMCObjectFileInfo(TM.getTargetTriple(),
TM.getRelocationModel(), TM.getCodeModel(), *Ctx);
}
-
+
TargetLoweringObjectFile::~TargetLoweringObjectFile() {
}
@@ -62,7 +62,7 @@ static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
return false;
// If the global has an explicit section specified, don't put it in BSS.
- if (!GV->getSection().empty())
+ if (GV->hasSection())
return false;
// If -nozero-initialized-in-bss is specified, don't ever use BSS.
@@ -138,7 +138,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// Early exit - functions should be always in text sections.
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
- if (GVar == 0)
+ if (!GVar)
return SectionKind::getText();
// Handle thread-local data first.
@@ -284,10 +284,10 @@ TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
if (Kind.isText())
return getTextSection();
- if (Kind.isBSS() && BSSSection != 0)
+ if (Kind.isBSS() && BSSSection != nullptr)
return BSSSection;
- if (Kind.isReadOnly() && ReadOnlySection != 0)
+ if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
return getDataSection();
@@ -298,7 +298,7 @@ TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
/// should be placed in.
const MCSection *
TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
- if (Kind.isReadOnly() && ReadOnlySection != 0)
+ if (Kind.isReadOnly() && ReadOnlySection != nullptr)
return ReadOnlySection;
return DataSection;
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index fe3c870..8365f64 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
@@ -28,24 +29,6 @@
using namespace llvm;
//---------------------------------------------------------------------------
-// Command-line options that tend to be useful on more than one back-end.
-//
-
-namespace llvm {
- bool HasDivModLibcall;
- bool AsmVerbosityDefault(false);
-}
-
-static cl::opt<bool>
-DataSections("fdata-sections",
- cl::desc("Emit data into separate sections"),
- cl::init(false));
-static cl::opt<bool>
-FunctionSections("ffunction-sections",
- cl::desc("Emit functions into separate sections"),
- cl::init(false));
-
-//---------------------------------------------------------------------------
// TargetMachine Class
//
@@ -53,12 +36,7 @@ TargetMachine::TargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options)
: TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS),
- CodeGenInfo(0), AsmInfo(0),
- MCRelaxAll(false),
- MCNoExecStack(false),
- MCSaveTempLabels(false),
- MCUseCFI(true),
- MCUseDwarfDirectory(false),
+ CodeGenInfo(nullptr), AsmInfo(nullptr),
RequireStructuredCFG(false),
Options(Options) {
}
@@ -89,6 +67,8 @@ void TargetMachine::resetTargetOptions(const MachineFunction *MF) const {
RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
RESET_OPTION(UseSoftFloat, "use-soft-float");
RESET_OPTION(DisableTailCalls, "disable-tail-calls");
+
+ TO.MCOptions.SanitizeAddress = F->hasFnAttribute(Attribute::SanitizeAddress);
}
/// getRelocationModel - Returns the code generation relocation model. The
@@ -126,19 +106,13 @@ static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) {
}
TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getAliasedGlobal();
- const GlobalVariable *Var = cast<GlobalVariable>(GV);
-
- bool isLocal = Var->hasLocalLinkage();
- bool isDeclaration = Var->isDeclaration();
+ bool isLocal = GV->hasLocalLinkage();
+ bool isDeclaration = GV->isDeclaration();
bool isPIC = getRelocationModel() == Reloc::PIC_;
bool isPIE = Options.PositionIndependentExecutable;
// FIXME: what should we do for protected and internal visibility?
// For variables, is internal different from hidden?
- bool isHidden = Var->hasHiddenVisibility();
+ bool isHidden = GV->hasHiddenVisibility();
TLSModel::Model Model;
if (isPIC && !isPIE) {
@@ -153,10 +127,13 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
Model = TLSModel::InitialExec;
}
- // If the user specified a more specific model, use that.
- TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
- if (SelectedModel > Model)
- return SelectedModel;
+ const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV);
+ if (Var) {
+ // If the user specified a more specific model, use that.
+ TLSModel::Model SelectedModel = getSelectedTLSModel(Var);
+ if (SelectedModel > Model)
+ return SelectedModel;
+ }
return Model;
}
@@ -174,28 +151,28 @@ void TargetMachine::setOptLevel(CodeGenOpt::Level Level) const {
CodeGenInfo->setOptLevel(Level);
}
-bool TargetMachine::getAsmVerbosityDefault() {
- return AsmVerbosityDefault;
+bool TargetMachine::getAsmVerbosityDefault() const {
+ return Options.MCOptions.AsmVerbose;
}
void TargetMachine::setAsmVerbosityDefault(bool V) {
- AsmVerbosityDefault = V;
+ Options.MCOptions.AsmVerbose = V;
}
-bool TargetMachine::getFunctionSections() {
- return FunctionSections;
+bool TargetMachine::getFunctionSections() const {
+ return Options.FunctionSections;
}
-bool TargetMachine::getDataSections() {
- return DataSections;
+bool TargetMachine::getDataSections() const {
+ return Options.DataSections;
}
void TargetMachine::setFunctionSections(bool V) {
- FunctionSections = V;
+ Options.FunctionSections = V;
}
void TargetMachine::setDataSections(bool V) {
- DataSections = V;
+ Options.DataSections = V;
}
void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
diff --git a/lib/Target/TargetMachineC.cpp b/lib/Target/TargetMachineC.cpp
index a2829d4..20923c9 100644
--- a/lib/Target/TargetMachineC.cpp
+++ b/lib/Target/TargetMachineC.cpp
@@ -18,6 +18,7 @@
#include "llvm/IR/Module.h"
#include "llvm/PassManager.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
@@ -29,23 +30,6 @@
using namespace llvm;
-inline DataLayout *unwrap(LLVMTargetDataRef P) {
- return reinterpret_cast<DataLayout*>(P);
-}
-
-inline LLVMTargetDataRef wrap(const DataLayout *P) {
- return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
-}
-
-inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
- return reinterpret_cast<TargetLibraryInfo*>(P);
-}
-
-inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
- TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
- return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
-}
-
inline TargetMachine *unwrap(LLVMTargetMachineRef P) {
return reinterpret_cast<TargetMachine*>(P);
}
@@ -62,7 +46,7 @@ inline LLVMTargetRef wrap(const Target * P) {
LLVMTargetRef LLVMGetFirstTarget() {
if(TargetRegistry::begin() == TargetRegistry::end()) {
- return NULL;
+ return nullptr;
}
const Target* target = &*TargetRegistry::begin();
@@ -80,7 +64,7 @@ LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
return wrap(&*IT);
}
- return NULL;
+ return nullptr;
}
LLVMBool LLVMGetTargetFromTriple(const char* TripleStr, LLVMTargetRef *T,
diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp
index df8948f..3ca13da 100644
--- a/lib/Target/TargetSubtargetInfo.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -24,11 +24,12 @@ TargetSubtargetInfo::TargetSubtargetInfo() {}
TargetSubtargetInfo::~TargetSubtargetInfo() {}
// Temporary option to compare overall performance change when moving from the
-// SD scheduler to the MachineScheduler pass pipeline. It should be removed
-// before 3.4. The normal way to enable/disable the MachineScheduling pass
-// itself is by using -enable-misched. For targets that already use MI sched
-// (via MySubTarget::enableMachineScheduler()) -misched-bench=false negates the
-// subtarget hook.
+// SD scheduler to the MachineScheduler pass pipeline. This is convenient for
+// benchmarking during the transition from SD to MI scheduling. Once armv7 makes
+// the switch, it should go away. The normal way to enable/disable the
+// MachineScheduling pass itself is by using -enable-misched. For targets that
+// already use MI sched (via MySubTarget::enableMachineScheduler())
+// -misched-bench=false negates the subtarget hook.
static cl::opt<bool> BenchMachineSched("misched-bench", cl::Hidden,
cl::desc("Migrate from the target's default SD scheduler to MI scheduler"));
diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk
index 73031de..0d0a9ca 100644
--- a/lib/Target/X86/Android.mk
+++ b/lib/Target/X86/Android.mk
@@ -12,7 +12,6 @@ x86_codegen_TBLGEN_TABLES := \
x86_codegen_SRC_FILES := \
X86AsmPrinter.cpp \
- X86COFFMachineModuleInfo.cpp \
X86CodeEmitter.cpp \
X86FastISel.cpp \
X86FixupLEAs.cpp \
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
index db29228..f3e6b3f 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp
@@ -11,21 +11,25 @@
#include "X86AsmInstrumentation.h"
#include "X86Operand.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Function.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
namespace llvm {
namespace {
-static cl::opt<bool> ClAsanInstrumentInlineAssembly(
- "asan-instrument-inline-assembly", cl::desc("instrument inline assembly"),
- cl::Hidden, cl::init(false));
+static cl::opt<bool> ClAsanInstrumentAssembly(
+ "asan-instrument-assembly",
+ cl::desc("instrument assembly with AddressSanitizer checks"), cl::Hidden,
+ cl::init(false));
bool IsStackReg(unsigned Reg) {
return Reg == X86::RSP || Reg == X86::ESP || Reg == X86::SP;
@@ -38,14 +42,14 @@ std::string FuncName(unsigned AccessSize, bool IsWrite) {
class X86AddressSanitizer : public X86AsmInstrumentation {
public:
- X86AddressSanitizer(MCSubtargetInfo &sti) : STI(sti) {}
+ X86AddressSanitizer(const MCSubtargetInfo &STI) : STI(STI) {}
virtual ~X86AddressSanitizer() {}
// X86AsmInstrumentation implementation:
virtual void InstrumentInstruction(
const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, MCStreamer &Out) override {
- InstrumentMOV(Inst, Operands, Ctx, Out);
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) override {
+ InstrumentMOV(Inst, Operands, Ctx, MII, Out);
}
// Should be implemented differently in x86_32 and x86_64 subclasses.
@@ -57,13 +61,13 @@ public:
bool IsWrite, MCContext &Ctx, MCStreamer &Out);
void InstrumentMOV(const MCInst &Inst,
SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, MCStreamer &Out);
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out);
void EmitInstruction(MCStreamer &Out, const MCInst &Inst) {
Out.EmitInstruction(Inst, STI);
}
protected:
- MCSubtargetInfo &STI;
+ const MCSubtargetInfo &STI;
};
void X86AddressSanitizer::InstrumentMemOperand(
@@ -83,68 +87,53 @@ void X86AddressSanitizer::InstrumentMemOperand(
void X86AddressSanitizer::InstrumentMOV(
const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, MCStreamer &Out) {
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {
// Access size in bytes.
unsigned AccessSize = 0;
- unsigned long OpIx = Operands.size();
+
switch (Inst.getOpcode()) {
case X86::MOV8mi:
case X86::MOV8mr:
- AccessSize = 1;
- OpIx = 2;
- break;
case X86::MOV8rm:
AccessSize = 1;
- OpIx = 1;
break;
case X86::MOV16mi:
case X86::MOV16mr:
- AccessSize = 2;
- OpIx = 2;
- break;
case X86::MOV16rm:
AccessSize = 2;
- OpIx = 1;
break;
case X86::MOV32mi:
case X86::MOV32mr:
- AccessSize = 4;
- OpIx = 2;
- break;
case X86::MOV32rm:
AccessSize = 4;
- OpIx = 1;
break;
case X86::MOV64mi32:
case X86::MOV64mr:
- AccessSize = 8;
- OpIx = 2;
- break;
case X86::MOV64rm:
AccessSize = 8;
- OpIx = 1;
break;
case X86::MOVAPDmr:
case X86::MOVAPSmr:
- AccessSize = 16;
- OpIx = 2;
- break;
case X86::MOVAPDrm:
case X86::MOVAPSrm:
AccessSize = 16;
- OpIx = 1;
break;
- }
- if (OpIx >= Operands.size())
+ default:
return;
+ }
- const bool IsWrite = (OpIx != 1);
- InstrumentMemOperand(Operands[OpIx], AccessSize, IsWrite, Ctx, Out);
+ const bool IsWrite = MII.get(Inst.getOpcode()).mayStore();
+ for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) {
+ MCParsedAsmOperand *Op = Operands[Ix];
+ if (Op && Op->isMem())
+ InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out);
+ }
}
class X86AddressSanitizer32 : public X86AddressSanitizer {
public:
- X86AddressSanitizer32(MCSubtargetInfo &sti) : X86AddressSanitizer(sti) {}
+ X86AddressSanitizer32(const MCSubtargetInfo &STI)
+ : X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer32() {}
virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
@@ -172,14 +161,14 @@ void X86AddressSanitizer32::InstrumentMemOperandImpl(
MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx);
EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr));
}
- EmitInstruction(Out, MCInstBuilder(X86::ADD32ri).addReg(X86::ESP)
- .addReg(X86::ESP).addImm(4));
+ EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX));
}
class X86AddressSanitizer64 : public X86AddressSanitizer {
public:
- X86AddressSanitizer64(MCSubtargetInfo &sti) : X86AddressSanitizer(sti) {}
+ X86AddressSanitizer64(const MCSubtargetInfo &STI)
+ : X86AddressSanitizer(STI) {}
virtual ~X86AddressSanitizer64() {}
virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize,
@@ -187,13 +176,26 @@ public:
MCStreamer &Out) override;
};
-void X86AddressSanitizer64::InstrumentMemOperandImpl(
- X86Operand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx,
- MCStreamer &Out) {
+void X86AddressSanitizer64::InstrumentMemOperandImpl(X86Operand *Op,
+ unsigned AccessSize,
+ bool IsWrite,
+ MCContext &Ctx,
+ MCStreamer &Out) {
// FIXME: emit .cfi directives for correct stack unwinding.
- // Set %rsp below current red zone (128 bytes wide)
- EmitInstruction(Out, MCInstBuilder(X86::SUB64ri32).addReg(X86::RSP)
- .addReg(X86::RSP).addImm(128));
+
+ // Set %rsp below current red zone (128 bytes wide) using LEA instruction to
+ // preserve flags.
+ {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA64r);
+ Inst.addOperand(MCOperand::CreateReg(X86::RSP));
+
+ const MCExpr *Disp = MCConstantExpr::Create(-128, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI));
{
MCInst Inst;
@@ -210,8 +212,19 @@ void X86AddressSanitizer64::InstrumentMemOperandImpl(
EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FuncExpr));
}
EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI));
- EmitInstruction(Out, MCInstBuilder(X86::ADD64ri32).addReg(X86::RSP)
- .addReg(X86::RSP).addImm(128));
+
+ // Restore old %rsp value.
+ {
+ MCInst Inst;
+ Inst.setOpcode(X86::LEA64r);
+ Inst.addOperand(MCOperand::CreateReg(X86::RSP));
+
+ const MCExpr *Disp = MCConstantExpr::Create(128, Ctx);
+ std::unique_ptr<X86Operand> Op(
+ X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc()));
+ Op->addMemOperands(Inst, 5);
+ EmitInstruction(Out, Inst);
+ }
}
} // End anonymous namespace
@@ -221,10 +234,15 @@ X86AsmInstrumentation::~X86AsmInstrumentation() {}
void X86AsmInstrumentation::InstrumentInstruction(
const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, MCStreamer &Out) {}
-
-X86AsmInstrumentation *CreateX86AsmInstrumentation(MCSubtargetInfo &STI) {
- if (ClAsanInstrumentInlineAssembly) {
+ MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {}
+
+X86AsmInstrumentation *
+CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
+ const MCContext &Ctx, const MCSubtargetInfo &STI) {
+ Triple T(STI.getTargetTriple());
+ const bool hasCompilerRTSupport = T.isOSLinux();
+ if (ClAsanInstrumentAssembly && hasCompilerRTSupport &&
+ MCOptions.SanitizeAddress) {
if ((STI.getFeatureBits() & X86::Mode32Bit) != 0)
return new X86AddressSanitizer32(STI);
if ((STI.getFeatureBits() & X86::Mode64Bit) != 0)
diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
index c783a78..0369b14 100644
--- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
+++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h
@@ -16,13 +16,17 @@ namespace llvm {
class MCContext;
class MCInst;
+class MCInstrInfo;
class MCParsedAsmOperand;
class MCStreamer;
class MCSubtargetInfo;
+class MCTargetOptions;
class X86AsmInstrumentation;
-X86AsmInstrumentation *CreateX86AsmInstrumentation(MCSubtargetInfo &STI);
+X86AsmInstrumentation *
+CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
+ const MCContext &Ctx, const MCSubtargetInfo &STI);
class X86AsmInstrumentation {
public:
@@ -32,15 +36,18 @@ public:
// instruction is sent to Out.
virtual void InstrumentInstruction(
const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
- MCContext &Ctx, MCStreamer &Out);
+ MCContext &Ctx,
+ const MCInstrInfo &MII,
+ MCStreamer &Out);
protected:
friend X86AsmInstrumentation *
- CreateX86AsmInstrumentation(MCSubtargetInfo &STI);
+ CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions,
+ const MCContext &Ctx, const MCSubtargetInfo &STI);
X86AsmInstrumentation();
};
-} // End llvm namespace
+} // End llvm namespace
-#endif // X86_ASM_INSTRUMENTATION_H
+#endif // X86_ASM_INSTRUMENTATION_H
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 9eddc74..d3e695e 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -20,6 +20,7 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -55,6 +56,7 @@ static const char OpPrecedence[] = {
class X86AsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
ParseInstructionInfo *InstInfo;
std::unique_ptr<X86AsmInstrumentation> Instrumentation;
private:
@@ -257,7 +259,7 @@ private:
public:
IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
- Scale(1), Imm(imm), Sym(0), StopOnLBrac(stoponlbrac),
+ Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
AddImmPrefix(addimmprefix) { Info.clear(); }
unsigned getBaseReg() { return BaseReg; }
@@ -618,7 +620,7 @@ private:
X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) {
Error(Loc, Msg);
- return 0;
+ return nullptr;
}
X86Operand *DefaultMemSIOperand(SMLoc Loc);
@@ -710,13 +712,17 @@ private:
public:
X86AsmParser(MCSubtargetInfo &sti, MCAsmParser &parser,
- const MCInstrInfo &MII)
- : MCTargetAsmParser(), STI(sti), Parser(parser), InstInfo(0) {
+ const MCInstrInfo &mii,
+ const MCTargetOptions &Options)
+ : MCTargetAsmParser(), STI(sti), Parser(parser), MII(mii),
+ InstInfo(nullptr) {
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
- Instrumentation.reset(CreateX86AsmInstrumentation(STI));
+ Instrumentation.reset(
+ CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
}
+
bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
bool
@@ -1173,9 +1179,9 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
// expression.
IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
if (ParseIntelExpression(SM, End))
- return 0;
+ return nullptr;
- const MCExpr *Disp = 0;
+ const MCExpr *Disp = nullptr;
if (const MCExpr *Sym = SM.getSym()) {
// A symbolic displacement.
Disp = Sym;
@@ -1199,7 +1205,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
if (Tok.getString().find('.') != StringRef::npos) {
const MCExpr *NewDisp;
if (ParseIntelDotOperator(Disp, NewDisp))
- return 0;
+ return nullptr;
End = Tok.getEndLoc();
Parser.Lex(); // Eat the field.
@@ -1220,7 +1226,7 @@ X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
StringRef ErrMsg;
if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
Error(StartInBrac, ErrMsg);
- return 0;
+ return nullptr;
}
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
End, Size);
@@ -1237,7 +1243,7 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
InlineAsmIdentifierInfo &Info,
bool IsUnevaluatedOperand, SMLoc &End) {
assert (isParsingInlineAsm() && "Expected to be parsing inline assembly.");
- Val = 0;
+ Val = nullptr;
StringRef LineBuf(Identifier.data());
SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
@@ -1309,7 +1315,7 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg,
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
- return 0;
+ return nullptr;
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
/*Scale=*/1, Start, End, Size, Identifier, Info);
}
@@ -1337,7 +1343,7 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
- return 0;
+ return nullptr;
if (!getLexer().is(AsmToken::LBrac))
return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
@@ -1349,19 +1355,19 @@ X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start,
IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
if (ParseIntelExpression(SM, End))
- return 0;
+ return nullptr;
if (SM.getSym()) {
Error(Start, "cannot use more than one symbol in memory operand");
- return 0;
+ return nullptr;
}
if (SM.getBaseReg()) {
Error(Start, "cannot use base register with variable reference");
- return 0;
+ return nullptr;
}
if (SM.getIndexReg()) {
Error(Start, "cannot use index register with variable reference");
- return 0;
+ return nullptr;
}
const MCExpr *Disp = MCConstantExpr::Create(SM.getImm(), getContext());
@@ -1430,7 +1436,7 @@ X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() {
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/false, End))
- return 0;
+ return nullptr;
// Don't emit the offset operator.
InstInfo->AsmRewrites->push_back(AsmRewrite(AOK_Skip, OffsetOfLoc, 7));
@@ -1461,13 +1467,13 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) {
SMLoc TypeLoc = Tok.getLoc();
Parser.Lex(); // Eat operator.
- const MCExpr *Val = 0;
+ const MCExpr *Val = nullptr;
InlineAsmIdentifierInfo Info;
SMLoc Start = Tok.getLoc(), End;
StringRef Identifier = Tok.getString();
if (ParseIntelIdentifier(Val, Identifier, Info,
/*Unevaluated=*/true, End))
- return 0;
+ return nullptr;
if (!Info.OpDecl)
return ErrorOperand(Start, "unable to lookup expression");
@@ -1522,7 +1528,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() {
IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
/*AddImmPrefix=*/false);
if (ParseIntelExpression(SM, End))
- return 0;
+ return nullptr;
int64_t Imm = SM.getImm();
if (isParsingInlineAsm()) {
@@ -1580,11 +1586,11 @@ X86Operand *X86AsmParser::ParseATTOperand() {
// Read the register.
unsigned RegNo;
SMLoc Start, End;
- if (ParseRegister(RegNo, Start, End)) return 0;
+ if (ParseRegister(RegNo, Start, End)) return nullptr;
if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
Error(Start, "%eiz and %riz can only be used as index registers",
SMRange(Start, End));
- return 0;
+ return nullptr;
}
// If this is a segment register followed by a ':', then this is the start
@@ -1601,7 +1607,7 @@ X86Operand *X86AsmParser::ParseATTOperand() {
Parser.Lex();
const MCExpr *Val;
if (getParser().parseExpression(Val, End))
- return 0;
+ return nullptr;
return X86Operand::CreateImm(Val, Start, End);
}
}
@@ -1630,7 +1636,7 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands
StringSwitch<const char*>(getLexer().getTok().getIdentifier())
.Case("to8", "{1to8}")
.Case("to16", "{1to16}")
- .Default(0);
+ .Default(nullptr);
if (!BroadcastPrimitive)
return !ErrorAndEatStatement(getLexer().getLoc(),
"Invalid memory broadcast primitive.");
@@ -1685,7 +1691,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
if (getLexer().isNot(AsmToken::LParen)) {
SMLoc ExprEnd;
- if (getParser().parseExpression(Disp, ExprEnd)) return 0;
+ if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
@@ -1712,7 +1718,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// It must be an parenthesized expression, parse it now.
if (getParser().parseParenExpression(Disp, ExprEnd))
- return 0;
+ return nullptr;
// After parsing the base expression we could either have a parenthesized
// memory address or not. If not, return now. If so, eat the (.
@@ -1736,11 +1742,11 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().is(AsmToken::Percent)) {
SMLoc StartLoc, EndLoc;
BaseLoc = Parser.getTok().getLoc();
- if (ParseRegister(BaseReg, StartLoc, EndLoc)) return 0;
+ if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
Error(StartLoc, "eiz and riz can only be used as index registers",
SMRange(StartLoc, EndLoc));
- return 0;
+ return nullptr;
}
}
@@ -1756,7 +1762,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
if (getLexer().is(AsmToken::Percent)) {
SMLoc L;
- if (ParseRegister(IndexReg, L, L)) return 0;
+ if (ParseRegister(IndexReg, L, L)) return nullptr;
if (getLexer().isNot(AsmToken::RParen)) {
// Parse the scale amount:
@@ -1764,7 +1770,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
if (getLexer().isNot(AsmToken::Comma)) {
Error(Parser.getTok().getLoc(),
"expected comma in scale expression");
- return 0;
+ return nullptr;
}
Parser.Lex(); // Eat the comma.
@@ -1774,18 +1780,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
int64_t ScaleVal;
if (getParser().parseAbsoluteExpression(ScaleVal)){
Error(Loc, "expected scale expression");
- return 0;
+ return nullptr;
}
// Validate the scale amount.
if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
ScaleVal != 1) {
Error(Loc, "scale factor in 16-bit address must be 1");
- return 0;
+ return nullptr;
}
if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
- return 0;
+ return nullptr;
}
Scale = (unsigned)ScaleVal;
}
@@ -1797,7 +1803,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
int64_t Value;
if (getParser().parseAbsoluteExpression(Value))
- return 0;
+ return nullptr;
if (Value != 1)
Warning(Loc, "scale factor without index register is ignored");
@@ -1808,7 +1814,7 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
// Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
if (getLexer().isNot(AsmToken::RParen)) {
Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
- return 0;
+ return nullptr;
}
SMLoc MemEnd = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ')'.
@@ -1821,18 +1827,18 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
BaseReg != X86::SI && BaseReg != X86::DI)) &&
BaseReg != X86::DX) {
Error(BaseLoc, "invalid 16-bit base register");
- return 0;
+ return nullptr;
}
if (BaseReg == 0 &&
X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
Error(IndexLoc, "16-bit memory operand may not include only index register");
- return 0;
+ return nullptr;
}
StringRef ErrMsg;
if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
Error(BaseLoc, ErrMsg);
- return 0;
+ return nullptr;
}
return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
@@ -1851,7 +1857,7 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
PatchedName = PatchedName.substr(0, Name.size()-1);
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
- const MCExpr *ExtraImmOp = 0;
+ const MCExpr *ExtraImmOp = nullptr;
if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
@@ -2070,8 +2076,10 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
(Name == "smov" || Name == "smovb" || Name == "smovw" ||
Name == "smovl" || Name == "smovd" || Name == "smovq"))) {
if (Operands.size() == 1) {
- if (Name == "movsd")
+ if (Name == "movsd") {
+ delete Operands.back();
Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
+ }
if (isParsingIntelSyntax()) {
Operands.push_back(DefaultMemDIOperand(NameLoc));
Operands.push_back(DefaultMemSIOperand(NameLoc));
@@ -2253,7 +2261,8 @@ static const char *getSubtargetFeatureName(unsigned Val);
void X86AsmParser::EmitInstruction(
MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands,
MCStreamer &Out) {
- Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), Out);
+ Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII,
+ Out);
Out.EmitInstruction(Inst, STI);
}
@@ -2291,7 +2300,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
.Case("fstsw", "fnstsw")
.Case("fstsww", "fnstsw")
.Case("fclex", "fnclex")
- .Default(0);
+ .Default(nullptr);
assert(Repl && "Unknown wait-prefixed instruction");
delete Operands[0];
Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h
index 45fe2a9..de3be38 100644
--- a/lib/Target/X86/AsmParser/X86Operand.h
+++ b/lib/Target/X86/AsmParser/X86Operand.h
@@ -422,7 +422,7 @@ struct X86Operand : public MCParsedAsmOperand {
bool AddressOf = false,
SMLoc OffsetOfLoc = SMLoc(),
StringRef SymName = StringRef(),
- void *OpDecl = 0) {
+ void *OpDecl = nullptr) {
X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
Res->Reg.RegNo = RegNo;
Res->AddressOf = AddressOf;
@@ -441,7 +441,7 @@ struct X86Operand : public MCParsedAsmOperand {
/// Create an absolute memory operand.
static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0, StringRef SymName = StringRef(),
- void *OpDecl = 0) {
+ void *OpDecl = nullptr) {
X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
Res->Mem.SegReg = 0;
Res->Mem.Disp = Disp;
@@ -461,7 +461,7 @@ struct X86Operand : public MCParsedAsmOperand {
unsigned Scale, SMLoc StartLoc, SMLoc EndLoc,
unsigned Size = 0,
StringRef SymName = StringRef(),
- void *OpDecl = 0) {
+ void *OpDecl = nullptr) {
// We should never just have a displacement, that should be parsed as an
// absolute memory operand.
assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 206b651..c54fbc1 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -14,7 +14,6 @@ add_public_tablegen_target(X86CommonTableGen)
set(sources
X86AsmPrinter.cpp
- X86COFFMachineModuleInfo.cpp
X86CodeEmitter.cpp
X86FastISel.cpp
X86FloatingPoint.cpp
diff --git a/lib/Target/X86/Disassembler/Android.mk b/lib/Target/X86/Disassembler/Android.mk
index 3984266..0b3b8a5 100644
--- a/lib/Target/X86/Disassembler/Android.mk
+++ b/lib/Target/X86/Disassembler/Android.mk
@@ -8,7 +8,8 @@ x86_disassembler_TBLGEN_TABLES := \
x86_disassembler_SRC_FILES := \
X86Disassembler.cpp \
- X86DisassemblerDecoder.c
+ X86DisassemblerDecoder.cpp
+
# For the device
# =====================================================
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index deed115..4370282 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -1,4 +1,4 @@
add_llvm_library(LLVMX86Disassembler
X86Disassembler.cpp
- X86DisassemblerDecoder.c
+ X86DisassemblerDecoder.cpp
)
diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile
index 8669fd8..51e7b82 100644
--- a/lib/Target/X86/Disassembler/Makefile
+++ b/lib/Target/X86/Disassembler/Makefile
@@ -10,7 +10,9 @@
LEVEL = ../../../..
LIBRARYNAME = LLVMX86Disassembler
-# Hack: we need to include 'main' x86 target directory to grab private headers
+# Hack: we need to include 'main' x86 target directory to grab private headers.
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common
+
+.PHONY: $(PROJ_SRC_DIR)/X86DisassemblerDecoder.c
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index d5759cd..c366725 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -27,6 +27,11 @@
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+#define DEBUG_TYPE "x86-disassembler"
+
#define GET_REGINFO_ENUM
#include "X86GenRegisterInfo.inc"
#define GET_INSTRINFO_ENUM
@@ -34,21 +39,18 @@
#define GET_SUBTARGETINFO_ENUM
#include "X86GenSubtargetInfo.inc"
-using namespace llvm;
-using namespace llvm::X86Disassembler;
-
-void x86DisassemblerDebug(const char *file,
- unsigned line,
- const char *s) {
+void llvm::X86Disassembler::Debug(const char *file, unsigned line,
+ const char *s) {
dbgs() << file << ":" << line << ": " << s;
}
-const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii) {
+const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
+ const void *mii) {
const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
return MII->getName(Opcode);
}
-#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+#define debug(s) DEBUG(Debug(__FILE__, __LINE__, s));
namespace llvm {
@@ -74,9 +76,11 @@ static bool translateInstruction(MCInst &target,
InternalInstruction &source,
const MCDisassembler *Dis);
-X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
- const MCInstrInfo *MII)
- : MCDisassembler(STI), MII(MII) {
+X86GenericDisassembler::X86GenericDisassembler(
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MII)
+ : MCDisassembler(STI, Ctx), MII(std::move(MII)) {
switch (STI.getFeatureBits() &
(X86::Mode16Bit | X86::Mode32Bit | X86::Mode64Bit)) {
case X86::Mode16Bit:
@@ -93,10 +97,6 @@ X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI,
}
}
-X86GenericDisassembler::~X86GenericDisassembler() {
- delete MII;
-}
-
/// regionReader - a callback function that wraps the readByte method from
/// MemoryObject.
///
@@ -140,14 +140,14 @@ X86GenericDisassembler::getInstruction(MCInst &instr,
dlog_t loggerFn = logger;
if (&vStream == &nulls())
- loggerFn = 0; // Disable logging completely if it's going to nulls().
+ loggerFn = nullptr; // Disable logging completely if it's going to nulls().
int ret = decodeInstruction(&internalInstr,
regionReader,
(const void*)&region,
loggerFn,
(void*)&vStream,
- (const void*)MII,
+ (const void*)MII.get(),
address,
fMode);
@@ -319,7 +319,7 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
}
// By default sign-extend all X86 immediates based on their encoding.
else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 ||
- type == TYPE_IMM64) {
+ type == TYPE_IMM64 || type == TYPE_IMMv) {
uint32_t Opcode = mcInst.getOpcode();
switch (operand.encoding) {
default:
@@ -787,13 +787,11 @@ static bool translateInstruction(MCInst &mcInst,
mcInst.setOpcode(X86::XACQUIRE_PREFIX);
}
- int index;
-
insn.numImmediatesTranslated = 0;
- for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- if (insn.operands[index].encoding != ENCODING_NONE) {
- if (translateOperand(mcInst, insn.operands[index], insn, Dis)) {
+ for (const auto &Op : insn.operands) {
+ if (Op.encoding != ENCODING_NONE) {
+ if (translateOperand(mcInst, Op, insn, Dis)) {
return true;
}
}
@@ -803,9 +801,10 @@ static bool translateInstruction(MCInst &mcInst,
}
static MCDisassembler *createX86Disassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new X86Disassembler::X86GenericDisassembler(STI,
- T.createMCInstrInfo());
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ std::unique_ptr<const MCInstrInfo> MII(T.createMCInstrInfo());
+ return new X86Disassembler::X86GenericDisassembler(STI, Ctx, std::move(MII));
}
extern "C" void LLVMInitializeX86Disassembler() {
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.h b/lib/Target/X86/Disassembler/X86Disassembler.h
index 4e6e297..4dc7c29 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.h
+++ b/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -74,17 +74,7 @@
#ifndef X86DISASSEMBLER_H
#define X86DISASSEMBLER_H
-#define INSTRUCTION_SPECIFIER_FIELDS \
- uint16_t operands;
-
-#define INSTRUCTION_IDS \
- uint16_t instructionIDs;
-
#include "X86DisassemblerDecoderCommon.h"
-
-#undef INSTRUCTION_SPECIFIER_FIELDS
-#undef INSTRUCTION_IDS
-
#include "llvm/MC/MCDisassembler.h"
namespace llvm {
@@ -101,13 +91,12 @@ namespace X86Disassembler {
/// All each platform class should have to do is subclass the constructor, and
/// provide a different disassemblerMode value.
class X86GenericDisassembler : public MCDisassembler {
- const MCInstrInfo *MII;
+ std::unique_ptr<const MCInstrInfo> MII;
public:
/// Constructor - Initializes the disassembler.
///
- X86GenericDisassembler(const MCSubtargetInfo &STI, const MCInstrInfo *MII);
-private:
- ~X86GenericDisassembler();
+ X86GenericDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MII);
public:
/// getInstruction - See MCDisassembler.
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index 0801c96..804606d 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -1,17 +1,17 @@
-/*===-- X86DisassemblerDecoder.c - Disassembler decoder ------------*- C -*-===*
- *
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
- *
- *===----------------------------------------------------------------------===*
- *
- * This file is part of the X86 Disassembler.
- * It contains the implementation of the instruction decoder.
- * Documentation for the disassembler can be found in X86Disassembler.h.
- *
- *===----------------------------------------------------------------------===*/
+//===-- X86DisassemblerDecoder.c - Disassembler decoder -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains the implementation of the instruction decoder.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
#include <stdarg.h> /* for va_*() */
#include <stdio.h> /* for vsnprintf() */
@@ -20,13 +20,35 @@
#include "X86DisassemblerDecoder.h"
-#include "X86GenDisassemblerTables.inc"
+using namespace llvm::X86Disassembler;
+
+/// Specifies whether a ModR/M byte is needed and (if so) which
+/// instruction each possible value of the ModR/M byte corresponds to. Once
+/// this information is known, we have narrowed down to a single instruction.
+struct ModRMDecision {
+ uint8_t modrm_type;
+ uint16_t instructionIDs;
+};
+
+/// Specifies which set of ModR/M->instruction tables to look at
+/// given a particular opcode.
+struct OpcodeDecision {
+ ModRMDecision modRMDecisions[256];
+};
+
+/// Specifies which opcode->instruction tables to look at given
+/// a particular context (set of attributes). Since there are many possible
+/// contexts, the decoder first uses CONTEXTS_SYM to determine which context
+/// applies given a specific set of attributes. Hence there are only IC_max
+/// entries in this table, rather than 2^(ATTR_max).
+struct ContextDecision {
+ OpcodeDecision opcodeDecisions[IC_max];
+};
-#define TRUE 1
-#define FALSE 0
+#include "X86GenDisassemblerTables.inc"
#ifndef NDEBUG
-#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#define debug(s) do { Debug(__FILE__, __LINE__, s); } while (0)
#else
#define debug(s) do { } while (0)
#endif
@@ -41,7 +63,7 @@
* an instruction with these attributes.
*/
static InstructionContext contextForAttrs(uint16_t attrMask) {
- return CONTEXTS_SYM[attrMask];
+ return static_cast<InstructionContext>(CONTEXTS_SYM[attrMask]);
}
/*
@@ -53,12 +75,12 @@ static InstructionContext contextForAttrs(uint16_t attrMask) {
* contextForAttrs.
* @param opcode - The last byte of the instruction's opcode, not counting
* ModR/M extensions and escapes.
- * @return - TRUE if the ModR/M byte is required, FALSE otherwise.
+ * @return - true if the ModR/M byte is required, false otherwise.
*/
static int modRMRequired(OpcodeType type,
InstructionContext insnContext,
uint16_t opcode) {
- const struct ContextDecision* decision = 0;
+ const struct ContextDecision* decision = nullptr;
switch (type) {
case ONEBYTE:
@@ -102,7 +124,7 @@ static InstrUID decode(OpcodeType type,
InstructionContext insnContext,
uint8_t opcode,
uint8_t modRM) {
- const struct ModRMDecision* dec = 0;
+ const struct ModRMDecision* dec = nullptr;
switch (type) {
case ONEBYTE:
@@ -284,15 +306,15 @@ static void setPrefixPresent(struct InternalInstruction* insn,
* @param location - The location to query.
* @return - Whether the prefix is at that location.
*/
-static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+static bool isPrefixAtLocation(struct InternalInstruction* insn,
uint8_t prefix,
uint64_t location)
{
if (insn->prefixPresent[prefix] == 1 &&
insn->prefixLocations[prefix] == location)
- return TRUE;
+ return true;
else
- return FALSE;
+ return false;
}
/*
@@ -305,14 +327,14 @@ static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
* bytes, and no prefixes conflicted; nonzero otherwise.
*/
static int readPrefixes(struct InternalInstruction* insn) {
- BOOL isPrefix = TRUE;
- BOOL prefixGroups[4] = { FALSE };
+ bool isPrefix = true;
+ bool prefixGroups[4] = { false };
uint64_t prefixLocation;
uint8_t byte = 0;
uint8_t nextByte;
- BOOL hasAdSize = FALSE;
- BOOL hasOpSize = FALSE;
+ bool hasAdSize = false;
+ bool hasOpSize = false;
dbgprintf(insn, "readPrefixes()");
@@ -344,7 +366,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if ((byte == 0xf2 || byte == 0xf3) &&
((nextByte == 0xf0) |
((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90)))
- insn->xAcquireRelease = TRUE;
+ insn->xAcquireRelease = true;
/*
* Also if the byte is 0xf3, and the following condition is met:
* - it is followed by a "mov mem, reg" (opcode 0x88/0x89) or
@@ -354,7 +376,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (byte == 0xf3 &&
(nextByte == 0x88 || nextByte == 0x89 ||
nextByte == 0xc6 || nextByte == 0xc7))
- insn->xAcquireRelease = TRUE;
+ insn->xAcquireRelease = true;
if (insn->mode == MODE_64BIT && (nextByte & 0xf0) == 0x40) {
if (consumeByte(insn, &nextByte))
return -1;
@@ -372,7 +394,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
case 0xf3: /* REP or REPE/REPZ */
if (prefixGroups[0])
dbgprintf(insn, "Redundant Group 1 prefix");
- prefixGroups[0] = TRUE;
+ prefixGroups[0] = true;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x2e: /* CS segment override -OR- Branch not taken */
@@ -406,25 +428,25 @@ static int readPrefixes(struct InternalInstruction* insn) {
}
if (prefixGroups[1])
dbgprintf(insn, "Redundant Group 2 prefix");
- prefixGroups[1] = TRUE;
+ prefixGroups[1] = true;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x66: /* Operand-size override */
if (prefixGroups[2])
dbgprintf(insn, "Redundant Group 3 prefix");
- prefixGroups[2] = TRUE;
- hasOpSize = TRUE;
+ prefixGroups[2] = true;
+ hasOpSize = true;
setPrefixPresent(insn, byte, prefixLocation);
break;
case 0x67: /* Address-size override */
if (prefixGroups[3])
dbgprintf(insn, "Redundant Group 4 prefix");
- prefixGroups[3] = TRUE;
- hasAdSize = TRUE;
+ prefixGroups[3] = true;
+ hasAdSize = true;
setPrefixPresent(insn, byte, prefixLocation);
break;
default: /* Not a prefix byte */
- isPrefix = FALSE;
+ isPrefix = false;
break;
}
@@ -549,7 +571,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = true;
break;
}
@@ -595,7 +617,7 @@ static int readPrefixes(struct InternalInstruction* insn) {
default:
break;
case VEX_PREFIX_66:
- hasOpSize = TRUE;
+ hasOpSize = true;
break;
}
@@ -790,11 +812,9 @@ static int readModRM(struct InternalInstruction* insn);
static int getIDWithAttrMask(uint16_t* instructionID,
struct InternalInstruction* insn,
uint16_t attrMask) {
- BOOL hasModRMExtension;
-
- uint16_t instructionClass;
+ bool hasModRMExtension;
- instructionClass = contextForAttrs(attrMask);
+ InstructionContext instructionClass = contextForAttrs(attrMask);
hasModRMExtension = modRMRequired(insn->opcodeType,
instructionClass,
@@ -825,14 +845,14 @@ static int getIDWithAttrMask(uint16_t* instructionID,
* @param orig - The instruction that is not 16-bit
* @param equiv - The instruction that is 16-bit
*/
-static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
+static bool is16BitEquivalent(const char* orig, const char* equiv) {
off_t i;
for (i = 0;; i++) {
if (orig[i] == '\0' && equiv[i] == '\0')
- return TRUE;
+ return true;
if (orig[i] == '\0' || equiv[i] == '\0')
- return FALSE;
+ return false;
if (orig[i] != equiv[i]) {
if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
continue;
@@ -840,7 +860,7 @@ static BOOL is16BitEquivalent(const char* orig, const char* equiv) {
continue;
if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
continue;
- return FALSE;
+ return false;
}
}
}
@@ -1011,9 +1031,8 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
return 0;
}
- specName = x86DisassemblerGetInstrName(instructionID, miiArg);
- specWithOpSizeName =
- x86DisassemblerGetInstrName(instructionIDWithOpsize, miiArg);
+ specName = GetInstrName(instructionID, miiArg);
+ specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
if (is16BitEquivalent(specName, specWithOpSizeName) &&
(insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
@@ -1077,8 +1096,8 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
* @return - 0 if the SIB byte was successfully read; nonzero otherwise.
*/
static int readSIB(struct InternalInstruction* insn) {
- SIBIndex sibIndexBase = 0;
- SIBBase sibBaseBase = 0;
+ SIBIndex sibIndexBase = SIB_INDEX_NONE;
+ SIBBase sibBaseBase = SIB_BASE_NONE;
uint8_t index, base;
dbgprintf(insn, "readSIB()");
@@ -1086,7 +1105,7 @@ static int readSIB(struct InternalInstruction* insn) {
if (insn->consumedSIB)
return 0;
- insn->consumedSIB = TRUE;
+ insn->consumedSIB = true;
switch (insn->addressSize) {
case 2:
@@ -1184,12 +1203,12 @@ static int readDisplacement(struct InternalInstruction* insn) {
if (insn->consumedDisplacement)
return 0;
- insn->consumedDisplacement = TRUE;
+ insn->consumedDisplacement = true;
insn->displacementOffset = insn->readerCursor - insn->startLocation;
switch (insn->eaDisplacement) {
case EA_DISP_NONE:
- insn->consumedDisplacement = FALSE;
+ insn->consumedDisplacement = false;
break;
case EA_DISP_8:
if (consumeInt8(insn, &d8))
@@ -1208,7 +1227,7 @@ static int readDisplacement(struct InternalInstruction* insn) {
break;
}
- insn->consumedDisplacement = TRUE;
+ insn->consumedDisplacement = true;
return 0;
}
@@ -1229,7 +1248,7 @@ static int readModRM(struct InternalInstruction* insn) {
if (consumeByte(insn, &insn->modRM))
return -1;
- insn->consumedModRM = TRUE;
+ insn->consumedModRM = true;
mod = modFromModRM(insn->modRM);
rm = rmFromModRM(insn->modRM);
@@ -1599,20 +1618,22 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
static int readVVVV(struct InternalInstruction* insn) {
dbgprintf(insn, "readVVVV()");
+ int vvvv;
if (insn->vectorExtensionType == TYPE_EVEX)
- insn->vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
+ vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]);
else if (insn->vectorExtensionType == TYPE_VEX_3B)
- insn->vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
+ vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]);
else if (insn->vectorExtensionType == TYPE_VEX_2B)
- insn->vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
+ vvvv = vvvvFromVEX2of2(insn->vectorExtensionPrefix[1]);
else if (insn->vectorExtensionType == TYPE_XOP)
- insn->vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
+ vvvv = vvvvFromXOP3of3(insn->vectorExtensionPrefix[2]);
else
return -1;
if (insn->mode != MODE_64BIT)
- insn->vvvv &= 0x7;
+ vvvv &= 0x7;
+ insn->vvvv = static_cast<Reg>(vvvv);
return 0;
}
@@ -1629,7 +1650,8 @@ static int readMaskRegister(struct InternalInstruction* insn) {
if (insn->vectorExtensionType != TYPE_EVEX)
return -1;
- insn->writemask = aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]);
+ insn->writemask =
+ static_cast<Reg>(aaaFromEVEX4of4(insn->vectorExtensionPrefix[3]));
return 0;
}
@@ -1641,7 +1663,6 @@ static int readMaskRegister(struct InternalInstruction* insn) {
* @return - 0 if all operands could be read; nonzero otherwise.
*/
static int readOperands(struct InternalInstruction* insn) {
- int index;
int hasVVVV, needVVVV;
int sawRegImm = 0;
@@ -1652,8 +1673,8 @@ static int readOperands(struct InternalInstruction* insn) {
hasVVVV = !readVVVV(insn);
needVVVV = hasVVVV && (insn->vvvv != 0);
- for (index = 0; index < X86_MAX_OPERANDS; ++index) {
- switch (x86OperandSets[insn->spec->operands][index].encoding) {
+ for (const auto &Op : x86OperandSets[insn->spec->operands]) {
+ switch (Op.encoding) {
case ENCODING_NONE:
case ENCODING_SI:
case ENCODING_DI:
@@ -1662,7 +1683,7 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_RM:
if (readModRM(insn))
return -1;
- if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
+ if (fixupReg(insn, &Op))
return -1;
break;
case ENCODING_CB:
@@ -1684,14 +1705,14 @@ static int readOperands(struct InternalInstruction* insn) {
}
if (readImmediate(insn, 1))
return -1;
- if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM3 &&
+ if (Op.type == TYPE_IMM3 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 7)
return -1;
- if (x86OperandSets[insn->spec->operands][index].type == TYPE_IMM5 &&
+ if (Op.type == TYPE_IMM5 &&
insn->immediates[insn->numImmediatesConsumed - 1] > 31)
return -1;
- if (x86OperandSets[insn->spec->operands][index].type == TYPE_XMM128 ||
- x86OperandSets[insn->spec->operands][index].type == TYPE_XMM256)
+ if (Op.type == TYPE_XMM128 ||
+ Op.type == TYPE_XMM256)
sawRegImm = 1;
break;
case ENCODING_IW:
@@ -1740,7 +1761,7 @@ static int readOperands(struct InternalInstruction* insn) {
needVVVV = 0; /* Mark that we have found a VVVV operand. */
if (!hasVVVV)
return -1;
- if (fixupReg(insn, &x86OperandSets[insn->spec->operands][index]))
+ if (fixupReg(insn, &Op))
return -1;
break;
case ENCODING_WRITEMASK:
@@ -1781,14 +1802,10 @@ static int readOperands(struct InternalInstruction* insn) {
* @return - 0 if the instruction's memory could be read; nonzero if
* not.
*/
-int decodeInstruction(struct InternalInstruction* insn,
- byteReader_t reader,
- const void* readerArg,
- dlog_t logger,
- void* loggerArg,
- const void* miiArg,
- uint64_t startLoc,
- DisassemblerMode mode) {
+int llvm::X86Disassembler::decodeInstruction(
+ struct InternalInstruction *insn, byteReader_t reader,
+ const void *readerArg, dlog_t logger, void *loggerArg, const void *miiArg,
+ uint64_t startLoc, DisassemblerMode mode) {
memset(insn, 0, sizeof(struct InternalInstruction));
insn->reader = reader;
@@ -1807,7 +1824,7 @@ int decodeInstruction(struct InternalInstruction* insn,
readOperands(insn))
return -1;
- insn->operands = &x86OperandSets[insn->spec->operands][0];
+ insn->operands = x86OperandSets[insn->spec->operands];
insn->length = insn->readerCursor - insn->startLocation;
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index ac3b39d..8c45402 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -1,39 +1,28 @@
-/*===-- X86DisassemblerDecoderInternal.h - Disassembler decoder ---*- C -*-===*
- *
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
- *
- *===----------------------------------------------------------------------===*
- *
- * This file is part of the X86 Disassembler.
- * It contains the public interface of the instruction decoder.
- * Documentation for the disassembler can be found in X86Disassembler.h.
- *
- *===----------------------------------------------------------------------===*/
+//===-- X86DisassemblerDecoderInternal.h - Disassembler decoder -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains the public interface of the instruction decoder.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
#ifndef X86DISASSEMBLERDECODER_H
#define X86DISASSEMBLERDECODER_H
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define INSTRUCTION_SPECIFIER_FIELDS \
- uint16_t operands;
-
-#define INSTRUCTION_IDS \
- uint16_t instructionIDs;
-
#include "X86DisassemblerDecoderCommon.h"
+#include "llvm/ADT/ArrayRef.h"
-#undef INSTRUCTION_SPECIFIER_FIELDS
-#undef INSTRUCTION_IDS
+namespace llvm {
+namespace X86Disassembler {
-/*
- * Accessor functions for various fields of an Intel instruction
- */
+// Accessor functions for various fields of an Intel instruction
#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
#define rmFromModRM(modRM) ((modRM) & 0x7)
@@ -83,10 +72,7 @@ extern "C" {
#define lFromXOP3of3(xop) (((xop) & 0x4) >> 2)
#define ppFromXOP3of3(xop) ((xop) & 0x3)
-/*
- * These enums represent Intel registers for use by the decoder.
- */
-
+// These enums represent Intel registers for use by the decoder.
#define REGS_8BIT \
ENTRY(AL) \
ENTRY(CL) \
@@ -392,13 +378,11 @@ extern "C" {
REGS_CONTROL \
ENTRY(RIP)
-/*
- * EABase - All possible values of the base field for effective-address
- * computations, a.k.a. the Mod and R/M fields of the ModR/M byte. We
- * distinguish between bases (EA_BASE_*) and registers that just happen to be
- * referred to when Mod == 0b11 (EA_REG_*).
- */
-typedef enum {
+/// \brief All possible values of the base field for effective-address
+/// computations, a.k.a. the Mod and R/M fields of the ModR/M byte.
+/// We distinguish between bases (EA_BASE_*) and registers that just happen
+/// to be referred to when Mod == 0b11 (EA_REG_*).
+enum EABase {
EA_BASE_NONE,
#define ENTRY(x) EA_BASE_##x,
ALL_EA_BASES
@@ -407,15 +391,13 @@ typedef enum {
ALL_REGS
#undef ENTRY
EA_max
-} EABase;
-
-/*
- * SIBIndex - All possible values of the SIB index field.
- * Borrows entries from ALL_EA_BASES with the special case that
- * sib is synonymous with NONE.
- * Vector SIB: index can be XMM or YMM.
- */
-typedef enum {
+};
+
+/// \brief All possible values of the SIB index field.
+/// borrows entries from ALL_EA_BASES with the special case that
+/// sib is synonymous with NONE.
+/// Vector SIB: index can be XMM or YMM.
+enum SIBIndex {
SIB_INDEX_NONE,
#define ENTRY(x) SIB_INDEX_##x,
ALL_EA_BASES
@@ -424,23 +406,18 @@ typedef enum {
REGS_ZMM
#undef ENTRY
SIB_INDEX_max
-} SIBIndex;
+};
-/*
- * SIBBase - All possible values of the SIB base field.
- */
-typedef enum {
+/// \brief All possible values of the SIB base field.
+enum SIBBase {
SIB_BASE_NONE,
#define ENTRY(x) SIB_BASE_##x,
ALL_SIB_BASES
#undef ENTRY
SIB_BASE_max
-} SIBBase;
+};
-/*
- * EADisplacement - Possible displacement types for effective-address
- * computations.
- */
+/// \brief Possible displacement types for effective-address computations.
typedef enum {
EA_DISP_NONE,
EA_DISP_8,
@@ -448,20 +425,16 @@ typedef enum {
EA_DISP_32
} EADisplacement;
-/*
- * Reg - All possible values of the reg field in the ModR/M byte.
- */
-typedef enum {
+/// \brief All possible values of the reg field in the ModR/M byte.
+enum Reg {
#define ENTRY(x) MODRM_REG_##x,
ALL_REGS
#undef ENTRY
MODRM_REG_max
-} Reg;
+};
-/*
- * SegmentOverride - All possible segment overrides.
- */
-typedef enum {
+/// \brief All possible segment overrides.
+enum SegmentOverride {
SEG_OVERRIDE_NONE,
SEG_OVERRIDE_CS,
SEG_OVERRIDE_SS,
@@ -470,235 +443,220 @@ typedef enum {
SEG_OVERRIDE_FS,
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
-} SegmentOverride;
-
-/*
- * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
- */
+};
-typedef enum {
+/// \brief Possible values for the VEX.m-mmmm field
+enum VEXLeadingOpcodeByte {
VEX_LOB_0F = 0x1,
VEX_LOB_0F38 = 0x2,
VEX_LOB_0F3A = 0x3
-} VEXLeadingOpcodeByte;
+};
-typedef enum {
+enum XOPMapSelect {
XOP_MAP_SELECT_8 = 0x8,
XOP_MAP_SELECT_9 = 0x9,
XOP_MAP_SELECT_A = 0xA
-} XOPMapSelect;
-
-/*
- * VEXPrefixCode - Possible values for the VEX.pp/EVEX.pp field
- */
+};
-typedef enum {
+/// \brief Possible values for the VEX.pp/EVEX.pp field
+enum VEXPrefixCode {
VEX_PREFIX_NONE = 0x0,
VEX_PREFIX_66 = 0x1,
VEX_PREFIX_F3 = 0x2,
VEX_PREFIX_F2 = 0x3
-} VEXPrefixCode;
+};
-typedef enum {
+enum VectorExtensionType {
TYPE_NO_VEX_XOP = 0x0,
TYPE_VEX_2B = 0x1,
TYPE_VEX_3B = 0x2,
TYPE_EVEX = 0x3,
TYPE_XOP = 0x4
-} VectorExtensionType;
-
-typedef uint8_t BOOL;
-
-/*
- * byteReader_t - Type for the byte reader that the consumer must provide to
- * the decoder. Reads a single byte from the instruction's address space.
- * @param arg - A baton that the consumer can associate with any internal
- * state that it needs.
- * @param byte - A pointer to a single byte in memory that should be set to
- * contain the value at address.
- * @param address - The address in the instruction's address space that should
- * be read from.
- * @return - -1 if the byte cannot be read for any reason; 0 otherwise.
- */
-typedef int (*byteReader_t)(const void* arg, uint8_t* byte, uint64_t address);
-
-/*
- * dlog_t - Type for the logging function that the consumer can provide to
- * get debugging output from the decoder.
- * @param arg - A baton that the consumer can associate with any internal
- * state that it needs.
- * @param log - A string that contains the message. Will be reused after
- * the logger returns.
- */
-typedef void (*dlog_t)(void* arg, const char *log);
-
-/*
- * The x86 internal instruction, which is produced by the decoder.
- */
+};
+
+/// \brief Type for the byte reader that the consumer must provide to
+/// the decoder. Reads a single byte from the instruction's address space.
+/// \param arg A baton that the consumer can associate with any internal
+/// state that it needs.
+/// \param byte A pointer to a single byte in memory that should be set to
+/// contain the value at address.
+/// \param address The address in the instruction's address space that should
+/// be read from.
+/// \return -1 if the byte cannot be read for any reason; 0 otherwise.
+typedef int (*byteReader_t)(const void *arg, uint8_t *byte, uint64_t address);
+
+/// \brief Type for the logging function that the consumer can provide to
+/// get debugging output from the decoder.
+/// \param arg A baton that the consumer can associate with any internal
+/// state that it needs.
+/// \param log A string that contains the message. Will be reused after
+/// the logger returns.
+typedef void (*dlog_t)(void *arg, const char *log);
+
+/// The specification for how to extract and interpret a full instruction and
+/// its operands.
+struct InstructionSpecifier {
+ uint16_t operands;
+};
+
+/// The x86 internal instruction, which is produced by the decoder.
struct InternalInstruction {
- /* Reader interface (C) */
+ // Reader interface (C)
byteReader_t reader;
- /* Opaque value passed to the reader */
+ // Opaque value passed to the reader
const void* readerArg;
- /* The address of the next byte to read via the reader */
+ // The address of the next byte to read via the reader
uint64_t readerCursor;
- /* Logger interface (C) */
+ // Logger interface (C)
dlog_t dlog;
- /* Opaque value passed to the logger */
+ // Opaque value passed to the logger
void* dlogArg;
- /* General instruction information */
+ // General instruction information
- /* The mode to disassemble for (64-bit, protected, real) */
+ // The mode to disassemble for (64-bit, protected, real)
DisassemblerMode mode;
- /* The start of the instruction, usable with the reader */
+ // The start of the instruction, usable with the reader
uint64_t startLocation;
- /* The length of the instruction, in bytes */
+ // The length of the instruction, in bytes
size_t length;
- /* Prefix state */
+ // Prefix state
- /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+ // 1 if the prefix byte corresponding to the entry is present; 0 if not
uint8_t prefixPresent[0x100];
- /* contains the location (for use with the reader) of the prefix byte */
+ // contains the location (for use with the reader) of the prefix byte
uint64_t prefixLocations[0x100];
- /* The value of the vector extension prefix(EVEX/VEX/XOP), if present */
+ // The value of the vector extension prefix(EVEX/VEX/XOP), if present
uint8_t vectorExtensionPrefix[4];
- /* The type of the vector extension prefix */
+ // The type of the vector extension prefix
VectorExtensionType vectorExtensionType;
- /* The value of the REX prefix, if present */
+ // The value of the REX prefix, if present
uint8_t rexPrefix;
- /* The location where a mandatory prefix would have to be (i.e., right before
- the opcode, or right before the REX prefix if one is present) */
+ // The location where a mandatory prefix would have to be (i.e., right before
+ // the opcode, or right before the REX prefix if one is present).
uint64_t necessaryPrefixLocation;
- /* The segment override type */
+ // The segment override type
SegmentOverride segmentOverride;
- /* 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease */
- BOOL xAcquireRelease;
+ // 1 if the prefix byte, 0xf2 or 0xf3 is xacquire or xrelease
+ bool xAcquireRelease;
- /* Sizes of various critical pieces of data, in bytes */
+ // Sizes of various critical pieces of data, in bytes
uint8_t registerSize;
uint8_t addressSize;
uint8_t displacementSize;
uint8_t immediateSize;
- /* Offsets from the start of the instruction to the pieces of data, which is
- needed to find relocation entries for adding symbolic operands */
+ // Offsets from the start of the instruction to the pieces of data, which is
+ // needed to find relocation entries for adding symbolic operands.
uint8_t displacementOffset;
uint8_t immediateOffset;
- /* opcode state */
+ // opcode state
- /* The last byte of the opcode, not counting any ModR/M extension */
+ // The last byte of the opcode, not counting any ModR/M extension
uint8_t opcode;
- /* The ModR/M byte of the instruction, if it is an opcode extension */
+ // The ModR/M byte of the instruction, if it is an opcode extension
uint8_t modRMExtension;
- /* decode state */
+ // decode state
- /* The type of opcode, used for indexing into the array of decode tables */
+ // The type of opcode, used for indexing into the array of decode tables
OpcodeType opcodeType;
- /* The instruction ID, extracted from the decode table */
+ // The instruction ID, extracted from the decode table
uint16_t instructionID;
- /* The specifier for the instruction, from the instruction info table */
- const struct InstructionSpecifier *spec;
+ // The specifier for the instruction, from the instruction info table
+ const InstructionSpecifier *spec;
- /* state for additional bytes, consumed during operand decode. Pattern:
- consumed___ indicates that the byte was already consumed and does not
- need to be consumed again */
+ // state for additional bytes, consumed during operand decode. Pattern:
+ // consumed___ indicates that the byte was already consumed and does not
+ // need to be consumed again.
- /* The VEX.vvvv field, which contains a third register operand for some AVX
- instructions */
+ // The VEX.vvvv field, which contains a third register operand for some AVX
+ // instructions.
Reg vvvv;
- /* The writemask for AVX-512 instructions which is contained in EVEX.aaa */
+ // The writemask for AVX-512 instructions which is contained in EVEX.aaa
Reg writemask;
- /* The ModR/M byte, which contains most register operands and some portion of
- all memory operands */
- BOOL consumedModRM;
+ // The ModR/M byte, which contains most register operands and some portion of
+ // all memory operands.
+ bool consumedModRM;
uint8_t modRM;
- /* The SIB byte, used for more complex 32- or 64-bit memory operands */
- BOOL consumedSIB;
+ // The SIB byte, used for more complex 32- or 64-bit memory operands
+ bool consumedSIB;
uint8_t sib;
- /* The displacement, used for memory operands */
- BOOL consumedDisplacement;
+ // The displacement, used for memory operands
+ bool consumedDisplacement;
int32_t displacement;
- /* Immediates. There can be two in some cases */
+ // Immediates. There can be two in some cases
uint8_t numImmediatesConsumed;
uint8_t numImmediatesTranslated;
uint64_t immediates[2];
- /* A register or immediate operand encoded into the opcode */
+ // A register or immediate operand encoded into the opcode
Reg opcodeRegister;
- /* Portions of the ModR/M byte */
+ // Portions of the ModR/M byte
- /* These fields determine the allowable values for the ModR/M fields, which
- depend on operand and address widths */
+ // These fields determine the allowable values for the ModR/M fields, which
+ // depend on operand and address widths.
EABase eaBaseBase;
EABase eaRegBase;
Reg regBase;
- /* The Mod and R/M fields can encode a base for an effective address, or a
- register. These are separated into two fields here */
+ // The Mod and R/M fields can encode a base for an effective address, or a
+ // register. These are separated into two fields here.
EABase eaBase;
EADisplacement eaDisplacement;
- /* The reg field always encodes a register */
+ // The reg field always encodes a register
Reg reg;
- /* SIB state */
+ // SIB state
SIBIndex sibIndex;
uint8_t sibScale;
SIBBase sibBase;
- const struct OperandSpecifier *operands;
+ ArrayRef<OperandSpecifier> operands;
};
-/* decodeInstruction - Decode one instruction and store the decoding results in
- * a buffer provided by the consumer.
- * @param insn - The buffer to store the instruction in. Allocated by the
- * consumer.
- * @param reader - The byteReader_t for the bytes to be read.
- * @param readerArg - An argument to pass to the reader for storing context
- * specific to the consumer. May be NULL.
- * @param logger - The dlog_t to be used in printing status messages from the
- * disassembler. May be NULL.
- * @param loggerArg - An argument to pass to the logger for storing context
- * specific to the logger. May be NULL.
- * @param startLoc - The address (in the reader's address space) of the first
- * byte in the instruction.
- * @param mode - The mode (16-bit, 32-bit, 64-bit) to decode in.
- * @return - Nonzero if there was an error during decode, 0 otherwise.
- */
-int decodeInstruction(struct InternalInstruction* insn,
+/// \brief Decode one instruction and store the decoding results in
+/// a buffer provided by the consumer.
+/// \param insn The buffer to store the instruction in. Allocated by the
+/// consumer.
+/// \param reader The byteReader_t for the bytes to be read.
+/// \param readerArg An argument to pass to the reader for storing context
+/// specific to the consumer. May be NULL.
+/// \param logger The dlog_t to be used in printing status messages from the
+/// disassembler. May be NULL.
+/// \param loggerArg An argument to pass to the logger for storing context
+/// specific to the logger. May be NULL.
+/// \param startLoc The address (in the reader's address space) of the first
+/// byte in the instruction.
+/// \param mode The mode (16-bit, 32-bit, 64-bit) to decode in.
+/// \return Nonzero if there was an error during decode, 0 otherwise.
+int decodeInstruction(InternalInstruction *insn,
byteReader_t reader,
- const void* readerArg,
+ const void *readerArg,
dlog_t logger,
- void* loggerArg,
- const void* miiArg,
+ void *loggerArg,
+ const void *miiArg,
uint64_t startLoc,
DisassemblerMode mode);
-/* x86DisassemblerDebug - C-accessible function for printing a message to
- * debugs()
- * @param file - The name of the file printing the debug message.
- * @param line - The line number that printed the debug message.
- * @param s - The message to print.
- */
+/// \brief Print a message to debugs()
+/// \param file The name of the file printing the debug message.
+/// \param line The line number that printed the debug message.
+/// \param s The message to print.
+void Debug(const char *file, unsigned line, const char *s);
-void x86DisassemblerDebug(const char *file,
- unsigned line,
- const char *s);
+const char *GetInstrName(unsigned Opcode, const void *mii);
-const char *x86DisassemblerGetInstrName(unsigned Opcode, const void *mii);
-
-#ifdef __cplusplus
-}
-#endif
+} // namespace X86Disassembler
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 523ae99..f59e0b6 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -1,29 +1,27 @@
-/*===-- X86DisassemblerDecoderCommon.h - Disassembler decoder -----*- C -*-===*
- *
- * The LLVM Compiler Infrastructure
- *
- * This file is distributed under the University of Illinois Open Source
- * License. See LICENSE.TXT for details.
- *
- *===----------------------------------------------------------------------===*
- *
- * This file is part of the X86 Disassembler.
- * It contains common definitions used by both the disassembler and the table
- * generator.
- * Documentation for the disassembler can be found in X86Disassembler.h.
- *
- *===----------------------------------------------------------------------===*/
-
-/*
- * This header file provides those definitions that need to be shared between
- * the decoder and the table generator in a C-friendly manner.
- */
+//===-- X86DisassemblerDecoderCommon.h - Disassembler decoder ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains common definitions used by both the disassembler and the table
+// generator.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
#ifndef X86DISASSEMBLERDECODERCOMMON_H
#define X86DISASSEMBLERDECODERCOMMON_H
#include "llvm/Support/DataTypes.h"
+namespace llvm {
+namespace X86Disassembler {
+
#define INSTRUCTIONS_SYM x86DisassemblerInstrSpecifiers
#define CONTEXTS_SYM x86DisassemblerContexts
#define ONEBYTE_SYM x86DisassemblerOneByteOpcodes
@@ -44,11 +42,9 @@
#define XOP9_MAP_STR "x86DisassemblerXOP9Opcodes"
#define XOPA_MAP_STR "x86DisassemblerXOPAOpcodes"
-/*
- * Attributes of an instruction that must be known before the opcode can be
- * processed correctly. Most of these indicate the presence of particular
- * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
- */
+// Attributes of an instruction that must be known before the opcode can be
+// processed correctly. Most of these indicate the presence of particular
+// prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
#define ATTRIBUTE_BITS \
ENUM_ENTRY(ATTR_NONE, 0x00) \
ENUM_ENTRY(ATTR_64BIT, (0x1 << 0)) \
@@ -73,13 +69,11 @@ enum attributeBits {
};
#undef ENUM_ENTRY
-/*
- * Combinations of the above attributes that are relevant to instruction
- * decode. Although other combinations are possible, they can be reduced to
- * these without affecting the ultimately decoded instruction.
- */
+// Combinations of the above attributes that are relevant to instruction
+// decode. Although other combinations are possible, they can be reduced to
+// these without affecting the ultimately decoded instruction.
-/* Class name Rank Rationale for rank assignment */
+// Class name Rank Rationale for rank assignment
#define INSTRUCTION_CONTEXTS \
ENUM_ENTRY(IC, 0, "says nothing about the instruction") \
ENUM_ENTRY(IC_64BIT, 1, "says the instruction applies in " \
@@ -274,17 +268,15 @@ enum attributeBits {
ENUM_ENTRY(IC_EVEX_L2_W_OPSIZE_KZ, 4, "requires EVEX_KZ, L2, W and OpSize")
#define ENUM_ENTRY(n, r, d) n,
-typedef enum {
+enum InstructionContext {
INSTRUCTION_CONTEXTS
IC_max
-} InstructionContext;
+};
#undef ENUM_ENTRY
-/*
- * Opcode types, which determine which decode table to use, both in the Intel
- * manual and also for the decoder.
- */
-typedef enum {
+// Opcode types, which determine which decode table to use, both in the Intel
+// manual and also for the decoder.
+enum OpcodeType {
ONEBYTE = 0,
TWOBYTE = 1,
THREEBYTE_38 = 2,
@@ -292,39 +284,33 @@ typedef enum {
XOP8_MAP = 4,
XOP9_MAP = 5,
XOPA_MAP = 6
-} OpcodeType;
-
-/*
- * The following structs are used for the hierarchical decode table. After
- * determining the instruction's class (i.e., which IC_* constant applies to
- * it), the decoder reads the opcode. Some instructions require specific
- * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
- *
- * If a ModR/M byte is not required, "required" is left unset, and the values
- * for each instructionID are identical.
- */
+};
+// The following structs are used for the hierarchical decode table. After
+// determining the instruction's class (i.e., which IC_* constant applies to
+// it), the decoder reads the opcode. Some instructions require specific
+// values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+//
+// If a ModR/M byte is not required, "required" is left unset, and the values
+// for each instructionID are identical.
typedef uint16_t InstrUID;
-/*
- * ModRMDecisionType - describes the type of ModR/M decision, allowing the
- * consumer to determine the number of entries in it.
- *
- * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
- * instruction is the same.
- * MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
- * corresponds to one instruction; otherwise, it corresponds to
- * a different instruction.
- * MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
- * divided by 8 is used to select instruction; otherwise, each
- * value of the ModR/M byte could correspond to a different
- * instruction.
- * MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
- corresponds to instructions that use reg field as opcode
- * MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
- * to a different instruction.
- */
-
+// ModRMDecisionType - describes the type of ModR/M decision, allowing the
+// consumer to determine the number of entries in it.
+//
+// MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+// instruction is the same.
+// MODRM_SPLITRM - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+// corresponds to one instruction; otherwise, it corresponds to
+// a different instruction.
+// MODRM_SPLITMISC- If the ModR/M byte is between 0x00 and 0xbf, ModR/M byte
+// divided by 8 is used to select instruction; otherwise, each
+// value of the ModR/M byte could correspond to a different
+// instruction.
+// MODRM_SPLITREG - ModR/M byte divided by 8 is used to select instruction. This
+// corresponds to instructions that use reg field as opcode
+// MODRM_FULL - Potentially, each value of the ModR/M byte could correspond
+// to a different instruction.
#define MODRMTYPES \
ENUM_ENTRY(MODRM_ONEENTRY) \
ENUM_ENTRY(MODRM_SPLITRM) \
@@ -333,47 +319,13 @@ typedef uint16_t InstrUID;
ENUM_ENTRY(MODRM_FULL)
#define ENUM_ENTRY(n) n,
-typedef enum {
+enum ModRMDecisionType {
MODRMTYPES
MODRM_max
-} ModRMDecisionType;
-#undef ENUM_ENTRY
-
-/*
- * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which
- * instruction each possible value of the ModR/M byte corresponds to. Once
- * this information is known, we have narrowed down to a single instruction.
- */
-struct ModRMDecision {
- uint8_t modrm_type;
-
- /* The macro below must be defined wherever this file is included. */
- INSTRUCTION_IDS
-};
-
-/*
- * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
- * given a particular opcode.
- */
-struct OpcodeDecision {
- struct ModRMDecision modRMDecisions[256];
-};
-
-/*
- * ContextDecision - Specifies which opcode->instruction tables to look at given
- * a particular context (set of attributes). Since there are many possible
- * contexts, the decoder first uses CONTEXTS_SYM to determine which context
- * applies given a specific set of attributes. Hence there are only IC_max
- * entries in this table, rather than 2^(ATTR_max).
- */
-struct ContextDecision {
- struct OpcodeDecision opcodeDecisions[IC_max];
};
+#undef ENUM_ENTRY
-/*
- * Physical encodings of instruction operands.
- */
-
+// Physical encodings of instruction operands.
#define ENCODINGS \
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
@@ -408,16 +360,13 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_DI, "Destination index; encoded in prefixes")
#define ENUM_ENTRY(n, d) n,
- typedef enum {
- ENCODINGS
- ENCODING_max
- } OperandEncoding;
+enum OperandEncoding {
+ ENCODINGS
+ ENCODING_max
+};
#undef ENUM_ENTRY
-/*
- * Semantic interpretations of instruction operands.
- */
-
+// Semantic interpretations of instruction operands.
#define TYPES \
ENUM_ENTRY(TYPE_NONE, "") \
ENUM_ENTRY(TYPE_REL8, "1-byte immediate address") \
@@ -508,56 +457,42 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_M512, "512-bit FPU/MMX/XMM/MXCSR state")
#define ENUM_ENTRY(n, d) n,
-typedef enum {
+enum OperandType {
TYPES
TYPE_max
-} OperandType;
+};
#undef ENUM_ENTRY
-/*
- * OperandSpecifier - The specification for how to extract and interpret one
- * operand.
- */
+/// \brief The specification for how to extract and interpret one operand.
struct OperandSpecifier {
uint8_t encoding;
uint8_t type;
};
-/*
- * Indicates where the opcode modifier (if any) is to be found. Extended
- * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
- */
-
+// Indicates where the opcode modifier (if any) is to be found. Extended
+// opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
#define MODIFIER_TYPES \
ENUM_ENTRY(MODIFIER_NONE)
#define ENUM_ENTRY(n) n,
-typedef enum {
+enum ModifierType {
MODIFIER_TYPES
MODIFIER_max
-} ModifierType;
+};
#undef ENUM_ENTRY
-#define X86_MAX_OPERANDS 5
-
-/*
- * The specification for how to extract and interpret a full instruction and
- * its operands.
- */
-struct InstructionSpecifier {
- /* The macro below must be defined wherever this file is included. */
- INSTRUCTION_SPECIFIER_FIELDS
-};
+static const unsigned X86_MAX_OPERANDS = 5;
-/*
- * Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
- * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
- * respectively.
- */
-typedef enum {
+/// Decoding mode for the Intel disassembler. 16-bit, 32-bit, and 64-bit mode
+/// are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+/// respectively.
+enum DisassemblerMode {
MODE_16BIT,
MODE_32BIT,
MODE_64BIT
-} DisassemblerMode;
+};
+
+} // namespace X86Disassembler
+} // namespace llvm
#endif
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index eea0a76..b45b118 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
@@ -28,6 +27,8 @@
#include <map>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
// Include the auto-generated portion of the assembly writer.
#define PRINT_ALIAS_INSTR
#include "X86GenAsmWriter.inc"
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index f34e633..531183b 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -32,6 +32,8 @@ public:
// Autogenerated by tblgen, returns true if we successfully printed an
// alias.
bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+ void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+ unsigned PrintMethodIdx, raw_ostream &O);
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index db61fb0..baf6507 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -32,7 +32,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
const char *(*getRegName)(unsigned)) {
// If this is a shuffle operation, the switch should fill in this state.
SmallVector<int, 8> ShuffleMask;
- const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+ const char *DestName = nullptr, *Src1Name = nullptr, *Src2Name = nullptr;
switch (MI->getOpcode()) {
case X86::INSERTPSrr:
@@ -492,7 +492,7 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// If this was a shuffle operation, print the shuffle mask.
if (!ShuffleMask.empty()) {
- if (DestName == 0) DestName = Src1Name;
+ if (!DestName) DestName = Src1Name;
OS << (DestName ? DestName : "mem") << " = ";
// If the two sources are the same, canonicalize the input elements to be
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 1c95d37..1c8466b 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
@@ -25,6 +24,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "X86GenAsmWriter1.inc"
void X86IntelInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
diff --git a/lib/Target/X86/MCTargetDesc/Android.mk b/lib/Target/X86/MCTargetDesc/Android.mk
index ee37c27..a3c9bc8 100644
--- a/lib/Target/X86/MCTargetDesc/Android.mk
+++ b/lib/Target/X86/MCTargetDesc/Android.mk
@@ -14,7 +14,8 @@ x86_mc_desc_SRC_FILES := \
X86MCCodeEmitter.cpp \
X86MachORelocationInfo.cpp \
X86MachObjectWriter.cpp \
- X86WinCOFFObjectWriter.cpp
+ X86WinCOFFObjectWriter.cpp \
+ X86WinCOFFStreamer.cpp
# For the host
# =====================================================
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
index 3f5a0e2..129c28d 100644
--- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMX86Desc
X86MCCodeEmitter.cpp
X86MachObjectWriter.cpp
X86ELFObjectWriter.cpp
+ X86WinCOFFStreamer.cpp
X86WinCOFFObjectWriter.cpp
X86MachORelocationInfo.cpp
X86ELFRelocationInfo.cpp
diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
index 9e1d29c..146d111 100644
--- a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
+++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt
@@ -19,5 +19,5 @@
type = Library
name = X86Desc
parent = X86
-required_libraries = MC Support X86AsmPrinter X86Info
+required_libraries = MC Object Support X86AsmPrinter X86Info
add_to_library_groups = X86
diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 23763f7..bf30a8e 100644
--- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -37,23 +37,29 @@ MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
- default: llvm_unreachable("invalid fixup kind!");
+ default:
+ llvm_unreachable("invalid fixup kind!");
case FK_PCRel_1:
case FK_SecRel_1:
- case FK_Data_1: return 0;
+ case FK_Data_1:
+ return 0;
case FK_PCRel_2:
case FK_SecRel_2:
- case FK_Data_2: return 1;
+ case FK_Data_2:
+ return 1;
case FK_PCRel_4:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
case X86::reloc_signed_4byte:
case X86::reloc_global_offset_table:
case FK_SecRel_4:
- case FK_Data_4: return 2;
+ case FK_Data_4:
+ return 2;
case FK_PCRel_8:
case FK_SecRel_8:
- case FK_Data_8: return 3;
+ case FK_Data_8:
+ case X86::reloc_global_offset_table8:
+ return 3;
}
}
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 38fab15..6aeb1f2 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -643,6 +643,10 @@ namespace X86II {
/// counted as one operand.
///
inline int getMemoryOperandNo(uint64_t TSFlags, unsigned Opcode) {
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+ bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
+ bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
+
switch (TSFlags & X86II::FormMask) {
default: llvm_unreachable("Unknown FormMask value in getMemoryOperandNo!");
case X86II::Pseudo:
@@ -660,9 +664,6 @@ namespace X86II {
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
- bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4;
- bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K);
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
@@ -690,6 +691,8 @@ namespace X86II {
unsigned FirstMemOp = 0;
if (HasVEX_4V)
++FirstMemOp;// Skip the register dest (which is encoded in VEX_VVVV).
+ if (HasEVEX_K)
+ ++FirstMemOp;// Skip the mask register
return FirstMemOp;
}
case X86II::MRM_C0: case X86II::MRM_C1: case X86II::MRM_C2:
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index c44d88d..3fdec87 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -43,7 +43,7 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
bool IsPCRel) const {
// determine the type of the relocation
- MCSymbolRefExpr::VariantKind Modifier = Fixup.getAccessVariant();
+ MCSymbolRefExpr::VariantKind Modifier = Target.getAccessVariant();
unsigned Type;
if (getEMachine() == ELF::EM_X86_64) {
if (IsPCRel) {
@@ -98,6 +98,12 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
+ case X86::reloc_global_offset_table8:
+ Type = ELF::R_X86_64_GOTPC64;
+ break;
+ case X86::reloc_global_offset_table:
+ Type = ELF::R_X86_64_GOTPC32;
+ break;
case FK_Data_8:
switch (Modifier) {
default:
diff --git a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
index 4fa519c..b679316 100644
--- a/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp
@@ -39,7 +39,7 @@ public:
if (Sym->isVariable() == false)
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
- const MCExpr *Expr = 0;
+ const MCExpr *Expr = nullptr;
// If hasAddend is true, then we need to add Addend (r_addend) to Expr.
bool hasAddend = false;
diff --git a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
index f2e34cb..09396b7 100644
--- a/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
+++ b/lib/Target/X86/MCTargetDesc/X86FixupKinds.h
@@ -23,6 +23,7 @@ enum Fixups {
reloc_global_offset_table, // 32-bit, relative to the start
// of the instruction. Used only
// for _GLOBAL_OFFSET_TABLE_.
+ reloc_global_offset_table8, // 64-bit variant.
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 6561804..39480ea 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -51,7 +51,7 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
TextAlignFillValue = 0x90;
if (!is64Bit)
- Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ Data64bitsDirective = nullptr; // we can't emit a 64-bit unit
// Use ## as a comment string so that .s files generated by llvm can go
// through the GCC preprocessor without causing an error. This is needed
@@ -115,7 +115,7 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
// into two .words.
if ((T.getOS() == Triple::OpenBSD || T.getOS() == Triple::Bitrig) &&
T.getArch() == Triple::x86)
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
// Always enable the integrated assembler by default.
// Clang also enabled it when the OS is Solaris but that is redundant here.
@@ -157,8 +157,10 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
- if (Triple.getArch() == Triple::x86_64)
+ if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
+ PointerSize = 8;
+ }
AssemblerDialect = AsmWriterFlavor;
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index e6fb037..2152b21 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mccodeemitter"
#include "MCTargetDesc/X86MCTargetDesc.h"
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
@@ -27,6 +26,8 @@
using namespace llvm;
+#define DEBUG_TYPE "mccodeemitter"
+
namespace {
class X86MCCodeEmitter : public MCCodeEmitter {
X86MCCodeEmitter(const X86MCCodeEmitter &) LLVM_DELETED_FUNCTION;
@@ -285,7 +286,7 @@ enum GlobalOffsetTableExprKind {
};
static GlobalOffsetTableExprKind
StartsWithGlobalOffsetTable(const MCExpr *Expr) {
- const MCExpr *RHS = 0;
+ const MCExpr *RHS = nullptr;
if (Expr->getKind() == MCExpr::Binary) {
const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
Expr = BE->getLHS();
@@ -316,7 +317,7 @@ void X86MCCodeEmitter::
EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
- const MCExpr *Expr = NULL;
+ const MCExpr *Expr = nullptr;
if (DispOp.isImm()) {
// If this is a simple integer displacement that doesn't require a
// relocation, emit it now.
@@ -339,7 +340,13 @@ EmitImmediate(const MCOperand &DispOp, SMLoc Loc, unsigned Size,
if (Kind != GOT_None) {
assert(ImmOffset == 0);
- FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ if (Size == 8) {
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table8);
+ } else {
+ assert(Size == 4);
+ FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+ }
+
if (Kind == GOT_Normal)
ImmOffset = CurByte;
} else if (Expr->getKind() == MCExpr::SymbolRef) {
@@ -1421,6 +1428,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM6r: case X86II::MRM7r: {
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
+ if (HasEVEX_K) // Skip writemask
+ ++CurOp;
EmitByte(BaseOpcode, CurByte, OS);
uint64_t Form = TSFlags & X86II::FormMask;
EmitRegModRMByte(MI.getOperand(CurOp++),
@@ -1436,6 +1445,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM6m: case X86II::MRM7m: {
if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
++CurOp;
+ if (HasEVEX_K) // Skip writemask
+ ++CurOp;
EmitByte(BaseOpcode, CurByte, OS);
uint64_t Form = TSFlags & X86II::FormMask;
EmitMemModRMByte(MI, CurOp, (Form == X86II::MRMXm) ? 0 : Form-X86II::MRM0m,
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 09fdb9c..e63036c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -27,6 +27,12 @@
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
+#if _MSC_VER
+#include <intrin.h>
+#endif
+
+using namespace llvm;
+
#define GET_REGINFO_MC_DESC
#include "X86GenRegisterInfo.inc"
@@ -36,13 +42,6 @@
#define GET_SUBTARGETINFO_MC_DESC
#include "X86GenSubtargetInfo.inc"
-#if _MSC_VER
-#include <intrin.h>
-#endif
-
-using namespace llvm;
-
-
std::string X86_MC::ParseX86Triple(StringRef TT) {
Triple TheTriple(TT);
std::string FS;
@@ -230,14 +229,8 @@ MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
}
std::string CPUName = CPU;
- if (CPUName.empty()) {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
- || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- CPUName = sys::getHostCPUName();
-#else
+ if (CPUName.empty())
CPUName = "generic";
-#endif
- }
MCSubtargetInfo *X = new MCSubtargetInfo();
InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
@@ -294,13 +287,13 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
// Initial state of the frame pointer is esp+stackGrowth.
unsigned StackPtr = is64Bit ? X86::RSP : X86::ESP;
MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(
- 0, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth);
+ nullptr, MRI.getDwarfRegNum(StackPtr, true), -stackGrowth);
MAI->addInitialFrameState(Inst);
// Add return address to move list
unsigned InstPtr = is64Bit ? X86::RIP : X86::EIP;
MCCFIInstruction Inst2 = MCCFIInstruction::createOffset(
- 0, MRI.getDwarfRegNum(InstPtr, true), stackGrowth);
+ nullptr, MRI.getDwarfRegNum(InstPtr, true), stackGrowth);
MAI->addInitialFrameState(Inst2);
return MAI;
@@ -365,13 +358,16 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
bool NoExecStack) {
Triple TheTriple(TT);
- if (TheTriple.isOSBinFormatMachO())
+ switch (TheTriple.getObjectFormat()) {
+ default: llvm_unreachable("unsupported object format");
+ case Triple::MachO:
return createMachOStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll);
-
- if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF())
- return createWinCOFFStreamer(Ctx, MAB, *_Emitter, _OS, RelaxAll);
-
- return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ case Triple::COFF:
+ assert(TheTriple.isOSWindows() && "only Windows COFF is supported");
+ return createX86WinCOFFStreamer(Ctx, MAB, _Emitter, _OS, RelaxAll);
+ case Triple::ELF:
+ return createELFStreamer(Ctx, MAB, _OS, _Emitter, RelaxAll, NoExecStack);
+ }
}
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
@@ -384,7 +380,7 @@ static MCInstPrinter *createX86MCInstPrinter(const Target &T,
return new X86ATTInstPrinter(MAI, MII, MRI);
if (SyntaxVariant == 1)
return new X86IntelInstPrinter(MAI, MII, MRI);
- return 0;
+ return nullptr;
}
static MCRelocationInfo *createX86MCRelocationInfo(StringRef TT,
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index 41ae435..8fe40fd 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -26,6 +26,7 @@ class MCObjectWriter;
class MCRegisterInfo;
class MCSubtargetInfo;
class MCRelocationInfo;
+class MCStreamer;
class Target;
class StringRef;
class raw_ostream;
@@ -84,6 +85,14 @@ MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
StringRef TT, StringRef CPU);
+/// createX86WinCOFFStreamer - Construct an X86 Windows COFF machine code
+/// streamer which will generate PE/COFF format object files.
+///
+/// Takes ownership of \p AB and \p CE.
+MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
+ MCCodeEmitter *CE, raw_ostream &OS,
+ bool RelaxAll);
+
/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
bool Is64Bit,
diff --git a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
index f2023e3..3b81d53 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp
@@ -40,7 +40,7 @@ public:
// FIXME: check that the value is actually the same.
if (Sym->isVariable() == false)
Sym->setVariableValue(MCConstantExpr::Create(SymAddr, Ctx));
- const MCExpr *Expr = 0;
+ const MCExpr *Expr = nullptr;
switch(RelType) {
case X86_64_RELOC_TLV:
diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
index 1a35ced..ead3338 100644
--- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp
@@ -146,13 +146,13 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
const MCSymbol *A = &Target.getSymA()->getSymbol();
if (A->isTemporary())
A = &A->AliasedSymbol();
- MCSymbolData &A_SD = Asm.getSymbolData(*A);
+ const MCSymbolData &A_SD = Asm.getSymbolData(*A);
const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
if (B->isTemporary())
B = &B->AliasedSymbol();
- MCSymbolData &B_SD = Asm.getSymbolData(*B);
+ const MCSymbolData &B_SD = Asm.getSymbolData(*B);
const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
// Neither symbol can be modified.
@@ -186,9 +186,9 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
false);
Value += Writer->getSymbolAddress(&A_SD, Layout) -
- (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+ (!A_Base ? 0 : Writer->getSymbolAddress(A_Base, Layout));
Value -= Writer->getSymbolAddress(&B_SD, Layout) -
- (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+ (!B_Base ? 0 : Writer->getSymbolAddress(B_Base, Layout));
if (A_Base) {
Index = A_Base->getIndex();
@@ -220,7 +220,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
Type = MachO::X86_64_RELOC_SUBTRACTOR;
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+ const MCSymbolData &SD = Asm.getSymbolData(*Symbol);
const MCSymbolData *Base = Asm.getAtom(&SD);
// Relocations inside debug sections always use local relocations when
@@ -231,7 +231,7 @@ void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
Fragment->getParent()->getSection());
if (Section.hasAttribute(MachO::S_ATTR_DEBUG))
- Base = 0;
+ Base = nullptr;
}
// x86_64 almost always uses external relocations, except when there is no
@@ -369,7 +369,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
// See <reloc.h>.
const MCSymbol *A = &Target.getSymA()->getSymbol();
- MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+ const MCSymbolData *A_SD = &Asm.getSymbolData(*A);
if (!A_SD->getFragment())
report_fatal_error("symbol '" + A->getName() +
@@ -382,7 +382,7 @@ bool X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
- MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+ const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
if (!B_SD->getFragment())
report_fatal_error("symbol '" + B->getSymbol().getName() +
@@ -465,7 +465,7 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
unsigned IsPCRel = 0;
// Get the symbol data.
- MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ const MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
unsigned Index = SD_A->getIndex();
// We're only going to have a second symbol in pic mode and it'll be a
@@ -476,7 +476,8 @@ void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
// If this is a subtraction then we're pcrel.
uint32_t FixupAddress =
Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
- MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+ const MCSymbolData *SD_B =
+ &Asm.getSymbolData(Target.getSymB()->getSymbol());
IsPCRel = 1;
FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
Target.getConstant());
@@ -524,7 +525,7 @@ void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
}
// Get the symbol data, if any.
- MCSymbolData *SD = 0;
+ const MCSymbolData *SD = nullptr;
if (Target.getSymA())
SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
index ffc9e8d..40af822 100644
--- a/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp
@@ -23,10 +23,8 @@ namespace llvm {
namespace {
class X86WinCOFFObjectWriter : public MCWinCOFFObjectTargetWriter {
- const bool Is64Bit;
-
public:
- X86WinCOFFObjectWriter(bool Is64Bit_);
+ X86WinCOFFObjectWriter(bool Is64Bit);
virtual ~X86WinCOFFObjectWriter();
unsigned getRelocType(const MCValue &Target, const MCFixup &Fixup,
@@ -34,10 +32,9 @@ namespace {
};
}
-X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit_)
- : MCWinCOFFObjectTargetWriter(Is64Bit_ ? COFF::IMAGE_FILE_MACHINE_AMD64 :
- COFF::IMAGE_FILE_MACHINE_I386),
- Is64Bit(Is64Bit_) {}
+X86WinCOFFObjectWriter::X86WinCOFFObjectWriter(bool Is64Bit)
+ : MCWinCOFFObjectTargetWriter(Is64Bit ? COFF::IMAGE_FILE_MACHINE_AMD64
+ : COFF::IMAGE_FILE_MACHINE_I386) {}
X86WinCOFFObjectWriter::~X86WinCOFFObjectWriter() {}
@@ -49,29 +46,46 @@ unsigned X86WinCOFFObjectWriter::getRelocType(const MCValue &Target,
MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
- switch (FixupKind) {
- case FK_PCRel_4:
- case X86::reloc_riprel_4byte:
- case X86::reloc_riprel_4byte_movq_load:
- return Is64Bit ? COFF::IMAGE_REL_AMD64_REL32 : COFF::IMAGE_REL_I386_REL32;
- case FK_Data_4:
- case X86::reloc_signed_4byte:
- if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
- return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32NB :
- COFF::IMAGE_REL_I386_DIR32NB;
- return Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32 : COFF::IMAGE_REL_I386_DIR32;
- case FK_Data_8:
- if (Is64Bit)
+ if (getMachine() == COFF::IMAGE_FILE_MACHINE_AMD64) {
+ switch (FixupKind) {
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return COFF::IMAGE_REL_AMD64_REL32;
+ case FK_Data_4:
+ case X86::reloc_signed_4byte:
+ if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+ return COFF::IMAGE_REL_AMD64_ADDR32NB;
+ return COFF::IMAGE_REL_AMD64_ADDR32;
+ case FK_Data_8:
return COFF::IMAGE_REL_AMD64_ADDR64;
- llvm_unreachable("unsupported relocation type");
- case FK_SecRel_2:
- return Is64Bit ? COFF::IMAGE_REL_AMD64_SECTION
- : COFF::IMAGE_REL_I386_SECTION;
- case FK_SecRel_4:
- return Is64Bit ? COFF::IMAGE_REL_AMD64_SECREL : COFF::IMAGE_REL_I386_SECREL;
- default:
- llvm_unreachable("unsupported relocation type");
- }
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_AMD64_SECTION;
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_AMD64_SECREL;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+ } else if (getMachine() == COFF::IMAGE_FILE_MACHINE_I386) {
+ switch (FixupKind) {
+ case FK_PCRel_4:
+ case X86::reloc_riprel_4byte:
+ case X86::reloc_riprel_4byte_movq_load:
+ return COFF::IMAGE_REL_I386_REL32;
+ case FK_Data_4:
+ case X86::reloc_signed_4byte:
+ if (Modifier == MCSymbolRefExpr::VK_COFF_IMGREL32)
+ return COFF::IMAGE_REL_I386_DIR32NB;
+ return COFF::IMAGE_REL_I386_DIR32;
+ case FK_SecRel_2:
+ return COFF::IMAGE_REL_I386_SECTION;
+ case FK_SecRel_4:
+ return COFF::IMAGE_REL_I386_SECREL;
+ default:
+ llvm_unreachable("unsupported relocation type");
+ }
+ } else
+ llvm_unreachable("Unsupported COFF machine type.");
}
MCObjectWriter *llvm::createX86WinCOFFObjectWriter(raw_ostream &OS,
diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
new file mode 100644
index 0000000..c62fd0a
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp
@@ -0,0 +1,51 @@
+//===-- X86WinCOFFStreamer.cpp - X86 Target WinCOFF Streamer ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "llvm/MC/MCWinCOFFStreamer.h"
+
+using namespace llvm;
+
+namespace {
+class X86WinCOFFStreamer : public MCWinCOFFStreamer {
+public:
+ X86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB, MCCodeEmitter *CE,
+ raw_ostream &OS)
+ : MCWinCOFFStreamer(C, AB, *CE, OS) { }
+
+ void EmitWin64EHHandlerData() override;
+ void FinishImpl() override;
+};
+
+void X86WinCOFFStreamer::EmitWin64EHHandlerData() {
+ MCStreamer::EmitWin64EHHandlerData();
+
+ // We have to emit the unwind info now, because this directive
+ // actually switches to the .xdata section!
+ MCWin64EHUnwindEmitter::EmitUnwindInfo(*this, getCurrentW64UnwindInfo());
+}
+
+void X86WinCOFFStreamer::FinishImpl() {
+ EmitFrames(nullptr);
+ EmitW64Tables();
+
+ MCWinCOFFStreamer::FinishImpl();
+}
+}
+
+namespace llvm {
+MCStreamer *createX86WinCOFFStreamer(MCContext &C, MCAsmBackend &AB,
+ MCCodeEmitter *CE, raw_ostream &OS,
+ bool RelaxAll) {
+ X86WinCOFFStreamer *S = new X86WinCOFFStreamer(C, AB, CE, OS);
+ S->getAssembler().setRelaxAll(RelaxAll);
+ return S;
+}
+}
+
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 18e6845..64e8ea8 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -30,9 +30,9 @@ class X86TargetMachine;
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
-/// createGlobalBaseRegPass - This pass initializes a global base
+/// createX86GlobalBaseRegPass - This pass initializes a global base
/// register for PIC on x86-32.
-FunctionPass* createGlobalBaseRegPass();
+FunctionPass* createX86GlobalBaseRegPass();
/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
/// to local-dynamic TLS variables so that the TLS base address for the module
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 78edcf0..6912b57 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -166,6 +166,8 @@ def FeatureCallRegIndirect : SubtargetFeature<"call-reg-indirect",
"Call register indirect">;
def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true",
"LEA instruction needs inputs at AG stage">;
+def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true",
+ "LEA instruction with certain arguments is slow">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -195,8 +197,7 @@ def : Proc<"pentium3m", [FeatureSSE1, FeatureSlowBTMem]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
def : Proc<"pentium4m", [FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
- FeatureFastUAMem]>;
+
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureSSE3, FeatureSlowBTMem]>;
@@ -227,6 +228,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM,
FeaturePCLMUL, FeatureAES,
FeatureCallRegIndirect,
FeaturePRFCHW,
+ FeatureSlowLEA,
FeatureSlowBTMem, FeatureFastUAMem]>;
// "Arrandale" along with corei3 and corei5
def : ProcessorModel<"corei7", SandyBridgeModel,
@@ -329,6 +331,13 @@ def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeaturePOPCNT, FeatureBMI, FeatureTBM,
FeatureFMA, FeatureFSGSBase]>;
+// Excavator
+def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
+ FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,
+ FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
+ FeaturePOPCNT, FeatureBMI, FeatureBMI2,
+ FeatureTBM, FeatureFMA, FeatureFSGSBase]>;
+
def : Proc<"geode", [Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
@@ -336,6 +345,20 @@ def : Proc<"winchip2", [Feature3DNow]>;
def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
+// We also provide a generic 64-bit specific x86 processor model which tries to
+// be good for modern chips without enabling instruction set encodings past the
+// basic SSE2 and 64-bit ones. It disables slow things from any mainstream and
+// modern 64-bit x86 chip, and enables features that are generally beneficial.
+//
+// We currently use the Sandy Bridge model as the default scheduling model as
+// we use it across Nehalem, Westmere, Sandy Bridge, and Ivy Bridge which
+// covers a huge swath of x86 processors. If there are specific scheduling
+// knobs which need to be tuned differently for AMD chips, we might consider
+// forming a common base for them.
+def : ProcessorModel<"x86-64", SandyBridgeModel,
+ [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index fb66acc..1dca568 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -15,7 +15,6 @@
#include "X86AsmPrinter.h"
#include "InstPrinter/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
-#include "X86COFFMachineModuleInfo.h"
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "llvm/ADT/SmallString.h"
@@ -102,7 +101,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
P.MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
@@ -110,14 +109,14 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
MachineModuleInfoImpl::StubValueTy &StubSym =
P.MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(
Sym);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage());
} else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
MCSymbol *Sym = P.getSymbolWithGlobalValueBase(GV, "$stub");
MachineModuleInfoImpl::StubValueTy &StubSym =
P.MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
- if (StubSym.getPointer() == 0)
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::
StubValueTy(P.getSymbol(GV), !GV->hasInternalLinkage());
}
@@ -174,7 +173,7 @@ static void printSymbolOperand(X86AsmPrinter &P, const MachineOperand &MO,
static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
unsigned OpNo, raw_ostream &O,
- const char *Modifier = 0, unsigned AsmVariant = 0);
+ const char *Modifier = nullptr, unsigned AsmVariant = 0);
/// printPCRelImm - This is used to print an immediate value that ends up
/// being encoded as a pc-relative value. These print slightly differently, for
@@ -232,7 +231,7 @@ static void printOperand(X86AsmPrinter &P, const MachineInstr *MI,
static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
unsigned Op, raw_ostream &O,
- const char *Modifier = NULL) {
+ const char *Modifier = nullptr) {
const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
const MachineOperand &IndexReg = MI->getOperand(Op+X86::AddrIndexReg);
const MachineOperand &DispSpec = MI->getOperand(Op+X86::AddrDisp);
@@ -284,7 +283,7 @@ static void printLeaMemReference(X86AsmPrinter &P, const MachineInstr *MI,
static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI,
unsigned Op, raw_ostream &O,
- const char *Modifier = NULL) {
+ const char *Modifier = nullptr) {
assert(isMem(MI, Op) && "Invalid memory reference!");
const MachineOperand &Segment = MI->getOperand(Op+X86::AddrSegmentReg);
if (Segment.getReg()) {
@@ -296,7 +295,7 @@ static void printMemReference(X86AsmPrinter &P, const MachineInstr *MI,
static void printIntelMemReference(X86AsmPrinter &P, const MachineInstr *MI,
unsigned Op, raw_ostream &O,
- const char *Modifier = NULL,
+ const char *Modifier = nullptr,
unsigned AsmVariant = 1) {
const MachineOperand &BaseReg = MI->getOperand(Op+X86::AddrBaseReg);
unsigned ScaleVal = MI->getOperand(Op+X86::AddrScaleAmt).getImm();
@@ -464,7 +463,7 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
}
- printOperand(*this, MI, OpNo, O, /*Modifier*/ 0, AsmVariant);
+ printOperand(*this, MI, OpNo, O, /*Modifier*/ nullptr, AsmVariant);
return false;
}
@@ -527,6 +526,55 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
}
}
+static void
+emitNonLazySymbolPointer(MCStreamer &OutStreamer, MCSymbol *StubLabel,
+ MachineModuleInfoImpl::StubValueTy &MCSym) {
+ // L_foo$stub:
+ OutStreamer.EmitLabel(StubLabel);
+ // .indirect_symbol _foo
+ OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+ if (MCSym.getInt())
+ // External to current translation unit.
+ OutStreamer.EmitIntValue(0, 4/*size*/);
+ else
+ // Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs; however, sometimes the types are local to the file.
+ // We need to fill in the value for the NLP in those cases.
+ OutStreamer.EmitValue(
+ MCSymbolRefExpr::Create(MCSym.getPointer(), OutStreamer.getContext()),
+ 4 /*size*/);
+}
+
+void X86AsmPrinter::GenerateExportDirective(const MCSymbol *Sym, bool IsData) {
+ SmallString<128> Directive;
+ raw_svector_ostream OS(Directive);
+ StringRef Name = Sym->getName();
+
+ if (Subtarget->isTargetKnownWindowsMSVC())
+ OS << " /EXPORT:";
+ else
+ OS << " -export:";
+
+ if ((Subtarget->isTargetWindowsGNU() || Subtarget->isTargetWindowsCygwin()) &&
+ (Name[0] == getDataLayout().getGlobalPrefix()))
+ Name = Name.drop_front();
+
+ OS << Name;
+
+ if (IsData) {
+ if (Subtarget->isTargetKnownWindowsMSVC())
+ OS << ",DATA";
+ else
+ OS << ",data";
+ }
+
+ OS.flush();
+ OutStreamer.EmitBytes(Directive);
+}
void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
if (Subtarget->isTargetMacho()) {
@@ -547,11 +595,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
5, SectionKind::getMetadata());
OutStreamer.SwitchSection(TheSection);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+ for (const auto &Stub : Stubs) {
// L_foo$stub:
- OutStreamer.EmitLabel(Stubs[i].first);
+ OutStreamer.EmitLabel(Stub.first);
// .indirect_symbol _foo
- OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+ OutStreamer.EmitSymbolAttribute(Stub.second.getPointer(),
MCSA_IndirectSymbol);
// hlt; hlt; hlt; hlt; hlt hlt = 0xf4.
const char HltInsts[] = "\xf4\xf4\xf4\xf4\xf4";
@@ -571,44 +619,24 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
SectionKind::getMetadata());
OutStreamer.SwitchSection(TheSection);
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$non_lazy_ptr:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .indirect_symbol _foo
- MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
- OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
- MCSA_IndirectSymbol);
- // .long 0
- if (MCSym.getInt())
- // External to current translation unit.
- OutStreamer.EmitIntValue(0, 4/*size*/);
- else
- // Internal to current translation unit.
- //
- // When we place the LSDA into the TEXT section, the type info
- // pointers need to be indirect and pc-rel. We accomplish this by
- // using NLPs. However, sometimes the types are local to the file. So
- // we need to fill in the value for the NLP in those cases.
- OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
- OutContext), 4/*size*/);
- }
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+
Stubs.clear();
OutStreamer.AddBlankLine();
}
Stubs = MMIMacho.GetHiddenGVStubList();
if (!Stubs.empty()) {
- OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
- EmitAlignment(2);
-
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- // L_foo$non_lazy_ptr:
- OutStreamer.EmitLabel(Stubs[i].first);
- // .long _foo
- OutStreamer.EmitValue(MCSymbolRefExpr::
- Create(Stubs[i].second.getPointer(),
- OutContext), 4/*size*/);
- }
+ const MCSection *TheSection =
+ OutContext.getMachOSection("__IMPORT", "__pointers",
+ MachO::S_NON_LAZY_SYMBOL_POINTERS,
+ SectionKind::getMetadata());
+ OutStreamer.SwitchSection(TheSection);
+
+ for (auto &Stub : Stubs)
+ emitNonLazySymbolPointer(OutStreamer, Stub.first, Stub.second);
+
Stubs.clear();
OutStreamer.AddBlankLine();
}
@@ -630,46 +658,25 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
}
if (Subtarget->isTargetCOFF()) {
- X86COFFMachineModuleInfo &COFFMMI =
- MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
-
- // Emit type information for external functions
- typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
- for (externals_iterator I = COFFMMI.externals_begin(),
- E = COFFMMI.externals_end();
- I != E; ++I) {
- OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
- OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
- << COFF::SCT_COMPLEX_TYPE_SHIFT);
- OutStreamer.EndCOFFSymbolDef();
- }
-
// Necessary for dllexport support
std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasDLLExportStorageClass())
- DLLExportedFns.push_back(getSymbol(I));
+ for (const auto &Function : M)
+ if (Function.hasDLLExportStorageClass())
+ DLLExportedFns.push_back(getSymbol(&Function));
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
- if (I->hasDLLExportStorageClass())
- DLLExportedGlobals.push_back(getSymbol(I));
+ for (const auto &Global : M.globals())
+ if (Global.hasDLLExportStorageClass())
+ DLLExportedGlobals.push_back(getSymbol(&Global));
- for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
- I != E; ++I) {
- const GlobalValue *GV = I;
- if (!GV->hasDLLExportStorageClass())
+ for (const auto &Alias : M.aliases()) {
+ if (!Alias.hasDLLExportStorageClass())
continue;
- while (const GlobalAlias *A = dyn_cast<GlobalAlias>(GV))
- GV = A->getAliasedGlobal();
-
- if (isa<Function>(GV))
- DLLExportedFns.push_back(getSymbol(I));
- else if (isa<GlobalVariable>(GV))
- DLLExportedGlobals.push_back(getSymbol(I));
+ if (Alias.getType()->getElementType()->isFunctionTy())
+ DLLExportedFns.push_back(getSymbol(&Alias));
+ else
+ DLLExportedGlobals.push_back(getSymbol(&Alias));
}
// Output linker support code for dllexported globals on windows.
@@ -678,28 +685,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
- SmallString<128> name;
- for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
- if (Subtarget->isTargetKnownWindowsMSVC())
- name = " /EXPORT:";
- else
- name = " -export:";
- name += DLLExportedGlobals[i]->getName();
- if (Subtarget->isTargetKnownWindowsMSVC())
- name += ",DATA";
- else
- name += ",data";
- OutStreamer.EmitBytes(name);
- }
- for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
- if (Subtarget->isTargetKnownWindowsMSVC())
- name = " /EXPORT:";
- else
- name = " -export:";
- name += DLLExportedFns[i]->getName();
- OutStreamer.EmitBytes(name);
- }
+ for (auto & Symbol : DLLExportedGlobals)
+ GenerateExportDirective(Symbol, /*IsData=*/true);
+ for (auto & Symbol : DLLExportedFns)
+ GenerateExportDirective(Symbol, /*IsData=*/false);
}
}
@@ -715,9 +705,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
const DataLayout *TD = TM.getDataLayout();
- for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
- OutStreamer.EmitLabel(Stubs[i].first);
- OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+ for (const auto &Stub : Stubs) {
+ OutStreamer.EmitLabel(Stub.first);
+ OutStreamer.EmitSymbolValue(Stub.second.getPointer(),
TD->getPointerSize());
}
Stubs.clear();
diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h
index 3308cc2..e4eef5d 100644
--- a/lib/Target/X86/X86AsmPrinter.h
+++ b/lib/Target/X86/X86AsmPrinter.h
@@ -16,13 +16,15 @@
#include "llvm/Target/TargetMachine.h"
namespace llvm {
-
class MCStreamer;
+class MCSymbol;
class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
StackMaps SM;
+ void GenerateExportDirective(const MCSymbol *Sym, bool IsData);
+
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer), SM(*this) {
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/lib/Target/X86/X86COFFMachineModuleInfo.cpp
deleted file mode 100644
index 6a6125b..0000000
--- a/lib/Target/X86/X86COFFMachineModuleInfo.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-//===-- X86COFFMachineModuleInfo.cpp - X86 COFF MMI Impl ------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is an MMI implementation for X86 COFF (windows) targets.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86COFFMachineModuleInfo.h"
-using namespace llvm;
-
-
-X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
-}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
deleted file mode 100644
index 0dfeb42..0000000
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- X86coffmachinemoduleinfo.h - X86 COFF MMI Impl ----------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This is an MMI implementation for X86 COFF (windows) targets.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86COFF_MACHINEMODULEINFO_H
-#define X86COFF_MACHINEMODULEINFO_H
-
-#include "X86MachineFunctionInfo.h"
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-
-namespace llvm {
- class X86MachineFunctionInfo;
- class DataLayout;
-
-/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
-/// for X86 COFF targets.
-class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
- DenseSet<MCSymbol const *> Externals;
-public:
- X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
- virtual ~X86COFFMachineModuleInfo();
-
- void addExternalFunction(MCSymbol* Symbol) {
- Externals.insert(Symbol);
- }
-
- typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
- externals_iterator externals_begin() const { return Externals.begin(); }
- externals_iterator externals_end() const { return Externals.end(); }
-};
-
-
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/Target/X86/X86CallingConv.h b/lib/Target/X86/X86CallingConv.h
index 040da35..e76f9fd 100644
--- a/lib/Target/X86/X86CallingConv.h
+++ b/lib/Target/X86/X86CallingConv.h
@@ -29,33 +29,6 @@ inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
-inline bool CC_X86_CDeclMethod_SRet(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags, CCState &State) {
- // Swap the order of the first two parameters if the first parameter is sret.
- if (ArgFlags.isSRet()) {
- assert(ValNo == 0);
- assert(ValVT == MVT::i32);
- State.AllocateStack(8, 4);
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 4, LocVT, LocInfo));
-
- // Indicate that we need to swap the order of the first and second
- // parameters by "allocating" register zero. There are no register
- // parameters with cdecl methods, so we can use this to communicate to the
- // next call.
- State.AllocateReg(1);
- return true;
- } else if (ValNo == 1 && State.isAllocated(1)) {
- assert(ValVT == MVT::i32 && "non-i32-sized this param unsupported");
- // Stack was already allocated while processing sret.
- State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 0, LocVT, LocInfo));
- return true;
- }
-
- // All other args use the C calling convention.
- return false;
-}
-
} // End llvm namespace
#endif
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 1cfd827..0824d4e 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -485,15 +485,6 @@ def CC_X86_32_ThisCall_Win : CallingConv<[
CCDelegateTo<CC_X86_32_ThisCall_Common>
]>;
-def CC_X86_CDeclMethod : CallingConv<[
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- CCCustom<"CC_X86_CDeclMethod_SRet">,
-
- CCDelegateTo<CC_X86_32_Common>
-]>;
-
def CC_X86_32_ThisCall : CallingConv<[
CCIfSubtarget<"isTargetCygMing()", CCDelegateTo<CC_X86_32_ThisCall_Mingw>>,
CCDelegateTo<CC_X86_32_ThisCall_Win>
@@ -583,7 +574,6 @@ def CC_Intel_OCL_BI : CallingConv<[
def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
- CCIfCC<"CallingConv::X86_CDeclMethod", CCDelegateTo<CC_X86_CDeclMethod>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index f6c4c2e..76718d0 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-emitter"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86JITInfo.h"
@@ -36,6 +35,8 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+#define DEBUG_TYPE "x86-emitter"
+
STATISTIC(NumEmitted, "Number of machine instructions emitted");
namespace {
@@ -52,7 +53,7 @@ namespace {
public:
static char ID;
explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
- : MachineFunctionPass(ID), II(0), TD(0), TM(tm),
+ : MachineFunctionPass(ID), II(nullptr), TD(nullptr), TM(tm),
MCE(mce), PICBaseOffset(0), Is64BitMode(false),
IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
@@ -450,7 +451,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
intptr_t PCAdj) {
const MachineOperand &Op3 = MI.getOperand(Op+3);
int DispVal = 0;
- const MachineOperand *DispForReloc = 0;
+ const MachineOperand *DispForReloc = nullptr;
// Figure out what sort of displacement we have to handle here.
if (Op3.isGlobal()) {
@@ -1475,7 +1476,7 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
#ifndef NDEBUG
dbgs() << "Cannot encode all operands of: " << MI << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
MCE.processDebugLoc(MI.getDebugLoc(), false);
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 1aab1ea..56bcfa3 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -183,7 +183,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
unsigned &ResultReg) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
switch (VT.getSimpleVT().SimpleTy) {
default: return false;
case MVT::i1:
@@ -363,7 +363,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// it works...).
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
if (const GlobalVariable *GVar =
- dyn_cast_or_null<GlobalVariable>(GA->getAliasedGlobal()))
+ dyn_cast_or_null<GlobalVariable>(GA->getAliasee()))
if (GVar->isThreadLocal())
return false;
@@ -406,7 +406,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
} else {
// Issue load from stub.
unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
X86AddressMode StubAM;
StubAM.Base.Reg = AM.Base.Reg;
StubAM.GV = GV;
@@ -441,7 +441,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
// Now construct the final address. Note that the Disp, Scale,
// and Index values may already be set here.
AM.Base.Reg = LoadReg;
- AM.GV = 0;
+ AM.GV = nullptr;
return true;
}
}
@@ -467,7 +467,7 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
SmallVector<const Value *, 32> GEPs;
redo_gep:
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
// Don't walk into other basic blocks; it's possible we haven't
@@ -626,7 +626,7 @@ redo_gep:
/// X86SelectCallAddress - Attempt to fill in an address from the given value.
///
bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
- const User *U = NULL;
+ const User *U = nullptr;
unsigned Opcode = Instruction::UserOp1;
const Instruction *I = dyn_cast<Instruction>(V);
// Record if the value is defined in the same basic block.
@@ -1247,7 +1247,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned CReg = 0, OpReg = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
if (I->getType()->isIntegerTy(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
@@ -1487,7 +1487,7 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
if (!Subtarget->hasCMov()) return false;
unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
if (VT == MVT::i16) {
Opc = X86::CMOVE16rr;
RC = &X86::GR16RegClass;
@@ -1821,10 +1821,10 @@ bool X86FastISel::FastLowerArguments() {
}
}
- static const uint16_t GPR32ArgRegs[] = {
+ static const MCPhysReg GPR32ArgRegs[] = {
X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
};
- static const uint16_t GPR64ArgRegs[] = {
+ static const MCPhysReg GPR64ArgRegs[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
};
@@ -1865,7 +1865,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (cast<CallInst>(I)->isTailCall())
return false;
- return DoSelectCall(I, 0);
+ return DoSelectCall(I, nullptr);
}
static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
@@ -1936,8 +1936,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (!X86SelectCallAddress(Callee, CalleeAM))
return false;
unsigned CalleeOp = 0;
- const GlobalValue *GV = 0;
- if (CalleeAM.GV != 0) {
+ const GlobalValue *GV = nullptr;
+ if (CalleeAM.GV != nullptr) {
GV = CalleeAM.GV;
} else if (CalleeAM.Base.Reg != 0) {
CalleeOp = CalleeAM.Base.Reg;
@@ -2163,7 +2163,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
if (Subtarget->is64Bit() && isVarArg && !isWin64) {
// Count the number of XMM registers allocated.
- static const uint16_t XMMArgRegs[] = {
+ static const MCPhysReg XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
@@ -2387,7 +2387,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
case MVT::i8:
@@ -2437,7 +2437,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// If the expression is just a basereg, then we're done, otherwise we need
// to emit an LEA.
if (AM.BaseType == X86AddressMode::RegBase &&
- AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
return AM.Base.Reg;
Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
@@ -2510,7 +2510,7 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
// Get opcode and regclass for the given zero.
unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
+ const TargetRegisterClass *RC = nullptr;
switch (VT.SimpleTy) {
default: return 0;
case MVT::f32:
@@ -2558,7 +2558,7 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
MachineInstr *Result =
XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
- if (Result == 0) return false;
+ if (!Result) return false;
FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
MI->eraseFromParent();
diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp
index c2c234b..6c5b86f 100644
--- a/lib/Target/X86/X86FixupLEAs.cpp
+++ b/lib/Target/X86/X86FixupLEAs.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-fixup-LEAs"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "x86-fixup-LEAs"
+
STATISTIC(NumLEAs, "Number of LEA instructions created");
namespace {
@@ -56,6 +57,11 @@ namespace {
void processInstruction(MachineBasicBlock::iterator& I,
MachineFunction::iterator MFI);
+ /// \brief Given a LEA instruction which is unprofitable
+ /// on Silvermont try to replace it with an equivalent ADD instruction
+ void processInstructionForSLM(MachineBasicBlock::iterator& I,
+ MachineFunction::iterator MFI);
+
/// \brief Determine if an instruction references a machine register
/// and, if so, whether it reads or writes the register.
RegUsageState usesRegister(MachineOperand& p,
@@ -85,7 +91,7 @@ namespace {
private:
MachineFunction *MF;
const TargetMachine *TM;
- const TargetInstrInfo *TII; // Machine instruction info.
+ const X86InstrInfo *TII; // Machine instruction info.
};
char FixupLEAPass::ID = 0;
@@ -97,7 +103,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
MachineInstr* MI = MBBI;
MachineInstr* NewMI;
switch (MI->getOpcode()) {
- case X86::MOV32rr:
+ case X86::MOV32rr:
case X86::MOV64rr: {
const MachineOperand& Src = MI->getOperand(1);
const MachineOperand& Dest = MI->getOperand(0);
@@ -123,7 +129,7 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
if (!MI->getOperand(2).isImm()) {
// convertToThreeAddress will call getImm()
// which requires isImm() to be true
- return 0;
+ return nullptr;
}
break;
case X86::ADD16rr:
@@ -132,10 +138,10 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI,
// if src1 != src2, then convertToThreeAddress will
// need to create a Virtual register, which we cannot do
// after register allocation.
- return 0;
+ return nullptr;
}
}
- return TII->convertToThreeAddress(MFI, MBBI, 0);
+ return TII->convertToThreeAddress(MFI, MBBI, nullptr);
}
FunctionPass *llvm::createX86FixupLEAs() {
@@ -143,9 +149,12 @@ FunctionPass *llvm::createX86FixupLEAs() {
}
bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) {
- MF = &Func;
- TM = &MF->getTarget();
- TII = TM->getInstrInfo();
+ TM = &Func.getTarget();
+ const X86Subtarget &ST = TM->getSubtarget<X86Subtarget>();
+ if (!ST.LEAusesAG() && !ST.slowLEA())
+ return false;
+
+ TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo());
DEBUG(dbgs() << "Start X86FixupLEAs\n";);
// Process all basic blocks.
@@ -211,7 +220,7 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p,
InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst);
Found = getPreviousInstr(CurInst, MFI);
}
- return 0;
+ return nullptr;
}
void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I,
@@ -242,9 +251,9 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI);
if (NewMI) {
++NumLEAs;
- DEBUG(dbgs() << "Candidate to replace:"; MBI->dump(););
+ DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
// now to replace with an equivalent LEA...
- DEBUG(dbgs() << "Replaced by: "; NewMI->dump(););
+ DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
MFI->erase(MBI);
MachineBasicBlock::iterator J =
static_cast<MachineBasicBlock::iterator> (NewMI);
@@ -253,10 +262,80 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p,
}
}
+void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I,
+ MachineFunction::iterator MFI) {
+ MachineInstr *MI = I;
+ const int opcode = MI->getOpcode();
+ if (opcode != X86::LEA16r && opcode != X86::LEA32r && opcode != X86::LEA64r &&
+ opcode != X86::LEA64_32r)
+ return;
+ if (MI->getOperand(5).getReg() != 0 || !MI->getOperand(4).isImm() ||
+ !TII->isSafeToClobberEFLAGS(*MFI, I))
+ return;
+ const unsigned DstR = MI->getOperand(0).getReg();
+ const unsigned SrcR1 = MI->getOperand(1).getReg();
+ const unsigned SrcR2 = MI->getOperand(3).getReg();
+ if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
+ return;
+ if (MI->getOperand(2).getImm() > 1)
+ return;
+ int addrr_opcode, addri_opcode;
+ switch (opcode) {
+ case X86::LEA16r:
+ addrr_opcode = X86::ADD16rr;
+ addri_opcode = X86::ADD16ri;
+ break;
+ case X86::LEA32r:
+ addrr_opcode = X86::ADD32rr;
+ addri_opcode = X86::ADD32ri;
+ break;
+ case X86::LEA64_32r:
+ case X86::LEA64r:
+ addrr_opcode = X86::ADD64rr;
+ addri_opcode = X86::ADD64ri32;
+ break;
+ default:
+ assert(false && "Unexpected LEA instruction");
+ }
+ DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
+ DEBUG(dbgs() << "FixLEA: Replaced by: ";);
+ MachineInstr *NewMI = 0;
+ const MachineOperand &Dst = MI->getOperand(0);
+ // Make ADD instruction for two registers writing to LEA's destination
+ if (SrcR1 != 0 && SrcR2 != 0) {
+ const MachineOperand &Src1 = MI->getOperand(SrcR1 == DstR ? 1 : 3);
+ const MachineOperand &Src2 = MI->getOperand(SrcR1 == DstR ? 3 : 1);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addrr_opcode))
+ .addOperand(Dst)
+ .addOperand(Src1)
+ .addOperand(Src2);
+ MFI->insert(I, NewMI);
+ DEBUG(NewMI->dump(););
+ }
+ // Make ADD instruction for immediate
+ if (MI->getOperand(4).getImm() != 0) {
+ const MachineOperand &SrcR = MI->getOperand(SrcR1 == DstR ? 1 : 3);
+ NewMI = BuildMI(*MF, MI->getDebugLoc(), TII->get(addri_opcode))
+ .addOperand(Dst)
+ .addOperand(SrcR)
+ .addImm(MI->getOperand(4).getImm());
+ MFI->insert(I, NewMI);
+ DEBUG(NewMI->dump(););
+ }
+ if (NewMI) {
+ MFI->erase(I);
+ I = static_cast<MachineBasicBlock::iterator>(NewMI);
+ }
+}
+
bool FixupLEAPass::processBasicBlock(MachineFunction &MF,
MachineFunction::iterator MFI) {
- for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I)
- processInstruction(I, MFI);
+ for (MachineBasicBlock::iterator I = MFI->begin(); I != MFI->end(); ++I) {
+ if (TM->getSubtarget<X86Subtarget>().isSLM())
+ processInstructionForSLM(I, MFI);
+ else
+ processInstruction(I, MFI);
+ }
return false;
}
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 7955ade..c8a3ab3 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -23,7 +23,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -45,6 +44,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "x86-codegen"
+
STATISTIC(NumFXCH, "Number of fxch instructions inserted");
STATISTIC(NumFP , "Number of floating point instructions");
@@ -430,7 +431,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
- MachineInstr *PrevMI = 0;
+ MachineInstr *PrevMI = nullptr;
if (I != BB.begin())
PrevMI = std::prev(I);
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index f0ad4d1..4c1374f 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -182,7 +182,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
- MachineInstr *MI = NULL;
+ MachineInstr *MI = nullptr;
if (UseLEA) {
MI = addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
@@ -204,7 +204,7 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
static
void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ unsigned StackPtr, uint64_t *NumBytes = nullptr) {
if (MBBI == MBB.begin()) return;
MachineBasicBlock::iterator PI = std::prev(MBBI);
@@ -225,11 +225,12 @@ void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
}
}
-/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
+/// iterator.
static
void mergeSPUpdatesDown(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr, uint64_t *NumBytes = NULL) {
+ unsigned StackPtr, uint64_t *NumBytes = nullptr) {
// FIXME: THIS ISN'T RUN!!!
return;
@@ -257,19 +258,19 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
}
/// mergeSPUpdates - Checks the instruction before/after the passed
-/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and the
-/// stack adjustment is returned as a positive value for ADD/LEA and a negative for
-/// SUB.
+/// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
+/// the stack adjustment is returned as a positive value for ADD/LEA and a
+/// negative for SUB.
static int mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- unsigned StackPtr,
- bool doMergeWithPrevious) {
+ MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
+ bool doMergeWithPrevious) {
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
(!doMergeWithPrevious && MBBI == MBB.end()))
return 0;
MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : std::next(MBBI);
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
+ : std::next(MBBI);
unsigned Opc = PI->getOpcode();
int Offset = 0;
@@ -366,8 +367,10 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
unsigned CFIIndex =
- MMI.addFrameInst(MCCFIInstruction::createOffset(0, DwarfReg, Offset));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
+ Offset));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
}
@@ -446,7 +449,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
!MFI->adjustsStack() && // No calls.
!IsWin64 && // Win64 has no Red Zone
!usesTheStack(MF) && // Don't push and pop.
- !MF.getTarget().Options.EnableSegmentedStacks) { // Regular stack
+ !MF.shouldSplitStack()) { // Regular stack
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
@@ -511,15 +514,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Define the current CFA rule to use the provided offset.
assert(StackSize);
unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(0, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION))
+ MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
// Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createOffset(0, DwarfFramePtr, 2 * stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION))
+ MCCFIInstruction::createOffset(nullptr,
+ DwarfFramePtr, 2 * stackGrowth));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -534,8 +538,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Define the current CFA to use the EBP/RBP register.
unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaRegister(0, DwarfFramePtr));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION))
+ MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -564,7 +568,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
assert(StackSize);
unsigned CFIIndex = MMI.addFrameInst(
MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
StackOffset += stackGrowth;
}
@@ -698,9 +702,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// Define the current CFA rule to use the provided offset.
assert(StackSize);
unsigned CFIIndex = MMI.addFrameInst(
- MCCFIInstruction::createDefCfaOffset(0, -StackSize + stackGrowth));
+ MCCFIInstruction::createDefCfaOffset(nullptr,
+ -StackSize + stackGrowth));
- BuildMI(MBB, MBBI, DL, TII.get(X86::CFI_INSTRUCTION))
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
}
@@ -905,7 +910,8 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
-int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+ int FI) const {
const X86RegisterInfo *RegInfo =
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
const MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1170,6 +1176,15 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
!STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD())
report_fatal_error("Segmented stacks not supported on this platform.");
+ // Eventually StackSize will be calculated by a link-time pass; which will
+ // also decide whether checking code needs to be injected into this particular
+ // prologue.
+ StackSize = MFI->getStackSize();
+
+ // Do not generate a prologue for functions with a stack of size zero
+ if (StackSize == 0)
+ return;
+
MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
@@ -1194,11 +1209,6 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
MF.push_front(allocMBB);
MF.push_front(checkMBB);
- // Eventually StackSize will be calculated by a link-time pass; which will
- // also decide whether checking code needs to be injected into this particular
- // prologue.
- StackSize = MFI->getStackSize();
-
// When the frame size is less than 256 we just compare the stack
// boundary directly to the value of the stack pointer, per gcc.
bool CompareStackPointer = StackSize < kSplitStackAvailable;
@@ -1256,22 +1266,23 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
.addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
} else if (STI.isTargetDarwin()) {
- // TlsOffset doesn't fit into a mod r/m byte so we need an extra register
+ // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
unsigned ScratchReg2;
bool SaveScratch2;
if (CompareStackPointer) {
- // The primary scratch register is available for holding the TLS offset
+ // The primary scratch register is available for holding the TLS offset.
ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
SaveScratch2 = false;
} else {
// Need to use a second register to hold the TLS offset
ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
- // Unfortunately, with fastcc the second scratch register may hold an arg
+ // Unfortunately, with fastcc the second scratch register may hold an
+ // argument.
SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
}
- // If Scratch2 is live-in then it needs to be saved
+ // If Scratch2 is live-in then it needs to be saved.
assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
"Scratch register is live-in and not saved");
@@ -1348,14 +1359,14 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
/// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
///
/// CheckStack:
-/// temp0 = sp - MaxStack
-/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
/// OldStart:
-/// ...
+/// ...
/// IncStack:
-/// call inc_stack # doubles the stack space
-/// temp0 = sp - MaxStack
-/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
+/// call inc_stack # doubles the stack space
+/// temp0 = sp - MaxStack
+/// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
const X86InstrInfo &TII = *TM.getInstrInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1514,7 +1525,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
- MachineInstr *New = 0;
+ MachineInstr *New = nullptr;
if (Opcode == TII.getCallFrameSetupOpcode()) {
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
StackPtr)
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index f0db8cb..208bb8b 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -47,7 +47,7 @@ public:
void adjustForHiPEPrologue(MachineFunction &MF) const override;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 3e45adb..74386d3 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
#include "X86MachineFunctionInfo.h"
@@ -36,6 +35,8 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+#define DEBUG_TYPE "x86-isel"
+
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
//===----------------------------------------------------------------------===//
@@ -70,17 +71,18 @@ namespace {
X86ISelAddressMode()
: BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
- Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
- SymbolFlags(X86II::MO_NO_FLAG) {
+ Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
+ JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {
}
bool hasSymbolicDisplacement() const {
- return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+ return GV != nullptr || CP != nullptr || ES != nullptr ||
+ JT != -1 || BlockAddr != nullptr;
}
bool hasBaseOrIndexReg() const {
return BaseType == FrameIndexBase ||
- IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+ IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
}
/// isRIPRelative - Return true if this addressing mode is already RIP
@@ -102,14 +104,14 @@ namespace {
void dump() {
dbgs() << "X86ISelAddressMode " << this << '\n';
dbgs() << "Base_Reg ";
- if (Base_Reg.getNode() != 0)
+ if (Base_Reg.getNode())
Base_Reg.getNode()->dump();
else
dbgs() << "nul";
dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
<< " Scale" << Scale << '\n'
<< "IndexReg ";
- if (IndexReg.getNode() != 0)
+ if (IndexReg.getNode())
IndexReg.getNode()->dump();
else
dbgs() << "nul";
@@ -160,6 +162,13 @@ namespace {
return "X86 DAG->DAG Instruction Selection";
}
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ // Reset the subtarget each time through.
+ Subtarget = &TM.getSubtarget<X86Subtarget>();
+ SelectionDAGISel::runOnMachineFunction(MF);
+ return true;
+ }
+
void EmitFunctionEntryCode() override;
bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
@@ -374,14 +383,13 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
else
Ops.push_back(Chain.getOperand(i));
SDValue NewChain =
- CurDAG->getNode(ISD::TokenFactor, SDLoc(Load),
- MVT::Other, &Ops[0], Ops.size());
+ CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
Ops.clear();
Ops.push_back(NewChain);
}
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
Ops.push_back(OrigChain.getOperand(i));
- CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
@@ -390,7 +398,7 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
Ops.push_back(SDValue(Load.getNode(), 1));
for (unsigned i = 1, e = NumOps; i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], NumOps);
+ CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
}
/// isCalleeLoad - Return true if call address is a load and it can be
@@ -612,7 +620,7 @@ bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
// gs:0 (or fs:0 on X86-64) contains its own address.
// For more information see http://people.redhat.com/drepper/tls.pdf
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
- if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+ if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
Subtarget->isTargetLinux())
switch (N->getPointerInfo().getAddrSpace()) {
case 256:
@@ -733,7 +741,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
// a smaller encoding and avoids a scaled-index.
if (AM.Scale == 2 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == 0) {
+ AM.Base_Reg.getNode() == nullptr) {
AM.Base_Reg = AM.IndexReg;
AM.Scale = 1;
}
@@ -745,8 +753,8 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
Subtarget->is64Bit() &&
AM.Scale == 1 &&
AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == 0 &&
- AM.IndexReg.getNode() == 0 &&
+ AM.Base_Reg.getNode() == nullptr &&
+ AM.IndexReg.getNode() == nullptr &&
AM.SymbolFlags == X86II::MO_NO_FLAG &&
AM.hasSymbolicDisplacement())
AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
@@ -926,7 +934,7 @@ static bool FoldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
APInt MaskedHighBits =
APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(X, KnownZero, KnownOne);
+ DAG.computeKnownBits(X, KnownZero, KnownOne);
if (MaskedHighBits != KnownZero) return true;
// We've identified a pattern that can be transformed into a single shift
@@ -1009,7 +1017,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::FrameIndex:
if (AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == 0 &&
+ AM.Base_Reg.getNode() == nullptr &&
(!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
AM.BaseType = X86ISelAddressMode::FrameIndexBase;
AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
@@ -1018,7 +1026,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
break;
case ISD::SHL:
- if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+ if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
break;
if (ConstantSDNode
@@ -1052,7 +1060,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case ISD::SRL: {
// Scale must not be used already.
- if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+ if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
SDValue And = N.getOperand(0);
if (And.getOpcode() != ISD::AND) break;
@@ -1086,8 +1094,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
case X86ISD::MUL_IMM:
// X*[3,5,9] -> X+X*[2,4,8]
if (AM.BaseType == X86ISelAddressMode::RegBase &&
- AM.Base_Reg.getNode() == 0 &&
- AM.IndexReg.getNode() == 0) {
+ AM.Base_Reg.getNode() == nullptr &&
+ AM.IndexReg.getNode() == nullptr) {
if (ConstantSDNode
*CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
@@ -1237,7 +1245,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// with a constant to enable use of the scaled offset field.
// Scale must not be used already.
- if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+ if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
SDValue Shift = N.getOperand(0);
if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
@@ -1276,7 +1284,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
// Is the base register already occupied?
if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
// If so, check to see if the scale index register is set.
- if (AM.IndexReg.getNode() == 0) {
+ if (!AM.IndexReg.getNode()) {
AM.IndexReg = N;
AM.Scale = 1;
return false;
@@ -1567,7 +1575,7 @@ SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return NULL;
+ return nullptr;
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
@@ -1756,7 +1764,7 @@ static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
if (Node->hasAnyUseOfValue(0))
- return 0;
+ return nullptr;
SDLoc dl(Node);
@@ -1768,13 +1776,13 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
SDValue Val = Node->getOperand(2);
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
- return 0;
+ return nullptr;
// Which index into the table.
enum AtomicOpc Op;
switch (Node->getOpcode()) {
default:
- return 0;
+ return nullptr;
case ISD::ATOMIC_LOAD_OR:
Op = OR;
break;
@@ -1795,7 +1803,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
unsigned Opc = 0;
switch (NVT.SimpleTy) {
- default: return 0;
+ default: return nullptr;
case MVT::i8:
if (isCN)
Opc = AtomicOpcTbl[Op][ConstantI8];
@@ -1847,7 +1855,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, MVT NVT) {
}
cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
SDValue RetVals[] = { Undef, Ret };
- return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+ return CurDAG->getMergeValues(RetVals, dl).getNode();
}
/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
@@ -1990,7 +1998,7 @@ static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
// Make a new TokenFactor with all the other input chains except
// for the load.
InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
- MVT::Other, &ChainOps[0], ChainOps.size());
+ MVT::Other, ChainOps);
}
if (!ChainCheck)
return false;
@@ -2027,7 +2035,7 @@ SDNode *X86DAGToDAGISel::SelectGather(SDNode *Node, unsigned Opc) {
SDValue VMask = Node->getOperand(5);
ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
if (!Scale)
- return 0;
+ return nullptr;
SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
MVT::Other);
@@ -2058,7 +2066,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
if (Node->isMachineOpcode()) {
DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
Node->setNodeId(-1);
- return NULL; // Already selected.
+ return nullptr; // Already selected.
}
switch (Opcode) {
@@ -2108,7 +2116,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDNode *RetVal = SelectGather(Node, Opc);
if (RetVal)
// We already called ReplaceUses inside SelectGather.
- return NULL;
+ return nullptr;
break;
}
}
@@ -2259,7 +2267,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
- return NULL;
+ return nullptr;
}
case ISD::SMUL_LOHI:
@@ -2386,7 +2394,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- if (ResLo.getNode() == 0) {
+ if (!ResLo.getNode()) {
assert(LoReg && "Register for low half is not defined!");
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
InFlag);
@@ -2397,7 +2405,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- if (ResHi.getNode() == 0) {
+ if (!ResHi.getNode()) {
assert(HiReg && "Register for high half is not defined!");
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
InFlag);
@@ -2407,7 +2415,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
}
- return NULL;
+ return nullptr;
}
case ISD::SDIVREM:
@@ -2575,7 +2583,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
- return NULL;
+ return nullptr;
}
case X86ISD::CMP:
@@ -2632,7 +2640,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
- return NULL;
+ return nullptr;
}
// For example, "testl %eax, $2048" to "testb %ah, $8".
@@ -2669,7 +2677,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
- return NULL;
+ return nullptr;
}
// For example, "testl %eax, $32776" to "testw %ax, $32776".
@@ -2691,7 +2699,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
- return NULL;
+ return nullptr;
}
// For example, "testq %rax, $268468232" to "testl %eax, $268468232".
@@ -2713,7 +2721,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
// one, do not call ReplaceAllUsesWith.
ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
SDValue(NewNode, 0));
- return NULL;
+ return nullptr;
}
}
break;
@@ -2740,7 +2748,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue StoredVal = StoreNode->getOperand(1);
unsigned Opc = StoredVal->getOpcode();
- LoadSDNode *LoadNode = 0;
+ LoadSDNode *LoadNode = nullptr;
SDValue InputChain;
if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
LoadNode, InputChain))
@@ -2772,7 +2780,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDNode *ResNode = SelectCode(Node);
DEBUG(dbgs() << "=> ";
- if (ResNode == NULL || ResNode == Node)
+ if (ResNode == nullptr || ResNode == Node)
Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
@@ -2790,7 +2798,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
case 'v': // not offsetable ??
default: return true;
case 'm': // memory
- if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+ if (!SelectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
return true;
break;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2a35061..cbaf44e 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-isel"
#include "X86ISelLowering.h"
#include "Utils/X86ShuffleDecode.h"
#include "X86CallingConv.h"
@@ -23,6 +22,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/VariadicFunction.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -52,6 +52,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "x86-isel"
+
STATISTIC(NumTailCalls, "Number of tail calls");
// Forward declarations.
@@ -84,7 +86,8 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
// If the input is a buildvector just emit a smaller one.
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
- Vec->op_begin()+NormalizedIdxVal, ElemsPerChunk);
+ makeArrayRef(Vec->op_begin()+NormalizedIdxVal,
+ ElemsPerChunk));
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal);
SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
@@ -265,10 +268,10 @@ void X86TargetLowering::resetOperationActions() {
// The _ftol2 runtime function has an unusual calling conv, which
// is modeled by a special pseudo-instruction.
- setLibcallName(RTLIB::FPTOUINT_F64_I64, 0);
- setLibcallName(RTLIB::FPTOUINT_F32_I64, 0);
- setLibcallName(RTLIB::FPTOUINT_F64_I32, 0);
- setLibcallName(RTLIB::FPTOUINT_F32_I32, 0);
+ setLibcallName(RTLIB::FPTOUINT_F64_I64, nullptr);
+ setLibcallName(RTLIB::FPTOUINT_F32_I64, nullptr);
+ setLibcallName(RTLIB::FPTOUINT_F64_I32, nullptr);
+ setLibcallName(RTLIB::FPTOUINT_F32_I32, nullptr);
}
if (Subtarget->isTargetDarwin()) {
@@ -635,15 +638,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->isOSWindows() && !Subtarget->isTargetMacho())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
- MVT::i64 : MVT::i32, Custom);
- else if (TM.Options.EnableSegmentedStacks)
- setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
- MVT::i64 : MVT::i32, Custom);
- else
- setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
- MVT::i64 : MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, Subtarget->is64Bit() ?
+ MVT::i64 : MVT::i32, Custom);
if (!TM.Options.UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -832,7 +828,9 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
@@ -944,6 +942,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
@@ -1036,6 +1038,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, Legal);
+
+ setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasSSE41()) {
@@ -1064,11 +1070,14 @@ void X86TargetLowering::resetOperationActions() {
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v2f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v2i64, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v2f64, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v2i64, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v8i16, Custom);
+ // There is no BLENDI for byte vectors. We don't need to custom lower
+ // some vselects for now.
setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4f32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -1111,9 +1120,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SRA, MVT::v4i32, Custom);
-
- setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
- setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
}
if (!TM.Options.UseSoftFloat && Subtarget->hasFp256()) {
@@ -1178,8 +1184,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SRA, MVT::v16i16, Custom);
setOperationAction(ISD::SRA, MVT::v32i8, Custom);
- setOperationAction(ISD::SDIV, MVT::v16i16, Custom);
-
setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
@@ -1189,10 +1193,10 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
- setOperationAction(ISD::VSELECT, MVT::v4f64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v4i64, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8i32, Legal);
- setOperationAction(ISD::VSELECT, MVT::v8f32, Legal);
+ setOperationAction(ISD::VSELECT, MVT::v4f64, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v4i64, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v8i32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v8f32, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
@@ -1232,9 +1236,13 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::MUL, MVT::v16i16, Legal);
// Don't lower v32i8 because there is no 128-bit byte mul
- setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
+ setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
+ setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
+ setOperationAction(ISD::MULHU, MVT::v16i16, Legal);
+ setOperationAction(ISD::MULHS, MVT::v16i16, Legal);
- setOperationAction(ISD::SDIV, MVT::v8i32, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v16i16, Custom);
+ setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
} else {
setOperationAction(ISD::ADD, MVT::v4i64, Custom);
setOperationAction(ISD::ADD, MVT::v8i32, Custom);
@@ -1343,7 +1351,6 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FNEG, MVT::v8f64, Custom);
setOperationAction(ISD::FMA, MVT::v8f64, Legal);
setOperationAction(ISD::FMA, MVT::v16f32, Legal);
- setOperationAction(ISD::SDIV, MVT::v16i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
@@ -1358,9 +1365,11 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
@@ -1392,6 +1401,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
@@ -1474,6 +1485,8 @@ void X86TargetLowering::resetOperationActions() {
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ if (!Subtarget->is64Bit())
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
// Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
// handle type legalization for these operations here.
@@ -1498,9 +1511,9 @@ void X86TargetLowering::resetOperationActions() {
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, 0);
- setLibcallName(RTLIB::SRL_I128, 0);
- setLibcallName(RTLIB::SRA_I128, 0);
+ setLibcallName(RTLIB::SHL_I128, nullptr);
+ setLibcallName(RTLIB::SRL_I128, nullptr);
+ setLibcallName(RTLIB::SRA_I128, nullptr);
}
// Combine sin / cos into one node or libcall if possible.
@@ -1516,6 +1529,15 @@ void X86TargetLowering::resetOperationActions() {
}
}
+ if (Subtarget->isTargetWin64()) {
+ setOperationAction(ISD::SDIV, MVT::i128, Custom);
+ setOperationAction(ISD::UDIV, MVT::i128, Custom);
+ setOperationAction(ISD::SREM, MVT::i128, Custom);
+ setOperationAction(ISD::UREM, MVT::i128, Custom);
+ setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
+ setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
+ }
+
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
@@ -1540,6 +1562,7 @@ void X86TargetLowering::resetOperationActions() {
setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::SETCC);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::XOR);
@@ -1738,7 +1761,7 @@ getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
// FIXME: Why this routine is here? Move to RegInfo!
std::pair<const TargetRegisterClass*, uint8_t>
X86TargetLowering::findRepresentativeClass(MVT VT) const{
- const TargetRegisterClass *RRC = 0;
+ const TargetRegisterClass *RRC = nullptr;
uint8_t Cost = 1;
switch (VT.SimpleTy) {
default:
@@ -1806,8 +1829,8 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv,
return CCInfo.CheckReturn(Outs, RetCC_X86);
}
-const uint16_t *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
- static const uint16_t ScratchRegs[] = { X86::R11, 0 };
+const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
+ static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
return ScratchRegs;
}
@@ -1930,8 +1953,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(X86ISD::RET_FLAG, dl,
- MVT::Other, &RetOps[0], RetOps.size());
+ return DAG.getNode(X86ISD::RET_FLAG, dl, MVT::Other, RetOps);
}
bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
@@ -2285,22 +2307,25 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(ArgValue);
}
- // The x86-64 ABIs require that for returning structs by value we copy
- // the sret argument into %rax/%eax (depending on ABI) for the return.
- // Win32 requires us to put the sret argument to %eax as well.
- // Save the argument into a virtual register so that we can access it
- // from the return points.
- if (MF.getFunction()->hasStructRetAttr() &&
- (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
- X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
- unsigned Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- MVT PtrTy = getPointerTy();
- Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
- FuncInfo->setSRetReturnReg(Reg);
+ if (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC()) {
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ // The x86-64 ABIs require that for returning structs by value we copy
+ // the sret argument into %rax/%eax (depending on ABI) for the return.
+ // Win32 requires us to put the sret argument to %eax as well.
+ // Save the argument into a virtual register so that we can access it
+ // from the return points.
+ if (Ins[i].Flags.isSRet()) {
+ unsigned Reg = FuncInfo->getSRetReturnReg();
+ if (!Reg) {
+ MVT PtrTy = getPointerTy();
+ Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
+ FuncInfo->setSRetReturnReg(Reg);
+ }
+ SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+ break;
+ }
}
- SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
}
unsigned StackSize = CCInfo.getNextStackOffset();
@@ -2320,17 +2345,17 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
// FIXME: We should really autogenerate these arrays
- static const uint16_t GPR64ArgRegsWin64[] = {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
X86::RCX, X86::RDX, X86::R8, X86::R9
};
- static const uint16_t GPR64ArgRegs64Bit[] = {
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
};
- static const uint16_t XMMArgRegs64Bit[] = {
+ static const MCPhysReg XMMArgRegs64Bit[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
- const uint16_t *GPR64ArgRegs;
+ const MCPhysReg *GPR64ArgRegs;
unsigned NumXMMRegs = 0;
if (IsWin64) {
@@ -2424,13 +2449,11 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
SaveXMMOps.push_back(Val);
}
MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
- MVT::Other,
- &SaveXMMOps[0], SaveXMMOps.size()));
+ MVT::Other, SaveXMMOps));
}
if (!MemOps.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOps[0], MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
}
}
@@ -2497,10 +2520,10 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
-static SDValue
-EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
- SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT,
- unsigned SlotSize, int FPDiff, SDLoc dl) {
+static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
+ SDValue Chain, SDValue RetAddrFrIdx,
+ EVT PtrVT, unsigned SlotSize,
+ int FPDiff, SDLoc dl) {
// Store the return address to the appropriate stack slot.
if (!FPDiff) return Chain;
// Calculate the new stack slot for the return address.
@@ -2537,7 +2560,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (MF.getTarget().Options.DisableTailCalls)
isTailCall = false;
- if (isTailCall) {
+ bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
+ if (IsMustTail) {
+ // Force this to be a tail call. The verifier rules are enough to ensure
+ // that we can lower this successfully without moving the return address
+ // around.
+ isTailCall = true;
+ } else if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, SR != NotStructReturn,
@@ -2578,7 +2607,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
int FPDiff = 0;
- if (isTailCall && !IsSibcall) {
+ if (isTailCall && !IsSibcall && !IsMustTail) {
// Lower arguments at fp - stackoffset + fpdiff.
X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
@@ -2683,7 +2712,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
} else if (!IsSibcall && (!isTailCall || isByVal)) {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
+ if (!StackPtr.getNode())
StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -2692,8 +2721,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
if (Subtarget->isPICStyleGOT()) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
@@ -2730,7 +2758,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// registers used and is in the range 0 - 8 inclusive.
// Count the number of XMM registers allocated.
- static const uint16_t XMMArgRegs[] = {
+ static const MCPhysReg XMMArgRegs[] = {
X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
};
@@ -2742,8 +2770,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DAG.getConstant(NumXMMRegs, MVT::i8)));
}
- // For tail calls lower the arguments to the 'real' stack slot.
- if (isTailCall) {
+ // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
+ // don't need this because the eligibility check rejects calls that require
+ // shuffling arguments passed in memory.
+ if (!IsSibcall && isTailCall) {
// Force all the incoming stack arguments to be loaded from the stack
// before any new outgoing arguments are stored to the stack, because the
// outgoing stack slots may alias the incoming argument stack slots, and
@@ -2755,45 +2785,45 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- if (getTargetMachine().Options.GuaranteedTailCallOpt) {
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- if (VA.isRegLoc())
- continue;
- assert(VA.isMemLoc());
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
- // Create frame index.
- int32_t Offset = VA.getLocMemOffset()+FPDiff;
- uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
- FIN = DAG.getFrameIndex(FI, getPointerTy());
-
- if (Flags.isByVal()) {
- // Copy relative to framepointer.
- SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl,
- RegInfo->getStackRegister(),
- getPointerTy());
- Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
-
- MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
- ArgChain,
- Flags, DAG, dl));
- } else {
- // Store relative to framepointer.
- MemOpChains2.push_back(
- DAG.getStore(ArgChain, dl, Arg, FIN,
- MachinePointerInfo::getFixedStack(FI),
- false, false, 0));
- }
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc())
+ continue;
+ assert(VA.isMemLoc());
+ SDValue Arg = OutVals[i];
+ ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ // Skip inalloca arguments. They don't require any work.
+ if (Flags.isInAlloca())
+ continue;
+ // Create frame index.
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+ FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal()) {
+ // Copy relative to framepointer.
+ SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+ if (!StackPtr.getNode())
+ StackPtr = DAG.getCopyFromReg(Chain, dl,
+ RegInfo->getStackRegister(),
+ getPointerTy());
+ Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+ MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+ ArgChain,
+ Flags, DAG, dl));
+ } else {
+ // Store relative to framepointer.
+ MemOpChains2.push_back(
+ DAG.getStore(ArgChain, dl, Arg, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
}
}
if (!MemOpChains2.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains2[0], MemOpChains2.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
// Store the return address to the appropriate stack slot.
Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
@@ -2930,10 +2960,10 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// This isn't right, although it's probably harmless on x86; liveouts
// should be computed from returns not tail calls. Consider a void
// function making a tail call to a function returning int.
- return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+ return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
}
- Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
@@ -3927,6 +3957,29 @@ static bool isMOVLHPSMask(ArrayRef<int> Mask, MVT VT) {
return true;
}
+/// isINSERTPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to INSERTPS.
+/// i. e: If all but one element come from the same vector.
+static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) {
+ // TODO: Deal with AVX's VINSERTPS
+ if (!VT.is128BitVector() || (VT != MVT::v4f32 && VT != MVT::v4i32))
+ return false;
+
+ unsigned CorrectPosV1 = 0;
+ unsigned CorrectPosV2 = 0;
+ for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i)
+ if (Mask[i] == i)
+ ++CorrectPosV1;
+ else if (Mask[i] == i + 4)
+ ++CorrectPosV2;
+
+ if (CorrectPosV1 == 3 || CorrectPosV2 == 3)
+ // We have 3 elements from one vector, and one from another.
+ return true;
+
+ return false;
+}
+
//
// Some special combinations that can be optimized.
//
@@ -4146,6 +4199,29 @@ static bool isUNPCKH_v_undef_Mask(ArrayRef<int> Mask, MVT VT, bool HasInt256) {
return true;
}
+// Match for INSERTI64x4 INSERTF64x4 instructions (src0[0], src1[0]) or
+// (src1[0], src0[1]), manipulation with 256-bit sub-vectors
+static bool isINSERT64x4Mask(ArrayRef<int> Mask, MVT VT, unsigned int *Imm) {
+ if (!VT.is512BitVector())
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfSize = NumElts/2;
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, 0)) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, NumElts)) {
+ *Imm = 1;
+ return true;
+ }
+ }
+ if (isSequentialOrUndefInRange(Mask, 0, HalfSize, NumElts)) {
+ if (isSequentialOrUndefInRange(Mask, HalfSize, HalfSize, HalfSize)) {
+ *Imm = 0;
+ return true;
+ }
+ }
+ return false;
+}
+
/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVSS,
/// MOVSD, and MOVD, i.e. setting the lowest element.
@@ -4624,11 +4700,17 @@ unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
return getInsertVINSERTImmediate(N, 256);
}
+/// isZero - Returns true if Elt is a constant integer zero
+static bool isZero(SDValue V) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+ return C && C->isNullValue();
+}
+
/// isZeroNode - Returns true if Elt is a constant zero or a floating point
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Elt))
- return CN->isNullValue();
+ if (isZero(Elt))
+ return true;
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Elt))
return CFP->getValueAPF().isPosZero();
return false;
@@ -4677,7 +4759,7 @@ static bool ShouldXformToMOVHLPS(ArrayRef<int> Mask, MVT VT) {
/// isScalarLoadToVector - Returns true if the node is a scalar load that
/// is promoted to a vector. It also returns the LoadSDNode by reference if
/// required.
-static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = nullptr) {
if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
return false;
N = N->getOperand(0).getNode();
@@ -4803,28 +4885,24 @@ static SDValue getZeroVector(EVT VT, const X86Subtarget *Subtarget,
if (Subtarget->hasInt256()) { // AVX2
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
- array_lengthof(Ops));
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else {
// 256-bit logic and arithmetic instructions in AVX are all
// floating-point, no support for integer ops. Emit fp zeroed vectors.
SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops,
- array_lengthof(Ops));
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops);
}
} else if (VT.is512BitVector()) { // AVX-512
SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops, 16);
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i32, Ops);
} else if (VT.getScalarType() == MVT::i1) {
assert(VT.getVectorNumElements() <= 16 && "Unexpected vector type");
SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
- Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- Ops, VT.getVectorNumElements());
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
} else
llvm_unreachable("Unexpected vector type");
@@ -4844,8 +4922,7 @@ static SDValue getOnesVector(MVT VT, bool HasInt256, SelectionDAG &DAG,
if (VT.is256BitVector()) {
if (HasInt256) { // AVX2
SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops,
- array_lengthof(Ops));
+ Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops);
} else { // AVX
Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
Vec = Concat128BitVectors(Vec, Vec, MVT::v8i32, 8, DAG, dl);
@@ -5307,7 +5384,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
return SDValue();
SDLoc dl(Op);
- SDValue V(0, 0);
+ SDValue V;
bool First = true;
for (unsigned i = 0; i < 16; ++i) {
bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
@@ -5320,7 +5397,7 @@ static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
}
if ((i & 1) != 0) {
- SDValue ThisElt(0, 0), LastElt(0, 0);
+ SDValue ThisElt, LastElt;
bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
if (LastIsNonZero) {
LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
@@ -5355,7 +5432,7 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return SDValue();
SDLoc dl(Op);
- SDValue V(0, 0);
+ SDValue V;
bool First = true;
for (unsigned i = 0; i < 8; ++i) {
bool isNonZero = (NonZeros & (1 << i)) != 0;
@@ -5376,6 +5453,79 @@ static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
return V;
}
+/// LowerBuildVectorv4x32 - Custom lower build_vector of v4i32 or v4f32.
+static SDValue LowerBuildVectorv4x32(SDValue Op, unsigned NumElems,
+ unsigned NonZeros, unsigned NumNonZero,
+ unsigned NumZero, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget,
+ const TargetLowering &TLI) {
+ // We know there's at least one non-zero element
+ unsigned FirstNonZeroIdx = 0;
+ SDValue FirstNonZero = Op->getOperand(FirstNonZeroIdx);
+ while (FirstNonZero.getOpcode() == ISD::UNDEF ||
+ X86::isZeroNode(FirstNonZero)) {
+ ++FirstNonZeroIdx;
+ FirstNonZero = Op->getOperand(FirstNonZeroIdx);
+ }
+
+ if (FirstNonZero.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+ !isa<ConstantSDNode>(FirstNonZero.getOperand(1)))
+ return SDValue();
+
+ SDValue V = FirstNonZero.getOperand(0);
+ MVT VVT = V.getSimpleValueType();
+ if (!Subtarget->hasSSE41() || (VVT != MVT::v4f32 && VVT != MVT::v4i32))
+ return SDValue();
+
+ unsigned FirstNonZeroDst =
+ cast<ConstantSDNode>(FirstNonZero.getOperand(1))->getZExtValue();
+ unsigned CorrectIdx = FirstNonZeroDst == FirstNonZeroIdx;
+ unsigned IncorrectIdx = CorrectIdx ? -1U : FirstNonZeroIdx;
+ unsigned IncorrectDst = CorrectIdx ? -1U : FirstNonZeroDst;
+
+ for (unsigned Idx = FirstNonZeroIdx + 1; Idx < NumElems; ++Idx) {
+ SDValue Elem = Op.getOperand(Idx);
+ if (Elem.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elem))
+ continue;
+
+ // TODO: What else can be here? Deal with it.
+ if (Elem.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ // TODO: Some optimizations are still possible here
+ // ex: Getting one element from a vector, and the rest from another.
+ if (Elem.getOperand(0) != V)
+ return SDValue();
+
+ unsigned Dst = cast<ConstantSDNode>(Elem.getOperand(1))->getZExtValue();
+ if (Dst == Idx)
+ ++CorrectIdx;
+ else if (IncorrectIdx == -1U) {
+ IncorrectIdx = Idx;
+ IncorrectDst = Dst;
+ } else
+ // There was already one element with an incorrect index.
+ // We can't optimize this case to an insertps.
+ return SDValue();
+ }
+
+ if (NumNonZero == CorrectIdx || NumNonZero == CorrectIdx + 1) {
+ SDLoc dl(Op);
+ EVT VT = Op.getSimpleValueType();
+ unsigned ElementMoveMask = 0;
+ if (IncorrectIdx == -1U)
+ ElementMoveMask = FirstNonZeroIdx << 6 | FirstNonZeroIdx << 4;
+ else
+ ElementMoveMask = IncorrectDst << 6 | IncorrectIdx << 4;
+
+ SDValue InsertpsMask =
+ DAG.getIntPtrConstant(ElementMoveMask | (~NonZeros & 0xf));
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, V, V, InsertpsMask);
+ }
+
+ return SDValue();
+}
+
/// getVShift - Return a vector logical shift node.
///
static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
@@ -5480,7 +5630,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
EVT EltVT = VT.getVectorElementType();
unsigned NumElems = Elts.size();
- LoadSDNode *LDBase = NULL;
+ LoadSDNode *LDBase = nullptr;
unsigned LastLoadedElt = -1U;
// For each element in the initializer, see if we've found a load or an undef.
@@ -5545,8 +5695,7 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
SDValue ResNode =
- DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops,
- array_lengthof(Ops), MVT::i64,
+ DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, MVT::i64,
LDBase->getPointerInfo(),
LDBase->getAlignment(),
false/*isVolatile*/, true/*ReadMem*/,
@@ -5661,7 +5810,7 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
unsigned ScalarSize = CVT.getSizeInBits();
if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) {
- const Constant *C = 0;
+ const Constant *C = nullptr;
if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
C = CI->getConstantIntValue();
else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
@@ -5706,6 +5855,41 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget,
return SDValue();
}
+/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
+/// underlying vector and index.
+///
+/// Modifies \p ExtractedFromVec to the real vector and returns the real
+/// index.
+static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
+ SDValue ExtIdx) {
+ int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
+ if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
+ return Idx;
+
+ // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
+ // lowered this:
+ // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
+ // to:
+ // (extract_vector_elt (vector_shuffle<2,u,u,u>
+ // (extract_subvector (v8f32 %vreg0), Constant<4>),
+ // undef)
+ // Constant<0>)
+ // In this case the vector is the extract_subvector expression and the index
+ // is 2, as specified by the shuffle.
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
+ SDValue ShuffleVec = SVOp->getOperand(0);
+ MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
+ assert(ShuffleVecVT.getVectorElementType() ==
+ ExtractedFromVec.getSimpleValueType().getVectorElementType());
+
+ int ShuffleIdx = SVOp->getMaskElt(Idx);
+ if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
+ ExtractedFromVec = ShuffleVec;
+ return ShuffleIdx;
+ }
+ return Idx;
+}
+
static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
@@ -5739,34 +5923,32 @@ static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
SDValue ExtIdx = Op.getOperand(i).getOperand(1);
+ // Quit if non-constant index.
+ if (!isa<ConstantSDNode>(ExtIdx))
+ return SDValue();
+ int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
// Quit if extracted from vector of different type.
if (ExtractedFromVec.getValueType() != VT)
return SDValue();
- // Quit if non-constant index.
- if (!isa<ConstantSDNode>(ExtIdx))
- return SDValue();
-
- if (VecIn1.getNode() == 0)
+ if (!VecIn1.getNode())
VecIn1 = ExtractedFromVec;
else if (VecIn1 != ExtractedFromVec) {
- if (VecIn2.getNode() == 0)
+ if (!VecIn2.getNode())
VecIn2 = ExtractedFromVec;
else if (VecIn2 != ExtractedFromVec)
// Quit if more than 2 vectors to shuffle
return SDValue();
}
- unsigned Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
-
if (ExtractedFromVec == VecIn1)
Mask[i] = Idx;
else if (ExtractedFromVec == VecIn2)
Mask[i] = Idx + NumElems;
}
- if (VecIn1.getNode() == 0)
+ if (!VecIn1.getNode())
return SDValue();
VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
@@ -5791,24 +5973,22 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
SDLoc dl(Op);
if (ISD::isBuildVectorAllZeros(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(0, MVT::i1);
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
- Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- Ops, VT.getVectorNumElements());
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
if (ISD::isBuildVectorAllOnes(Op.getNode())) {
SDValue Cst = DAG.getTargetConstant(1, MVT::i1);
- SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst,
- Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT,
- Ops, VT.getVectorNumElements());
+ SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Cst);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
}
bool AllContants = true;
uint64_t Immediate = 0;
int NonConstIdx = -1;
bool IsSplat = true;
+ unsigned NumNonConsts = 0;
+ unsigned NumConsts = 0;
for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
SDValue In = Op.getOperand(idx);
if (In.getOpcode() == ISD::UNDEF)
@@ -5816,9 +5996,13 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(In)) {
AllContants = false;
NonConstIdx = idx;
+ NumNonConsts++;
}
- else if (cast<ConstantSDNode>(In)->getZExtValue())
+ else {
+ NumConsts++;
+ if (cast<ConstantSDNode>(In)->getZExtValue())
Immediate |= (1ULL << idx);
+ }
if (In != Op.getOperand(0))
IsSplat = false;
}
@@ -5830,6 +6014,19 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
DAG.getIntPtrConstant(0));
}
+ if (NumNonConsts == 1 && NonConstIdx != 0) {
+ SDValue DstVec;
+ if (NumConsts) {
+ SDValue VecAsImm = DAG.getConstant(Immediate,
+ MVT::getIntegerVT(VT.getSizeInBits()));
+ DstVec = DAG.getNode(ISD::BITCAST, dl, VT, VecAsImm);
+ }
+ else
+ DstVec = DAG.getUNDEF(VT);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+ Op.getOperand(NonConstIdx),
+ DAG.getIntPtrConstant(NonConstIdx));
+ }
if (!IsSplat && (NonConstIdx != 0))
llvm_unreachable("Unsupported BUILD_VECTOR operation");
MVT SelectVT = (VT == MVT::v16i1)? MVT::i16 : MVT::i8;
@@ -6043,9 +6240,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
// Build both the lower and upper subvector.
- SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
- SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
- NumElems/2);
+ SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
+ makeArrayRef(&V[0], NumElems/2));
+ SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT,
+ makeArrayRef(&V[NumElems / 2], NumElems/2));
// Recreate the wider vector with the lower and upper part.
if (VT.is256BitVector())
@@ -6078,6 +6276,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (V.getNode()) return V;
}
+ // If element VT is == 32 bits and has 4 elems, try to generate an INSERTPS
+ if (EVTBits == 32 && NumElems == 4) {
+ SDValue V = LowerBuildVectorv4x32(Op, NumElems, NonZeros, NumNonZero,
+ NumZero, DAG, Subtarget, *this);
+ if (V.getNode())
+ return V;
+ }
+
// If element VT is == 32 bits, turn it into a number of shuffles.
SmallVector<SDValue, 8> V(NumElems);
if (NumElems == 4 && NumZero > 0) {
@@ -6332,8 +6538,7 @@ static SDValue getPSHUFB(ArrayRef<int> MaskVals, SDValue V1, SDLoc &dl,
if (ShufVT != VT)
V1 = DAG.getNode(ISD::BITCAST, dl, ShufVT, V1);
return DAG.getNode(X86ISD::PSHUFB, dl, ShufVT, V1,
- DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT,
- PshufbMask.data(), PshufbMask.size()));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, ShufVT, PshufbMask));
}
// v8i16 shuffles - Prefer shuffles in the following order:
@@ -6516,7 +6721,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
NewV.getOperand(0),
@@ -6540,7 +6745,7 @@ LowerVECTOR_SHUFFLEv8i16(SDValue Op, const X86Subtarget *Subtarget,
NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
&MaskV[0]);
- if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3()) {
+ if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSE2()) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(NewV.getNode());
NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
NewV.getOperand(0),
@@ -6635,7 +6840,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
DAG.getNode(ISD::BUILD_VECTOR, dl,
- MVT::v16i8, &pshufbMask[0], 16));
+ MVT::v16i8, pshufbMask));
// As PSHUFB will zero elements with negative indices, it's safe to ignore
// the 2nd operand if it's undefined or zero.
@@ -6653,7 +6858,7 @@ static SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
DAG.getNode(ISD::BUILD_VECTOR, dl,
- MVT::v16i8, &pshufbMask[0], 16));
+ MVT::v16i8, pshufbMask));
return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
}
@@ -6771,6 +6976,9 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
unsigned Scale;
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected!");
+ case MVT::v2i64:
+ case MVT::v2f64:
+ return SDValue(SVOp, 0);
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
@@ -6805,7 +7013,7 @@ static SDValue getVZextMovL(MVT VT, MVT OpVT,
SDValue SrcOp, SelectionDAG &DAG,
const X86Subtarget *Subtarget, SDLoc dl) {
if (VT == MVT::v2f64 || VT == MVT::v4f32) {
- LoadSDNode *LD = NULL;
+ LoadSDNode *LD = nullptr;
if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
LD = dyn_cast<LoadSDNode>(SrcOp);
if (!LD) {
@@ -6924,8 +7132,7 @@ LowerVECTOR_SHUFFLE_256(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
}
// Construct the output using a BUILD_VECTOR.
- Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &SVOps[0],
- SVOps.size());
+ Output[l] = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, SVOps);
} else if (InputUsed[0] < 0) {
// No input vectors were used! The result is undefined.
Output[l] = DAG.getUNDEF(NVT);
@@ -7207,6 +7414,93 @@ SDValue getMOVLP(SDValue &Op, SDLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
getShuffleSHUFImmediate(SVOp), DAG);
}
+static SDValue NarrowVectorLoadToElement(LoadSDNode *Load, unsigned Index,
+ SelectionDAG &DAG) {
+ SDLoc dl(Load);
+ MVT VT = Load->getSimpleValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ SDValue Addr = Load->getOperand(1);
+ SDValue NewAddr = DAG.getNode(
+ ISD::ADD, dl, Addr.getSimpleValueType(), Addr,
+ DAG.getConstant(Index * EVT.getStoreSize(), Addr.getSimpleValueType()));
+
+ SDValue NewLoad =
+ DAG.getLoad(EVT, dl, Load->getChain(), NewAddr,
+ DAG.getMachineFunction().getMachineMemOperand(
+ Load->getMemOperand(), 0, EVT.getStoreSize()));
+ return NewLoad;
+}
+
+// It is only safe to call this function if isINSERTPSMask is true for
+// this shufflevector mask.
+static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl,
+ SelectionDAG &DAG) {
+ // Generate an insertps instruction when inserting an f32 from memory onto a
+ // v4f32 or when copying a member from one v4f32 to another.
+ // We also use it for transferring i32 from one register to another,
+ // since it simply copies the same bits.
+ // If we're transferring an i32 from memory to a specific element in a
+ // register, we output a generic DAG that will match the PINSRD
+ // instruction.
+ MVT VT = SVOp->getSimpleValueType(0);
+ MVT EVT = VT.getVectorElementType();
+ SDValue V1 = SVOp->getOperand(0);
+ SDValue V2 = SVOp->getOperand(1);
+ auto Mask = SVOp->getMask();
+ assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
+ "unsupported vector type for insertps/pinsrd");
+
+ int FromV1 = std::count_if(Mask.begin(), Mask.end(),
+ [](const int &i) { return i < 4; });
+
+ SDValue From;
+ SDValue To;
+ unsigned DestIndex;
+ if (FromV1 == 1) {
+ From = V1;
+ To = V2;
+ DestIndex = std::find_if(Mask.begin(), Mask.end(),
+ [](const int &i) { return i < 4; }) -
+ Mask.begin();
+ } else {
+ From = V2;
+ To = V1;
+ DestIndex = std::find_if(Mask.begin(), Mask.end(),
+ [](const int &i) { return i >= 4; }) -
+ Mask.begin();
+ }
+
+ if (MayFoldLoad(From)) {
+ // Trivial case, when From comes from a load and is only used by the
+ // shuffle. Make it use insertps from the vector that we need from that
+ // load.
+ SDValue NewLoad =
+ NarrowVectorLoadToElement(cast<LoadSDNode>(From), DestIndex, DAG);
+ if (!NewLoad.getNode())
+ return SDValue();
+
+ if (EVT == MVT::f32) {
+ // Create this as a scalar to vector to match the instruction pattern.
+ SDValue LoadScalarToVector =
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, NewLoad);
+ SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, LoadScalarToVector,
+ InsertpsMask);
+ } else { // EVT == MVT::i32
+ // If we're getting an i32 from memory, use an INSERT_VECTOR_ELT
+ // instruction, to match the PINSRD instruction, which loads an i32 to a
+ // certain vector element.
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, To, NewLoad,
+ DAG.getConstant(DestIndex, MVT::i32));
+ }
+ }
+
+ // Vector-element-to-vector
+ unsigned SrcIndex = Mask[DestIndex] % 4;
+ SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6);
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask);
+}
+
// Reduce a vector shuffle to zext.
static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
@@ -7295,9 +7589,8 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
}
-static SDValue
-NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
+static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -7322,31 +7615,29 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
// If the shuffle can be profitably rewritten as a narrower shuffle, then
// do it!
- if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
- VT == MVT::v16i16 || VT == MVT::v32i8) {
+ if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 ||
+ VT == MVT::v32i8) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode())
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
- } else if ((VT == MVT::v4i32 ||
- (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+ } else if (VT.is128BitVector() && Subtarget->hasSSE2()) {
// FIXME: Figure out a cleaner way to do this.
- // Try to make use of movq to zero out the top part.
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
NewVT, true, false))
- return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
- DAG, Subtarget, dl);
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget,
+ dl);
}
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
if (NewOp.getNode()) {
MVT NewVT = NewOp.getSimpleValueType();
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
- return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
- DAG, Subtarget, dl);
+ return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget,
+ dl);
}
}
}
@@ -7609,6 +7900,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
getShuffleSHUFImmediate(SVOp), DAG);
}
+ unsigned Idx;
+ if (VT.is512BitVector() && isINSERT64x4Mask(M, VT, &Idx))
+ return Insert256BitVector(V1, Extract256BitVector(V2, 0, DAG, dl),
+ Idx*(NumElems/2), DAG, dl);
+
// Handle VPERM2F128/VPERM2I128 permutations
if (isVPERM2X128Mask(M, VT, HasFp256))
return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1,
@@ -7618,6 +7914,9 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (BlendOp.getNode())
return BlendOp;
+ if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT))
+ return getINSERTPS(SVOp, dl, DAG);
+
unsigned Imm8;
if (V2IsUndef && HasInt256 && isPermImmMask(M, VT, Imm8))
return getTargetShuffleNode(X86ISD::VPERMI, dl, VT, V1, Imm8, DAG);
@@ -7631,8 +7930,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
permclMask.push_back(DAG.getConstant((M[i]>=0) ? M[i] : 0, MaskEltVT));
}
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT,
- &permclMask[0], NumElems);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, MaskVectorVT, permclMask);
if (V2IsUndef)
// Bitcast is for VPERMPS since mask is v8i32 but node takes v8f32
return DAG.getNode(X86ISD::VPERMV, dl, VT,
@@ -7684,6 +7982,109 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// This function assumes its argument is a BUILD_VECTOR of constants or
+// undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
+// true.
+static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
+ unsigned &MaskValue) {
+ MaskValue = 0;
+ unsigned NumElems = BuildVector->getNumOperands();
+ // There are 2 lanes if (NumElems > 8), and 1 lane otherwise.
+ unsigned NumLanes = (NumElems - 1) / 8 + 1;
+ unsigned NumElemsInLane = NumElems / NumLanes;
+
+ // Blend for v16i16 should be symetric for the both lanes.
+ for (unsigned i = 0; i < NumElemsInLane; ++i) {
+ SDValue EltCond = BuildVector->getOperand(i);
+ SDValue SndLaneEltCond =
+ (NumLanes == 2) ? BuildVector->getOperand(i + NumElemsInLane) : EltCond;
+
+ int Lane1Cond = -1, Lane2Cond = -1;
+ if (isa<ConstantSDNode>(EltCond))
+ Lane1Cond = !isZero(EltCond);
+ if (isa<ConstantSDNode>(SndLaneEltCond))
+ Lane2Cond = !isZero(SndLaneEltCond);
+
+ if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
+ // Lane1Cond != 0, means we want the first argument.
+ // Lane1Cond == 0, means we want the second argument.
+ // The encoding of this argument is 0 for the first argument, 1
+ // for the second. Therefore, invert the condition.
+ MaskValue |= !Lane1Cond << i;
+ else if (Lane1Cond < 0)
+ MaskValue |= !Lane2Cond << i;
+ else
+ return false;
+ }
+ return true;
+}
+
+// Try to lower a vselect node into a simple blend instruction.
+static SDValue LowerVSELECTtoBlend(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ SDLoc dl(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
+ return SDValue();
+
+ if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ return SDValue();
+
+ // Check the mask for BLEND and build the value.
+ unsigned MaskValue = 0;
+ if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
+ return SDValue();
+
+ // Convert i32 vectors to floating point if it is not AVX2.
+ // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors.
+ MVT BlendVT = VT;
+ if (EltVT == MVT::i64 || (EltVT == MVT::i32 && !Subtarget->hasInt256())) {
+ BlendVT = MVT::getVectorVT(MVT::getFloatingPointVT(EltVT.getSizeInBits()),
+ NumElems);
+ LHS = DAG.getNode(ISD::BITCAST, dl, VT, LHS);
+ RHS = DAG.getNode(ISD::BITCAST, dl, VT, RHS);
+ }
+
+ SDValue Ret = DAG.getNode(X86ISD::BLENDI, dl, BlendVT, LHS, RHS,
+ DAG.getConstant(MaskValue, MVT::i32));
+ return DAG.getNode(ISD::BITCAST, dl, VT, Ret);
+}
+
+SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue BlendOp = LowerVSELECTtoBlend(Op, Subtarget, DAG);
+ if (BlendOp.getNode())
+ return BlendOp;
+
+ // Some types for vselect were previously set to Expand, not Legal or
+ // Custom. Return an empty SDValue so we fall-through to Expand, after
+ // the Custom lowering phase.
+ MVT VT = Op.getSimpleValueType();
+ switch (VT.SimpleTy) {
+ default:
+ break;
+ case MVT::v8i16:
+ case MVT::v16i16:
+ return SDValue();
+ }
+
+ // We couldn't create a "Blend with immediate" node.
+ // This node should still be legal, but we'll have to emit a blendv*
+ // instruction.
+ return Op;
+}
+
static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
SDLoc dl(Op);
@@ -7946,10 +8347,47 @@ static SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
return SDValue();
}
+/// Insert one bit to mask vector, like v16i1 or v8i1.
+/// AVX-512 feature.
+SDValue
+X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue Vec = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ MVT VecVT = Vec.getSimpleValueType();
+
+ if (!isa<ConstantSDNode>(Idx)) {
+ // Non constant index. Extend source and destination,
+ // insert element and then truncate the result.
+ MVT ExtVecVT = (VecVT == MVT::v8i1 ? MVT::v8i64 : MVT::v16i32);
+ MVT ExtEltVT = (VecVT == MVT::v8i1 ? MVT::i64 : MVT::i32);
+ SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVecVT, Vec),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, ExtEltVT, Elt), Idx);
+ return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp);
+ }
+
+ unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
+ if (Vec.getOpcode() == ISD::UNDEF)
+ return DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ DAG.getConstant(IdxVal, MVT::i8));
+ const TargetRegisterClass* rc = getRegClassFor(VecVT);
+ unsigned MaxSift = rc->getSize()*8 - 1;
+ EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+ DAG.getConstant(MaxSift, MVT::i8));
+ EltInVec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, EltInVec,
+ DAG.getConstant(MaxSift - IdxVal, MVT::i8));
+ return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
+}
SDValue
X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
MVT EltVT = VT.getVectorElementType();
+
+ if (EltVT == MVT::i1)
+ return InsertBitToMaskVector(Op, DAG);
SDLoc dl(Op);
SDValue N0 = Op.getOperand(0);
@@ -8294,10 +8732,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
- Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
SDValue Ops[] = { Chain, TGA };
- Chain = DAG.getNode(CallType, dl, NodeTys, Ops, array_lengthof(Ops));
+ Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@@ -8325,7 +8763,7 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
static SDValue
LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
- return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+ return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT,
X86::RAX, X86II::MO_TLSGD);
}
@@ -8342,7 +8780,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SDValue Base;
if (is64Bit) {
- Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
+ Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, X86::RAX,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
SDValue InFlag;
@@ -8481,7 +8919,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Args[] = { Chain, Offset };
- Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+ Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -8507,10 +8945,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// Windows 64bit: gs:0x58
// Windows 32bit: fs:__tls_array
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->getAliasedGlobal();
SDLoc dl(GA);
SDValue Chain = DAG.getEntryNode();
@@ -8609,15 +9043,15 @@ static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) {
SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
if (Op.getOpcode() == ISD::SHL_PARTS) {
- Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
- Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
} else {
- Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
- Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+ Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0);
+ Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1);
}
SDValue Ops[2] = { Lo, Hi };
- return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -8680,8 +9114,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
X86ISD::FILD, DL,
- Tys, Ops, array_lengthof(Ops),
- SrcVT, MMO);
+ Tys, Ops, SrcVT, MMO);
if (useSSE) {
Chain = Result.getValue(1);
@@ -8704,8 +9137,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
MachineMemOperand::MOStore, SSFISize, SSFISize);
Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
- Ops, array_lengthof(Ops),
- Op.getValueType(), MMO);
+ Ops, Op.getValueType(), MMO);
Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
MachinePointerInfo::getFixedStack(SSFI),
false, false, false, 0);
@@ -8900,7 +9332,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops,
- array_lengthof(Ops), MVT::i64, MMO);
+ MVT::i64, MMO);
APInt FF(32, 0x5F800000ULL);
@@ -8993,8 +9425,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MachineMemOperand *MMO =
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
MachineMemOperand::MOLoad, MemSize, MemSize);
- Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops,
- array_lengthof(Ops), DstTy, MMO);
+ Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
Chain = Value.getValue(1);
SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
@@ -9008,8 +9439,7 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
// Build the FP_TO_INT*_IN_MEM
SDValue Ops[] = { Chain, Value, StackSlot };
SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
- Ops, array_lengthof(Ops), DstTy,
- MMO);
+ Ops, DstTy, MMO);
return std::make_pair(FIST, StackSlot);
} else {
SDValue ftol = DAG.getNode(X86ISD::WIN_FTOL, DL,
@@ -9021,8 +9451,8 @@ X86TargetLowering:: FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
MVT::i32, eax.getValue(2));
SDValue Ops[] = { eax, edx };
SDValue pair = IsReplace
- ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops, array_lengthof(Ops))
- : DAG.getMergeValues(Ops, array_lengthof(Ops), DL);
+ ? DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops)
+ : DAG.getMergeValues(Ops, DL);
return std::make_pair(pair, SDValue());
}
}
@@ -9217,8 +9647,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
for (unsigned j = 0; j < 8; ++j)
pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
}
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8,
- &pshufbMask[0], 32);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v32i8, pshufbMask);
In = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v32i8, In, BV);
In = DAG.getNode(ISD::BITCAST, DL, MVT::v4i64, In);
@@ -9284,7 +9713,7 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
/*IsSigned=*/ true, /*IsReplace=*/ false);
SDValue FIST = Vals.first, StackSlot = Vals.second;
// If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
- if (FIST.getNode() == 0) return Op;
+ if (!FIST.getNode()) return Op;
if (StackSlot.getNode())
// Load the result.
@@ -9581,12 +10010,29 @@ static SDValue LowerVectorAllZeroTest(SDValue Op, const X86Subtarget *Subtarget,
VecIns.back(), VecIns.back());
}
+/// \brief return true if \c Op has a use that doesn't just read flags.
+static bool hasNonFlagsUse(SDValue Op) {
+ for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE;
+ ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC &&
+ !(User->getOpcode() == ISD::SELECT && UOpNo == 0))
+ return true;
+ }
+ return false;
+}
+
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl,
SelectionDAG &DAG) const {
- SDLoc dl(Op);
-
if (Op.getValueType() == MVT::i1)
// KORTEST instruction should be selected
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
@@ -9687,31 +10133,35 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
Opcode = X86ISD::ADD;
NumOperands = 2;
break;
- case ISD::AND: {
- // If the primary and result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
- bool NonFlagUse = false;
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- unsigned UOpNo = UI.getOperandNo();
- if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
- // Look pass truncate.
- UOpNo = User->use_begin().getOperandNo();
- User = *User->use_begin();
- }
-
- if (User->getOpcode() != ISD::BRCOND &&
- User->getOpcode() != ISD::SETCC &&
- !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) {
- NonFlagUse = true;
+ case ISD::SHL:
+ case ISD::SRL:
+ // If we have a constant logical shift that's only used in a comparison
+ // against zero turn it into an equivalent AND. This allows turning it into
+ // a TEST instruction later.
+ if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) &&
+ isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) {
+ EVT VT = Op.getValueType();
+ unsigned BitWidth = VT.getSizeInBits();
+ unsigned ShAmt = Op->getConstantOperandVal(1);
+ if (ShAmt >= BitWidth) // Avoid undefined shifts.
break;
- }
+ APInt Mask = ArithOp.getOpcode() == ISD::SRL
+ ? APInt::getHighBitsSet(BitWidth, BitWidth - ShAmt)
+ : APInt::getLowBitsSet(BitWidth, BitWidth - ShAmt);
+ if (!Mask.isSignedIntN(32)) // Avoid large immediates.
+ break;
+ SDValue New = DAG.getNode(ISD::AND, dl, VT, Op->getOperand(0),
+ DAG.getConstant(Mask, VT));
+ DAG.ReplaceAllUsesWith(Op, New);
+ Op = New;
}
+ break;
- if (!NonFlagUse)
+ case ISD::AND:
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ if (!hasNonFlagsUse(Op))
break;
- }
// FALL THROUGH
case ISD::SUB:
case ISD::OR:
@@ -9794,7 +10244,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
for (unsigned i = 0; i != NumOperands; ++i)
Ops.push_back(Op.getOperand(i));
- SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ SDValue New = DAG.getNode(Opcode, dl, VTs, Ops);
DAG.ReplaceAllUsesWith(Op, New);
return SDValue(New.getNode(), 1);
}
@@ -9802,11 +10252,10 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent.
SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
- SelectionDAG &DAG) const {
- SDLoc dl(Op0);
+ SDLoc dl, SelectionDAG &DAG) const {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) {
if (C->getAPIntValue() == 0)
- return EmitTest(Op0, X86CC, DAG);
+ return EmitTest(Op0, X86CC, dl, DAG);
if (Op0.getValueType() == MVT::i1)
llvm_unreachable("Unexpected comparison operation for MVT::i1 operands");
@@ -9888,7 +10337,7 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
unsigned AndBitWidth = And.getValueSizeInBits();
if (BitWidth > AndBitWidth) {
APInt Zeros, Ones;
- DAG.ComputeMaskedBits(Op0, Zeros, Ones);
+ DAG.computeKnownBits(Op0, Zeros, Ones);
if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
return SDValue();
}
@@ -10054,7 +10503,7 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG,
/// \brief Try to turn a VSETULT into a VSETULE by modifying its second
/// operand \p Op1. If non-trivial (for example because it's not constant)
/// return an empty value.
-static SDValue ChangeVSETULTtoVSETULE(SDValue Op1, SelectionDAG &DAG)
+static SDValue ChangeVSETULTtoVSETULE(SDLoc dl, SDValue Op1, SelectionDAG &DAG)
{
BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1.getNode());
if (!BV)
@@ -10078,8 +10527,7 @@ static SDValue ChangeVSETULTtoVSETULE(SDValue Op1, SelectionDAG &DAG)
ULTOp1.push_back(DAG.getConstant(Val - 1, EVT));
}
- return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op1), VT, ULTOp1.data(),
- ULTOp1.size());
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, ULTOp1);
}
static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
@@ -10204,7 +10652,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget *Subtarget,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget->hasAVX())
break;
- SDValue ULEOp1 = ChangeVSETULTtoVSETULE(Op1, DAG);
+ SDValue ULEOp1 = ChangeVSETULTtoVSETULE(dl, Op1, DAG);
if (ULEOp1.getNode()) {
Op1 = ULEOp1;
Subus = true; Invert = false; Swap = false;
@@ -10383,7 +10831,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (X86CC == X86::COND_INVALID)
return SDValue();
- SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+ SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), EFLAGS);
@@ -10418,11 +10866,6 @@ static bool isX86LogicalCmp(SDValue Op) {
return false;
}
-static bool isZero(SDValue V) {
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
- return C && C->isNullValue();
-}
-
static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
if (V.getOpcode() != ISD::TRUNCATE)
return false;
@@ -10517,7 +10960,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
Res = DAG.getNOT(DL, Res, Res.getValueType());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
- if (N2C == 0 || !N2C->isNullValue())
+ if (!N2C || !N2C->isNullValue())
Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
return Res;
}
@@ -10606,7 +11049,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
}
// a < b ? -1 : 0 -> RES = ~setcc_carry
@@ -10646,7 +11089,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// condition is true.
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, SelectionDAG &DAG) {
@@ -11027,7 +11470,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
if (addTest) {
CC = DAG.getConstant(X86::COND_NE, MVT::i8);
- Cond = EmitTest(Cond, X86::COND_NE, DAG);
+ Cond = EmitTest(Cond, X86::COND_NE, dl, DAG);
}
Cond = ConvertCmpIfNecessary(Cond, DAG);
return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -11042,13 +11485,50 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
- assert((Subtarget->isOSWindows() ||
- getTargetMachine().Options.EnableSegmentedStacks) &&
- "This should be used only on Windows targets or when segmented stacks "
- "are being used");
- assert(!Subtarget->isTargetMacho() && "Not implemented");
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool SplitStack = MF.shouldSplitStack();
+ bool Lower = (Subtarget->isOSWindows() && !Subtarget->isTargetMacho()) ||
+ SplitStack;
SDLoc dl(Op);
+ if (!Lower) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDNode* Node = Op.getNode();
+
+ unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+ " not tell us which reg is the stack pointer!");
+ EVT VT = Node->getValueType(0);
+ SDValue Tmp1 = SDValue(Node, 0);
+ SDValue Tmp2 = SDValue(Node, 1);
+ SDValue Tmp3 = Node->getOperand(2);
+ SDValue Chain = Tmp1.getOperand(0);
+
+ // Chain the dynamic stack allocation so that it doesn't modify the stack
+ // pointer when other instructions are using the stack.
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true),
+ SDLoc(Node));
+
+ SDValue Size = Tmp2.getOperand(1);
+ SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+ Chain = SP.getValue(1);
+ unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+ const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+ unsigned StackAlign = TFI.getStackAlignment();
+ Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
+ if (Align > StackAlign)
+ Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1,
+ DAG.getConstant(-(uint64_t)Align, VT));
+ Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain
+
+ Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true),
+ DAG.getIntPtrConstant(0, true), SDValue(),
+ SDLoc(Node));
+
+ SDValue Ops[2] = { Tmp1, Tmp2 };
+ return DAG.getMergeValues(Ops, dl);
+ }
+
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
@@ -11058,8 +11538,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
bool Is64Bit = Subtarget->is64Bit();
EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
- if (getTargetMachine().Options.EnableSegmentedStacks) {
- MachineFunction &MF = DAG.getMachineFunction();
+ if (SplitStack) {
MachineRegisterInfo &MRI = MF.getRegInfo();
if (Is64Bit) {
@@ -11081,7 +11560,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
DAG.getRegister(Vreg, SPTy));
SDValue Ops1[2] = { Value, Chain };
- return DAG.getMergeValues(Ops1, 2, dl);
+ return DAG.getMergeValues(Ops1, dl);
} else {
SDValue Flag;
unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
@@ -11105,7 +11584,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
}
SDValue Ops1[2] = { SP, Chain };
- return DAG.getMergeValues(Ops1, 2, dl);
+ return DAG.getMergeValues(Ops1, dl);
}
}
@@ -11166,8 +11645,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
MachinePointerInfo(SV, 16), false, false, 0);
MemOps.push_back(Store);
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
- &MemOps[0], MemOps.size());
+ return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
}
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
@@ -11221,8 +11699,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
InstOps.push_back(DAG.getConstant(Align, MVT::i32));
SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
- VTs, &InstOps[0], InstOps.size(),
- MVT::i64,
+ VTs, InstOps, MVT::i64,
MachinePointerInfo(SV),
/*Align=*/0,
/*Volatile=*/false,
@@ -11262,6 +11739,10 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
SelectionDAG &DAG) {
MVT ElementType = VT.getVectorElementType();
+ // Fold this packed shift into its first operand if ShiftAmt is 0.
+ if (ShiftAmt == 0)
+ return SrcOp;
+
// Check for ShiftAmt >= element width
if (ShiftAmt >= ElementType.getSizeInBits()) {
if (Opc == X86ISD::VSRAI)
@@ -11282,7 +11763,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
ConstantSDNode *ND;
switch(Opc) {
- default: llvm_unreachable(0);
+ default: llvm_unreachable(nullptr);
case X86ISD::VSHLI:
for (unsigned i=0; i!=NumElts; ++i) {
SDValue CurrentOp = SrcOp->getOperand(i);
@@ -11321,7 +11802,7 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, SDLoc dl, MVT VT,
break;
}
- return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
}
return DAG.getNode(Opc, dl, VT, SrcOp, DAG.getConstant(ShiftAmt, MVT::i8));
@@ -11353,7 +11834,7 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT,
ShOps[0] = ShAmt;
ShOps[1] = DAG.getConstant(0, MVT::i32);
ShOps[2] = ShOps[3] = DAG.getUNDEF(MVT::i32);
- ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &ShOps[0], 4);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, ShOps);
// The return type has to be a 128-bit type with the same element
// type as the input type.
@@ -11476,6 +11957,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(X86ISD::PMULUDQ, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
+ case Intrinsic::x86_sse41_pmuldq:
+ case Intrinsic::x86_avx2_pmul_dq:
+ return DAG.getNode(X86ISD::PMULDQ, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pmulhu_w:
+ case Intrinsic::x86_avx2_pmulhu_w:
+ return DAG.getNode(ISD::MULHU, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
+ case Intrinsic::x86_sse2_pmulh_w:
+ case Intrinsic::x86_avx2_pmulh_w:
+ return DAG.getNode(ISD::MULHS, dl, Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(2));
+
// SSE2/AVX2 sub with unsigned saturation intrinsics
case Intrinsic::x86_sse2_psubus_b:
case Intrinsic::x86_sse2_psubus_w:
@@ -11927,7 +12423,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8),
SDValue(PCMP.getNode(), 1));
@@ -11944,7 +12440,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
+ return DAG.getNode(Opcode, dl, VTs, NewOps);
}
case Intrinsic::x86_fma_vfmadd_ps:
case Intrinsic::x86_fma_vfmadd_pd:
@@ -12042,27 +12538,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
static SDValue getGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Base, SDValue Index,
- SDValue ScaleOp, SDValue Chain,
- const X86Subtarget * Subtarget) {
- SDLoc dl(Op);
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
- assert(C && "Invalid scale type");
- SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
- SDValue Src = getZeroVector(Op.getValueType(), Subtarget, DAG, dl);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
- SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
- SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
- SDValue Segment = DAG.getRegister(0, MVT::i32);
- SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
-}
-
-static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Src, SDValue Mask, SDValue Base,
SDValue Index, SDValue ScaleOp, SDValue Chain,
const X86Subtarget * Subtarget) {
@@ -12072,7 +12547,12 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
- SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ SDValue MaskInReg;
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
+ if (MaskC)
+ MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+ else
+ MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MaskVT, MVT::Other);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
@@ -12081,12 +12561,12 @@ static SDValue getMGatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Ops[] = {Src, MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
SDValue RetOps[] = { SDValue(Res, 0), SDValue(Res, 2) };
- return DAG.getMergeValues(RetOps, array_lengthof(RetOps), dl);
+ return DAG.getMergeValues(RetOps, dl);
}
static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Base, SDValue Index,
- SDValue ScaleOp, SDValue Chain) {
+ SDValue Src, SDValue Mask, SDValue Base,
+ SDValue Index, SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
@@ -12095,52 +12575,218 @@ static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
SDValue Segment = DAG.getRegister(0, MVT::i32);
EVT MaskVT = MVT::getVectorVT(MVT::i1,
Index.getSimpleValueType().getVectorNumElements());
- SDValue MaskInReg = DAG.getConstant(~0, MaskVT);
+ SDValue MaskInReg;
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
+ if (MaskC)
+ MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+ else
+ MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
return SDValue(Res, 1);
}
-static SDValue getMScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
- SDValue Src, SDValue Mask, SDValue Base,
- SDValue Index, SDValue ScaleOp, SDValue Chain) {
+static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
+ SDValue Mask, SDValue Base, SDValue Index,
+ SDValue ScaleOp, SDValue Chain) {
SDLoc dl(Op);
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ScaleOp);
assert(C && "Invalid scale type");
SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), MVT::i8);
SDValue Disp = DAG.getTargetConstant(0, MVT::i32);
SDValue Segment = DAG.getRegister(0, MVT::i32);
- EVT MaskVT = MVT::getVectorVT(MVT::i1,
- Index.getSimpleValueType().getVectorNumElements());
- SDValue MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
- SDVTList VTs = DAG.getVTList(MaskVT, MVT::Other);
- SDValue Ops[] = {Base, Scale, Index, Disp, Segment, MaskInReg, Src, Chain};
- SDNode *Res = DAG.getMachineNode(Opc, dl, VTs, Ops);
- return SDValue(Res, 1);
+ EVT MaskVT =
+ MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements());
+ SDValue MaskInReg;
+ ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask);
+ if (MaskC)
+ MaskInReg = DAG.getTargetConstant(MaskC->getSExtValue(), MaskVT);
+ else
+ MaskInReg = DAG.getNode(ISD::BITCAST, dl, MaskVT, Mask);
+ //SDVTList VTs = DAG.getVTList(MVT::Other);
+ SDValue Ops[] = {MaskInReg, Base, Scale, Index, Disp, Segment, Chain};
+ SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops);
+ return SDValue(Res, 0);
+}
+
+// getReadTimeStampCounter - Handles the lowering of builtin intrinsics that
+// read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is
+// also used to custom lower READCYCLECOUNTER nodes.
+static void getReadTimeStampCounter(SDNode *N, SDLoc DL, unsigned Opcode,
+ SelectionDAG &DAG, const X86Subtarget *Subtarget,
+ SmallVectorImpl<SDValue> &Results) {
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+ SDValue rd = DAG.getNode(Opcode, DL, Tys, N->getOperand(0));
+ SDValue LO, HI;
+
+ // The processor's time-stamp counter (a 64-bit MSR) is stored into the
+ // EDX:EAX registers. EDX is loaded with the high-order 32 bits of the MSR
+ // and the EAX register is loaded with the low-order 32 bits.
+ if (Subtarget->is64Bit()) {
+ LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+ LO.getValue(2));
+ } else {
+ LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1));
+ HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+ LO.getValue(2));
+ }
+ SDValue Chain = HI.getValue(1);
+
+ if (Opcode == X86ISD::RDTSCP_DAG) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+
+ // Instruction RDTSCP loads the IA32:TSC_AUX_MSR (address C000_0103H) into
+ // the ECX register. Add 'ecx' explicitly to the chain.
+ SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32,
+ HI.getValue(2));
+ // Explicitly store the content of ECX at the location passed in input
+ // to the 'rdtscp' intrinsic.
+ Chain = DAG.getStore(ecx.getValue(1), DL, ecx, N->getOperand(2),
+ MachinePointerInfo(), false, false, 0);
+ }
+
+ if (Subtarget->is64Bit()) {
+ // The EDX register is loaded with the high-order 32 bits of the MSR, and
+ // the EAX register is loaded with the low-order 32 bits.
+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+ DAG.getConstant(32, MVT::i8));
+ Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+ Results.push_back(Chain);
+ return;
+ }
+
+ // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+ SDValue Ops[] = { LO, HI };
+ SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
+ Results.push_back(Pair);
+ Results.push_back(Chain);
+}
+
+static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue, 2> Results;
+ SDLoc DL(Op);
+ getReadTimeStampCounter(Op.getNode(), DL, X86ISD::RDTSC_DAG, DAG, Subtarget,
+ Results);
+ return DAG.getMergeValues(Results, DL);
+}
+
+enum IntrinsicType {
+ GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST
+};
+
+struct IntrinsicData {
+ IntrinsicData(IntrinsicType IType, unsigned IOpc0, unsigned IOpc1)
+ :Type(IType), Opc0(IOpc0), Opc1(IOpc1) {}
+ IntrinsicType Type;
+ unsigned Opc0;
+ unsigned Opc1;
+};
+
+std::map < unsigned, IntrinsicData> IntrMap;
+static void InitIntinsicsMap() {
+ static bool Initialized = false;
+ if (Initialized)
+ return;
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
+ IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qps_512,
+ IntrinsicData(GATHER, X86::VGATHERQPSZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpd_512,
+ IntrinsicData(GATHER, X86::VGATHERQPDZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpd_512,
+ IntrinsicData(GATHER, X86::VGATHERDPDZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dps_512,
+ IntrinsicData(GATHER, X86::VGATHERDPSZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpi_512,
+ IntrinsicData(GATHER, X86::VPGATHERQDZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_qpq_512,
+ IntrinsicData(GATHER, X86::VPGATHERQQZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpi_512,
+ IntrinsicData(GATHER, X86::VPGATHERDDZrm, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gather_dpq_512,
+ IntrinsicData(GATHER, X86::VPGATHERDQZrm, 0)));
+
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qps_512,
+ IntrinsicData(SCATTER, X86::VSCATTERQPSZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpd_512,
+ IntrinsicData(SCATTER, X86::VSCATTERQPDZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpd_512,
+ IntrinsicData(SCATTER, X86::VSCATTERDPDZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dps_512,
+ IntrinsicData(SCATTER, X86::VSCATTERDPSZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpi_512,
+ IntrinsicData(SCATTER, X86::VPSCATTERQDZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_qpq_512,
+ IntrinsicData(SCATTER, X86::VPSCATTERQQZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpi_512,
+ IntrinsicData(SCATTER, X86::VPSCATTERDDZmr, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatter_dpq_512,
+ IntrinsicData(SCATTER, X86::VPSCATTERDQZmr, 0)));
+
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qps_512,
+ IntrinsicData(PREFETCH, X86::VGATHERPF0QPSm,
+ X86::VGATHERPF1QPSm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_qpd_512,
+ IntrinsicData(PREFETCH, X86::VGATHERPF0QPDm,
+ X86::VGATHERPF1QPDm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dpd_512,
+ IntrinsicData(PREFETCH, X86::VGATHERPF0DPDm,
+ X86::VGATHERPF1DPDm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_gatherpf_dps_512,
+ IntrinsicData(PREFETCH, X86::VGATHERPF0DPSm,
+ X86::VGATHERPF1DPSm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qps_512,
+ IntrinsicData(PREFETCH, X86::VSCATTERPF0QPSm,
+ X86::VSCATTERPF1QPSm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_qpd_512,
+ IntrinsicData(PREFETCH, X86::VSCATTERPF0QPDm,
+ X86::VSCATTERPF1QPDm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dpd_512,
+ IntrinsicData(PREFETCH, X86::VSCATTERPF0DPDm,
+ X86::VSCATTERPF1DPDm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_avx512_scatterpf_dps_512,
+ IntrinsicData(PREFETCH, X86::VSCATTERPF0DPSm,
+ X86::VSCATTERPF1DPSm)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_16,
+ IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_32,
+ IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdrand_64,
+ IntrinsicData(RDRAND, X86ISD::RDRAND, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_16,
+ IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_32,
+ IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdseed_64,
+ IntrinsicData(RDSEED, X86ISD::RDSEED, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_xtest,
+ IntrinsicData(XTEST, X86ISD::XTEST, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdtsc,
+ IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0)));
+ IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp,
+ IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0)));
+ Initialized = true;
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
- SDLoc dl(Op);
+ InitIntinsicsMap();
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
- switch (IntNo) {
- default: return SDValue(); // Don't custom lower most intrinsics.
+ std::map < unsigned, IntrinsicData>::const_iterator itr = IntrMap.find(IntNo);
+ if (itr == IntrMap.end())
+ return SDValue();
- // RDRAND/RDSEED intrinsics.
- case Intrinsic::x86_rdrand_16:
- case Intrinsic::x86_rdrand_32:
- case Intrinsic::x86_rdrand_64:
- case Intrinsic::x86_rdseed_16:
- case Intrinsic::x86_rdseed_32:
- case Intrinsic::x86_rdseed_64: {
- unsigned Opcode = (IntNo == Intrinsic::x86_rdseed_16 ||
- IntNo == Intrinsic::x86_rdseed_32 ||
- IntNo == Intrinsic::x86_rdseed_64) ? X86ISD::RDSEED :
- X86ISD::RDRAND;
+ SDLoc dl(Op);
+ IntrinsicData Intr = itr->second;
+ switch(Intr.Type) {
+ case RDSEED:
+ case RDRAND: {
// Emit the node with the right value type.
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Glue, MVT::Other);
- SDValue Result = DAG.getNode(Opcode, dl, VTs, Op.getOperand(0));
+ SDValue Result = DAG.getNode(Intr.Opc0, dl, VTs, Op.getOperand(0));
// If the value returned by RDRAND/RDSEED was valid (CF=1), return 1.
// Otherwise return the value from Rand, which is always 0, casted to i32.
@@ -12150,152 +12796,55 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
SDValue(Result.getNode(), 1) };
SDValue isValid = DAG.getNode(X86ISD::CMOV, dl,
DAG.getVTList(Op->getValueType(1), MVT::Glue),
- Ops, array_lengthof(Ops));
+ Ops);
// Return { result, isValid, chain }.
return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid,
SDValue(Result.getNode(), 2));
}
- //int_gather(index, base, scale);
- case Intrinsic::x86_avx512_gather_qpd_512:
- case Intrinsic::x86_avx512_gather_qps_512:
- case Intrinsic::x86_avx512_gather_dpd_512:
- case Intrinsic::x86_avx512_gather_qpi_512:
- case Intrinsic::x86_avx512_gather_qpq_512:
- case Intrinsic::x86_avx512_gather_dpq_512:
- case Intrinsic::x86_avx512_gather_dps_512:
- case Intrinsic::x86_avx512_gather_dpi_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_avx512_gather_qps_512: Opc = X86::VGATHERQPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpd_512: Opc = X86::VGATHERQPDZrm; break;
- case Intrinsic::x86_avx512_gather_dpd_512: Opc = X86::VGATHERDPDZrm; break;
- case Intrinsic::x86_avx512_gather_dps_512: Opc = X86::VGATHERDPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpi_512: Opc = X86::VPGATHERQDZrm; break;
- case Intrinsic::x86_avx512_gather_qpq_512: Opc = X86::VPGATHERQQZrm; break;
- case Intrinsic::x86_avx512_gather_dpi_512: Opc = X86::VPGATHERDDZrm; break;
- case Intrinsic::x86_avx512_gather_dpq_512: Opc = X86::VPGATHERDQZrm; break;
- }
- SDValue Chain = Op.getOperand(0);
- SDValue Index = Op.getOperand(2);
- SDValue Base = Op.getOperand(3);
- SDValue Scale = Op.getOperand(4);
- return getGatherNode(Opc, Op, DAG, Base, Index, Scale, Chain, Subtarget);
- }
- //int_gather_mask(v1, mask, index, base, scale);
- case Intrinsic::x86_avx512_gather_qps_mask_512:
- case Intrinsic::x86_avx512_gather_qpd_mask_512:
- case Intrinsic::x86_avx512_gather_dpd_mask_512:
- case Intrinsic::x86_avx512_gather_dps_mask_512:
- case Intrinsic::x86_avx512_gather_qpi_mask_512:
- case Intrinsic::x86_avx512_gather_qpq_mask_512:
- case Intrinsic::x86_avx512_gather_dpi_mask_512:
- case Intrinsic::x86_avx512_gather_dpq_mask_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_avx512_gather_qps_mask_512:
- Opc = X86::VGATHERQPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpd_mask_512:
- Opc = X86::VGATHERQPDZrm; break;
- case Intrinsic::x86_avx512_gather_dpd_mask_512:
- Opc = X86::VGATHERDPDZrm; break;
- case Intrinsic::x86_avx512_gather_dps_mask_512:
- Opc = X86::VGATHERDPSZrm; break;
- case Intrinsic::x86_avx512_gather_qpi_mask_512:
- Opc = X86::VPGATHERQDZrm; break;
- case Intrinsic::x86_avx512_gather_qpq_mask_512:
- Opc = X86::VPGATHERQQZrm; break;
- case Intrinsic::x86_avx512_gather_dpi_mask_512:
- Opc = X86::VPGATHERDDZrm; break;
- case Intrinsic::x86_avx512_gather_dpq_mask_512:
- Opc = X86::VPGATHERDQZrm; break;
- }
+ case GATHER: {
+ //gather(v1, mask, index, base, scale);
SDValue Chain = Op.getOperand(0);
SDValue Src = Op.getOperand(2);
- SDValue Mask = Op.getOperand(3);
+ SDValue Base = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
- SDValue Base = Op.getOperand(5);
+ SDValue Mask = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getMGatherNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
+ return getGatherNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain,
Subtarget);
}
- //int_scatter(base, index, v1, scale);
- case Intrinsic::x86_avx512_scatter_qpd_512:
- case Intrinsic::x86_avx512_scatter_qps_512:
- case Intrinsic::x86_avx512_scatter_dpd_512:
- case Intrinsic::x86_avx512_scatter_qpi_512:
- case Intrinsic::x86_avx512_scatter_qpq_512:
- case Intrinsic::x86_avx512_scatter_dpq_512:
- case Intrinsic::x86_avx512_scatter_dps_512:
- case Intrinsic::x86_avx512_scatter_dpi_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_avx512_scatter_qpd_512:
- Opc = X86::VSCATTERQPDZmr; break;
- case Intrinsic::x86_avx512_scatter_qps_512:
- Opc = X86::VSCATTERQPSZmr; break;
- case Intrinsic::x86_avx512_scatter_dpd_512:
- Opc = X86::VSCATTERDPDZmr; break;
- case Intrinsic::x86_avx512_scatter_dps_512:
- Opc = X86::VSCATTERDPSZmr; break;
- case Intrinsic::x86_avx512_scatter_qpi_512:
- Opc = X86::VPSCATTERQDZmr; break;
- case Intrinsic::x86_avx512_scatter_qpq_512:
- Opc = X86::VPSCATTERQQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpq_512:
- Opc = X86::VPSCATTERDQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpi_512:
- Opc = X86::VPSCATTERDDZmr; break;
- }
- SDValue Chain = Op.getOperand(0);
- SDValue Base = Op.getOperand(2);
- SDValue Index = Op.getOperand(3);
- SDValue Src = Op.getOperand(4);
- SDValue Scale = Op.getOperand(5);
- return getScatterNode(Opc, Op, DAG, Src, Base, Index, Scale, Chain);
- }
- //int_scatter_mask(base, mask, index, v1, scale);
- case Intrinsic::x86_avx512_scatter_qps_mask_512:
- case Intrinsic::x86_avx512_scatter_qpd_mask_512:
- case Intrinsic::x86_avx512_scatter_dpd_mask_512:
- case Intrinsic::x86_avx512_scatter_dps_mask_512:
- case Intrinsic::x86_avx512_scatter_qpi_mask_512:
- case Intrinsic::x86_avx512_scatter_qpq_mask_512:
- case Intrinsic::x86_avx512_scatter_dpi_mask_512:
- case Intrinsic::x86_avx512_scatter_dpq_mask_512: {
- unsigned Opc;
- switch (IntNo) {
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- case Intrinsic::x86_avx512_scatter_qpd_mask_512:
- Opc = X86::VSCATTERQPDZmr; break;
- case Intrinsic::x86_avx512_scatter_qps_mask_512:
- Opc = X86::VSCATTERQPSZmr; break;
- case Intrinsic::x86_avx512_scatter_dpd_mask_512:
- Opc = X86::VSCATTERDPDZmr; break;
- case Intrinsic::x86_avx512_scatter_dps_mask_512:
- Opc = X86::VSCATTERDPSZmr; break;
- case Intrinsic::x86_avx512_scatter_qpi_mask_512:
- Opc = X86::VPSCATTERQDZmr; break;
- case Intrinsic::x86_avx512_scatter_qpq_mask_512:
- Opc = X86::VPSCATTERQQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpq_mask_512:
- Opc = X86::VPSCATTERDQZmr; break;
- case Intrinsic::x86_avx512_scatter_dpi_mask_512:
- Opc = X86::VPSCATTERDDZmr; break;
- }
+ case SCATTER: {
+ //scatter(base, mask, index, v1, scale);
SDValue Chain = Op.getOperand(0);
SDValue Base = Op.getOperand(2);
SDValue Mask = Op.getOperand(3);
SDValue Index = Op.getOperand(4);
SDValue Src = Op.getOperand(5);
SDValue Scale = Op.getOperand(6);
- return getMScatterNode(Opc, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ return getScatterNode(Intr.Opc0, Op, DAG, Src, Mask, Base, Index, Scale, Chain);
+ }
+ case PREFETCH: {
+ SDValue Hint = Op.getOperand(6);
+ unsigned HintVal;
+ if (dyn_cast<ConstantSDNode> (Hint) == 0 ||
+ (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1)
+ llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1");
+ unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(2);
+ SDValue Index = Op.getOperand(3);
+ SDValue Base = Op.getOperand(4);
+ SDValue Scale = Op.getOperand(5);
+ return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain);
+ }
+ // Read Time Stamp Counter (RDTSC) and Processor ID (RDTSCP).
+ case RDTSC: {
+ SmallVector<SDValue, 2> Results;
+ getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results);
+ return DAG.getMergeValues(Results, dl);
}
// XTEST intrinsics.
- case Intrinsic::x86_xtest: {
+ case XTEST: {
SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
SDValue InTrans = DAG.getNode(X86ISD::XTEST, dl, VTs, Op.getOperand(0));
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
@@ -12306,6 +12855,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
Ret, SDValue(InTrans.getNode(), 1));
}
}
+ llvm_unreachable("Unknown Intrinsic Type");
}
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
@@ -12358,6 +12908,19 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
+// FIXME? Maybe this could be a TableGen attribute on some registers and
+// this table could be generated automatically from RegInfo.
+unsigned X86TargetLowering::getRegisterByName(const char* RegName,
+ EVT VT) const {
+ unsigned Reg = StringSwitch<unsigned>(RegName)
+ .Case("esp", X86::ESP)
+ .Case("rsp", X86::RSP)
+ .Default(0);
+ if (Reg)
+ return Reg;
+ report_fatal_error("Invalid register name global variable");
+}
+
SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
SelectionDAG &DAG) const {
const X86RegisterInfo *RegInfo =
@@ -12477,7 +13040,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 22),
false, false, 0);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
} else {
const Function *Func =
cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
@@ -12557,7 +13120,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
MachinePointerInfo(TrmpAddr, 6),
false, false, 1);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
}
@@ -12600,8 +13163,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
DAG.getVTList(MVT::Other),
- Ops, array_lengthof(Ops), MVT::i16,
- MMO);
+ Ops, MVT::i16, MMO);
// Load FP Control Word from stack slot
SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
@@ -12654,7 +13216,7 @@ static SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
- Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+ Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops);
// Finally xor with NumBits-1.
Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
@@ -12706,7 +13268,7 @@ static SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(X86::COND_E, MVT::i8),
Op.getValue(1)
};
- return DAG.getNode(X86ISD::CMOV, dl, VT, Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, dl, VT, Ops);
}
// Lower256IntArith - Break a 256-bit integer operation into two new 128-bit
@@ -12824,59 +13386,104 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
}
-static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
- MVT VT = Op.getSimpleValueType();
- MVT EltTy = VT.getVectorElementType();
- unsigned NumElts = VT.getVectorNumElements();
- SDValue N0 = Op.getOperand(0);
+SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetWin64() && "Unexpected target");
+ EVT VT = Op.getValueType();
+ assert(VT.isInteger() && VT.getSizeInBits() == 128 &&
+ "Unexpected return type for lowering");
+
+ RTLIB::Libcall LC;
+ bool isSigned;
+ switch (Op->getOpcode()) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break;
+ case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break;
+ case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break;
+ case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break;
+ case ISD::SDIVREM: isSigned = true; LC = RTLIB::SDIVREM_I128; break;
+ case ISD::UDIVREM: isSigned = false; LC = RTLIB::UDIVREM_I128; break;
+ }
+
SDLoc dl(Op);
+ SDValue InChain = DAG.getEntryNode();
- // Lower sdiv X, pow2-const.
- BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
- if (!C)
- return SDValue();
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Op->getOperand(i).getValueType();
+ assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 &&
+ "Unexpected argument type for lowering");
+ SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16);
+ Entry.Node = StackPtr;
+ InChain = DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MachinePointerInfo(),
+ false, false, 16);
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Ty = PointerType::get(ArgTy,0);
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Args.push_back(Entry);
+ }
+
+ SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ getPointerTy());
- APInt SplatValue, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (!C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
- HasAnyUndefs) ||
- EltTy.getSizeInBits() < SplatBitSize)
- return SDValue();
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(InChain)
+ .setCallee(getLibcallCallingConv(LC),
+ static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()),
+ Callee, &Args, 0)
+ .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned);
+
+ std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+ return DAG.getNode(ISD::BITCAST, dl, VT, CallInfo.first);
+}
+
+static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget,
+ SelectionDAG &DAG) {
+ SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ EVT VT = Op0.getValueType();
+ SDLoc dl(Op);
- if ((SplatValue != 0) &&
- (SplatValue.isPowerOf2() || (-SplatValue).isPowerOf2())) {
- unsigned Lg2 = SplatValue.countTrailingZeros();
- // Splat the sign bit.
- SmallVector<SDValue, 16> Sz(NumElts,
- DAG.getConstant(EltTy.getSizeInBits() - 1,
- EltTy));
- SDValue SGN = DAG.getNode(ISD::SRA, dl, VT, N0,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Sz[0],
- NumElts));
- // Add (N0 < 0) ? abs2 - 1 : 0;
- SmallVector<SDValue, 16> Amt(NumElts,
- DAG.getConstant(EltTy.getSizeInBits() - Lg2,
- EltTy));
- SDValue SRL = DAG.getNode(ISD::SRL, dl, VT, SGN,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Amt[0],
- NumElts));
- SDValue ADD = DAG.getNode(ISD::ADD, dl, VT, N0, SRL);
- SmallVector<SDValue, 16> Lg2Amt(NumElts, DAG.getConstant(Lg2, EltTy));
- SDValue SRA = DAG.getNode(ISD::SRA, dl, VT, ADD,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Lg2Amt[0],
- NumElts));
-
- // If we're dividing by a positive value, we're done. Otherwise, we must
- // negate the result.
- if (SplatValue.isNonNegative())
- return SRA;
-
- SmallVector<SDValue, 16> V(NumElts, DAG.getConstant(0, EltTy));
- SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], NumElts);
- return DAG.getNode(ISD::SUB, dl, VT, Zero, SRA);
+ assert((VT == MVT::v4i32 && Subtarget->hasSSE2()) ||
+ (VT == MVT::v8i32 && Subtarget->hasInt256()));
+
+ // Get the high parts.
+ const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8};
+ SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask);
+ SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask);
+
+ // Emit two multiplies, one for the lower 2 ints and one for the higher 2
+ // ints.
+ MVT MulVT = VT == MVT::v4i32 ? MVT::v2i64 : MVT::v4i64;
+ bool IsSigned = Op->getOpcode() == ISD::SMUL_LOHI;
+ unsigned Opcode =
+ (!IsSigned || !Subtarget->hasSSE41()) ? X86ISD::PMULUDQ : X86ISD::PMULDQ;
+ SDValue Mul1 = DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(Opcode, dl, MulVT, Op0, Op1));
+ SDValue Mul2 = DAG.getNode(ISD::BITCAST, dl, VT,
+ DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1));
+
+ // Shuffle it back into the right order.
+ const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15};
+ SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask);
+ const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14};
+ SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask);
+
+ // If we have a signed multiply but no PMULDQ fix up the high parts of a
+ // unsigned multiply.
+ if (IsSigned && !Subtarget->hasSSE41()) {
+ SDValue ShAmt =
+ DAG.getConstant(31, DAG.getTargetLoweringInfo().getShiftAmountTy(VT));
+ SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRA, dl, VT, Op0, ShAmt), Op1);
+ SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRA, dl, VT, Op1, ShAmt), Op0);
+
+ SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
+ Highs = DAG.getNode(ISD::SUB, dl, VT, Highs, Fixup);
}
- return SDValue();
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getValueType(), Highs, Lows);
}
static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
@@ -12920,7 +13527,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
@@ -12933,7 +13540,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
@@ -12946,7 +13553,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 16> V(16, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
@@ -12966,7 +13573,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(uint8_t(-1U << ShiftAmt),
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SHL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRL) {
// Make a large shift.
@@ -12979,7 +13586,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
DAG.getConstant(uint8_t(-1U) >> ShiftAmt,
MVT::i8));
return DAG.getNode(ISD::AND, dl, VT, SRL,
- DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32));
+ DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V));
}
if (Op.getOpcode() == ISD::SRA) {
if (ShiftAmt == 7) {
@@ -12992,7 +13599,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt);
SmallVector<SDValue, 32> V(32, DAG.getConstant(128 >> ShiftAmt,
MVT::i8));
- SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 32);
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, V);
Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask);
Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask);
return Res;
@@ -13014,7 +13621,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
uint64_t ShiftAmt = 0;
for (unsigned i = 0; i != Ratio; ++i) {
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Amt.getOperand(i));
- if (C == 0)
+ if (!C)
return SDValue();
// 6 == Log2(64)
ShiftAmt |= C->getZExtValue() << (i * (1 << (6 - RatioInLog2)));
@@ -13025,7 +13632,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
for (unsigned j = 0; j != Ratio; ++j) {
ConstantSDNode *C =
dyn_cast<ConstantSDNode>(Amt.getOperand(i + j));
- if (C == 0)
+ if (!C)
return SDValue();
// 6 == Log2(64)
ShAmt |= C->getZExtValue() << (j * (1 << (6 - RatioInLog2)));
@@ -13107,7 +13714,7 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
BaseShAmt = InVec.getOperand(1);
}
}
- if (BaseShAmt.getNode() == 0)
+ if (!BaseShAmt.getNode())
BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Amt,
DAG.getIntPtrConstant(0));
}
@@ -13260,7 +13867,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
}
Elts.push_back(DAG.getConstant(One.shl(ShAmt), SVT));
}
- SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Elts[0], NumElems);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Elts);
return DAG.getNode(ISD::MUL, dl, VT, R, BV);
}
@@ -13274,6 +13881,79 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
return DAG.getNode(ISD::MUL, dl, VT, Op, R);
}
+ // If possible, lower this shift as a sequence of two shifts by
+ // constant plus a MOVSS/MOVSD instead of scalarizing it.
+ // Example:
+ // (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
+ //
+ // Could be rewritten as:
+ // (v4i32 (MOVSS (srl A, <Y,Y,Y,Y>), (srl A, <X,X,X,X>)))
+ //
+ // The advantage is that the two shifts from the example would be
+ // lowered as X86ISD::VSRLI nodes. This would be cheaper than scalarizing
+ // the vector shift into four scalar shifts plus four pairs of vector
+ // insert/extract.
+ if ((VT == MVT::v8i16 || VT == MVT::v4i32) &&
+ ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) {
+ unsigned TargetOpcode = X86ISD::MOVSS;
+ bool CanBeSimplified;
+ // The splat value for the first packed shift (the 'X' from the example).
+ SDValue Amt1 = Amt->getOperand(0);
+ // The splat value for the second packed shift (the 'Y' from the example).
+ SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) :
+ Amt->getOperand(2);
+
+ // See if it is possible to replace this node with a sequence of
+ // two shifts followed by a MOVSS/MOVSD
+ if (VT == MVT::v4i32) {
+ // Check if it is legal to use a MOVSS.
+ CanBeSimplified = Amt2 == Amt->getOperand(2) &&
+ Amt2 == Amt->getOperand(3);
+ if (!CanBeSimplified) {
+ // Otherwise, check if we can still simplify this node using a MOVSD.
+ CanBeSimplified = Amt1 == Amt->getOperand(1) &&
+ Amt->getOperand(2) == Amt->getOperand(3);
+ TargetOpcode = X86ISD::MOVSD;
+ Amt2 = Amt->getOperand(2);
+ }
+ } else {
+ // Do similar checks for the case where the machine value type
+ // is MVT::v8i16.
+ CanBeSimplified = Amt1 == Amt->getOperand(1);
+ for (unsigned i=3; i != 8 && CanBeSimplified; ++i)
+ CanBeSimplified = Amt2 == Amt->getOperand(i);
+
+ if (!CanBeSimplified) {
+ TargetOpcode = X86ISD::MOVSD;
+ CanBeSimplified = true;
+ Amt2 = Amt->getOperand(4);
+ for (unsigned i=0; i != 4 && CanBeSimplified; ++i)
+ CanBeSimplified = Amt1 == Amt->getOperand(i);
+ for (unsigned j=4; j != 8 && CanBeSimplified; ++j)
+ CanBeSimplified = Amt2 == Amt->getOperand(j);
+ }
+ }
+
+ if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
+ isa<ConstantSDNode>(Amt2)) {
+ // Replace this node with two shifts followed by a MOVSS/MOVSD.
+ EVT CastVT = MVT::v4i32;
+ SDValue Splat1 =
+ DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), VT);
+ SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
+ SDValue Splat2 =
+ DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), VT);
+ SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
+ if (TargetOpcode == X86ISD::MOVSD)
+ CastVT = MVT::v2i64;
+ SDValue BitCast1 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift1);
+ SDValue BitCast2 = DAG.getNode(ISD::BITCAST, dl, CastVT, Shift2);
+ SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
+ BitCast1, DAG);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Result);
+ }
+ }
+
if (VT == MVT::v16i8 && Op->getOpcode() == ISD::SHL) {
assert(Subtarget->hasSSE2() && "Need SSE2 for pslli/pcmpeq.");
@@ -13351,10 +14031,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
for (unsigned i = NumElems/2; i != NumElems; ++i)
Amt2Csts.push_back(Amt->getOperand(i));
- Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
- &Amt1Csts[0], NumElems/2);
- Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT,
- &Amt2Csts[0], NumElems/2);
+ Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt1Csts);
+ Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Amt2Csts);
} else {
// Variable shift amount
Amt1 = Extract128BitVector(Amt, 0, DAG, dl);
@@ -13585,35 +14263,47 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
- Ops, array_lengthof(Ops), T, MMO);
+ Ops, T, MMO);
SDValue cpOut =
DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
return cpOut;
}
-static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget,
- SelectionDAG &DAG) {
- assert(Subtarget->is64Bit() && "Result not type legalized?");
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue TheChain = Op.getOperand(0);
- SDLoc dl(Op);
- SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
- SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
- SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
- rax.getValue(2));
- SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
- DAG.getConstant(32, MVT::i8));
- SDValue Ops[] = {
- DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
- rdx.getValue(1)
- };
- return DAG.getMergeValues(Ops, array_lengthof(Ops), dl);
-}
-
static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
+
+ if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ if (DstVT != MVT::f64)
+ // This conversion needs to be expanded.
+ return SDValue();
+
+ SDValue InVec = Op->getOperand(0);
+ SDLoc dl(Op);
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ EVT SVT = SrcVT.getVectorElementType();
+
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
+ DAG.getIntPtrConstant(i)));
+
+ // Explicitly mark the extra elements as Undef.
+ SDValue Undef = DAG.getUNDEF(SVT);
+ for (unsigned i = NumElts, e = NumElts * 2; i != e; ++i)
+ Elts.push_back(Undef);
+
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
+ SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, Elts);
+ SDValue ToV2F64 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, BV);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, ToV2F64,
+ DAG.getIntPtrConstant(0));
+ }
+
assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
Subtarget->hasMMX() && "Unexpected custom BITCAST");
assert((DstVT == MVT::i64 ||
@@ -13641,8 +14331,7 @@ static SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) {
cast<AtomicSDNode>(Node)->getMemoryVT(),
Node->getOperand(0),
Node->getOperand(1), negOp,
- cast<AtomicSDNode>(Node)->getSrcValue(),
- cast<AtomicSDNode>(Node)->getAlignment(),
+ cast<AtomicSDNode>(Node)->getMemOperand(),
cast<AtomicSDNode>(Node)->getOrdering(),
cast<AtomicSDNode>(Node)->getSynchScope());
}
@@ -13730,12 +14419,11 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget,
Type *RetTy = isF64
? (Type*)StructType::get(ArgTy, ArgTy, NULL)
: (Type*)VectorType::get(ArgTy, 4);
- TargetLowering::
- CallLoweringInfo CLI(DAG.getEntryNode(), RetTy,
- false, false, false, false, 0,
- CallingConv::C, /*isTaillCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed*/true,
- Callee, Args, DAG, dl);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
+ .setCallee(CallingConv::C, RetTy, Callee, &Args, 0);
+
std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
if (isF64)
@@ -13764,6 +14452,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+ case ISD::VSELECT: return LowerVSELECT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG);
@@ -13815,6 +14504,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ_ZERO_UNDEF(Op, DAG);
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
case ISD::MUL: return LowerMUL(Op, Subtarget, DAG);
+ case ISD::UMUL_LOHI:
+ case ISD::SMUL_LOHI: return LowerMUL_LOHI(Op, Subtarget, DAG);
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: return LowerShift(Op, Subtarget, DAG);
@@ -13832,7 +14523,6 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
case ISD::ADD: return LowerADD(Op, DAG);
case ISD::SUB: return LowerSUB(Op, DAG);
- case ISD::SDIV: return LowerSDIV(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
}
}
@@ -13875,10 +14565,10 @@ ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
SDValue Ops[] = { Chain, In1, In2L, In2H };
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
SDValue Result =
- DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, array_lengthof(Ops), MVT::i64,
+ DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64,
cast<MemSDNode>(Node)->getMemOperand());
SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF));
Results.push_back(Result.getValue(2));
}
@@ -13899,6 +14589,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::SUBE:
// We don't want to expand or promote these.
return;
+ case ISD::SDIV:
+ case ISD::UDIV:
+ case ISD::SREM:
+ case ISD::UREM:
+ case ISD::SDIVREM:
+ case ISD::UDIVREM: {
+ SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG);
+ Results.push_back(V);
+ return;
+ }
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: {
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
@@ -13909,10 +14609,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
std::pair<SDValue,SDValue> Vals =
FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
SDValue FIST = Vals.first, StackSlot = Vals.second;
- if (FIST.getNode() != 0) {
+ if (FIST.getNode()) {
EVT VT = N->getValueType(0);
// Return a load from the stack slot.
- if (StackSlot.getNode() != 0)
+ if (StackSlot.getNode())
Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
MachinePointerInfo(),
false, false, false, 0));
@@ -13945,20 +14645,22 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(V);
return;
}
+ case ISD::INTRINSIC_W_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ switch (IntNo) {
+ default : llvm_unreachable("Do not know how to custom type "
+ "legalize this intrinsic operation!");
+ case Intrinsic::x86_rdtsc:
+ return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+ Results);
+ case Intrinsic::x86_rdtscp:
+ return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget,
+ Results);
+ }
+ }
case ISD::READCYCLECOUNTER: {
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue TheChain = N->getOperand(0);
- SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
- SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
- rd.getValue(1));
- SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
- eax.getValue(2));
- // Use a buildpair to merge the two 32-bit values into a 64-bit one.
- SDValue Ops[] = { eax, edx };
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops,
- array_lengthof(Ops)));
- Results.push_back(edx.getValue(1));
- return;
+ return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget,
+ Results);
}
case ISD::ATOMIC_CMP_SWAP: {
EVT T = N->getValueType(0);
@@ -13994,8 +14696,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
unsigned Opcode = Regs64bit ? X86ISD::LCMPXCHG16_DAG :
X86ISD::LCMPXCHG8_DAG;
- SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys,
- Ops, array_lengthof(Ops), T, MMO);
+ SDValue Result = DAG.getMemIntrinsicNode(Opcode, dl, Tys, Ops, T, MMO);
SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl,
Regs64bit ? X86::RAX : X86::EAX,
HalfT, Result.getValue(1));
@@ -14003,7 +14704,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Regs64bit ? X86::RDX : X86::EDX,
HalfT, cpOutL.getValue(2));
SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF, 2));
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
Results.push_back(cpOutH.getValue(1));
return;
}
@@ -14058,14 +14759,39 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc);
return;
}
- case ISD::ATOMIC_LOAD:
+ case ISD::ATOMIC_LOAD: {
ReplaceATOMIC_LOAD(N, Results, DAG);
+ return;
+ }
+ case ISD::BITCAST: {
+ assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
+ EVT DstVT = N->getValueType(0);
+ EVT SrcVT = N->getOperand(0)->getValueType(0);
+
+ if (SrcVT != MVT::f64 ||
+ (DstVT != MVT::v2i32 && DstVT != MVT::v4i16 && DstVT != MVT::v8i8))
+ return;
+
+ unsigned NumElts = DstVT.getVectorNumElements();
+ EVT SVT = DstVT.getVectorElementType();
+ EVT WiderVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumElts * 2);
+ SDValue Expanded = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+ MVT::v2f64, N->getOperand(0));
+ SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded);
+
+ SmallVector<SDValue, 8> Elts;
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT,
+ ToVecInt, DAG.getIntPtrConstant(i)));
+
+ Results.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, DstVT, Elts));
+ }
}
}
const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
- default: return NULL;
+ default: return nullptr;
case X86ISD::BSF: return "X86ISD::BSF";
case X86ISD::BSR: return "X86ISD::BSR";
case X86ISD::SHLD: return "X86ISD::SHLD";
@@ -14176,7 +14902,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
- case X86ISD::BZHI: return "X86ISD::BZHI";
case X86ISD::BEXTR: return "X86ISD::BEXTR";
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
case X86ISD::PTEST: return "X86ISD::PTEST";
@@ -14203,6 +14928,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::UNPCKH: return "X86ISD::UNPCKH";
case X86ISD::VBROADCAST: return "X86ISD::VBROADCAST";
case X86ISD::VBROADCASTM: return "X86ISD::VBROADCASTM";
+ case X86ISD::VEXTRACT: return "X86ISD::VEXTRACT";
case X86ISD::VPERMILP: return "X86ISD::VPERMILP";
case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128";
case X86ISD::VPERMV: return "X86ISD::VPERMV";
@@ -14210,6 +14936,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3";
case X86ISD::VPERMI: return "X86ISD::VPERMI";
case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ";
+ case X86ISD::PMULDQ: return "X86ISD::PMULDQ";
case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
case X86ISD::VAARG_64: return "X86ISD::VAARG_64";
case X86ISD::WIN_ALLOCA: return "X86ISD::WIN_ALLOCA";
@@ -14240,7 +14967,7 @@ bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
Reloc::Model R = getTargetMachine().getRelocationModel();
// X86 allows a sign-extended 32-bit immediate field as a displacement.
- if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+ if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr))
return false;
if (AM.BaseGV) {
@@ -14418,7 +15145,23 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
if (VT.getSizeInBits() == 64)
return false;
- // FIXME: pshufb, blends, shifts.
+ // If this is a single-input shuffle with no 128 bit lane crossings we can
+ // lower it into pshufb.
+ if ((SVT.is128BitVector() && Subtarget->hasSSSE3()) ||
+ (SVT.is256BitVector() && Subtarget->hasInt256())) {
+ bool isLegal = true;
+ for (unsigned I = 0, E = M.size(); I != E; ++I) {
+ if (M[I] >= (int)SVT.getVectorNumElements() ||
+ ShuffleCrosses128bitLane(SVT, I, M[I])) {
+ isLegal = false;
+ break;
+ }
+ }
+ if (isLegal)
+ return true;
+ }
+
+ // FIXME: blends, shifts.
return (SVT.getVectorNumElements() == 2 ||
ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isMOVLMask(M, SVT) ||
@@ -15366,7 +16109,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter(
OffsetDestReg = 0; // unused
OverflowDestReg = DestReg;
- offsetMBB = NULL;
+ offsetMBB = nullptr;
overflowMBB = thisMBB;
endMBB = thisMBB;
} else {
@@ -15736,7 +16479,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock();
- assert(getTargetMachine().Options.EnableSegmentedStacks);
+ assert(MF->shouldSplitStack());
unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
@@ -16509,11 +17252,11 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// X86 Optimization Hooks
//===----------------------------------------------------------------------===//
-void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
unsigned BitWidth = KnownZero.getBitWidth();
unsigned Opc = Op.getOpcode();
assert((Opc >= ISD::BUILTIN_OP_END ||
@@ -16576,8 +17319,10 @@ void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
}
}
-unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
- unsigned Depth) const {
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
+ SDValue Op,
+ const SelectionDAG &,
+ unsigned Depth) const {
// SETCC_CARRY sets the dest to ~0 for true or 0 for false.
if (Op.getOpcode() == X86ISD::SETCC_CARRY)
return Op.getValueType().getScalarType().getSizeInBits();
@@ -16679,7 +17424,6 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
SDValue ResNode =
DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
- array_lengthof(Ops),
Ld->getMemoryVT(),
Ld->getPointerInfo(),
Ld->getAlignment(),
@@ -17036,6 +17780,51 @@ matchIntegerMINMAX(SDValue Cond, EVT VT, SDValue LHS, SDValue RHS,
return std::make_pair(Opc, NeedSplit);
}
+static SDValue
+TransformVSELECTtoBlendVECTOR_SHUFFLE(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc dl(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ if (Cond.getOpcode() == ISD::SIGN_EXTEND) {
+ SDValue CondSrc = Cond->getOperand(0);
+ if (CondSrc->getOpcode() == ISD::SIGN_EXTEND_INREG)
+ Cond = CondSrc->getOperand(0);
+ }
+
+ MVT VT = N->getSimpleValueType(0);
+ MVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = VT.getVectorNumElements();
+ // There is no blend with immediate in AVX-512.
+ if (VT.is512BitVector())
+ return SDValue();
+
+ if (!Subtarget->hasSSE41() || EltVT == MVT::i8)
+ return SDValue();
+ if (!Subtarget->hasInt256() && VT == MVT::v16i16)
+ return SDValue();
+
+ if (!ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()))
+ return SDValue();
+
+ unsigned MaskValue = 0;
+ if (!BUILD_VECTORtoBlendMask(cast<BuildVectorSDNode>(Cond), MaskValue))
+ return SDValue();
+
+ SmallVector<int, 8> ShuffleMask(NumElems, -1);
+ for (unsigned i = 0; i < NumElems; ++i) {
+ // Be sure we emit undef where we can.
+ if (Cond.getOperand(i)->getOpcode() == ISD::UNDEF)
+ ShuffleMask[i] = -1;
+ else
+ ShuffleMask[i] = i + NumElems * ((MaskValue >> i) & 1);
+ }
+
+ return DAG.getVectorShuffle(VT, dl, LHS, RHS, &ShuffleMask[0]);
+}
+
/// PerformSELECTCombine - Do target-specific dag combines on SELECT and VSELECT
/// nodes.
static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
@@ -17378,7 +18167,7 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// Another special case: If C was a sign bit, the sub has been
// canonicalized into a xor.
- // FIXME: Would it be better to use ComputeMaskedBits to determine whether
+ // FIXME: Would it be better to use computeKnownBits to determine whether
// it's safe to decanonicalize the xor?
// x s< 0 ? x^C : 0 --> subus x, C
if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR &&
@@ -17544,7 +18333,13 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
// depend on the highest bit in each word. Try to use SimplifyDemandedBits
// to simplify previous instructions.
if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
- !DCI.isBeforeLegalize() && TLI.isOperationLegal(ISD::VSELECT, VT)) {
+ !DCI.isBeforeLegalize() &&
+ // We explicitly check against v8i16 and v16i16 because, although
+ // they're marked as Custom, they might only be legal when Cond is a
+ // build_vector of constants. This will be taken care in a later
+ // condition.
+ (TLI.isOperationLegalOrCustom(ISD::VSELECT, VT) && VT != MVT::v16i16 &&
+ VT != MVT::v8i16)) {
unsigned BitWidth = Cond.getValueType().getScalarType().getSizeInBits();
// Don't optimize vector selects that map to mask-registers.
@@ -17571,6 +18366,23 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
DCI.CommitTargetLoweringOpt(TLO);
}
+ // We should generate an X86ISD::BLENDI from a vselect if its argument
+ // is a sign_extend_inreg of an any_extend of a BUILD_VECTOR of
+ // constants. This specific pattern gets generated when we split a
+ // selector for a 512 bit vector in a machine without AVX512 (but with
+ // 256-bit vectors), during legalization:
+ //
+ // (vselect (sign_extend (any_extend (BUILD_VECTOR)) i1) LHS RHS)
+ //
+ // Iff we find this pattern and the build_vectors are built from
+ // constants, we translate the vselect into a shuffle_vector that we
+ // know will be matched by LowerVECTOR_SHUFFLEtoBlend.
+ if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalize()) {
+ SDValue Shuffle = TransformVSELECTtoBlendVECTOR_SHUFFLE(N, DAG, Subtarget);
+ if (Shuffle.getNode())
+ return Shuffle;
+ }
+
return SDValue();
}
@@ -17605,7 +18417,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
SDValue Op2 = Cmp.getOperand(1);
SDValue SetCC;
- const ConstantSDNode* C = 0;
+ const ConstantSDNode* C = nullptr;
bool needOppositeCond = (CC == X86::COND_E);
bool checkAgainstTrue = false; // Is it a comparison against 1?
@@ -17740,8 +18552,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
(FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, MVT::i8), Flags };
- return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(),
- Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
}
// If this is a select between two integer constants, try to do some
@@ -17856,7 +18667,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
// the DCI.xxxx conditions are provided to postpone the optimization as
// late as possible.
- ConstantSDNode *CmpAgainst = 0;
+ ConstantSDNode *CmpAgainst = nullptr;
if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) &&
(CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) &&
!isa<ConstantSDNode>(Cond.getOperand(0))) {
@@ -17871,8 +18682,7 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) {
SDValue Ops[] = { FalseOp, Cond.getOperand(0),
DAG.getConstant(CC, MVT::i8), Cond };
- return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops,
- array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, N->getVTList (), Ops);
}
}
}
@@ -17880,6 +18690,106 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default: return SDValue();
+ // SSE/AVX/AVX2 blend intrinsics.
+ case Intrinsic::x86_avx2_pblendvb:
+ case Intrinsic::x86_avx2_pblendw:
+ case Intrinsic::x86_avx2_pblendd_128:
+ case Intrinsic::x86_avx2_pblendd_256:
+ // Don't try to simplify this intrinsic if we don't have AVX2.
+ if (!Subtarget->hasAVX2())
+ return SDValue();
+ // FALL-THROUGH
+ case Intrinsic::x86_avx_blend_pd_256:
+ case Intrinsic::x86_avx_blend_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ // Don't try to simplify this intrinsic if we don't have AVX.
+ if (!Subtarget->hasAVX())
+ return SDValue();
+ // FALL-THROUGH
+ case Intrinsic::x86_sse41_pblendw:
+ case Intrinsic::x86_sse41_blendpd:
+ case Intrinsic::x86_sse41_blendps:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_sse41_pblendvb: {
+ SDValue Op0 = N->getOperand(1);
+ SDValue Op1 = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+
+ // Don't try to simplify this intrinsic if we don't have SSE4.1.
+ if (!Subtarget->hasSSE41())
+ return SDValue();
+
+ // fold (blend A, A, Mask) -> A
+ if (Op0 == Op1)
+ return Op0;
+ // fold (blend A, B, allZeros) -> A
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Op0;
+ // fold (blend A, B, allOnes) -> B
+ if (ISD::isBuildVectorAllOnes(Mask.getNode()))
+ return Op1;
+
+ // Simplify the case where the mask is a constant i32 value.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mask)) {
+ if (C->isNullValue())
+ return Op0;
+ if (C->isAllOnesValue())
+ return Op1;
+ }
+ }
+
+ // Packed SSE2/AVX2 arithmetic shift immediate intrinsics.
+ case Intrinsic::x86_sse2_psrai_w:
+ case Intrinsic::x86_sse2_psrai_d:
+ case Intrinsic::x86_avx2_psrai_w:
+ case Intrinsic::x86_avx2_psrai_d:
+ case Intrinsic::x86_sse2_psra_w:
+ case Intrinsic::x86_sse2_psra_d:
+ case Intrinsic::x86_avx2_psra_w:
+ case Intrinsic::x86_avx2_psra_d: {
+ SDValue Op0 = N->getOperand(1);
+ SDValue Op1 = N->getOperand(2);
+ EVT VT = Op0.getValueType();
+ assert(VT.isVector() && "Expected a vector type!");
+
+ if (isa<BuildVectorSDNode>(Op1))
+ Op1 = Op1.getOperand(0);
+
+ if (!isa<ConstantSDNode>(Op1))
+ return SDValue();
+
+ EVT SVT = VT.getVectorElementType();
+ unsigned SVTBits = SVT.getSizeInBits();
+
+ ConstantSDNode *CND = cast<ConstantSDNode>(Op1);
+ const APInt &C = APInt(SVTBits, CND->getAPIntValue().getZExtValue());
+ uint64_t ShAmt = C.getZExtValue();
+
+ // Don't try to convert this shift into a ISD::SRA if the shift
+ // count is bigger than or equal to the element size.
+ if (ShAmt >= SVTBits)
+ return SDValue();
+
+ // Trivial case: if the shift count is zero, then fold this
+ // into the first operand.
+ if (ShAmt == 0)
+ return Op0;
+
+ // Replace this packed shift intrinsic with a target independent
+ // shift dag node.
+ SDValue Splat = DAG.getConstant(C, VT);
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, Op0, Splat);
+ }
+ }
+}
+
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
@@ -18223,7 +19133,7 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(),
N1->getOperand(0));
SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1);
- N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, &C[0], C.size());
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C);
} else if (RHSTrunc) {
N1 = N1->getOperand(0);
}
@@ -18260,40 +19170,13 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
if (R.getNode())
return R;
- // Create BEXTR and BZHI instructions
- // BZHI is X & ((1 << Y) - 1)
+ // Create BEXTR instructions
// BEXTR is ((X >> imm) & (2**size-1))
if (VT == MVT::i32 || VT == MVT::i64) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDLoc DL(N);
- if (Subtarget->hasBMI2()) {
- // Check for (and (add (shl 1, Y), -1), X)
- if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::SHL) {
- SDValue N001 = N00.getOperand(1);
- assert(N001.getValueType() == MVT::i8 && "unexpected type");
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
- if (C && C->getZExtValue() == 1)
- return DAG.getNode(X86ISD::BZHI, DL, VT, N1, N001);
- }
- }
-
- // Check for (and X, (add (shl 1, Y), -1))
- if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == ISD::SHL) {
- SDValue N101 = N10.getOperand(1);
- assert(N101.getValueType() == MVT::i8 && "unexpected type");
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
- if (C && C->getZExtValue() == 1)
- return DAG.getNode(X86ISD::BZHI, DL, VT, N0, N101);
- }
- }
- }
-
// Check for BEXTR.
if ((Subtarget->hasBMI() || Subtarget->hasTBM()) &&
(N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::SRL)) {
@@ -18533,8 +19416,7 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
SDValue Ops[] = { N0.getOperand(0), Neg,
DAG.getConstant(X86::COND_GE, MVT::i8),
SDValue(Neg.getNode(), 1) };
- return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue),
- Ops, array_lengthof(Ops));
+ return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
}
return SDValue();
}
@@ -18691,8 +19573,7 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
}
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
- Chains.size());
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
// Bitcast the loaded value to a vector of the original element type, in
// the size of the target vector type.
@@ -18867,8 +19748,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
Chains.push_back(Ch);
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0],
- Chains.size());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
}
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
@@ -18891,7 +19771,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
!cast<LoadSDNode>(St->getValue())->isVolatile() &&
St->getChain().hasOneUse() && !St->isVolatile()) {
SDNode* LdVal = St->getValue().getNode();
- LoadSDNode *Ld = 0;
+ LoadSDNode *Ld = nullptr;
int TokenFactorIndex = -1;
SmallVector<SDValue, 8> Ops;
SDNode* ChainVal = St->getChain().getNode();
@@ -18934,8 +19814,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
SDValue NewChain = NewLd.getValue(1);
if (TokenFactorIndex != -1) {
Ops.push_back(NewChain);
- NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
- Ops.size());
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
St->getPointerInfo(),
@@ -18962,8 +19841,7 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
if (TokenFactorIndex != -1) {
Ops.push_back(LoLd);
Ops.push_back(HiLd);
- NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
- Ops.size());
+ NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, Ops);
}
LoAddr = St->getBasePtr();
@@ -19432,6 +20310,33 @@ static SDValue PerformISDSETCCCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformINSERTPSCombine(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget *Subtarget) {
+ SDLoc dl(N);
+ MVT VT = N->getOperand(1)->getSimpleValueType(0);
+ assert((VT == MVT::v4f32 || VT == MVT::v4i32) &&
+ "X86insertps is only defined for v4x32");
+
+ SDValue Ld = N->getOperand(1);
+ if (MayFoldLoad(Ld)) {
+ // Extract the countS bits from the immediate so we can get the proper
+ // address when narrowing the vector load to a specific element.
+ // When the second source op is a memory address, interps doesn't use
+ // countS and just gets an f32 from that address.
+ unsigned DestIndex =
+ cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() >> 6;
+ Ld = NarrowVectorLoadToElement(cast<LoadSDNode>(Ld), DestIndex, DAG);
+ } else
+ return SDValue();
+
+ // Create this as a scalar to vector to match the instruction pattern.
+ SDValue LoadScalarToVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Ld);
+ // countS bits are ignored when loading from memory on insertps, which
+ // means we don't need to explicitly set them to 0.
+ return DAG.getNode(X86ISD::INSERTPS, dl, VT, N->getOperand(0),
+ LoadScalarToVector, N->getOperand(2));
+}
+
// Helper function of PerformSETCCCombine. It is to materialize "setb reg"
// as "sbb reg,reg", since it can be extended without zext and produces
// an all-ones bit which is more useful than 0/1 in some cases.
@@ -19711,7 +20616,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
- case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
+ case ISD::SIGN_EXTEND_INREG:
+ return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::TRUNCATE: return PerformTruncateCombine(N, DAG,DCI,Subtarget);
case ISD::SETCC: return PerformISDSETCCCombine(N, DAG, Subtarget);
case X86ISD::SETCC: return PerformSETCCCombine(N, DAG, DCI, Subtarget);
@@ -19732,6 +20638,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::VPERM2X128:
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI,Subtarget);
case ISD::FMA: return PerformFMACombine(N, DAG, Subtarget);
+ case ISD::INTRINSIC_WO_CHAIN:
+ return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget);
+ case X86ISD::INSERTPS:
+ return PerformINSERTPSCombine(N, DAG, Subtarget);
}
return SDValue();
@@ -20006,7 +20916,7 @@ TargetLowering::ConstraintWeight
Value *CallOperandVal = info.CallOperandVal;
// If we don't have a value, we can't do a match,
// but allow it at the lowest weight.
- if (CallOperandVal == NULL)
+ if (!CallOperandVal)
return CW_Default;
Type *type = CallOperandVal->getType();
// Look at the constraint type.
@@ -20124,7 +21034,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
std::string &Constraint,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
- SDValue Result(0, 0);
+ SDValue Result;
// Only support length 1 constraints for now.
if (Constraint.length() > 1) return;
@@ -20207,7 +21117,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
- GlobalAddressSDNode *GA = 0;
+ GlobalAddressSDNode *GA = nullptr;
int64_t Offset = 0;
// Match either (GA), (GA+C), (GA+C1+C2), etc.
@@ -20363,7 +21273,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
// Not found as a standard register?
- if (Res.second == 0) {
+ if (!Res.second) {
// Map st(0) -> st(7) -> ST0
if (Constraint.size() == 7 && Constraint[0] == '{' &&
tolower(Constraint[1]) == 's' &&
@@ -20488,3 +21398,30 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return Res;
}
+
+int X86TargetLowering::getScalingFactorCost(const AddrMode &AM,
+ Type *Ty) const {
+ // Scaling factors are not free at all.
+ // An indexed folded instruction, i.e., inst (reg1, reg2, scale),
+ // will take 2 allocations in the out of order engine instead of 1
+ // for plain addressing mode, i.e. inst (reg1).
+ // E.g.,
+ // vaddps (%rsi,%drx), %ymm0, %ymm1
+ // Requires two allocations (one for the load, one for the computation)
+ // whereas:
+ // vaddps (%rsi), %ymm0, %ymm1
+ // Requires just 1 allocation, i.e., freeing allocations for other operations
+ // and having less micro operations to execute.
+ //
+ // For some X86 architectures, this is even worse because for instance for
+ // stores, the complex addressing mode forces the instruction to use the
+ // "load" ports instead of the dedicated "store" port.
+ // E.g., on Haswell:
+ // vmovaps %ymm1, (%r8, %rdi) can use port 2 or 3.
+ // vmovaps %ymm1, (%r8) can use port 2, 3, or 7.
+ if (isLegalAddressingMode(AM, Ty))
+ // Scale represents reg2 * scale, thus account for 1
+ // as soon as we use a second register.
+ return AM.Scale != 0;
+ return -1;
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0f0d17b..9f51b53 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -83,6 +83,9 @@ namespace llvm {
/// readcyclecounter
RDTSC_DAG,
+ /// X86 Read Time-Stamp Counter and Processor ID.
+ RDTSCP_DAG,
+
/// X86 compare and logical compare instructions.
CMP, COMI, UCOMI,
@@ -291,7 +294,6 @@ namespace llvm {
ADD, SUB, ADC, SBB, SMUL,
INC, DEC, OR, XOR, AND,
- BZHI, // BZHI - Zero high bits
BEXTR, // BEXTR - Bit field extract
UMUL, // LOW, HI, FLAGS = umul LHS, RHS
@@ -345,6 +347,8 @@ namespace llvm {
// PMULUDQ - Vector multiply packed unsigned doubleword integers
PMULUDQ,
+ // PMULUDQ - Vector multiply packed signed doubleword integers
+ PMULDQ,
// FMA nodes
FMADD,
@@ -614,18 +618,19 @@ namespace llvm {
/// getSetCCResultType - Return the value type to use for ISD::SETCC.
EVT getSetCCResultType(LLVMContext &Context, EVT VT) const override;
- /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+ /// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
- void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
// ComputeNumSignBitsForTargetNode - Determine the number of bits in the
// operation that are sign bits.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ const SelectionDAG &DAG,
unsigned Depth) const override;
bool isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
@@ -679,6 +684,12 @@ namespace llvm {
/// the immediate into a register.
bool isLegalAddImmediate(int64_t Imm) const override;
+ /// \brief Return the cost of the scaling factor used in the addressing
+ /// mode represented by AM for this target, for a load/store
+ /// of the specified type.
+ /// If the AM is supported, the return value must be >= 0.
+ /// If the AM is not supported, it returns a negative value.
+ int getScalingFactorCost(const AddrMode &AM, Type *Ty) const override;
bool isVectorShiftByScalarCheap(Type *Ty) const override;
@@ -771,10 +782,12 @@ namespace llvm {
Type *Ty) const override;
/// Intel processors have a unified instruction and data cache
- const char * getClearCacheBuiltinName() const {
- return 0; // nothing to do, move along.
+ const char * getClearCacheBuiltinName() const override {
+ return nullptr; // nothing to do, move along.
}
+ unsigned getRegisterByName(const char* RegName, EVT VT) const override;
+
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
@@ -871,8 +884,11 @@ namespace llvm {
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
+ SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
+
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -908,6 +924,7 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
SDValue
LowerFormalArguments(SDValue Chain,
@@ -936,7 +953,7 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
LLVMContext &Context) const override;
- const uint16_t *getScratchRegisters(CallingConv::ID CC) const override;
+ const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// Utility function to emit atomic-load-arith operations (and, or, xor,
/// nand, max, min, umax, umin). It takes the corresponding instruction to
@@ -987,11 +1004,12 @@ namespace llvm {
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
- SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+ SDValue EmitTest(SDValue Op0, unsigned X86CC, SDLoc dl,
+ SelectionDAG &DAG) const;
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
/// equivalent, for use with the given x86 condition code.
- SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+ SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, SDLoc dl,
SelectionDAG &DAG) const;
/// Convert a comparison if required by the subtarget.
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 2c5edf6..37bcc52 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -209,12 +209,12 @@ def : Pat<(vinsert256_insert:$ins (v16i32 VR512:$src1),
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u32u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR128X:$dst, (X86insrtps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
+ [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
EVEX_4V;
def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
(ins VR128X:$src1, f32mem:$src2, u32u8imm:$src3),
"vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- [(set VR128X:$dst, (X86insrtps VR128X:$src1,
+ [(set VR128X:$dst, (X86insertps VR128X:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
@@ -621,6 +621,22 @@ defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512me
X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem,
X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+
+def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx),
+ (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))),
+ (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx),
+ (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))),
+ (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx),
+ (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))),
+ (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>;
+
+def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx),
+ (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))),
+ (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
//
@@ -984,6 +1000,10 @@ let Predicates = [HasAVX512] in {
(EXTRACT_SUBREG
(AND32ri (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
sub_16bit)>;
+ def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK16)>;
+ def : Pat<(v8i1 (scalar_to_vector VK1:$src)),
+ (COPY_TO_REGCLASS VK1:$src, VK8)>;
}
// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
let Predicates = [HasAVX512] in {
@@ -1356,6 +1376,23 @@ defm VMOVDQU64: avx512_load<0x6F, VR512, VK8WM, i512mem, load,
"vmovdqu64", SSEPackedInt, v8i64>,
XS, VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>;
+def: Pat<(v16i32 (int_x86_avx512_mask_loadu_d_512 addr:$ptr,
+ (v16i32 immAllZerosV), GR16:$mask)),
+ (VMOVDQU32rmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
+
+def: Pat<(v8i64 (int_x86_avx512_mask_loadu_q_512 addr:$ptr,
+ (bc_v8i64 (v16i32 immAllZerosV)), GR8:$mask)),
+ (VMOVDQU64rmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
+
+def: Pat<(int_x86_avx512_mask_storeu_d_512 addr:$ptr, (v16i32 VR512:$src),
+ GR16:$mask),
+ (VMOVDQU32mrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
+ VR512:$src)>;
+def: Pat<(int_x86_avx512_mask_storeu_q_512 addr:$ptr, (v8i64 VR512:$src),
+ GR8:$mask),
+ (VMOVDQU64mrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)),
+ VR512:$src)>;
+
let AddedComplexity = 20 in {
def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 VR512:$src),
(bc_v8i64 (v16i32 immAllZerosV)))),
@@ -3112,6 +3149,17 @@ def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
(EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
(v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
+ (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
+ (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
+
+def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
+ (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
+ (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
+ (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src),
(bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)),
@@ -3715,7 +3763,7 @@ defm VRNDSCALEPSZ : avx512_rndscale<0x08, "vrndscaleps", f512mem, VR512,
EVEX_CD8<32, CD8VF>;
def : Pat<(v16f32 (int_x86_avx512_mask_rndscale_ps_512 (v16f32 VR512:$src1),
- imm:$src2, (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1),
+ imm:$src2, (v16f32 VR512:$src1), (i16 -1),
FROUND_CURRENT)),
(VRNDSCALEPSZr VR512:$src1, imm:$src2)>;
@@ -3725,7 +3773,7 @@ defm VRNDSCALEPDZ : avx512_rndscale<0x09, "vrndscalepd", f512mem, VR512,
VEX_W, EVEX_CD8<64, CD8VF>;
def : Pat<(v8f64 (int_x86_avx512_mask_rndscale_pd_512 (v8f64 VR512:$src1),
- imm:$src2, (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1),
+ imm:$src2, (v8f64 VR512:$src1), (i8 -1),
FROUND_CURRENT)),
(VRNDSCALEPDZr VR512:$src1, imm:$src2)>;
@@ -3807,7 +3855,13 @@ multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
[]>, EVEX;
- def krr : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ def rrk : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
+ (ins KRC:$mask, srcRC:$src),
+ !strconcat(OpcodeStr,
+ " \t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
+ []>, EVEX, EVEX_K;
+
+ def rrkz : AVX512XS8I<opc, MRMDestReg, (outs dstRC:$dst),
(ins KRC:$mask, srcRC:$src),
!strconcat(OpcodeStr,
" \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"),
@@ -3816,6 +3870,12 @@ multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr,
def mr : AVX512XS8I<opc, MRMDestMem, (outs), (ins x86memop:$dst, srcRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[]>, EVEX;
+
+ def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
+ (ins x86memop:$dst, KRC:$mask, srcRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst {${mask}}|${dst} {${mask}}, $src}"),
+ []>, EVEX, EVEX_K;
+
}
defm VPMOVQB : avx512_trunc_sat<0x32, "vpmovqb", VR128X, VR512, VK8WM,
i128mem>, EVEX_V512, EVEX_CD8<8, CD8VO>;
@@ -3855,60 +3915,86 @@ def : Pat<(v16i8 (X86vtrunc (v16i32 VR512:$src))), (VPMOVDBrr VR512:$src)>;
def : Pat<(v8i32 (X86vtrunc (v8i64 VR512:$src))), (VPMOVQDrr VR512:$src)>;
def : Pat<(v16i8 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDBkrr VK16WM:$mask, VR512:$src)>;
+ (VPMOVDBrrkz VK16WM:$mask, VR512:$src)>;
def : Pat<(v16i16 (X86vtruncm VK16WM:$mask, (v16i32 VR512:$src))),
- (VPMOVDWkrr VK16WM:$mask, VR512:$src)>;
+ (VPMOVDWrrkz VK16WM:$mask, VR512:$src)>;
def : Pat<(v8i16 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQWkrr VK8WM:$mask, VR512:$src)>;
+ (VPMOVQWrrkz VK8WM:$mask, VR512:$src)>;
def : Pat<(v8i32 (X86vtruncm VK8WM:$mask, (v8i64 VR512:$src))),
- (VPMOVQDkrr VK8WM:$mask, VR512:$src)>;
+ (VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
-multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass DstRC,
- RegisterClass SrcRC, SDNode OpNode, PatFrag mem_frag,
- X86MemOperand x86memop, ValueType OpVT, ValueType InVT> {
+multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
+ RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
+ PatFrag mem_frag, X86MemOperand x86memop,
+ ValueType OpVT, ValueType InVT> {
def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
(ins SrcRC:$src),
!strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
- def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
+
+ def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
+ (ins KRC:$mask, SrcRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
+ []>, EVEX, EVEX_K;
+
+ def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
+ (ins KRC:$mask, SrcRC:$src),
+ !strconcat(OpcodeStr, " \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ []>, EVEX, EVEX_KZ;
+
+ let mayLoad = 1 in {
+ def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
(ins x86memop:$src),
!strconcat(OpcodeStr," \t{$src, $dst|$dst, $src}"),
[(set DstRC:$dst,
(OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
EVEX;
+
+ def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins KRC:$mask, x86memop:$src),
+ !strconcat(OpcodeStr," \t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
+ []>,
+ EVEX, EVEX_K;
+
+ def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins KRC:$mask, x86memop:$src),
+ !strconcat(OpcodeStr," \t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
+ []>,
+ EVEX, EVEX_KZ;
+ }
}
-defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VR512, VR128X, X86vzext,
+defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
-defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VR512, VR128X, X86vzext,
+defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
-defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VR512, VR256X, X86vzext,
+defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
-defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VR512, VR128X, X86vzext,
+defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
-defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VR512, VR256X, X86vzext,
+defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
-
-defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VR512, VR128X, X86vsext,
+
+defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
memopv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VQ>;
-defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VR512, VR128X, X86vsext,
+defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
memopv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
EVEX_CD8<8, CD8VO>;
-defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VR512, VR256X, X86vsext,
+defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
memopv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
EVEX_CD8<16, CD8VH>;
-defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VR512, VR128X, X86vsext,
+defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
memopv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
EVEX_CD8<16, CD8VQ>;
-defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VR512, VR256X, X86vsext,
+defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
memopv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
EVEX_CD8<32, CD8VH>;
@@ -3984,6 +4070,62 @@ defm VPSCATTERQQZ : avx512_scatter<0xA1, "vpscatterqq", VK8WM, VR512, vz64mem>,
defm VPSCATTERQDZ : avx512_scatter<0xA1, "vpscatterqd", VK8WM, VR256X, vz64mem>,
EVEX_V512, EVEX_CD8<32, CD8VT1>;
+// prefetch
+multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
+ RegisterClass KRC, X86MemOperand memop> {
+ let Predicates = [HasPFI], hasSideEffects = 1 in
+ def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
+ !strconcat(OpcodeStr, " \t{$src {${mask}}|{${mask}}, $src}"),
+ []>, EVEX, EVEX_K;
+}
+
+defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
+ VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
+ VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+
+defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
+ VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
+ VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
+ VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
+ VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+
+defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
+ VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+
+defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
+ VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
+ VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
+ VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+
+defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
+ VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
+ VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+
+defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
+ VK16WM, vz32mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
+ VK8WM, vz64mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
+
+defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
+ VK8WM, vy32mem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+
+defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
+ VK8WM, vz64mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations
@@ -4200,3 +4342,19 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1,
GR8:$mask),
(VPCONFLICTQrrk VR512:$src1,
(v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>;
+
+def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
+def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
+
+def : Pat<(store VK1:$src, addr:$dst),
+ (KMOVWmk addr:$dst, (COPY_TO_REGCLASS VK1:$src, VK16))>;
+
+def truncstorei1 : PatFrag<(ops node:$val, node:$ptr),
+ (truncstore node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+
+def : Pat<(truncstorei1 GR8:$src, addr:$dst),
+ (MOV8mr addr:$dst, GR8:$src)>;
+
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index aaef4a4..e421f8c 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -52,7 +52,8 @@ struct X86AddressMode {
unsigned GVOpFlags;
X86AddressMode()
- : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(nullptr),
+ GVOpFlags(0) {
Base.Reg = 0;
}
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 401849f..34d8fb9 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1187,9 +1187,9 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
+ CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0);
APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
+ CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0);
return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td
index df6c9da..c0a6864 100644
--- a/lib/Target/X86/X86InstrFMA.td
+++ b/lib/Target/X86/X86InstrFMA.td
@@ -19,8 +19,9 @@ let Constraints = "$src1 = $dst" in {
multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
PatFrag MemFrag128, PatFrag MemFrag256,
ValueType OpVT128, ValueType OpVT256,
+ bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator Op = null_frag> {
- let usesCustomInserter = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -28,7 +29,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2,
VR128:$src1, VR128:$src3)))]>;
- let mayLoad = 1 in
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
@@ -36,7 +37,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
(MemFrag128 addr:$src3))))]>;
- let usesCustomInserter = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
@@ -44,7 +45,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (OpVT256 (Op VR256:$src2, VR256:$src1,
VR256:$src3)))]>, VEX_L;
- let mayLoad = 1 in
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
@@ -59,18 +60,27 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
string OpcodeStr, string PackTy,
PatFrag MemFrag128, PatFrag MemFrag256,
SDNode Op, ValueType OpTy128, ValueType OpTy256> {
- let isCommutable = 1 in
+ // For 213, both the register and memory variant are commutable.
+ // Indeed, the commutable operands are 1 and 2 and both live in registers
+ // for both variants.
defm r213 : fma3p_rm<opc213,
!strconcat(OpcodeStr, "213", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
+ MemFrag128, MemFrag256, OpTy128, OpTy256,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 1,
+ Op>;
let neverHasSideEffects = 1 in {
defm r132 : fma3p_rm<opc132,
!strconcat(OpcodeStr, "132", PackTy),
MemFrag128, MemFrag256, OpTy128, OpTy256>;
- let isCommutable = 1 in
+ // For 231, only the register variant is commutable.
+ // For the memory variant the folded operand must be in 3. Thus,
+ // in that case, it cannot be swapped with 2.
defm r231 : fma3p_rm<opc231,
!strconcat(OpcodeStr, "231", PackTy),
- MemFrag128, MemFrag256, OpTy128, OpTy256>;
+ MemFrag128, MemFrag256, OpTy128, OpTy256,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 0>;
} // neverHasSideEffects = 1
}
@@ -119,8 +129,9 @@ let ExeDomain = SSEPackedDouble in {
let Constraints = "$src1 = $dst" in {
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
+ bit IsRVariantCommutable = 0, bit IsMVariantCommutable = 0,
SDPatternOperator OpNode = null_frag> {
- let usesCustomInserter = 1 in
+ let usesCustomInserter = 1, isCommutable = IsRVariantCommutable in
def r : FMA3<opc, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -128,7 +139,7 @@ multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
[(set RC:$dst,
(OpVT (OpNode RC:$src2, RC:$src1, RC:$src3)))]>;
- let mayLoad = 1 in
+ let mayLoad = 1, isCommutable = IsMVariantCommutable in
def m : FMA3<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
@@ -147,14 +158,21 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
let neverHasSideEffects = 1 in {
defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
x86memop, RC, OpVT, mem_frag>;
- let isCommutable = 1 in
+ // See the other defm of r231 for the explanation regarding the
+ // commutable flags.
defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
- x86memop, RC, OpVT, mem_frag>;
+ x86memop, RC, OpVT, mem_frag,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 0>;
}
-let isCommutable = 1 in
+// See the other defm of r213 for the explanation regarding the
+// commutable flags.
defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
- x86memop, RC, OpVT, mem_frag, OpNode>;
+ x86memop, RC, OpVT, mem_frag,
+ /* IsRVariantCommutable */ 1,
+ /* IsMVariantCommutable */ 1,
+ OpNode>;
}
multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 486e5a9..1582f43 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -81,7 +81,7 @@ def X86pinsrb : SDNode<"X86ISD::PINSRB",
def X86pinsrw : SDNode<"X86ISD::PINSRW",
SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86insrtps : SDNode<"X86ISD::INSERTPS",
+def X86insertps : SDNode<"X86ISD::INSERTPS",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
@@ -175,6 +175,9 @@ def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
SDTCisSameAs<1,2>]>>;
+def X86pmuldq : SDNode<"X86ISD::PMULDQ",
+ SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<1,2>]>>;
// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
// translated into one of the target nodes below during lowering.
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 6450f2a..6993577 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -36,11 +36,13 @@
#include "llvm/Target/TargetOptions.h"
#include <limits>
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-instr-info"
+
#define GET_INSTRINFO_CTOR_DTOR
#include "X86GenInstrInfo.inc"
-using namespace llvm;
-
static cl::opt<bool>
NoFusing("disable-spill-fusing",
cl::desc("Disable fusing of spill code into instructions"));
@@ -1511,12 +1513,14 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
/// operand and follow operands form a reference to the stack frame.
bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
int &FrameIndex) const {
- if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
- MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
- MI->getOperand(Op+1).getImm() == 1 &&
- MI->getOperand(Op+2).getReg() == 0 &&
- MI->getOperand(Op+3).getImm() == 0) {
- FrameIndex = MI->getOperand(Op).getIndex();
+ if (MI->getOperand(Op+X86::AddrBaseReg).isFI() &&
+ MI->getOperand(Op+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(Op+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(Op+X86::AddrDisp).isImm() &&
+ MI->getOperand(Op+X86::AddrScaleAmt).getImm() == 1 &&
+ MI->getOperand(Op+X86::AddrIndexReg).getReg() == 0 &&
+ MI->getOperand(Op+X86::AddrDisp).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op+X86::AddrBaseReg).getIndex();
return true;
}
return false;
@@ -1680,15 +1684,16 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::FsMOVAPSrm:
case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
- if (MI->getOperand(1).isReg() &&
- MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+ if (MI->getOperand(1+X86::AddrBaseReg).isReg() &&
+ MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(1+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
MI->isInvariantLoad(AA)) {
- unsigned BaseReg = MI->getOperand(1).getReg();
+ unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
if (BaseReg == 0 || BaseReg == X86::RIP)
return true;
// Allow re-materialization of PIC load.
- if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+ if (!ReMatPICStubLoad && MI->getOperand(1+X86::AddrDisp).isGlobal())
return false;
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -1699,13 +1704,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::LEA32r:
case X86::LEA64r: {
- if (MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
- !MI->getOperand(4).isReg()) {
+ if (MI->getOperand(1+X86::AddrScaleAmt).isImm() &&
+ MI->getOperand(1+X86::AddrIndexReg).isReg() &&
+ MI->getOperand(1+X86::AddrIndexReg).getReg() == 0 &&
+ !MI->getOperand(1+X86::AddrDisp).isReg()) {
// lea fi#, lea GV, etc. are all rematerializable.
- if (!MI->getOperand(1).isReg())
+ if (!MI->getOperand(1+X86::AddrBaseReg).isReg())
return true;
- unsigned BaseReg = MI->getOperand(1).getReg();
+ unsigned BaseReg = MI->getOperand(1+X86::AddrBaseReg).getReg();
if (BaseReg == 0)
return true;
// Allow re-materialization of lea PICBase + x.
@@ -1722,12 +1728,8 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
return true;
}
-/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
-/// would clobber the EFLAGS condition register. Note the result may be
-/// conservative. If it cannot definitely determine the safety after visiting
-/// a few instructions in each direction it assumes it's not safe.
-static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
+bool X86InstrInfo::isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
MachineBasicBlock::iterator E = MBB.end();
// For compile time consideration, if we are not able to determine the
@@ -1998,7 +2000,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
unsigned leaInReg2 = 0;
- MachineInstr *InsMI2 = 0;
+ MachineInstr *InsMI2 = nullptr;
if (Src == Src2) {
// ADD16rr %reg1028<kill>, %reg1028
// just a single insert_subreg.
@@ -2062,14 +2064,14 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
// convert them to equivalent lea if the condition code register def's
// are dead!
if (hasLiveCondCodeDef(MI))
- return 0;
+ return nullptr;
MachineFunction &MF = *MI->getParent()->getParent();
// All instructions input are two-addr instructions. Get the known operands.
const MachineOperand &Dest = MI->getOperand(0);
const MachineOperand &Src = MI->getOperand(1);
- MachineInstr *NewMI = NULL;
+ MachineInstr *NewMI = nullptr;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
// 16-bit LEA is also slow on Core2.
@@ -2080,11 +2082,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
switch (MIOpc) {
case X86::SHUFPSrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
- if (B != C) return 0;
+ if (B != C) return nullptr;
unsigned M = MI->getOperand(3).getImm();
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
.addOperand(Dest).addOperand(Src).addImm(M);
@@ -2092,11 +2094,11 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::SHUFPDrri: {
assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!");
- if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+ if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr;
unsigned B = MI->getOperand(1).getReg();
unsigned C = MI->getOperand(2).getReg();
- if (B != C) return 0;
+ if (B != C) return nullptr;
unsigned M = MI->getOperand(3).getImm();
// Convert to PSHUFD mask.
@@ -2109,13 +2111,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHL64ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
// LEA can't handle RSP.
if (TargetRegisterInfo::isVirtualRegister(Src.getReg()) &&
!MF.getRegInfo().constrainRegClass(Src.getReg(),
&X86::GR64_NOSPRegClass))
- return 0;
+ return nullptr;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addOperand(Dest)
@@ -2125,7 +2127,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHL32ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -2135,7 +2137,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
@@ -2151,10 +2153,10 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::SHL16ri: {
assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
unsigned ShAmt = getTruncatedShiftCount(MI, 2);
- if (!isTruncatedShiftCountForLEA(ShAmt)) return 0;
+ if (!isTruncatedShiftCountForLEA(ShAmt)) return nullptr;
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : nullptr;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest)
.addReg(0).addImm(1 << ShAmt).addOperand(Src).addImm(0).addReg(0);
@@ -2163,7 +2165,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
default: {
switch (MIOpc) {
- default: return 0;
+ default: return nullptr;
case X86::INC64r:
case X86::INC32r:
case X86::INC64_32r: {
@@ -2175,7 +2177,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
@@ -2189,7 +2191,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::INC16r:
case X86::INC64_16r:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src), 1);
@@ -2206,7 +2209,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ false,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
@@ -2221,7 +2224,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::DEC16r:
case X86::DEC64_16r:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src), -1);
@@ -2242,7 +2246,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
const MachineOperand &Src2 = MI->getOperand(2);
bool isKill2, isUndef2;
@@ -2250,7 +2254,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp2 = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src2, Opc, /*AllowSP=*/ false,
SrcReg2, isKill2, isUndef2, ImplicitOp2))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest);
@@ -2272,7 +2276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD16rr:
case X86::ADD16rr_DB: {
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
@@ -2311,7 +2316,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineOperand ImplicitOp = MachineOperand::CreateReg(0, false);
if (!classifyLEAReg(MI, Src, Opc, /*AllowSP=*/ true,
SrcReg, isKill, isUndef, ImplicitOp))
- return 0;
+ return nullptr;
MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addOperand(Dest)
@@ -2327,7 +2332,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD16ri_DB:
case X86::ADD16ri8_DB:
if (DisableLEA16)
- return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV)
+ : nullptr;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addOperand(Dest).addOperand(Src),
@@ -2337,7 +2343,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
}
- if (!NewMI) return 0;
+ if (!NewMI) return nullptr;
if (LV) { // Update live variables
if (Src.isKill())
@@ -2789,11 +2795,11 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
std::next(I)->eraseFromParent();
Cond.clear();
- FBB = 0;
+ FBB = nullptr;
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = 0;
+ TBB = nullptr;
I->eraseFromParent();
I = MBB.end();
UnCondBrIter = MBB.end();
@@ -3549,6 +3555,26 @@ inline static bool isDefConvertible(MachineInstr *MI) {
}
}
+/// isUseDefConvertible - check whether the use can be converted
+/// to remove a comparison against zero.
+static X86::CondCode isUseDefConvertible(MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ default: return X86::COND_INVALID;
+ case X86::LZCNT16rr: case X86::LZCNT16rm:
+ case X86::LZCNT32rr: case X86::LZCNT32rm:
+ case X86::LZCNT64rr: case X86::LZCNT64rm:
+ return X86::COND_B;
+ case X86::POPCNT16rr:case X86::POPCNT16rm:
+ case X86::POPCNT32rr:case X86::POPCNT32rm:
+ case X86::POPCNT64rr:case X86::POPCNT64rm:
+ return X86::COND_E;
+ case X86::TZCNT16rr: case X86::TZCNT16rm:
+ case X86::TZCNT32rr: case X86::TZCNT32rm:
+ case X86::TZCNT64rr: case X86::TZCNT64rm:
+ return X86::COND_B;
+ }
+}
+
/// optimizeCompareInstr - Check if there exists an earlier instruction that
/// operates on the same source operands and sets flags in the same way as
/// Compare; remove Compare if possible.
@@ -3615,13 +3641,38 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// If we are comparing against zero, check whether we can use MI to update
// EFLAGS. If MI is not in the same BB as CmpInstr, do not optimize.
bool IsCmpZero = (SrcReg2 == 0 && CmpValue == 0);
- if (IsCmpZero && (MI->getParent() != CmpInstr->getParent() ||
- !isDefConvertible(MI)))
+ if (IsCmpZero && MI->getParent() != CmpInstr->getParent())
return false;
+ // If we have a use of the source register between the def and our compare
+ // instruction we can eliminate the compare iff the use sets EFLAGS in the
+ // right way.
+ bool ShouldUpdateCC = false;
+ X86::CondCode NewCC = X86::COND_INVALID;
+ if (IsCmpZero && !isDefConvertible(MI)) {
+ // Scan forward from the use until we hit the use we're looking for or the
+ // compare instruction.
+ for (MachineBasicBlock::iterator J = MI;; ++J) {
+ // Do we have a convertible instruction?
+ NewCC = isUseDefConvertible(J);
+ if (NewCC != X86::COND_INVALID && J->getOperand(1).isReg() &&
+ J->getOperand(1).getReg() == SrcReg) {
+ assert(J->definesRegister(X86::EFLAGS) && "Must be an EFLAGS def!");
+ ShouldUpdateCC = true; // Update CC later on.
+ // This is not a def of SrcReg, but still a def of EFLAGS. Keep going
+ // with the new def.
+ MI = Def = J;
+ break;
+ }
+
+ if (J == I)
+ return false;
+ }
+ }
+
// We are searching for an earlier instruction that can make CmpInstr
// redundant and that instruction will be saved in Sub.
- MachineInstr *Sub = NULL;
+ MachineInstr *Sub = nullptr;
const TargetRegisterInfo *TRI = &getRegisterInfo();
// We iterate backward, starting from the instruction before CmpInstr and
@@ -3634,7 +3685,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
RE = CmpInstr->getParent() == MI->getParent() ?
MachineBasicBlock::reverse_iterator(++Def) /* points to MI */ :
CmpInstr->getParent()->rend();
- MachineInstr *Movr0Inst = 0;
+ MachineInstr *Movr0Inst = nullptr;
for (; RI != RE; ++RI) {
MachineInstr *Instr = &*RI;
// Check whether CmpInstr can be made redundant by the current instruction.
@@ -3716,13 +3767,28 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
// CF and OF are used, we can't perform this optimization.
return false;
}
+
+ // If we're updating the condition code check if we have to reverse the
+ // condition.
+ if (ShouldUpdateCC)
+ switch (OldCC) {
+ default:
+ return false;
+ case X86::COND_E:
+ break;
+ case X86::COND_NE:
+ NewCC = GetOppositeBranchCondition(NewCC);
+ break;
+ }
} else if (IsSwapped) {
// If we have SUB(r1, r2) and CMP(r2, r1), the condition code needs
// to be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
// We swap the condition code and synthesize the new opcode.
- X86::CondCode NewCC = getSwappedCondition(OldCC);
+ NewCC = getSwappedCondition(OldCC);
if (NewCC == X86::COND_INVALID) return false;
+ }
+ if ((ShouldUpdateCC || IsSwapped) && NewCC != OldCC) {
// Synthesize the new opcode.
bool HasMemoryOperand = Instr.hasOneMemOperand();
unsigned NewOpc;
@@ -3809,19 +3875,19 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
unsigned &FoldAsLoadDefReg,
MachineInstr *&DefMI) const {
if (FoldAsLoadDefReg == 0)
- return 0;
+ return nullptr;
// To be conservative, if there exists another load, clear the load candidate.
if (MI->mayLoad()) {
FoldAsLoadDefReg = 0;
- return 0;
+ return nullptr;
}
// Check whether we can move DefMI here.
DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
assert(DefMI);
bool SawStore = false;
- if (!DefMI->isSafeToMove(this, 0, SawStore))
- return 0;
+ if (!DefMI->isSafeToMove(this, nullptr, SawStore))
+ return nullptr;
// We try to commute MI if possible.
unsigned IdxEnd = (MI->isCommutable()) ? 2 : 1;
@@ -3838,12 +3904,12 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
continue;
// Do not fold if we have a subreg use or a def or multiple uses.
if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
- return 0;
+ return nullptr;
SrcOperandId = i;
FoundSrcOperand = true;
}
- if (!FoundSrcOperand) return 0;
+ if (!FoundSrcOperand) return nullptr;
// Check whether we can fold the def into SrcOperandId.
SmallVector<unsigned, 8> Ops;
@@ -3857,22 +3923,22 @@ optimizeLoadInstr(MachineInstr *MI, const MachineRegisterInfo *MRI,
if (Idx == 1) {
// MI was changed but it didn't help, commute it back!
commuteInstruction(MI, false);
- return 0;
+ return nullptr;
}
// Check whether we can commute MI and enable folding.
if (MI->isCommutable()) {
MachineInstr *NewMI = commuteInstruction(MI, false);
// Unable to commute.
- if (!NewMI) return 0;
+ if (!NewMI) return nullptr;
if (NewMI != MI) {
// New instruction. It doesn't need to be kept.
NewMI->eraseFromParent();
- return 0;
+ return nullptr;
}
}
}
- return 0;
+ return nullptr;
}
/// Expand2AddrUndef - Expand a single-def pseudo instruction to a two-addr
@@ -4007,7 +4073,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr *MI, unsigned i,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Align) const {
- const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ const DenseMap<unsigned,
+ std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect();
bool isTwoAddrFold = false;
@@ -4015,7 +4082,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// when X86Subtarget is Atom.
if (isCallRegIndirect &&
(MI->getOpcode() == X86::CALL32r || MI->getOpcode() == X86::CALL64r)) {
- return NULL;
+ return nullptr;
}
unsigned NumOps = MI->getDesc().getNumOperands();
@@ -4026,9 +4093,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
if (MI->getOpcode() == X86::ADD32ri &&
MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
- return NULL;
+ return nullptr;
- MachineInstr *NewMI = NULL;
+ MachineInstr *NewMI = nullptr;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
@@ -4063,7 +4130,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned Opcode = I->second.first;
unsigned MinAlign = (I->second.second & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT;
if (Align < MinAlign)
- return NULL;
+ return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
unsigned RCSize = getRegClass(MI->getDesc(), i, &RI, MF)->getSize();
@@ -4071,12 +4138,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
- return NULL;
+ return nullptr;
// If this is a 64-bit load, but the spill slot is 32, then we can do
// a 32-bit load which is implicitly zero-extended. This likely is due
// to liveintervalanalysis remat'ing a load from stack slot.
if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
- return NULL;
+ return nullptr;
Opcode = X86::MOV32rm;
NarrowToMOV32rm = true;
}
@@ -4105,7 +4172,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// No fusion
if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
- return NULL;
+ return nullptr;
}
/// hasPartialRegUpdate - Return true for all instructions that only update
@@ -4270,14 +4337,14 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops,
int FrameIndex) const {
// Check switch flag
- if (NoFusing) return NULL;
+ if (NoFusing) return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
if (!MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
- return 0;
+ return nullptr;
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
@@ -4290,7 +4357,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned NewOpc = 0;
unsigned RCSize = 0;
switch (MI->getOpcode()) {
- default: return NULL;
+ default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
@@ -4299,12 +4366,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
if (Size < RCSize)
- return NULL;
+ return nullptr;
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
- return NULL;
+ return nullptr;
SmallVector<MachineOperand,4> MOs;
MOs.push_back(MachineOperand::CreateFI(FrameIndex));
@@ -4322,14 +4389,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
return foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
// Check switch flag
- if (NoFusing) return NULL;
+ if (NoFusing) return nullptr;
// Unless optimizing for size, don't fold to avoid partial
// register update stalls
if (!MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
hasPartialRegUpdate(MI->getOpcode()))
- return 0;
+ return nullptr;
// Determine the alignment of the load.
unsigned Alignment = 0;
@@ -4352,12 +4419,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = 4;
break;
default:
- return 0;
+ return nullptr;
}
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
unsigned NewOpc = 0;
switch (MI->getOpcode()) {
- default: return NULL;
+ default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
@@ -4367,12 +4434,12 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
MI->setDesc(get(NewOpc));
MI->getOperand(1).ChangeToImmediate(0);
} else if (Ops.size() != 1)
- return NULL;
+ return nullptr;
// Make sure the subregisters match.
// Otherwise we risk changing the size of the load.
if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
- return NULL;
+ return nullptr;
SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
@@ -4388,7 +4455,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// Medium and large mode can't fold loads this way.
if (TM.getCodeModel() != CodeModel::Small &&
TM.getCodeModel() != CodeModel::Kernel)
- return NULL;
+ return nullptr;
// x86-32 PIC requires a PIC base register for constant pools.
unsigned PICBase = 0;
@@ -4400,7 +4467,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// This doesn't work for several reasons.
// 1. GlobalBaseReg may have been spilled.
// 2. It may not be live at MI.
- return NULL;
+ return nullptr;
}
// Create a constant-pool entry.
@@ -4436,14 +4503,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
> 4)
// These instructions only load 32 bits, we can't fold them if the
// destination register is wider than 32 bits (4 bytes).
- return NULL;
+ return nullptr;
if ((LoadMI->getOpcode() == X86::MOVSDrm ||
LoadMI->getOpcode() == X86::VMOVSDrm) &&
MF.getRegInfo().getRegClass(LoadMI->getOperand(0).getReg())->getSize()
> 8)
// These instructions only load 64 bits, we can't fold them if the
// destination register is wider than 64 bits (8 bytes).
- return NULL;
+ return nullptr;
// Folding a normal load. Just copy the load's address operands.
for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
@@ -4489,7 +4556,8 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
- const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+ const DenseMap<unsigned,
+ std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr;
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
@@ -4671,7 +4739,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.push_back(Chain);
// Emit the load instruction.
- SDNode *Load = 0;
+ SDNode *Load = nullptr;
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
std::pair<MachineInstr::mmo_iterator,
@@ -4696,7 +4764,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the data processing instruction.
std::vector<EVT> VTs;
- const TargetRegisterClass *DstRC = 0;
+ const TargetRegisterClass *DstRC = nullptr;
if (MCID.getNumDefs() > 0) {
DstRC = getRegClass(MCID, 0, &RI, MF);
VTs.push_back(*DstRC->vt_begin());
@@ -5190,14 +5258,14 @@ static const uint16_t *lookup(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
if (ReplaceableInstrs[i][domain-1] == opcode)
return ReplaceableInstrs[i];
- return 0;
+ return nullptr;
}
static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
for (unsigned i = 0, e = array_lengthof(ReplaceableInstrsAVX2); i != e; ++i)
if (ReplaceableInstrsAVX2[i][domain-1] == opcode)
return ReplaceableInstrsAVX2[i];
- return 0;
+ return nullptr;
}
std::pair<uint16_t, uint16_t>
@@ -5327,8 +5395,10 @@ namespace {
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF.getTarget());
- assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
- "X86-64 PIC uses RIP relative addressing");
+ // Don't do anything if this is 64-bit as 64-bit PIC
+ // uses RIP relative addressing.
+ if (TM->getSubtarget<X86Subtarget>().is64Bit())
+ return false;
// Only emit a global base reg in PIC mode.
if (TM->getRelocationModel() != Reloc::PIC_)
@@ -5383,7 +5453,7 @@ namespace {
char CGBR::ID = 0;
FunctionPass*
-llvm::createGlobalBaseRegPass() { return new CGBR(); }
+llvm::createX86GlobalBaseRegPass() { return new CGBR(); }
namespace {
struct LDTLSCleanup : public MachineFunctionPass {
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 156291e..5f34915 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -325,7 +325,7 @@ public:
/// value.
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
- unsigned *LoadRegIndex = 0) const override;
+ unsigned *LoadRegIndex = nullptr) const override;
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
/// to determine if two loads are loading from the same base address. It
@@ -359,6 +359,13 @@ public:
/// instruction that defines the specified register class.
bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override;
+ /// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction tha
+ /// would clobber the EFLAGS condition register. Note the result may be
+ /// conservative. If it cannot definitely determine the safety after visiting
+ /// a few instructions in each direction it assumes it's not safe.
+ bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
return X86II::isX86_64ExtendedReg(MO.getReg());
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8edf873..0d97669 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -206,6 +206,8 @@ def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void,
+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>;
def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
@@ -249,7 +251,6 @@ def X86xor_flag : SDNode<"X86ISD::XOR", SDTBinaryArithWithFlags,
def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
-def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntShiftOp>;
def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>;
def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
@@ -2001,6 +2002,46 @@ let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
(implicit EFLAGS)]>, XS;
}
+let Predicates = [HasLZCNT] in {
+ def : Pat<(X86cmov (ctlz GR16:$src), (i16 16), (X86_COND_E),
+ (X86cmp GR16:$src, (i16 0))),
+ (LZCNT16rr GR16:$src)>;
+ def : Pat<(X86cmov (ctlz GR32:$src), (i32 32), (X86_COND_E),
+ (X86cmp GR32:$src, (i32 0))),
+ (LZCNT32rr GR32:$src)>;
+ def : Pat<(X86cmov (ctlz GR64:$src), (i64 64), (X86_COND_E),
+ (X86cmp GR64:$src, (i64 0))),
+ (LZCNT64rr GR64:$src)>;
+ def : Pat<(X86cmov (i16 16), (ctlz GR16:$src), (X86_COND_E),
+ (X86cmp GR16:$src, (i16 0))),
+ (LZCNT16rr GR16:$src)>;
+ def : Pat<(X86cmov (i32 32), (ctlz GR32:$src), (X86_COND_E),
+ (X86cmp GR32:$src, (i32 0))),
+ (LZCNT32rr GR32:$src)>;
+ def : Pat<(X86cmov (i64 64), (ctlz GR64:$src), (X86_COND_E),
+ (X86cmp GR64:$src, (i64 0))),
+ (LZCNT64rr GR64:$src)>;
+
+ def : Pat<(X86cmov (ctlz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
+ (LZCNT16rm addr:$src)>;
+ def : Pat<(X86cmov (ctlz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
+ (LZCNT32rm addr:$src)>;
+ def : Pat<(X86cmov (ctlz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
+ (LZCNT64rm addr:$src)>;
+ def : Pat<(X86cmov (i16 16), (ctlz (loadi16 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
+ (LZCNT16rm addr:$src)>;
+ def : Pat<(X86cmov (i32 32), (ctlz (loadi32 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
+ (LZCNT32rm addr:$src)>;
+ def : Pat<(X86cmov (i64 64), (ctlz (loadi64 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
+ (LZCNT64rm addr:$src)>;
+}
+
//===----------------------------------------------------------------------===//
// BMI Instructions
//
@@ -2077,6 +2118,47 @@ let Predicates = [HasBMI] in {
(BLSI64rr GR64:$src)>;
}
+let Predicates = [HasBMI] in {
+ def : Pat<(X86cmov (cttz GR16:$src), (i16 16), (X86_COND_E),
+ (X86cmp GR16:$src, (i16 0))),
+ (TZCNT16rr GR16:$src)>;
+ def : Pat<(X86cmov (cttz GR32:$src), (i32 32), (X86_COND_E),
+ (X86cmp GR32:$src, (i32 0))),
+ (TZCNT32rr GR32:$src)>;
+ def : Pat<(X86cmov (cttz GR64:$src), (i64 64), (X86_COND_E),
+ (X86cmp GR64:$src, (i64 0))),
+ (TZCNT64rr GR64:$src)>;
+ def : Pat<(X86cmov (i16 16), (cttz GR16:$src), (X86_COND_E),
+ (X86cmp GR16:$src, (i16 0))),
+ (TZCNT16rr GR16:$src)>;
+ def : Pat<(X86cmov (i32 32), (cttz GR32:$src), (X86_COND_E),
+ (X86cmp GR32:$src, (i32 0))),
+ (TZCNT32rr GR32:$src)>;
+ def : Pat<(X86cmov (i64 64), (cttz GR64:$src), (X86_COND_E),
+ (X86cmp GR64:$src, (i64 0))),
+ (TZCNT64rr GR64:$src)>;
+
+ def : Pat<(X86cmov (cttz (loadi16 addr:$src)), (i16 16), (X86_COND_E),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
+ (TZCNT16rm addr:$src)>;
+ def : Pat<(X86cmov (cttz (loadi32 addr:$src)), (i32 32), (X86_COND_E),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
+ (TZCNT32rm addr:$src)>;
+ def : Pat<(X86cmov (cttz (loadi64 addr:$src)), (i64 64), (X86_COND_E),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
+ (TZCNT64rm addr:$src)>;
+ def : Pat<(X86cmov (i16 16), (cttz (loadi16 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi16 addr:$src), (i16 0))),
+ (TZCNT16rm addr:$src)>;
+ def : Pat<(X86cmov (i32 32), (cttz (loadi32 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi32 addr:$src), (i32 0))),
+ (TZCNT32rm addr:$src)>;
+ def : Pat<(X86cmov (i64 64), (cttz (loadi64 addr:$src)), (X86_COND_E),
+ (X86cmp (loadi64 addr:$src), (i64 0))),
+ (TZCNT64rm addr:$src)>;
+}
+
+
multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
X86MemOperand x86memop, Intrinsic Int,
PatFrag ld_frag> {
@@ -2104,18 +2186,38 @@ let Predicates = [HasBMI2], Defs = [EFLAGS] in {
int_x86_bmi_bzhi_64, loadi64>, VEX_W;
}
-def : Pat<(X86bzhi GR32:$src1, GR8:$src2),
- (BZHI32rr GR32:$src1,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-def : Pat<(X86bzhi (loadi32 addr:$src1), GR8:$src2),
- (BZHI32rm addr:$src1,
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-def : Pat<(X86bzhi GR64:$src1, GR8:$src2),
- (BZHI64rr GR64:$src1,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
-def : Pat<(X86bzhi (loadi64 addr:$src1), GR8:$src2),
- (BZHI64rm addr:$src1,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
+
+def CountTrailingOnes : SDNodeXForm<imm, [{
+ // Count the trailing ones in the immediate.
+ return getI8Imm(CountTrailingOnes_64(N->getZExtValue()));
+}]>;
+
+def BZHIMask : ImmLeaf<i64, [{
+ return isMask_64(Imm) && (CountTrailingOnes_64(Imm) > 32);
+}]>;
+
+let Predicates = [HasBMI2] in {
+ def : Pat<(and GR64:$src, BZHIMask:$mask),
+ (BZHI64rr GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+
+ def : Pat<(and GR32:$src, (add (shl 1, GR8:$lz), -1)),
+ (BZHI32rr GR32:$src,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+
+ def : Pat<(and (loadi32 addr:$src), (add (shl 1, GR8:$lz), -1)),
+ (BZHI32rm addr:$src,
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+
+ def : Pat<(and GR64:$src, (add (shl 1, GR8:$lz), -1)),
+ (BZHI64rr GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+
+ def : Pat<(and (loadi64 addr:$src), (add (shl 1, GR8:$lz), -1)),
+ (BZHI64rm addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR8:$lz, sub_8bit))>;
+} // HasBMI2
let Predicates = [HasBMI] in {
def : Pat<(X86bextr GR32:$src1, GR32:$src2),
@@ -2617,21 +2719,21 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
// this is compatible with what GAS does.
-def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst)>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst)>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall *$dst", (FARCALL16m opaque32mem:$dst)>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp *$dst", (FARJMP16m opaque32mem:$dst)>, Requires<[In16BitMode]>;
-
-def : InstAlias<"call *$dst", (CALL64m i16mem:$dst)>, Requires<[In64BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP64m i16mem:$dst)>, Requires<[In64BitMode]>;
-def : InstAlias<"call *$dst", (CALL32m i16mem:$dst)>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP32m i16mem:$dst)>, Requires<[In32BitMode]>;
-def : InstAlias<"call *$dst", (CALL16m i16mem:$dst)>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp *$dst", (JMP16m i16mem:$dst)>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall *$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp *$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall *$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp *$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+
+def : InstAlias<"call *$dst", (CALL64m i16mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"jmp *$dst", (JMP64m i16mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"call *$dst", (CALL32m i16mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp *$dst", (JMP32m i16mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"call *$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp *$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
@@ -2664,11 +2766,11 @@ def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
// Force mov without a suffix with a segment and mem to prefer the 'l' form of
// the move. All segment/mem forms are equivalent, this has the shortest
// encoding.
-def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
-def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
def : InstAlias<"movq $src, $dst",
@@ -2705,7 +2807,7 @@ def : InstAlias<"outl\t$port", (OUT32ir i8imm:$port), 0>;
// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
// errors, since its encoding is the most compact.
-def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem), 0>;
// shld/shrd op,op -> shld op, op, CL
def : InstAlias<"shld{w}\t{$r2, $r1|$r1, $r2}", (SHLD16rrCL GR16:$r1, GR16:$r2), 0>;
@@ -2751,19 +2853,29 @@ defm : ShiftRotateByOneAlias<"ror", "ROR">;
FIXME */
// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
-def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}", (TEST8rm GR8 :$val, i8mem :$mem)>;
-def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}", (TEST16rm GR16:$val, i16mem:$mem)>;
-def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}", (TEST32rm GR32:$val, i32mem:$mem)>;
-def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}", (TEST64rm GR64:$val, i64mem:$mem)>;
+def : InstAlias<"test{b}\t{$val, $mem|$mem, $val}",
+ (TEST8rm GR8 :$val, i8mem :$mem), 0>;
+def : InstAlias<"test{w}\t{$val, $mem|$mem, $val}",
+ (TEST16rm GR16:$val, i16mem:$mem), 0>;
+def : InstAlias<"test{l}\t{$val, $mem|$mem, $val}",
+ (TEST32rm GR32:$val, i32mem:$mem), 0>;
+def : InstAlias<"test{q}\t{$val, $mem|$mem, $val}",
+ (TEST64rm GR64:$val, i64mem:$mem), 0>;
// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
-def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}", (XCHG8rm GR8 :$val, i8mem :$mem)>;
-def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}", (XCHG16rm GR16:$val, i16mem:$mem)>;
-def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}", (XCHG32rm GR32:$val, i32mem:$mem)>;
-def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}", (XCHG64rm GR64:$val, i64mem:$mem)>;
+def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}",
+ (XCHG8rm GR8 :$val, i8mem :$mem), 0>;
+def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}",
+ (XCHG16rm GR16:$val, i16mem:$mem), 0>;
+def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}",
+ (XCHG32rm GR32:$val, i32mem:$mem), 0>;
+def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}",
+ (XCHG64rm GR64:$val, i64mem:$mem), 0>;
// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
-def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src)>;
-def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src)>, Requires<[Not64BitMode]>;
-def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar64 GR32_NOAX:$src)>, Requires<[In64BitMode]>;
-def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src)>;
+def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>;
+def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
+ (XCHG32ar GR32:$src), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}",
+ (XCHG32ar64 GR32_NOAX:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 050ee39..ecf80a1 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -254,6 +254,11 @@ let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", [],
IIC_MMX_MOVQ_RR>;
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", [],
+ IIC_MMX_MOVQ_RR>;
+}
} // SchedRW
let SchedRW = [WriteLoad] in {
@@ -262,11 +267,12 @@ def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))],
IIC_MMX_MOVQ_RM>;
+} // SchedRW
+let SchedRW = [WriteStore] in
def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (x86mmx VR64:$src), addr:$dst)],
IIC_MMX_MOVQ_RM>;
-} // SchedRW
let SchedRW = [WriteMove] in {
def MMX_MOVDQ2Qrr : MMXSDIi8<0xD6, MRMSrcReg, (outs VR64:$dst),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index f2f3967..1eb0485 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1561,9 +1561,9 @@ defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">,
let Predicates = [UseAVX] in {
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+ (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
- (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src)>;
+ (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>;
def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
(VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
@@ -1627,9 +1627,9 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>;
def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}",
- (CVTSI2SSrm FR64:$dst, i32mem:$src)>;
+ (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>;
def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}",
- (CVTSI2SDrm FR64:$dst, i32mem:$src)>;
+ (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>;
// Conversion Instructions Intrinsics - Match intrinsics which expect MM
// and/or XMM operand(s).
@@ -2005,7 +2005,7 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// XMM only
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQrr VR128:$dst, VR128:$src)>;
+ (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2024,7 +2024,7 @@ def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
(int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
- (VCVTPD2DQYrr VR128:$dst, VR256:$src)>;
+ (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
}
def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -2127,7 +2127,7 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// XMM only
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQrr VR128:$dst, VR128:$src)>;
+ (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dqx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
@@ -2146,7 +2146,7 @@ def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
(int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
- (VCVTTPD2DQYrr VR128:$dst, VR256:$src)>;
+ (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
let Predicates = [HasAVX] in {
def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
@@ -2252,7 +2252,7 @@ def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// XMM only
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSrr VR128:$dst, VR128:$src)>;
+ (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2psx\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2271,7 +2271,7 @@ def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
(int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
- (VCVTPD2PSYrr VR128:$dst, VR256:$src)>;
+ (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
@@ -2973,6 +2973,19 @@ defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
let isCommutable = 0 in
defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
+// AVX1 requires type coercions in order to fold loads directly into logical
+// operations.
+let Predicates = [HasAVX1Only] in {
+ def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))),
+ (VANDPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))),
+ (VORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))),
+ (VXORPSYrm VR256:$src1, addr:$src2)>;
+ def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))),
+ (VANDNPSYrm VR256:$src1, addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Arithmetic Instructions
//===----------------------------------------------------------------------===//
@@ -3144,23 +3157,23 @@ let Predicates = [UseSSE2] in {
let Predicates = [UseSSE41] in {
// If the subtarget has SSE4.1 but not AVX, the vector insert
- // instruction is lowered into a X86insrtps rather than a X86Movss.
+ // instruction is lowered into a X86insertps rather than a X86Movss.
// When selecting SSE scalar single-precision fp arithmetic instructions,
- // make sure that we correctly match the X86insrtps.
+ // make sure that we correctly match the X86insertps.
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(ADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(SUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(MULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(DIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -3186,19 +3199,19 @@ let Predicates = [HasAVX] in {
(f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))),
FR64:$src))))),
(VDIVSDrr_Int v2f64:$dst, (COPY_TO_REGCLASS FR64:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fadd (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VADDSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fsub (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VSUBSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fmul (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VMULSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
- def : Pat<(v4f32 (X86insrtps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
(fdiv (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))),
FR32:$src))), (iPTR 0))),
(VDIVSSrr_Int v4f32:$dst, (COPY_TO_REGCLASS FR32:$src, VR128))>;
@@ -4068,6 +4081,10 @@ defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64,
SSE_INTALUQ_ITINS_P, 1>;
defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
SSE_INTMUL_ITINS_P, 1>;
+defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
+defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16,
+ SSE_INTMUL_ITINS_P, 1>;
defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8,
SSE_INTALU_ITINS_P, 0>;
defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16,
@@ -4102,10 +4119,6 @@ defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b,
int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>;
defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w,
int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>;
-defm PMULHUW : PDI_binop_all_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w,
- int_x86_avx2_pmulhu_w, SSE_INTMUL_ITINS_P, 1>;
-defm PMULHW : PDI_binop_all_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w,
- int_x86_avx2_pmulh_w, SSE_INTMUL_ITINS_P, 1>;
defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b,
@@ -6515,7 +6528,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
+ (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>,
Sched<[WriteFShuffle]>;
def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src2, u32u8imm:$src3),
@@ -6524,7 +6537,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1,
!strconcat(asm,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
[(set VR128:$dst,
- (X86insrtps VR128:$src1,
+ (X86insertps VR128:$src1,
(v4f32 (scalar_to_vector (loadf32 addr:$src2))),
imm:$src3))], itins.rm>,
Sched<[WriteFShuffleLd, ReadAfterLd]>;
@@ -6537,6 +6550,29 @@ let ExeDomain = SSEPackedSingle in {
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>;
}
+let Predicates = [UseSSE41] in {
+ // If we're inserting an element from a load or a null pshuf of a load,
+ // fold the load into the insertps instruction.
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32
+ (scalar_to_vector (loadf32 addr:$src2))), (i8 0)),
+ imm:$src3)),
+ (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd
+ (loadv4f32 addr:$src2), (i8 0)), imm:$src3)),
+ (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+}
+
+let Predicates = [UseAVX] in {
+ // If we're inserting an element from a vbroadcast of a load, fold the
+ // load into the X86insertps instruction.
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
+ (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)),
+ (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+ def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1),
+ (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)),
+ (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>;
+}
+
//===----------------------------------------------------------------------===//
// SSE4.1 - Round Instructions
//===----------------------------------------------------------------------===//
@@ -6990,6 +7026,31 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
+/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst
+/// types.
+multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType DstVT, ValueType SrcVT, RegisterClass RC,
+ PatFrag memop_frag, X86MemOperand x86memop,
+ OpndItins itins,
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
+ def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>,
+ Sched<[itins.Sched]>;
+ def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1),
+ (bitconvert (memop_frag addr:$src2)))))]>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
+}
+
let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
@@ -7018,8 +7079,9 @@ let Predicates = [HasAVX] in {
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128,
loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V;
- defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
- 0, DEFAULT_ITINS_VECIMULSCHED>, VEX_4V;
+ defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32,
+ VR128, loadv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
}
let Predicates = [HasAVX2] in {
@@ -7051,9 +7113,9 @@ let Predicates = [HasAVX2] in {
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256,
loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,
VEX_4V, VEX_L;
- defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq",
- int_x86_avx2_pmul_dq, WriteVecIMul>,
- VEX_4V, VEX_L;
+ defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32,
+ VR256, loadv4i64, i256mem,
+ SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
}
let Constraints = "$src1 = $dst" in {
@@ -7076,8 +7138,9 @@ let Constraints = "$src1 = $dst" in {
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128,
memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>;
- defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq,
- 1, SSE_INTMUL_ITINS_P>;
+ defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32,
+ VR128, memopv2i64, i128mem,
+ SSE_INTMUL_ITINS_P, 1>;
}
let Predicates = [HasAVX] in {
@@ -7394,6 +7457,7 @@ let Predicates = [UseSSE41] in {
}
+let SchedRW = [WriteLoad] in {
let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
@@ -7407,6 +7471,7 @@ def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>;
+} // SchedRW
//===----------------------------------------------------------------------===//
// SSE4.2 - Compare Instructions
@@ -7831,18 +7896,20 @@ def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
multiclass pclmul_alias<string asm, int immop> {
def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
- (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+ (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>;
def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"),
- (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+ (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>;
def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
- (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+ (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop),
+ 0>;
def : InstAlias<!strconcat("vpclmul", asm,
"dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
- (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+ (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop),
+ 0>;
}
defm : pclmul_alias<"hqhq", 0x11>;
defm : pclmul_alias<"hqlq", 0x01>;
@@ -8291,6 +8358,12 @@ let Predicates = [HasF16C] in {
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L;
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L;
+
+ // Pattern match vcvtph2ps of a scalar i64 load.
+ def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)),
+ (VCVTPH2PSrm addr:$src)>;
+ def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
+ (VCVTPH2PSrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index 9d3aa1c..b5595cb 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -19,7 +19,7 @@ let Defs = [RAX, RDX] in
TB;
let Defs = [RAX, RCX, RDX] in
- def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+ def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", [(X86rdtscp)]>, TB;
// CPU flow control instructions
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index e99f2d9..e969ef2 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jit"
#include "X86JITInfo.h"
#include "X86Relocations.h"
#include "X86Subtarget.h"
@@ -24,6 +23,8 @@
#include <cstring>
using namespace llvm;
+#define DEBUG_TYPE "jit"
+
// Determine the platform we're running on
#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
# define X86_64_JIT
@@ -427,9 +428,14 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
TsanIgnoreWritesEnd();
#if defined (X86_32_JIT) && !defined (_MSC_VER)
+#if defined(__SSE__)
+ // SSE Callback should be called for SSE-enabled LLVM.
+ return X86CompilationCallback_SSE;
+#else
if (Subtarget->hasSSE1())
return X86CompilationCallback_SSE;
#endif
+#endif
return X86CompilationCallback;
}
@@ -437,7 +443,7 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
useGOT = 0;
- TLSOffset = 0;
+ TLSOffset = nullptr;
}
void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 6d7f3cb..0190080 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -120,7 +120,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getGVStubEntry(Sym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
@@ -132,7 +132,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getHiddenGVStubEntry(Sym);
- if (StubSym.getPointer() == 0) {
+ if (!StubSym.getPointer()) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym =
MachineModuleInfoImpl::
@@ -168,7 +168,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
MCSymbol *Sym) const {
// FIXME: We would like an efficient form for this, so we don't have to do a
// lot of extra uniquing.
- const MCExpr *Expr = 0;
+ const MCExpr *Expr = nullptr;
MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
switch (MO.getTargetFlags()) {
@@ -223,7 +223,7 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
break;
}
- if (Expr == 0)
+ if (!Expr)
Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
diff --git a/lib/Target/X86/X86PadShortFunction.cpp b/lib/Target/X86/X86PadShortFunction.cpp
index 746d0d6..6639875 100644
--- a/lib/Target/X86/X86PadShortFunction.cpp
+++ b/lib/Target/X86/X86PadShortFunction.cpp
@@ -15,9 +15,9 @@
#include <algorithm>
-#define DEBUG_TYPE "x86-pad-short-functions"
#include "X86.h"
#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -30,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "x86-pad-short-functions"
+
STATISTIC(NumBBsPadded, "Number of basic blocks padded");
namespace {
@@ -49,7 +51,7 @@ namespace {
struct PadShortFunc : public MachineFunctionPass {
static char ID;
PadShortFunc() : MachineFunctionPass(ID)
- , Threshold(4), TM(0), TII(0) {}
+ , Threshold(4), TM(nullptr), TII(nullptr) {}
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -100,6 +102,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
}
TM = &MF.getTarget();
+ if (!TM->getSubtarget<X86Subtarget>().padShortFunctions())
+ return false;
+
TII = TM->getInstrInfo();
// Search through basic blocks and mark the ones that have early returns
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 85aa9b5..a83e1e4 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -38,11 +38,11 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
#define GET_REGINFO_TARGET_DESC
#include "X86GenRegisterInfo.inc"
-using namespace llvm;
-
cl::opt<bool>
ForceStackAlign("force-align-stack",
cl::desc("Force align the stack to the minimum alignment"
@@ -129,7 +129,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
if (!Is64Bit && SubIdx == X86::sub_8bit) {
A = X86GenRegisterInfo::getSubClassWithSubReg(A, X86::sub_8bit_hi);
if (!A)
- return 0;
+ return nullptr;
}
return X86GenRegisterInfo::getMatchingSuperRegClass(A, B, SubIdx);
}
@@ -231,7 +231,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
}
}
-const uint16_t *
+const MCPhysReg *
X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 6a71113..2289d91 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -100,7 +100,7 @@ public:
/// getCalleeSavedRegs - Return a null-terminated list of all of the
/// callee-save registers on this target.
- const uint16_t *
+ const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
const uint32_t *getCallPreservedMask(CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const;
@@ -122,7 +122,7 @@ public:
void eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const override;
+ RegScavenger *RS = nullptr) const override;
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const override;
diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td
index f5b51ee..6966d61 100644
--- a/lib/Target/X86/X86SchedHaswell.td
+++ b/lib/Target/X86/X86SchedHaswell.td
@@ -20,6 +20,9 @@ def HaswellModel : SchedMachineModel {
let LoadLatency = 4;
let MispredictPenalty = 16;
+ // Based on the LSD (loop-stream detector) queue size and benchmarking data.
+ let LoopMicroOpBufferSize = 50;
+
// FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td
index a58859a..83f0534 100644
--- a/lib/Target/X86/X86SchedSandyBridge.td
+++ b/lib/Target/X86/X86SchedSandyBridge.td
@@ -21,6 +21,9 @@ def SandyBridgeModel : SchedMachineModel {
let LoadLatency = 4;
let MispredictPenalty = 16;
+ // Based on the LSD (loop-stream detector) queue size.
+ let LoopMicroOpBufferSize = 28;
+
// FIXME: SSE4 and AVX are unimplemented. This flag is set to allow
// the scheduler to assign a default model to unrecognized opcodes.
let CompleteModel = 0;
diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td
index ba72f29..3256ee7 100644
--- a/lib/Target/X86/X86ScheduleAtom.td
+++ b/lib/Target/X86/X86ScheduleAtom.td
@@ -535,5 +535,9 @@ def AtomModel : SchedMachineModel {
let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
let HighLatency = 30;// Expected, may be overriden by OperandCycles.
+ // On the Atom, the throughput for taken branches is 2 cycles. For small
+ // simple loops, expand by a small factor to hide the backedge cost.
+ let LoopMicroOpBufferSize = 10;
+
let Itineraries = AtomItineraries;
}
diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td
index 6c2a304..823d101 100644
--- a/lib/Target/X86/X86ScheduleSLM.td
+++ b/lib/Target/X86/X86ScheduleSLM.td
@@ -1,4 +1,4 @@
-//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==//
+//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=//
//
// The LLVM Compiler Infrastructure
//
@@ -7,662 +7,225 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the itinerary class data for the Intel Atom
-// (Silvermont) processor.
+// This file defines the machine model for Intel Silvermont to support
+// instruction scheduling and other instruction cost heuristics.
//
//===----------------------------------------------------------------------===//
-def IEC_RSV0 : FuncUnit;
-def IEC_RSV1 : FuncUnit;
-def FPC_RSV0 : FuncUnit;
-def FPC_RSV1 : FuncUnit;
-def MEC_RSV : FuncUnit;
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-def SLMItineraries : ProcessorItineraries<
- [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ],
- [], [
- // [InstrStage<N, [FPC_RSV0, FPC_RSV1]>]
- // [InstrStage<N, [FPC_RSV0, FPC_RSV1], 0>, InstrStage<N, [MEC_RSV]>]
- // [InstrStage<N, [IEC_RSV0, IEC_RSV1]>]
- // [InstrStage<N, [IEC_RSV0, IEC_RSV1], 0>,InstrStage<N,[MEC_RSV]>]
- //
- // Default is 1 cycle, IEC_RSV0 or IEC_RSV1
- //InstrItinData<IIC_DEFAULT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_ALU_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_ALU_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LEA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LEA_16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // mul
- InstrItinData<IIC_MUL8, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MUL16_MEM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_MUL16_REG, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MUL32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_MUL32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MUL64, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- // imul by al, ax, eax, rax
- InstrItinData<IIC_IMUL8, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL16_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL16_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL32_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL32_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL64, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- // imul reg by reg|mem
- InstrItinData<IIC_IMUL16_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL16_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL32_RM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL32_RR, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL64_RM, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL64_RR, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- // imul reg = reg/mem * imm
- InstrItinData<IIC_IMUL16_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL32_RRI, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL64_RRI, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IMUL16_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL32_RMI, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_IMUL64_RMI, [InstrStage<4, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- // idiv - min latency
- InstrItinData<IIC_IDIV8, [InstrStage<34, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IDIV16, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IDIV32, [InstrStage<35, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IDIV64, [InstrStage<49, [IEC_RSV0, IEC_RSV1]>] >,
- // div - min latency
- InstrItinData<IIC_DIV8_REG, [InstrStage<25, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_DIV8_MEM, [InstrStage<25, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<25, [MEC_RSV]>] >,
- InstrItinData<IIC_DIV16, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_DIV32, [InstrStage<26, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_DIV64, [InstrStage<38, [IEC_RSV0, IEC_RSV1]>] >,
- // neg/not/inc/dec
- InstrItinData<IIC_UNARY_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_UNARY_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- // add/sub/and/or/xor/adc/sbc/cmp/test
- InstrItinData<IIC_BIN_NONMEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BIN_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- // adc/sbb
- InstrItinData<IIC_BIN_CARRY_NONMEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BIN_CARRY_MEM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- // shift/rotate
- InstrItinData<IIC_SR, [InstrStage<1, [IEC_RSV0], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- // shift double
- InstrItinData<IIC_SHD16_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD16_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD16_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SHD16_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SHD32_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD32_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD32_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SHD32_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SHD64_REG_IM, [InstrStage<2, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD64_REG_CL, [InstrStage<4, [IEC_RSV0]>] >,
- InstrItinData<IIC_SHD64_MEM_IM, [InstrStage<2, [IEC_RSV0], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SHD64_MEM_CL, [InstrStage<4, [IEC_RSV0], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- // cmov
- InstrItinData<IIC_CMOV16_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_CMOV16_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMOV32_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_CMOV32_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMOV64_RM, [InstrStage<2, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_CMOV64_RR, [InstrStage<2, [IEC_RSV0, IEC_RSV1]>] >,
- // set
- InstrItinData<IIC_SET_M, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SET_R, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // jcc
- InstrItinData<IIC_Jcc, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // jcxz/jecxz/jrcxz
- InstrItinData<IIC_JCXZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // jmp rel
- InstrItinData<IIC_JMP_REL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // jmp indirect
- InstrItinData<IIC_JMP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_JMP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- // jmp far
- InstrItinData<IIC_JMP_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_JMP_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // loop/loope/loopne
- InstrItinData<IIC_LOOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LOOPE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LOOPNE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // call - all but reg/imm
- InstrItinData<IIC_CALL_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CALL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_CALL_FAR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_CALL_FAR_PTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- //ret
- InstrItinData<IIC_RET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_RET_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- //sign extension movs
- InstrItinData<IIC_MOVSX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVSX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVSX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_MOVSX_R16_R16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVSX_R32_R32, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- //zero extension movs
- InstrItinData<IIC_MOVZX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVZX_R16_R8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVZX_R16_M8, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_REP_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_REP_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- // SSE binary operations
- // arithmetic fp scalar
- InstrItinData<IIC_SSE_ALU_F32S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ALU_F32S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_ALU_F64S_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ALU_F64S_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MUL_F32S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MUL_F32S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MUL_F64S_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MUL_F64S_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DIV_F32S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DIV_F32S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<13, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DIV_F64S_RR, [InstrStage<13, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DIV_F64S_RM, [InstrStage<13, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<13, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_COMIS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_COMIS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_HADDSUB_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_HADDSUB_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
-
- // arithmetic fp parallel
- InstrItinData<IIC_SSE_ALU_F32P_RR, [InstrStage<3, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ALU_F32P_RM, [InstrStage<3, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_ALU_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ALU_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MUL_F32P_RR, [InstrStage<2, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MUL_F32P_RM, [InstrStage<2, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MUL_F64P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MUL_F64P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DIV_F32P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DIV_F32P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<27, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DIV_F64P_RR, [InstrStage<27, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DIV_F64P_RM, [InstrStage<27, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<27, [MEC_RSV]>] >,
-
- // bitwise parallel
- InstrItinData<IIC_SSE_BIT_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_BIT_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- // arithmetic int parallel
- InstrItinData<IIC_SSE_INTALU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_INTALU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_INTALUQ_P_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_INTALUQ_P_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
-
- // multiply int parallel
- InstrItinData<IIC_SSE_INTMUL_P_RR, [InstrStage<5, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_INTMUL_P_RM, [InstrStage<5, [FPC_RSV0], 0>,
- InstrStage<5, [MEC_RSV]>] >,
-
- // shift parallel
- InstrItinData<IIC_SSE_INTSH_P_RR, [InstrStage<2, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_INTSH_P_RM, [InstrStage<2, [FPC_RSV0], 0>,
- InstrStage<2, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_INTSH_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
-
- InstrItinData<IIC_SSE_INTSHDQ_P_RI, [InstrStage<1, [FPC_RSV0]>] >,
-
- InstrItinData<IIC_SSE_SHUFP, [InstrStage<1, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_PSHUF_RI, [InstrStage<1, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_PSHUF_MI, [InstrStage<1, [FPC_RSV0], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_UNPCK, [InstrStage<1, [FPC_RSV0]>] >,
-
- InstrItinData<IIC_SSE_SQRTPS_RR, [InstrStage<26, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_SQRTPS_RM, [InstrStage<26, [FPC_RSV0], 0>,
- InstrStage<26, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_SQRTSS_RR, [InstrStage<13, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_SQRTSS_RM, [InstrStage<13, [FPC_RSV0], 0>,
- InstrStage<13, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_SQRTPD_RR, [InstrStage<26, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_SQRTPD_RM, [InstrStage<26, [FPC_RSV0], 0>,
- InstrStage<26, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_SQRTSD_RR, [InstrStage<13, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_SQRTSD_RM, [InstrStage<13, [FPC_RSV0], 0>,
- InstrStage<13, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_RCPP_RR, [InstrStage<9, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_RCPP_RM, [InstrStage<9, [FPC_RSV0], 0>,
- InstrStage<9, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_RCPS_RR, [InstrStage<4, [FPC_RSV0]>] >,
- InstrItinData<IIC_SSE_RCPS_RM, [InstrStage<4, [FPC_RSV0], 0>,
- InstrStage<4, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_MOVMSK, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MASKMOV, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_PEXTRW, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PINSRW, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_PABS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PABS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_MOV_S_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MOV_S_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MOV_S_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_MOVA_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MOVA_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MOVA_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_MOVU_P_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MOVU_P_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MOVU_P_MR, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_MOV_LH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_LDDQU, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_MOVDQ, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MOVD_ToGP, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MOVQ_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_MOVNT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_PREFETCH, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_PAUSE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_LFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_MFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_SFENCE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_LDMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_STMXCSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- InstrItinData<IIC_SSE_PHADDSUBD_RR, [InstrStage<6, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBD_RM, [InstrStage<6, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PHADDSUBSW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PHADDSUBSW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<9, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PHADDSUBW_RR, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<9, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PHADDSUBW_RM, [InstrStage<9, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<9, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PSHUFB_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PSHUFB_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<5, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PSIGN_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PSIGN_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
-
- InstrItinData<IIC_SSE_PMADD, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PMULHRSW, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PALIGNRR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PALIGNRM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MWAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_MONITOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- // conversions
- // to/from PD ...
- InstrItinData<IIC_SSE_CVT_PD_RR, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_PD_RM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<5, [MEC_RSV]>] >,
- // to/from PS except to/from PD and PS2PI
- InstrItinData<IIC_SSE_CVT_PS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_PS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_CVT_Scalar_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_Scalar_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI32_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI32_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI64_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_SS2SI64_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_CVT_SD2SI_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
-
- // MMX MOVs
- InstrItinData<IIC_MMX_MOV_MM_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MOV_REG_MM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MOVQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MOVQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // other MMX
- InstrItinData<IIC_MMX_ALU_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_ALU_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_ALUQ_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_ALUQ_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBW_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBW_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PHADDSUBD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PMUL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MISC_FUNC_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MISC_FUNC_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PSADBW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_SHIFT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_UNPCK_H_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_UNPCK_H_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_UNPCK_L, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PCK_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PCK_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PSHUF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PEXTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_PINSRW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_MASKMOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // conversions
- // from/to PD
- InstrItinData<IIC_MMX_CVT_PD_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_CVT_PD_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // from/to PI
- InstrItinData<IIC_MMX_CVT_PS_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MMX_CVT_PS_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- InstrItinData<IIC_CMPX_LOCK, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPX_LOCK_8, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPX_LOCK_8B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPX_LOCK_16B, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_XADD_LOCK_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
-
- InstrItinData<IIC_FILD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FLD80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+def SLMModel : SchedMachineModel {
+ // All x86 instructions are modeled as a single micro-op, and SLM can decode 2
+ // instructions per cycle.
+ let IssueWidth = 2;
+ let MicroOpBufferSize = 32; // Based on the reorder buffer.
+ let LoadLatency = 3;
+ let MispredictPenalty = 10;
+
+ // For small loops, expand by a small factor to hide the backedge cost.
+ let LoopMicroOpBufferSize = 10;
+
+ // FIXME: SSE4 is unimplemented. This flag is set to allow
+ // the scheduler to assign a default model to unrecognized opcodes.
+ let CompleteModel = 0;
+}
- InstrItinData<IIC_FST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FST80, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FIST, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+let SchedModel = SLMModel in {
+
+// Silvermont has 5 reservation stations for micro-ops
+
+def IEC_RSV0 : ProcResource<1>;
+def IEC_RSV1 : ProcResource<1>;
+def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; }
+def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; }
+def MEC_RSV : ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def IEC_RSV01 : ProcResGroup<[IEC_RSV0, IEC_RSV1]>;
+def FPC_RSV01 : ProcResGroup<[FPC_RSV0, FPC_RSV1]>;
+
+def SMDivider : ProcResource<1>;
+def SMFPMultiplier : ProcResource<1>;
+def SMFPDivider : ProcResource<1>;
+
+// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3
+// cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 3>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SMWriteResPair<X86FoldableSchedWrite SchedRW,
+ ProcResourceKind ExePort,
+ int Lat> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+
+ // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the
+ // latency.
+ def : WriteRes<SchedRW.Folded, [MEC_RSV, ExePort]> {
+ let Latency = !add(Lat, 3);
+ }
+}
- InstrItinData<IIC_FLDZ, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FUCOM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FUCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FCOMI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FNSTSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FNSTCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FLDCW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FNINIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FFREE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FNCLEX, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_WAIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FXAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FNOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FLDL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_F2XM1, [InstrStage<88, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FYL2X, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FPTAN, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FPATAN, [InstrStage<296, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FXTRACT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FPREM1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FPSTP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FPREM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FYL2XP1, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FSINCOS, [InstrStage<281, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FRNDINT, [InstrStage<25, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FSCALE, [InstrStage<74, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_FCOMPP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FXSAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FXRSTOR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_FXCH, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
+// A folded store needs a cycle on MEC_RSV for the store data, but it does not
+// need an extra port cycle to recompute the address.
+def : WriteRes<WriteRMW, [MEC_RSV]>;
+
+def : WriteRes<WriteStore, [IEC_RSV01, MEC_RSV]>;
+def : WriteRes<WriteLoad, [MEC_RSV]> { let Latency = 3; }
+def : WriteRes<WriteMove, [IEC_RSV01]>;
+def : WriteRes<WriteZero, []>;
+
+defm : SMWriteResPair<WriteALU, IEC_RSV01, 1>;
+defm : SMWriteResPair<WriteIMul, IEC_RSV1, 3>;
+defm : SMWriteResPair<WriteShift, IEC_RSV0, 1>;
+defm : SMWriteResPair<WriteJump, IEC_RSV1, 1>;
+
+// This is for simple LEAs with one or two input operands.
+// The complex ones can only execute on port 1, and they require two cycles on
+// the port to read all inputs. We don't model that.
+def : WriteRes<WriteLEA, [IEC_RSV1]>;
+
+// This is quite rough, latency depends on the dividend.
+def : WriteRes<WriteIDiv, [IEC_RSV01, SMDivider]> {
+ let Latency = 25;
+ let ResourceCycles = [1, 25];
+}
+def : WriteRes<WriteIDivLd, [MEC_RSV, IEC_RSV01, SMDivider]> {
+ let Latency = 29;
+ let ResourceCycles = [1, 1, 25];
+}
- // System instructions
- InstrItinData<IIC_CPUID, [InstrStage<60, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_INT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_INT3, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_INVD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_INVLPG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IRET, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_HLT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LXS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LTR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_RDTSC, [InstrStage<30, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_RSM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SLDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_STR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SWAPGS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SYSCALL, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SYS_ENTER_EXIT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// Scalar and vector floating point.
+defm : SMWriteResPair<WriteFAdd, FPC_RSV1, 3>;
+defm : SMWriteResPair<WriteFRcp, FPC_RSV0, 5>;
+defm : SMWriteResPair<WriteFSqrt, FPC_RSV0, 15>;
+defm : SMWriteResPair<WriteCvtF2I, FPC_RSV01, 4>;
+defm : SMWriteResPair<WriteCvtI2F, FPC_RSV01, 4>;
+defm : SMWriteResPair<WriteCvtF2F, FPC_RSV01, 4>;
+defm : SMWriteResPair<WriteFShuffle, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteFBlend, FPC_RSV0, 1>;
+
+// This is quite rough, latency depends on precision
+def : WriteRes<WriteFMul, [FPC_RSV0, SMFPMultiplier]> {
+ let Latency = 5;
+ let ResourceCycles = [1, 2];
+}
+def : WriteRes<WriteFMulLd, [MEC_RSV, FPC_RSV0, SMFPMultiplier]> {
+ let Latency = 8;
+ let ResourceCycles = [1, 1, 2];
+}
- InstrItinData<IIC_IN_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_IN_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_OUT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_OUT_IR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_INS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+def : WriteRes<WriteFDiv, [FPC_RSV0, SMFPDivider]> {
+ let Latency = 34;
+ let ResourceCycles = [1, 34];
+}
+def : WriteRes<WriteFDivLd, [MEC_RSV, FPC_RSV0, SMFPDivider]> {
+ let Latency = 37;
+ let ResourceCycles = [1, 1, 34];
+}
- InstrItinData<IIC_MOV_REG_DR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_DR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // worst case for mov REG_CRx
- InstrItinData<IIC_MOV_REG_CR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_CR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// Vector integer operations.
+defm : SMWriteResPair<WriteVecShift, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteVecLogic, FPC_RSV01, 1>;
+defm : SMWriteResPair<WriteVecALU, FPC_RSV01, 1>;
+defm : SMWriteResPair<WriteVecIMul, FPC_RSV0, 4>;
+defm : SMWriteResPair<WriteShuffle, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteBlend, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteMPSAD, FPC_RSV0, 7>;
+
+// String instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+def : WriteRes<WritePCmpIStrM, [FPC_RSV0]> {
+ let Latency = 13;
+ let ResourceCycles = [13];
+}
+def : WriteRes<WritePCmpIStrMLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 13;
+ let ResourceCycles = [13, 1];
+}
- InstrItinData<IIC_MOV_REG_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_MEM_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_SR_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_SR_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // LAR
- InstrItinData<IIC_LAR_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LAR_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // LSL
- InstrItinData<IIC_LSL_RM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LSL_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// Packed Compare Explicit Length Strings, Return Mask
+def : WriteRes<WritePCmpEStrM, [FPC_RSV0]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpEStrMLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 17;
+ let ResourceCycles = [17, 1];
+}
- InstrItinData<IIC_LGDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LIDT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LLDT_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LLDT_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // push control register, segment registers
- InstrItinData<IIC_PUSH_CS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_PUSH_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // pop control register, segment registers
- InstrItinData<IIC_POP_SR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_SR_SS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // VERR, VERW
- InstrItinData<IIC_VERR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_VERW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_VERW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // WRMSR, RDMSR
- InstrItinData<IIC_WRMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_RDMSR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_RDPMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- // SMSW, LMSW
- InstrItinData<IIC_SMSW, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LMSW_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LMSW_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// Packed Compare Implicit Length Strings, Return Index
+def : WriteRes<WritePCmpIStrI, [FPC_RSV0]> {
+ let Latency = 17;
+ let ResourceCycles = [17];
+}
+def : WriteRes<WritePCmpIStrILd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 17;
+ let ResourceCycles = [17, 1];
+}
- InstrItinData<IIC_ENTER, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LEAVE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// Packed Compare Explicit Length Strings, Return Index
+def : WriteRes<WritePCmpEStrI, [FPC_RSV0]> {
+ let Latency = 21;
+ let ResourceCycles = [21];
+}
+def : WriteRes<WritePCmpEStrILd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 21;
+ let ResourceCycles = [21, 1];
+}
- InstrItinData<IIC_POP_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_REG16, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_FD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_POP_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+// AES Instructions.
+def : WriteRes<WriteAESDecEnc, [FPC_RSV0]> {
+ let Latency = 8;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESDecEncLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 8;
+ let ResourceCycles = [5, 1];
+}
- InstrItinData<IIC_PUSH_IMM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_PUSH_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_PUSH_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_PUSH_F, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_PUSH_A, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
+def : WriteRes<WriteAESIMC, [FPC_RSV0]> {
+ let Latency = 8;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESIMCLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 8;
+ let ResourceCycles = [5, 1];
+}
- InstrItinData<IIC_BSWAP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<10, [MEC_RSV]>] >,
- InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOVS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_STOS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_SCAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_MOV_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_AHF, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BT_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_BT_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_BT_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BT_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BTX_MI, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_BTX_MR, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_BTX_RI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BTX_RR, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_XCHG_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_XCHG_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<5, [MEC_RSV]>] >,
- InstrItinData<IIC_XADD_REG, [InstrStage<5, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_XADD_MEM, [InstrStage<5, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<5, [MEC_RSV]>] >,
- InstrItinData<IIC_CMPXCHG_MEM, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPXCHG_REG, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPXCHG_MEM8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
- InstrItinData<IIC_CMPXCHG_REG8, [InstrStage<6, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<6, [MEC_RSV]>] >,
- InstrItinData<IIC_CMPXCHG_8B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMPXCHG_16B, [InstrStage<6, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_LODS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_OUTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CLC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CLD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CLI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CMC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CLTS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_STC, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_STI, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_STD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_XLAT, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_AAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_AAD, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_AAM, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_AAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_DAA, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_DAS, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_BOUND, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_ARPL_REG, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_ARPL_MEM, [InstrStage<1, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_MOVBE, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_AES, [InstrStage<8, [FPC_RSV0]>] >,
- InstrItinData<IIC_BLEND_NOMEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_BLEND_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_BIT_SCAN_MEM, [InstrStage<10, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<10, [MEC_RSV]>] >,
- InstrItinData<IIC_BIT_SCAN_REG, [InstrStage<10, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CBW, [InstrStage<4, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CRC32_REG, [InstrStage<3, [IEC_RSV0, IEC_RSV1]>] >,
- InstrItinData<IIC_CRC32_MEM, [InstrStage<3, [IEC_RSV0, IEC_RSV1], 0>,
- InstrStage<3, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DPPD_RR, [InstrStage<12, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DPPD_RM, [InstrStage<12, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<12, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_DPPS_RR, [InstrStage<15, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_DPPS_RM, [InstrStage<15, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<15, [MEC_RSV]>] >,
- InstrItinData<IIC_MMX_EMMS, [InstrStage<10, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_EXTRACTPS_RR, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_EXTRACTPS_RM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_INSERTPS_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_INSERTPS_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_MPSADBW_RR, [InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_MPSADBW_RM, [InstrStage<1, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<1, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PMULLD_RR, [InstrStage<11, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_PMULLD_RM, [InstrStage<11, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<11, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_ROUNDPS_REG, [InstrStage<5, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ROUNDPS_MEM, [InstrStage<5, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<5, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_ROUNDPD_REG, [InstrStage<4, [FPC_RSV0, FPC_RSV1]>] >,
- InstrItinData<IIC_SSE_ROUNDPD_MEM, [InstrStage<4, [FPC_RSV0, FPC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_POPCNT_RR, [InstrStage<4, [IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_POPCNT_RM, [InstrStage<4, [IEC_RSV1], 0>,
- InstrStage<4, [MEC_RSV]>] >,
- InstrItinData<IIC_SSE_PCLMULQDQ_RR, [InstrStage<10, [IEC_RSV1]>] >,
- InstrItinData<IIC_SSE_PCLMULQDQ_RM, [InstrStage<10, [IEC_RSV1], 0>,
- InstrStage<10, [MEC_RSV]>] >,
+def : WriteRes<WriteAESKeyGen, [FPC_RSV0]> {
+ let Latency = 8;
+ let ResourceCycles = [5];
+}
+def : WriteRes<WriteAESKeyGenLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 8;
+ let ResourceCycles = [5, 1];
+}
- InstrItinData<IIC_NOP, [InstrStage<1, [IEC_RSV0, IEC_RSV1]>] >
- ]>;
+// Carry-less multiplication instructions.
+def : WriteRes<WriteCLMul, [FPC_RSV0]> {
+ let Latency = 10;
+ let ResourceCycles = [10];
+}
+def : WriteRes<WriteCLMulLd, [FPC_RSV0, MEC_RSV]> {
+ let Latency = 10;
+ let ResourceCycles = [10, 1];
+}
-// Silvermont machine model.
-def SLMModel : SchedMachineModel {
- let IssueWidth = 2; // Allows 2 instructions per scheduling group.
- let MinLatency = 1; // InstrStage cycles overrides MinLatency.
- // OperandCycles may be used for expected latency.
- let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles.
- let HighLatency = 30;// Expected, may be overriden by OperandCycles.
- let Itineraries = SLMItineraries;
-}
+def : WriteRes<WriteSystem, [FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteMicrocoded, [FPC_RSV0]> { let Latency = 100; }
+def : WriteRes<WriteFence, [MEC_RSV]>;
+def : WriteRes<WriteNop, []>;
+
+// AVX is not supported on that architecture, but we should define the basic
+// scheduling resources anyway.
+def : WriteRes<WriteIMulH, [FPC_RSV0]>;
+defm : SMWriteResPair<WriteVarBlend, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteFVarBlend, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteFShuffle256, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteShuffle256, FPC_RSV0, 1>;
+defm : SMWriteResPair<WriteVarVecShift, FPC_RSV0, 1>;
+} // SchedModel
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index b9c620f..744890d 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-selectiondag-info"
#include "X86TargetMachine.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/DerivedTypes.h"
using namespace llvm;
+#define DEBUG_TYPE "x86-selectiondag-info"
+
X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<X86Subtarget>()),
@@ -50,7 +51,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) {
EVT IntPtr = TLI.getPointerTy();
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
@@ -60,15 +61,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
Args.push_back(Entry);
Entry.Node = Size;
Args.push_back(Entry);
- TargetLowering::
- CallLoweringInfo CLI(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
- DAG, dl);
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(CLI);
+
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0)
+ .setDiscardResult();
+
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
@@ -77,7 +77,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
}
uint64_t SizeVal = ConstantSize->getZExtValue();
- SDValue InFlag(0, 0);
+ SDValue InFlag;
EVT AVT;
SDValue Count;
ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
@@ -139,7 +139,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
if (TwoRepStos) {
InFlag = Chain.getValue(1);
@@ -153,7 +153,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl,
InFlag = Chain.getValue(1);
Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
} else if (BytesLeft) {
// Handle the last 1 - 7 bytes.
unsigned Offset = SizeVal - BytesLeft;
@@ -225,7 +225,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Count = DAG.getIntPtrConstant(CountVal);
unsigned BytesLeft = SizeVal % UBytes;
- SDValue InFlag(0, 0);
+ SDValue InFlag;
Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
X86::ECX,
Count, InFlag);
@@ -241,8 +241,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
- array_lengthof(Ops));
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
SmallVector<SDValue, 4> Results;
Results.push_back(RepMovs);
@@ -263,6 +262,5 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SrcPtrInfo.getWithOffset(Offset)));
}
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Results[0], Results.size());
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
}
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 207d0ba..989e0d6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -11,12 +11,12 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "subtarget"
#include "X86Subtarget.h"
#include "X86InstrInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
@@ -24,15 +24,24 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+using namespace llvm;
+
+#define DEBUG_TYPE "subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "X86GenSubtargetInfo.inc"
-using namespace llvm;
+// Temporary option to control early if-conversion for x86 while adding machine
+// models.
+static cl::opt<bool>
+X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
+ cl::desc("Enable early if-conversion on X86"));
-#if defined(_MSC_VER)
-#include <intrin.h>
-#endif
/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
/// current subtarget according to how we should reference it in a non-pcrel
@@ -153,7 +162,7 @@ const char *X86Subtarget::getBZeroEntry() const {
!getTargetTriple().isMacOSXVersionLT(10, 6))
return "__bzero";
- return 0;
+ return nullptr;
}
bool X86Subtarget::hasSinCos() const {
@@ -173,251 +182,16 @@ bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
}
-static bool OSHasAVXSupport() {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
- || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-#if defined(__GNUC__)
- // Check xgetbv; this uses a .byte sequence instead of the instruction
- // directly because older assemblers do not include support for xgetbv and
- // there is no easy way to conditionally compile based on the assembler used.
- int rEAX, rEDX;
- __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a" (rEAX), "=d" (rEDX) : "c" (0));
-#elif defined(_MSC_FULL_VER) && defined(_XCR_XFEATURE_ENABLED_MASK)
- unsigned long long rEAX = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
- int rEAX = 0; // Ensures we return false
-#endif
- return (rEAX & 6) == 6;
-#else
- return false;
-#endif
-}
-
-void X86Subtarget::AutoDetectSubtargetFeatures() {
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- unsigned MaxLevel;
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- if (X86_MC::GetCpuIDAndInfo(0, &MaxLevel, text.u+0, text.u+2, text.u+1) ||
- MaxLevel < 1)
- return;
-
- X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
-
- if ((EDX >> 15) & 1) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
- if ((EDX >> 23) & 1) { X86SSELevel = MMX; ToggleFeature(X86::FeatureMMX); }
- if ((EDX >> 25) & 1) { X86SSELevel = SSE1; ToggleFeature(X86::FeatureSSE1); }
- if ((EDX >> 26) & 1) { X86SSELevel = SSE2; ToggleFeature(X86::FeatureSSE2); }
- if (ECX & 0x1) { X86SSELevel = SSE3; ToggleFeature(X86::FeatureSSE3); }
- if ((ECX >> 9) & 1) { X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);}
- if ((ECX >> 19) & 1) { X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);}
- if ((ECX >> 20) & 1) { X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);}
- if (((ECX >> 27) & 1) && ((ECX >> 28) & 1) && OSHasAVXSupport()) {
- X86SSELevel = AVX; ToggleFeature(X86::FeatureAVX);
- }
-
- bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
- bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
-
- if ((ECX >> 1) & 0x1) {
- HasPCLMUL = true;
- ToggleFeature(X86::FeaturePCLMUL);
- }
- if ((ECX >> 12) & 0x1) {
- HasFMA = true;
- ToggleFeature(X86::FeatureFMA);
- }
- if (IsIntel && ((ECX >> 22) & 0x1)) {
- HasMOVBE = true;
- ToggleFeature(X86::FeatureMOVBE);
- }
- if ((ECX >> 23) & 0x1) {
- HasPOPCNT = true;
- ToggleFeature(X86::FeaturePOPCNT);
- }
- if ((ECX >> 25) & 0x1) {
- HasAES = true;
- ToggleFeature(X86::FeatureAES);
- }
- if ((ECX >> 29) & 0x1) {
- HasF16C = true;
- ToggleFeature(X86::FeatureF16C);
- }
- if (IsIntel && ((ECX >> 30) & 0x1)) {
- HasRDRAND = true;
- ToggleFeature(X86::FeatureRDRAND);
- }
-
- if ((ECX >> 13) & 0x1) {
- HasCmpxchg16b = true;
- ToggleFeature(X86::FeatureCMPXCHG16B);
- }
-
- if (IsIntel || IsAMD) {
- // Determine if bit test memory instructions are slow.
- unsigned Family = 0;
- unsigned Model = 0;
- X86_MC::DetectFamilyModel(EAX, Family, Model);
- if (IsAMD || (Family == 6 && Model >= 13)) {
- IsBTMemSlow = true;
- ToggleFeature(X86::FeatureSlowBTMem);
- }
-
- // Determine if SHLD/SHRD instructions have higher latency then the
- // equivalent series of shifts/or instructions.
- // FIXME: Add Intel's processors that have SHLD instructions with very
- // poor latency.
- if (IsAMD) {
- IsSHLDSlow = true;
- ToggleFeature(X86::FeatureSlowSHLD);
- }
-
- // If it's an Intel chip since Nehalem and not an Atom chip, unaligned
- // memory access is fast. We hard code model numbers here because they
- // aren't strictly increasing for Intel chips it seems.
- if (IsIntel &&
- ((Family == 6 && Model == 0x1E) || // Nehalem: Clarksfield, Lynnfield,
- // Jasper Froest
- (Family == 6 && Model == 0x1A) || // Nehalem: Bloomfield, Nehalem-EP
- (Family == 6 && Model == 0x2E) || // Nehalem: Nehalem-EX
- (Family == 6 && Model == 0x25) || // Westmere: Arrandale, Clarksdale
- (Family == 6 && Model == 0x2C) || // Westmere: Gulftown, Westmere-EP
- (Family == 6 && Model == 0x2F) || // Westmere: Westmere-EX
- (Family == 6 && Model == 0x2A) || // SandyBridge
- (Family == 6 && Model == 0x2D) || // SandyBridge: SandyBridge-E*
- (Family == 6 && Model == 0x3A) || // IvyBridge
- (Family == 6 && Model == 0x3E) || // IvyBridge EP
- (Family == 6 && Model == 0x3C) || // Haswell
- (Family == 6 && Model == 0x3F) || // ...
- (Family == 6 && Model == 0x45) || // ...
- (Family == 6 && Model == 0x46))) { // ...
- IsUAMemFast = true;
- ToggleFeature(X86::FeatureFastUAMem);
- }
-
- // Set processor type. Currently only Atom or Silvermont (SLM) is detected.
- if (Family == 6 &&
- (Model == 28 || Model == 38 || Model == 39 ||
- Model == 53 || Model == 54)) {
- X86ProcFamily = IntelAtom;
-
- UseLeaForSP = true;
- ToggleFeature(X86::FeatureLeaForSP);
- }
- else if (Family == 6 &&
- (Model == 55 || Model == 74 || Model == 77)) {
- X86ProcFamily = IntelSLM;
- }
-
- unsigned MaxExtLevel;
- X86_MC::GetCpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
-
- if (MaxExtLevel >= 0x80000001) {
- X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- if ((EDX >> 29) & 0x1) {
- HasX86_64 = true;
- ToggleFeature(X86::Feature64Bit);
- }
- if ((ECX >> 5) & 0x1) {
- HasLZCNT = true;
- ToggleFeature(X86::FeatureLZCNT);
- }
- if (IsIntel && ((ECX >> 8) & 0x1)) {
- HasPRFCHW = true;
- ToggleFeature(X86::FeaturePRFCHW);
- }
- if (IsAMD) {
- if ((ECX >> 6) & 0x1) {
- HasSSE4A = true;
- ToggleFeature(X86::FeatureSSE4A);
- }
- if ((ECX >> 11) & 0x1) {
- HasXOP = true;
- ToggleFeature(X86::FeatureXOP);
- }
- if ((ECX >> 16) & 0x1) {
- HasFMA4 = true;
- ToggleFeature(X86::FeatureFMA4);
- }
- }
- }
- }
-
- if (MaxLevel >= 7) {
- if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) {
- if (IsIntel && (EBX & 0x1)) {
- HasFSGSBase = true;
- ToggleFeature(X86::FeatureFSGSBase);
- }
- if ((EBX >> 3) & 0x1) {
- HasBMI = true;
- ToggleFeature(X86::FeatureBMI);
- }
- if ((EBX >> 4) & 0x1) {
- HasHLE = true;
- ToggleFeature(X86::FeatureHLE);
- }
- if (IsIntel && ((EBX >> 5) & 0x1)) {
- X86SSELevel = AVX2;
- ToggleFeature(X86::FeatureAVX2);
- }
- if (IsIntel && ((EBX >> 8) & 0x1)) {
- HasBMI2 = true;
- ToggleFeature(X86::FeatureBMI2);
- }
- if (IsIntel && ((EBX >> 11) & 0x1)) {
- HasRTM = true;
- ToggleFeature(X86::FeatureRTM);
- }
- if (IsIntel && ((EBX >> 16) & 0x1)) {
- X86SSELevel = AVX512F;
- ToggleFeature(X86::FeatureAVX512);
- }
- if (IsIntel && ((EBX >> 18) & 0x1)) {
- HasRDSEED = true;
- ToggleFeature(X86::FeatureRDSEED);
- }
- if (IsIntel && ((EBX >> 19) & 0x1)) {
- HasADX = true;
- ToggleFeature(X86::FeatureADX);
- }
- if (IsIntel && ((EBX >> 26) & 0x1)) {
- HasPFI = true;
- ToggleFeature(X86::FeaturePFI);
- }
- if (IsIntel && ((EBX >> 27) & 0x1)) {
- HasERI = true;
- ToggleFeature(X86::FeatureERI);
- }
- if (IsIntel && ((EBX >> 28) & 0x1)) {
- HasCDI = true;
- ToggleFeature(X86::FeatureCDI);
- }
- if (IsIntel && ((EBX >> 29) & 0x1)) {
- HasSHA = true;
- ToggleFeature(X86::FeatureSHA);
- }
- }
- if (IsAMD && ((ECX >> 21) & 0x1)) {
- HasTBM = true;
- ToggleFeature(X86::FeatureTBM);
- }
- }
-}
-
void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
AttributeSet FnAttrs = MF->getFunction()->getAttributes();
- Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-cpu");
- Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
- "target-features");
+ Attribute CPUAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-cpu");
+ Attribute FSAttr =
+ FnAttrs.getAttribute(AttributeSet::FunctionIndex, "target-features");
std::string CPU =
- !CPUAttr.hasAttribute(Attribute::None) ?CPUAttr.getValueAsString() : "";
+ !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
std::string FS =
- !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
if (!FS.empty()) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
@@ -426,54 +200,23 @@ void X86Subtarget::resetSubtargetFeatures(const MachineFunction *MF) {
void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
std::string CPUName = CPU;
- if (!FS.empty() || !CPU.empty()) {
- if (CPUName.empty()) {
-#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
- || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
- CPUName = sys::getHostCPUName();
-#else
- CPUName = "generic";
-#endif
- }
-
- // Make sure 64-bit features are available in 64-bit mode. (But make sure
- // SSE2 can be turned off explicitly.)
- std::string FullFS = FS;
- if (In64BitMode) {
- if (!FullFS.empty())
- FullFS = "+64bit,+sse2," + FullFS;
- else
- FullFS = "+64bit,+sse2";
- }
-
- // If feature string is not empty, parse features string.
- ParseSubtargetFeatures(CPUName, FullFS);
- } else {
- if (CPUName.empty()) {
-#if defined (__x86_64__) || defined(__i386__)
- CPUName = sys::getHostCPUName();
-#else
- CPUName = "generic";
-#endif
- }
- // Otherwise, use CPUID to auto-detect feature set.
- AutoDetectSubtargetFeatures();
-
- // Make sure 64-bit features are available in 64-bit mode.
- if (In64BitMode) {
- if (!HasX86_64) { HasX86_64 = true; ToggleFeature(X86::Feature64Bit); }
- if (!HasCMov) { HasCMov = true; ToggleFeature(X86::FeatureCMOV); }
-
- if (X86SSELevel < SSE2) {
- X86SSELevel = SSE2;
- ToggleFeature(X86::FeatureSSE1);
- ToggleFeature(X86::FeatureSSE2);
- }
- }
+ if (CPUName.empty())
+ CPUName = "generic";
+
+ // Make sure 64-bit features are available in 64-bit mode. (But make sure
+ // SSE2 can be turned off explicitly.)
+ std::string FullFS = FS;
+ if (In64BitMode) {
+ if (!FullFS.empty())
+ FullFS = "+64bit,+sse2," + FullFS;
+ else
+ FullFS = "+64bit,+sse2";
}
- // CPUName may have been set by the CPU detection code. Make sure the
- // new MCSchedModel is used.
+ // If feature string is not empty, parse features string.
+ ParseSubtargetFeatures(CPUName, FullFS);
+
+ // Make sure the right MCSchedModel is used.
InitCPUSchedModel(CPUName);
if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
@@ -547,33 +290,36 @@ void X86Subtarget::initializeEnvironment() {
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
+ SlowLEA = false;
stackAlignment = 4;
// FIXME: this is a known good value for Yonah. How about others?
MaxInlineSizeThreshold = 128;
}
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
- const std::string &FS,
- unsigned StackAlignOverride)
- : X86GenSubtargetInfo(TT, CPU, FS)
- , X86ProcFamily(Others)
- , PICStyle(PICStyles::None)
- , TargetTriple(TT)
- , StackAlignOverride(StackAlignOverride)
- , In64BitMode(TargetTriple.getArch() == Triple::x86_64)
- , In32BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() != Triple::CODE16)
- , In16BitMode(TargetTriple.getArch() == Triple::x86 &&
- TargetTriple.getEnvironment() == Triple::CODE16) {
+ const std::string &FS, unsigned StackAlignOverride)
+ : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
+ PICStyle(PICStyles::None), TargetTriple(TT),
+ StackAlignOverride(StackAlignOverride),
+ In64BitMode(TargetTriple.getArch() == Triple::x86_64),
+ In32BitMode(TargetTriple.getArch() == Triple::x86 &&
+ TargetTriple.getEnvironment() != Triple::CODE16),
+ In16BitMode(TargetTriple.getArch() == Triple::x86 &&
+ TargetTriple.getEnvironment() == Triple::CODE16) {
initializeEnvironment();
resetSubtargetFeatures(CPU, FS);
}
-bool X86Subtarget::enablePostRAScheduler(
- CodeGenOpt::Level OptLevel,
- TargetSubtargetInfo::AntiDepBreakMode& Mode,
- RegClassVector& CriticalPathRCs) const {
+bool
+X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+ TargetSubtargetInfo::AntiDepBreakMode &Mode,
+ RegClassVector &CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
+
+bool
+X86Subtarget::enableEarlyIfConversion() const {
+ return hasCMov() && X86EarlyIfConv;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 52986b9..703559a 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -178,6 +178,9 @@ protected:
/// address generation (AG) time.
bool LEAUsesAG;
+ /// SlowLEA - True if the LEA instruction with certain arguments is slow
+ bool SlowLEA;
+
/// Processor has AVX-512 PreFetch Instructions
bool HasPFI;
@@ -235,10 +238,6 @@ public:
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
- /// instruction.
- void AutoDetectSubtargetFeatures();
-
/// \brief Reset the features for the X86 target.
void resetSubtargetFeatures(const MachineFunction *MF) override;
private:
@@ -319,11 +318,13 @@ public:
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
+ bool slowLEA() const { return SlowLEA; }
bool hasCDI() const { return HasCDI; }
bool hasPFI() const { return HasPFI; }
bool hasERI() const { return HasERI; }
bool isAtom() const { return X86ProcFamily == IntelAtom; }
+ bool isSLM() const { return X86ProcFamily == IntelSLM; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -429,6 +430,8 @@ public:
bool postRAScheduler() const { return PostRAScheduler; }
+ bool enableEarlyIfConversion() const override;
+
/// getInstrItins = Return the instruction itineraries based on the
/// subtarget selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 6f09ccf..93760ef 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -108,6 +108,13 @@ X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT,
if (Options.FloatABIType == FloatABI::Default)
this->Options.FloatABIType = FloatABI::Hard;
+ // Windows stack unwinder gets confused when execution flow "falls through"
+ // after a call to 'noreturn' function.
+ // To prevent that, we emit a trap for 'unreachable' IR instructions.
+ // (which on X86, happens to be the 'ud2' instruction)
+ if (Subtarget.isTargetWin64())
+ this->Options.TrapUnreachable = true;
+
initAsmInfo();
}
@@ -119,12 +126,6 @@ UseVZeroUpper("x86-use-vzeroupper", cl::Hidden,
cl::desc("Minimize AVX to SSE transition penalty"),
cl::init(true));
-// Temporary option to control early if-conversion for x86 while adding machine
-// models.
-static cl::opt<bool>
-X86EarlyIfConv("x86-early-ifcvt", cl::Hidden,
- cl::desc("Enable early if-conversion on X86"));
-
//===----------------------------------------------------------------------===//
// X86 Analysis Pass Setup
//===----------------------------------------------------------------------===//
@@ -177,19 +178,14 @@ bool X86PassConfig::addInstSelector() {
if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
addPass(createCleanupLocalDynamicTLSPass());
- // For 32-bit, prepend instructions to set the "global base reg" for PIC.
- if (!getX86Subtarget().is64Bit())
- addPass(createGlobalBaseRegPass());
+ addPass(createX86GlobalBaseRegPass());
return false;
}
bool X86PassConfig::addILPOpts() {
- if (X86EarlyIfConv && getX86Subtarget().hasCMov()) {
- addPass(&EarlyIfConverterID);
- return true;
- }
- return false;
+ addPass(&EarlyIfConverterID);
+ return true;
}
bool X86PassConfig::addPreRegAlloc() {
@@ -208,18 +204,13 @@ bool X86PassConfig::addPreEmitPass() {
ShouldPrint = true;
}
- if (getX86Subtarget().hasAVX() && UseVZeroUpper) {
+ if (UseVZeroUpper) {
addPass(createX86IssueVZeroUpperPass());
ShouldPrint = true;
}
- if (getOptLevel() != CodeGenOpt::None &&
- getX86Subtarget().padShortFunctions()) {
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createX86PadShortFunctions());
- ShouldPrint = true;
- }
- if (getOptLevel() != CodeGenOpt::None &&
- getX86Subtarget().LEAusesAG()){
addPass(createX86FixupLEAs());
ShouldPrint = true;
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index 0a88e98..8157085 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -26,7 +26,7 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
// On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
// is an indirect pc-relative reference.
- if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+ if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) {
const MCSymbol *Sym = TM.getSymbol(GV, Mang);
const MCExpr *Res =
MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
@@ -62,7 +62,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
// operation.
const SubOperator *Sub = dyn_cast<SubOperator>(CE);
if (!Sub)
- return 0;
+ return nullptr;
// Symbols must first be numbers before we can subtract them, we need to see a
// ptrtoint on both subtraction operands.
@@ -71,13 +71,13 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
const PtrToIntOperator *SubRHS =
dyn_cast<PtrToIntOperator>(Sub->getOperand(1));
if (!SubLHS || !SubRHS)
- return 0;
+ return nullptr;
// Our symbols should exist in address space zero, cowardly no-op if
// otherwise.
if (SubLHS->getPointerAddressSpace() != 0 ||
SubRHS->getPointerAddressSpace() != 0)
- return 0;
+ return nullptr;
// Both ptrtoint instructions must wrap global variables:
// - Only global variables are eligible for image relative relocations.
@@ -87,7 +87,7 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
const GlobalVariable *GVRHS =
dyn_cast<GlobalVariable>(SubRHS->getPointerOperand());
if (!GVLHS || !GVRHS)
- return 0;
+ return nullptr;
// We expect __ImageBase to be a global variable without a section, externally
// defined.
@@ -96,11 +96,11 @@ const MCExpr *X86WindowsTargetObjectFile::getExecutableRelativeSymbol(
if (GVRHS->isThreadLocal() || GVRHS->getName() != "__ImageBase" ||
!GVRHS->hasExternalLinkage() || GVRHS->hasInitializer() ||
GVRHS->hasSection())
- return 0;
+ return nullptr;
// An image-relative, thread-local, symbol makes no sense.
if (GVLHS->isThreadLocal())
- return 0;
+ return nullptr;
return MCSymbolRefExpr::Create(TM.getSymbol(GVLHS, Mang),
MCSymbolRefExpr::VK_COFF_IMGREL32,
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp
index c04964d..91b9d40 100644
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -14,37 +14,24 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86tti"
#include "X86.h"
#include "X86TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "x86tti"
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeX86TTIPass(PassRegistry &);
}
-static cl::opt<bool>
-UsePartialUnrolling("x86-use-partial-unrolling", cl::init(true),
- cl::desc("Use partial unrolling for some X86 targets"), cl::Hidden);
-static cl::opt<unsigned>
-PartialUnrollingThreshold("x86-partial-unrolling-threshold", cl::init(0),
- cl::desc("Threshold for X86 partial unrolling"), cl::Hidden);
-static cl::opt<unsigned>
-PartialUnrollingMaxBranches("x86-partial-max-branches", cl::init(2),
- cl::desc("Threshold for taken branches in X86 partial unrolling"),
- cl::Hidden);
-
namespace {
class X86TTI final : public ImmutablePass, public TargetTransformInfo {
@@ -56,7 +43,7 @@ class X86TTI final : public ImmutablePass, public TargetTransformInfo {
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
public:
- X86TTI() : ImmutablePass(ID), ST(0), TLI(0) {
+ X86TTI() : ImmutablePass(ID), ST(nullptr), TLI(nullptr) {
llvm_unreachable("This pass cannot be directly constructed");
}
@@ -87,8 +74,6 @@ public:
/// \name Scalar TTI Implementations
/// @{
PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override;
- void getUnrollingPreferences(Loop *L,
- UnrollingPreferences &UP) const override;
/// @}
@@ -153,93 +138,6 @@ X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
return ST->hasPOPCNT() ? PSK_FastHardware : PSK_Software;
}
-void X86TTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
- if (!UsePartialUnrolling)
- return;
- // According to the Intel 64 and IA-32 Architectures Optimization Reference
- // Manual, Intel Core models and later have a loop stream detector
- // (and associated uop queue) that can benefit from partial unrolling.
- // The relevant requirements are:
- // - The loop must have no more than 4 (8 for Nehalem and later) branches
- // taken, and none of them may be calls.
- // - The loop can have no more than 18 (28 for Nehalem and later) uops.
-
- // According to the Software Optimization Guide for AMD Family 15h Processors,
- // models 30h-4fh (Steamroller and later) have a loop predictor and loop
- // buffer which can benefit from partial unrolling.
- // The relevant requirements are:
- // - The loop must have fewer than 16 branches
- // - The loop must have less than 40 uops in all executed loop branches
-
- unsigned MaxBranches, MaxOps;
- if (PartialUnrollingThreshold.getNumOccurrences() > 0) {
- MaxBranches = PartialUnrollingMaxBranches;
- MaxOps = PartialUnrollingThreshold;
- } else if (ST->isAtom()) {
- // On the Atom, the throughput for taken branches is 2 cycles. For small
- // simple loops, expand by a small factor to hide the backedge cost.
- MaxBranches = 2;
- MaxOps = 10;
- } else if (ST->hasFSGSBase() && ST->hasXOP() /* Steamroller and later */) {
- MaxBranches = 16;
- MaxOps = 40;
- } else if (ST->hasFMA4() /* Any other recent AMD */) {
- return;
- } else if (ST->hasAVX() || ST->hasSSE42() /* Nehalem and later */) {
- MaxBranches = 8;
- MaxOps = 28;
- } else if (ST->hasSSSE3() /* Intel Core */) {
- MaxBranches = 4;
- MaxOps = 18;
- } else {
- return;
- }
-
- // Scan the loop: don't unroll loops with calls, and count the potential
- // number of taken branches (this is somewhat conservative because we're
- // counting all block transitions as potential branches while in reality some
- // of these will become implicit via block placement).
- unsigned MaxDepth = 0;
- for (df_iterator<BasicBlock*> DI = df_begin(L->getHeader()),
- DE = df_end(L->getHeader()); DI != DE;) {
- if (!L->contains(*DI)) {
- DI.skipChildren();
- continue;
- }
-
- MaxDepth = std::max(MaxDepth, DI.getPathLength());
- if (MaxDepth > MaxBranches)
- return;
-
- for (BasicBlock::iterator I = DI->begin(), IE = DI->end(); I != IE; ++I)
- if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
- ImmutableCallSite CS(I);
- if (const Function *F = CS.getCalledFunction()) {
- if (!isLoweredToCall(F))
- continue;
- }
-
- return;
- }
-
- ++DI;
- }
-
- // Enable runtime and partial unrolling up to the specified size.
- UP.Partial = UP.Runtime = true;
- UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
-
- // Set the maximum count based on the loop depth. The maximum number of
- // branches taken in a loop (including the backedge) is equal to the maximum
- // loop depth (the DFS path length from the loop header to any block in the
- // loop). When the loop is unrolled, this depth (except for the backedge
- // itself) is multiplied by the unrolling factor. This new unrolled depth
- // must be less than the target-specific maximum branch count (which limits
- // the number of taken branches in the uop buffer).
- if (MaxDepth > 1)
- UP.MaxCount = (MaxBranches-1)/(MaxDepth-1);
-}
-
unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasSSE1())
return 0;
@@ -283,6 +181,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry<MVT::SimpleValueType>
+ AVX2UniformConstCostTable[] = {
+ { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
+ { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
+ { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
+ { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
+ };
+
+ if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+ ST->hasAVX2()) {
+ int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
+ if (Idx != -1)
+ return LT.first * AVX2UniformConstCostTable[Idx].Cost;
+ }
+
static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
// Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
// customize them to detect the cases where shift amount is a scalar one.
@@ -350,10 +263,19 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
{ ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
{ ISD::SRA, MVT::v8i16, 1 }, // psraw.
{ ISD::SRA, MVT::v4i32, 1 }, // psrad.
+
+ { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
+ { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
+ { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
+ { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
};
if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
ST->hasSSE2()) {
+ // pmuldq sequence.
+ if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
+ return LT.first * 15;
+
int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
if (Idx != -1)
return LT.first * SSE2UniformConstCostTable[Idx].Cost;
@@ -893,6 +815,13 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
if (BitSize == 0)
return ~0U;
+ // Never hoist constants larger than 128bit, because this might lead to
+ // incorrect code generation or assertions in codegen.
+ // Fixme: Create a cost model for types larger than i128 once the codegen
+ // issues have been fixed.
+ if (BitSize > 128)
+ return TCC_Free;
+
if (Imm == 0)
return TCC_Free;
@@ -908,8 +837,10 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return ~0U;
+ return TCC_Free;
unsigned ImmIdx = ~0U;
switch (Opcode) {
@@ -931,15 +862,19 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
case Instruction::ICmp:
ImmIdx = 1;
break;
+ // Always return TCC_Free for the shift value of a shift instruction.
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ if (Idx == 1)
+ return TCC_Free;
+ break;
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -966,8 +901,10 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
assert(Ty->isIntegerTy());
unsigned BitSize = Ty->getPrimitiveSizeInBits();
+ // There is no cost model for constants with a bit size of 0. Return TCC_Free
+ // here, so that constant hoisting will ignore this constant.
if (BitSize == 0)
- return ~0U;
+ return TCC_Free;
switch (IID) {
default: return TCC_Free;
diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp
index d4341b9..0bb5f99 100644
--- a/lib/Target/X86/X86VZeroUpper.cpp
+++ b/lib/Target/X86/X86VZeroUpper.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "x86-vzeroupper"
#include "X86.h"
#include "X86InstrInfo.h"
#include "X86Subtarget.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetInstrInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "x86-vzeroupper"
+
STATISTIC(NumVZU, "Number of vzeroupper instructions inserted");
namespace {
@@ -246,7 +247,8 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
/// runOnMachineFunction - Loop over all of the basic blocks, inserting
/// vzero upper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
- if (MF.getTarget().getSubtarget<X86Subtarget>().hasAVX512())
+ const X86Subtarget &ST = MF.getTarget().getSubtarget<X86Subtarget>();
+ if (!ST.hasAVX() || ST.hasAVX512())
return false;
TII = MF.getTarget().getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
diff --git a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 9c20abd..7fef796 100644
--- a/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -14,6 +14,7 @@
#include "XCore.h"
#include "XCoreRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler.h"
#include "llvm/MC/MCFixedLenDisassembler.h"
#include "llvm/MC/MCInst.h"
@@ -23,16 +24,17 @@
using namespace llvm;
+#define DEBUG_TYPE "xcore-disassembler"
+
typedef MCDisassembler::DecodeStatus DecodeStatus;
namespace {
/// \brief A disassembler class for XCore.
class XCoreDisassembler : public MCDisassembler {
- OwningPtr<const MCRegisterInfo> RegInfo;
public:
- XCoreDisassembler(const MCSubtargetInfo &STI, const MCRegisterInfo *Info) :
- MCDisassembler(STI), RegInfo(Info) {}
+ XCoreDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+ MCDisassembler(STI, Ctx) {}
/// \brief See MCDisassembler.
virtual DecodeStatus getInstruction(MCInst &instr,
@@ -40,9 +42,8 @@ public:
const MemoryObject &region,
uint64_t address,
raw_ostream &vStream,
- raw_ostream &cStream) const;
+ raw_ostream &cStream) const override;
- const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); }
};
}
@@ -81,7 +82,8 @@ static bool readInstruction32(const MemoryObject &region,
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo) {
const XCoreDisassembler *Dis = static_cast<const XCoreDisassembler*>(D);
- return *(Dis->getRegInfo()->getRegClass(RC).begin() + RegNo);
+ const MCRegisterInfo *RegInfo = Dis->getContext().getRegisterInfo();
+ return *(RegInfo->getRegClass(RC).begin() + RegNo);
}
static DecodeStatus DecodeGRRegsRegisterClass(MCInst &Inst,
@@ -788,8 +790,9 @@ namespace llvm {
}
static MCDisassembler *createXCoreDisassembler(const Target &T,
- const MCSubtargetInfo &STI) {
- return new XCoreDisassembler(STI, T.createMCRegInfo(""));
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ return new XCoreDisassembler(STI, Ctx);
}
extern "C" void LLVMInitializeXCoreDisassembler() {
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
index 9ae8c0d..215fe89 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "XCoreInstPrinter.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
@@ -22,6 +21,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
#include "XCoreGenAsmWriter.inc"
void XCoreInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
diff --git a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
index 772c515..98e7c98 100644
--- a/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
+++ b/lib/Target/XCore/InstPrinter/XCoreInstPrinter.h
@@ -31,8 +31,8 @@ public:
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
- virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
- virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
+ void printRegName(raw_ostream &OS, unsigned RegNo) const override;
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot) override;
private:
void printInlineJT(const MCInst *MI, int opNum, raw_ostream &O);
void printInlineJT32(const MCInst *MI, int opNum, raw_ostream &O);
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index f788c59..5665911 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
@@ -17,7 +17,7 @@ XCoreMCAsmInfo::XCoreMCAsmInfo(StringRef TT) {
SupportsDebugInformation = true;
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
- Data64bitsDirective = 0;
+ Data64bitsDirective = nullptr;
ZeroDirective = "\t.space\t";
CommentString = "#";
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index e53c96b..da2689a 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
@@ -21,7 +21,7 @@ namespace llvm {
class Target;
class XCoreMCAsmInfo : public MCAsmInfoELF {
- virtual void anchor();
+ void anchor() override;
public:
explicit XCoreMCAsmInfo(StringRef TT);
};
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 439d0ab..d54e94f 100644
--- a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -23,6 +23,8 @@
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_MC_DESC
#include "XCoreGenInstrInfo.inc"
@@ -32,8 +34,6 @@
#define GET_REGINFO_MC_DESC
#include "XCoreGenRegisterInfo.inc"
-using namespace llvm;
-
static MCInstrInfo *createXCoreMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitXCoreMCInstrInfo(X);
@@ -58,7 +58,7 @@ static MCAsmInfo *createXCoreMCAsmInfo(const MCRegisterInfo &MRI,
MCAsmInfo *MAI = new XCoreMCAsmInfo(TT);
// Initial state of the frame pointer is SP.
- MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, XCore::SP, 0);
+ MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(nullptr, XCore::SP, 0);
MAI->addInitialFrameState(Inst);
return MAI;
@@ -128,12 +128,11 @@ void XCoreTargetAsmStreamer::emitCCBottomFunction(StringRef Name) {
static MCStreamer *
createXCoreMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
- bool isVerboseAsm, bool useCFI, bool useDwarfDirectory,
+ bool isVerboseAsm, bool useDwarfDirectory,
MCInstPrinter *InstPrint, MCCodeEmitter *CE,
MCAsmBackend *TAB, bool ShowInst) {
- MCStreamer *S =
- llvm::createAsmStreamer(Ctx, OS, isVerboseAsm, useCFI, useDwarfDirectory,
- InstPrint, CE, TAB, ShowInst);
+ MCStreamer *S = llvm::createAsmStreamer(
+ Ctx, OS, isVerboseAsm, useDwarfDirectory, InstPrint, CE, TAB, ShowInst);
new XCoreTargetAsmStreamer(*S, OS);
return S;
}
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 21acedf..e98d4f9 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asm-printer"
#include "XCore.h"
#include "InstPrinter/XCoreInstPrinter.h"
#include "XCoreInstrInfo.h"
@@ -47,6 +46,8 @@
#include <cctype>
using namespace llvm;
+#define DEBUG_TYPE "asm-printer"
+
namespace {
class XCoreAsmPrinter : public AsmPrinter {
const XCoreSubtarget &Subtarget;
@@ -58,7 +59,7 @@ namespace {
: AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()),
MCInstLowering(*this) {}
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "XCore Assembly Printer";
}
@@ -70,18 +71,18 @@ namespace {
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O);
+ raw_ostream &O) override;
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
- virtual void EmitGlobalVariable(const GlobalVariable *GV);
+ void EmitGlobalVariable(const GlobalVariable *GV) override;
- void EmitFunctionEntryLabel();
- void EmitInstruction(const MachineInstr *MI);
- void EmitFunctionBodyStart();
- void EmitFunctionBodyEnd();
+ void EmitFunctionEntryLabel() override;
+ void EmitInstruction(const MachineInstr *MI) override;
+ void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
};
} // end of anonymous namespace
diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp
index 954fddf..5499aba 100644
--- a/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -64,7 +64,8 @@ static void EmitDefCfaRegister(MachineBasicBlock &MBB,
MachineModuleInfo *MMI, unsigned DRegNum) {
unsigned CFIIndex = MMI->addFrameInst(
MCCFIInstruction::createDefCfaRegister(nullptr, DRegNum));
- BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
static void EmitDefCfaOffset(MachineBasicBlock &MBB,
@@ -73,7 +74,8 @@ static void EmitDefCfaOffset(MachineBasicBlock &MBB,
MachineModuleInfo *MMI, int Offset) {
unsigned CFIIndex =
MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
- BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
static void EmitCfiOffset(MachineBasicBlock &MBB,
@@ -82,7 +84,8 @@ static void EmitCfiOffset(MachineBasicBlock &MBB,
unsigned DRegNum, int Offset) {
unsigned CFIIndex = MMI->addFrameInst(
MCCFIInstruction::createOffset(nullptr, DRegNum, Offset));
- BuildMI(MBB, MBBI, dl, TII.get(XCore::CFI_INSTRUCTION)).addCFIIndex(CFIIndex);
+ BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
}
/// The SP register is moved in steps of 'MaxImmU16' towards the bottom of the
@@ -113,7 +116,8 @@ static void IfNeededExtSP(MachineBasicBlock &MBB,
/// IfNeededLDAWSP emits the necessary LDAWSP instructions to move the SP only
/// as far as to make 'OffsetFromTop' reachable using an LDAWSP_lru6.
/// \param OffsetFromTop the spill offset from the top of the frame.
-/// \param [in,out] RemainingAdj the current SP offset from the top of the frame.
+/// \param [in,out] RemainingAdj the current SP offset from the top of the
+/// frame.
static void IfNeededLDAWSP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc dl,
const TargetInstrInfo &TII, int OffsetFromTop,
@@ -346,7 +350,8 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
RemainingAdj /= 4;
if (RetOpcode == XCore::EH_RETURN) {
- // 'Restore' the exception info the unwinder has placed into the stack slots.
+ // 'Restore' the exception info the unwinder has placed into the stack
+ // slots.
SmallVector<StackSlotInfo,2> SpillList;
GetEHSpillList(SpillList, MFI, XFI, MF.getTarget().getTargetLowering());
RestoreSpillList(MBB, MBBI, dl, TII, RemainingAdj, SpillList);
@@ -495,7 +500,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
errs() << "eliminateCallFramePseudoInstr size too big: "
<< Amount << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
MachineInstr *New;
@@ -514,7 +519,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.insert(I, New);
}
}
-
+
MBB.erase(I);
}
diff --git a/lib/Target/XCore/XCoreFrameLowering.h b/lib/Target/XCore/XCoreFrameLowering.h
index 6cd90c9..e4f806a 100644
--- a/lib/Target/XCore/XCoreFrameLowering.h
+++ b/lib/Target/XCore/XCoreFrameLowering.h
@@ -27,29 +27,30 @@ namespace llvm {
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
- void emitPrologue(MachineFunction &MF) const;
- void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+ void emitPrologue(MachineFunction &MF) const override;
+ void emitEpilogue(MachineFunction &MF,
+ MachineBasicBlock &MBB) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const;
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const override;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const override;
- bool hasFP(const MachineFunction &MF) const;
+ bool hasFP(const MachineFunction &MF) const override;
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
//! Stack slot size (4 bytes)
static int stackSlotSize() {
diff --git a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index c18eff9..30c7b59 100644
--- a/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -26,9 +26,9 @@ namespace {
static char ID;
XCoreFTAOElim() : MachineFunctionPass(ID) {}
- virtual bool runOnMachineFunction(MachineFunction &Fn);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "XCore FRAME_TO_ARGS_OFFSET Elimination";
}
};
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 5b0fcfa..86bc6f2 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -44,7 +44,7 @@ namespace {
: SelectionDAGISel(TM, OptLevel),
Subtarget(*TM.getSubtargetImpl()) { }
- SDNode *Select(SDNode *N);
+ SDNode *Select(SDNode *N) override;
SDNode *SelectBRIND(SDNode *N);
/// getI32Imm - Return a target constant with the specified value, of type
@@ -70,7 +70,7 @@ namespace {
bool SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps) override;
- virtual const char *getPassName() const {
+ const char *getPassName() const override {
return "XCore DAG->DAG Pattern Instruction Selection";
}
@@ -89,14 +89,14 @@ FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM,
bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
SDValue &Offset) {
- FrameIndexSDNode *FIN = 0;
+ FrameIndexSDNode *FIN = nullptr;
if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
Offset = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
if (Addr.getOpcode() == ISD::ADD) {
- ConstantSDNode *CN = 0;
+ ConstantSDNode *CN = nullptr;
if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
&& (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
&& (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
@@ -227,8 +227,7 @@ replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New)
}
if (!found)
return SDValue();
- return CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
- &Ops[0], Ops.size());
+ return CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, Ops);
}
SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
@@ -237,10 +236,10 @@ SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue Addr = N->getOperand(1);
if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
- return 0;
+ return nullptr;
unsigned IntNo = cast<ConstantSDNode>(Addr->getOperand(1))->getZExtValue();
if (IntNo != Intrinsic::xcore_checkevent)
- return 0;
+ return nullptr;
SDValue nextAddr = Addr->getOperand(2);
SDValue CheckEventChainOut(Addr.getNode(), 1);
if (!CheckEventChainOut.use_empty()) {
@@ -252,7 +251,7 @@ SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) {
SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut,
CheckEventChainIn);
if (!NewChain.getNode())
- return 0;
+ return nullptr;
Chain = NewChain;
}
// Enable events on the thread using setsr 1 and then disable them immediately
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 1b74013..9d78586 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -11,8 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "xcore-lower"
-
#include "XCoreISelLowering.h"
#include "XCore.h"
#include "XCoreMachineFunctionInfo.h"
@@ -41,6 +39,8 @@
using namespace llvm;
+#define DEBUG_TYPE "xcore-lower"
+
const char *XCoreTargetLowering::
getTargetNodeName(unsigned Opcode) const
{
@@ -64,7 +64,7 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::FRAME_TO_ARGS_OFFSET : return "XCoreISD::FRAME_TO_ARGS_OFFSET";
case XCoreISD::EH_RETURN : return "XCoreISD::EH_RETURN";
case XCoreISD::MEMBARRIER : return "XCoreISD::MEMBARRIER";
- default : return NULL;
+ default : return nullptr;
}
}
@@ -268,21 +268,19 @@ LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
Op.getOperand(1));
}
-SDValue XCoreTargetLowering::
-getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
- SelectionDAG &DAG) const
-{
+SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA,
+ const GlobalValue *GV,
+ SelectionDAG &DAG) const {
// FIXME there is no actual debug info here
SDLoc dl(GA);
const GlobalValue *UnderlyingGV = GV;
// If GV is an alias then use the aliasee to determine the wrapper type
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- UnderlyingGV = GA->getAliasedGlobal();
+ UnderlyingGV = GA->getAliasee();
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) {
- if ( ( GVar->isConstant() &&
- UnderlyingGV->isLocalLinkage(GV->getLinkage()) )
- || ( GVar->hasSection() &&
- StringRef(GVar->getSection()).startswith(".cp.") ) )
+ if ((GVar->isConstant() && GV->hasLocalLinkage()) ||
+ (GVar->hasSection() &&
+ StringRef(GVar->getSection()).startswith(".cp.")))
return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
}
@@ -428,13 +426,13 @@ lowerLoadWordFromAlignedBasePlusOffset(SDLoc DL, SDValue Chain, SDValue Base,
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
static bool isWordAligned(SDValue Value, SelectionDAG &DAG)
{
APInt KnownZero, KnownOne;
- DAG.ComputeMaskedBits(Value, KnownZero, KnownOne);
+ DAG.computeKnownBits(Value, KnownZero, KnownOne);
return KnownZero.countTrailingOnes() >= 2;
}
@@ -494,7 +492,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
High.getValue(1));
SDValue Ops[] = { Result, Chain };
- return DAG.getMergeValues(Ops, 2, DL);
+ return DAG.getMergeValues(Ops, DL);
}
// Lower to a call to __misaligned_load(BasePtr).
@@ -506,17 +504,15 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Entry.Node = BasePtr;
Args.push_back(Entry);
- TargetLowering::CallLoweringInfo CLI(Chain, IntPtrTy, false, false,
- false, false, 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- Args, DAG, DL);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
-
- SDValue Ops[] =
- { CallResult.first, CallResult.second };
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(DL).setChain(Chain)
+ .setCallee(CallingConv::C, IntPtrTy,
+ DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+ &Args, 0);
- return DAG.getMergeValues(Ops, 2, DL);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ SDValue Ops[] = { CallResult.first, CallResult.second };
+ return DAG.getMergeValues(Ops, DL);
}
SDValue XCoreTargetLowering::
@@ -568,14 +564,13 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Entry.Node = Value;
Args.push_back(Entry);
- TargetLowering::CallLoweringInfo CLI(Chain,
- Type::getVoidTy(*DAG.getContext()), false, false,
- false, false, 0, CallingConv::C, /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- Args, DAG, dl);
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+ &Args, 0);
+ std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
return CallResult.second;
}
@@ -593,7 +588,7 @@ LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
LHS, RHS);
SDValue Lo(Hi.getNode(), 1);
SDValue Ops[] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
SDValue XCoreTargetLowering::
@@ -610,7 +605,7 @@ LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
Zero, Zero);
SDValue Lo(Hi.getNode(), 1);
SDValue Ops[] = { Lo, Hi };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
/// isADDADDMUL - Return whether Op is in a form that is equivalent to
@@ -741,7 +736,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
if (N->getOpcode() == ISD::ADD) {
SDValue Result = TryExpandADDWithMul(N, DAG);
- if (Result.getNode() != 0)
+ if (Result.getNode())
return Result;
}
@@ -886,7 +881,7 @@ LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
DAG.getCopyToReg(Chain, dl, HandlerReg, Handler)
};
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 2);
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
return DAG.getNode(XCoreISD::EH_RETURN, dl, MVT::Other, Chain,
DAG.getRegister(StackReg, MVT::i32),
@@ -952,7 +947,7 @@ LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(TrmpAddr, 16), false, false,
0);
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5);
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
}
SDValue XCoreTargetLowering::
@@ -967,7 +962,7 @@ LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
Op.getOperand(1), Op.getOperand(2) , Op.getOperand(3));
SDValue Crc(Data.getNode(), 1);
SDValue Results[] = { Crc, Data };
- return DAG.getMergeValues(Results, 2, DL);
+ return DAG.getMergeValues(Results, DL);
}
return SDValue();
}
@@ -1111,7 +1106,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag,
unsigned index = ResultMemLocs[i].second;
SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
SDValue Ops[] = { Chain, DAG.getConstant(offset / 4, MVT::i32) };
- SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops, 2);
+ SDValue load = DAG.getNode(XCoreISD::LDWSP, dl, VTs, Ops);
InVals[index] = load;
MemOpChains.push_back(load.getValue(1));
}
@@ -1119,8 +1114,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag,
// Transform all loads nodes into one single node because
// all load nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
return Chain;
}
@@ -1204,8 +1198,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Transform all store nodes into one single node because
// all store nodes are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
@@ -1244,7 +1237,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
- Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+ Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, Ops);
InFlag = Chain.getValue(1);
// Create the CALLSEQ_END node.
@@ -1347,7 +1340,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
errs() << "LowerFormalArguments Unhandled argument type: "
<< RegVT.getSimpleVT().SimpleTy << "\n";
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
case MVT::i32:
unsigned VReg = RegInfo.createVirtualRegister(&XCore::GRRegsRegClass);
@@ -1384,7 +1377,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// 1b. CopyFromReg vararg registers.
if (isVarArg) {
// Argument registers
- static const uint16_t ArgRegs[] = {
+ static const MCPhysReg ArgRegs[] = {
XCore::R0, XCore::R1, XCore::R2, XCore::R3
};
XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -1422,8 +1415,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// 2. chain CopyFromReg nodes into a TokenFactor.
if (!CFRegNode.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &CFRegNode[0],
- CFRegNode.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, CFRegNode);
// 3. Memcpy 'byVal' args & push final InVals.
// Aggregates passed "byVal" need to be copied by the callee.
@@ -1452,8 +1444,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// 4, chain mem ops nodes into a TokenFactor.
if (!MemOps.empty()) {
MemOps.push_back(Chain);
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOps[0],
- MemOps.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
}
return Chain;
@@ -1535,8 +1526,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
// Transform all store nodes into one single node because
// all stores are independent of each other.
if (!MemOpChains.empty())
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &MemOpChains[0], MemOpChains.size());
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Now handle return values copied to registers.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
@@ -1558,8 +1548,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
if (Flag.getNode())
RetOps.push_back(Flag);
- return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
- &RetOps[0], RetOps.size());
+ return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
@@ -1696,7 +1685,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
DAG.getConstant(1, VT));
SDValue Ops[] = { Result, Carry };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
// fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
@@ -1705,12 +1694,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ DAG.computeKnownBits(N2, KnownZero, KnownOne);
if ((KnownZero & Mask) == Mask) {
SDValue Carry = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
SDValue Ops[] = { Result, Carry };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
}
}
@@ -1728,13 +1717,13 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ DAG.computeKnownBits(N2, KnownZero, KnownOne);
if ((KnownZero & Mask) == Mask) {
SDValue Borrow = N2;
SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
DAG.getConstant(0, VT), N2);
SDValue Ops[] = { Result, Borrow };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
}
@@ -1744,12 +1733,12 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
APInt KnownZero, KnownOne;
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
VT.getSizeInBits() - 1);
- DAG.ComputeMaskedBits(N2, KnownZero, KnownOne);
+ DAG.computeKnownBits(N2, KnownZero, KnownOne);
if ((KnownZero & Mask) == Mask) {
SDValue Borrow = DAG.getConstant(0, VT);
SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
SDValue Ops[] = { Result, Borrow };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
}
}
@@ -1775,14 +1764,14 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
if (N->hasNUsesOfValue(0, 0)) {
SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
SDValue Ops[] = { Lo, Lo };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
// Otherwise fold to ladd(a, b, 0)
SDValue Result =
DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
SDValue Carry(Result.getNode(), 1);
SDValue Ops[] = { Carry, Result };
- return DAG.getMergeValues(Ops, 2, dl);
+ return DAG.getMergeValues(Ops, dl);
}
}
break;
@@ -1866,11 +1855,11 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
-void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth) const {
+void XCoreTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0);
switch (Op.getOpcode()) {
default: break;
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 65e2bad..d28715b 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -97,31 +97,30 @@ namespace llvm {
explicit XCoreTargetLowering(XCoreTargetMachine &TM);
using TargetLowering::isZExtFree;
- virtual bool isZExtFree(SDValue Val, EVT VT2) const;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
- virtual unsigned getJumpTableEncoding() const;
- virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+ unsigned getJumpTableEncoding() const override;
+ MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; }
/// LowerOperation - Provide custom lowering hooks for some operations.
- virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
/// ReplaceNodeResults - Replace the results of node with an illegal result
/// type with new values built out of custom code.
///
- virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
- SelectionDAG &DAG) const;
+ void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+ SelectionDAG &DAG) const override;
/// getTargetNodeName - This method returns the name of a target specific
// DAG node.
- virtual const char *getTargetNodeName(unsigned Opcode) const;
+ const char *getTargetNodeName(unsigned Opcode) const override;
- virtual MachineBasicBlock *
+ MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr *MI,
- MachineBasicBlock *MBB) const;
+ MachineBasicBlock *MBB) const override;
- virtual bool isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
private:
const XCoreTargetMachine &TM;
@@ -176,44 +175,44 @@ namespace llvm {
// Inline asm support
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- MVT VT) const;
+ MVT VT) const override;
// Expand specifics
SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
- virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
- virtual void computeMaskedBitsForTargetNode(const SDValue Op,
- APInt &KnownZero,
- APInt &KnownOne,
- const SelectionDAG &DAG,
- unsigned Depth = 0) const;
+ void computeKnownBitsForTargetNode(const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
- virtual SDValue
+ SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
SDLoc dl, SelectionDAG &DAG,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
+ SDValue
LowerCall(TargetLowering::CallLoweringInfo &CLI,
- SmallVectorImpl<SDValue> &InVals) const;
+ SmallVectorImpl<SDValue> &InVals) const override;
- virtual SDValue
+ SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- SDLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const override;
- virtual bool
+ bool
CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
- LLVMContext &Context) const;
+ LLVMContext &Context) const override;
};
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index cea3bbf..984f0cd 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -26,6 +26,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
#define GET_INSTRINFO_CTOR_DTOR
#include "XCoreGenInstrInfo.inc"
@@ -41,9 +43,6 @@ namespace XCore {
}
}
-using namespace llvm;
-
-
// Pin the vtable to this file.
void XCoreInstrInfo::anchor() {}
@@ -289,7 +288,7 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
assert((Cond.size() == 2 || Cond.size() == 0) &&
"Unexpected number of components!");
- if (FBB == 0) { // One way branch.
+ if (!FBB) { // One way branch.
if (Cond.empty()) {
// Unconditional branch
BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
@@ -428,13 +427,21 @@ static inline bool isImmU16(unsigned val) {
return val < (1 << 16);
}
+static bool isImmMskBitp(unsigned val) {
+ if (!isMask_32(val)) {
+ return false;
+ }
+ int N = Log2_32(val) + 1;
+ return (N >= 1 && N <= 8) || N == 16 || N == 24 || N == 32;
+}
+
MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned Reg, uint64_t Value) const {
DebugLoc dl;
if (MI != MBB.end()) dl = MI->getDebugLoc();
- if (isMask_32(Value)) {
+ if (isImmMskBitp(Value)) {
int N = Log2_32(Value) + 1;
return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N);
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 48c9cb5..e0be96b 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -32,55 +32,55 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+ const TargetRegisterInfo &getRegisterInfo() const { return RI; }
/// isLoadFromStackSlot - If the specified machine instruction is a direct
/// load from a stack slot, return the virtual or physical register number of
/// the destination along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than loading from the stack slot.
- virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
+ unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
/// isStoreToStackSlot - If the specified machine instruction is a direct
/// store to a stack slot, return the virtual or physical register number of
/// the source reg along with the FrameIndex of the loaded stack slot. If
/// not, return 0. This predicate must return 0 if the instruction has
/// any side effects other than storing to the stack slot.
- virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const;
-
- virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const;
-
- virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond,
- DebugLoc DL) const;
-
- virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
-
- virtual void copyPhysReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- unsigned DestReg, unsigned SrcReg,
- bool KillSrc) const;
-
- virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC,
- const TargetRegisterInfo *TRI) const;
-
- virtual bool ReverseBranchCondition(
- SmallVectorImpl<MachineOperand> &Cond) const;
+ unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const override;
+
+ bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const override;
+
+ unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const override;
+
+ unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const override;
+
+ void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const override;
+
+ bool ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const override;
// Emit code before MBBI to load immediate value into physical register Reg.
// Returns an iterator to the new instruction.
diff --git a/lib/Target/XCore/XCoreLowerThreadLocal.cpp b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
index b398c2d..ac3bae5 100644
--- a/lib/Target/XCore/XCoreLowerThreadLocal.cpp
+++ b/lib/Target/XCore/XCoreLowerThreadLocal.cpp
@@ -48,7 +48,7 @@ namespace {
bool lowerGlobal(GlobalVariable *GV);
- bool runOnModule(Module &M);
+ bool runOnModule(Module &M) override;
};
}
@@ -189,13 +189,14 @@ bool XCoreLowerThreadLocal::lowerGlobal(GlobalVariable *GV) {
// Create replacement global.
ArrayType *NewType = createLoweredType(GV->getType()->getElementType());
- Constant *NewInitializer = 0;
+ Constant *NewInitializer = nullptr;
if (GV->hasInitializer())
NewInitializer = createLoweredInitializer(NewType,
GV->getInitializer());
GlobalVariable *NewGV =
new GlobalVariable(*M, NewType, GV->isConstant(), GV->getLinkage(),
- NewInitializer, "", 0, GlobalVariable::NotThreadLocal,
+ NewInitializer, "", nullptr,
+ GlobalVariable::NotThreadLocal,
GV->getType()->getAddressSpace(),
GV->isExternallyInitialized());
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index d85d717..316c82c 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -33,11 +33,13 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "xcore-reg-info"
+
#define GET_REGINFO_TARGET_DESC
#include "XCoreGenRegisterInfo.inc"
-using namespace llvm;
-
XCoreRegisterInfo::XCoreRegisterInfo()
: XCoreGenRegisterInfo(XCore::LR) {
}
@@ -205,16 +207,16 @@ bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
MF.getFunction()->needsUnwindTableEntry();
}
-const uint16_t* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+const MCPhysReg* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
const {
// The callee saved registers LR & FP are explicitly handled during
// emitPrologue & emitEpilogue and related functions.
- static const uint16_t CalleeSavedRegs[] = {
+ static const MCPhysReg CalleeSavedRegs[] = {
XCore::R4, XCore::R5, XCore::R6, XCore::R7,
XCore::R8, XCore::R9, XCore::R10,
0
};
- static const uint16_t CalleeSavedRegsFP[] = {
+ static const MCPhysReg CalleeSavedRegsFP[] = {
XCore::R4, XCore::R5, XCore::R6, XCore::R7,
XCore::R8, XCore::R9,
0
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 36ba7b4..aa617a0 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -29,22 +29,23 @@ public:
/// Code Generation virtual methods...
- const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+ const MCPhysReg *
+ getCalleeSavedRegs(const MachineFunction *MF =nullptr) const override;
- BitVector getReservedRegs(const MachineFunction &MF) const;
+ BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool requiresRegisterScavenging(const MachineFunction &MF) const;
+ bool requiresRegisterScavenging(const MachineFunction &MF) const override;
- bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const;
+ bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
- bool useFPForScavengingIndex(const MachineFunction &MF) const;
+ bool useFPForScavengingIndex(const MachineFunction &MF) const override;
void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
- RegScavenger *RS = NULL) const;
+ RegScavenger *RS = nullptr) const override;
// Debug information queries.
- unsigned getFrameRegister(const MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const override;
//! Return whether to emit frame moves
static bool needsFrameMoves(const MachineFunction &MF);
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 68ede6a..5a6bbe7 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "xcore-selectiondag-info"
#include "XCoreTargetMachine.h"
using namespace llvm;
+#define DEBUG_TYPE "xcore-selectiondag-info"
+
XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
: TargetSelectionDAGInfo(TM) {
}
@@ -41,13 +42,15 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain,
Entry.Node = Src; Args.push_back(Entry);
Entry.Node = Size; Args.push_back(Entry);
- TargetLowering::CallLoweringInfo
- CLI(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false,
- 0, TLI.getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false,
- /*doesNotRet=*/false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), Args, DAG, dl);
- std::pair<SDValue,SDValue> CallResult =
- TLI.LowerCallTo(CLI);
+ TargetLowering::CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(dl).setChain(Chain)
+ .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
+ Type::getVoidTy(*DAG.getContext()),
+ DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()),
+ &Args, 0)
+ .setDiscardResult();
+
+ std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI);
return CallResult.second;
}
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index 31704f3..ea6af98 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -25,14 +25,14 @@ public:
explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
~XCoreSelectionDAGInfo();
- virtual SDValue
+ SDValue
EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
SDValue Op3, unsigned Align, bool isVolatile,
bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
- MachinePointerInfo SrcPtrInfo) const;
+ MachinePointerInfo SrcPtrInfo) const override;
};
}
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 8cfb770..89ea03a 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -15,12 +15,14 @@
#include "XCore.h"
#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "xcore-subtarget"
+
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "XCoreGenSubtargetInfo.inc"
-using namespace llvm;
-
void XCoreSubtarget::anchor() { }
XCoreSubtarget::XCoreSubtarget(const std::string &TT,
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 781a87b..0fb21c5 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -46,9 +46,9 @@ public:
return getTM<XCoreTargetMachine>();
}
- virtual bool addPreISel();
- virtual bool addInstSelector();
- virtual bool addPreEmitPass();
+ bool addPreISel() override;
+ bool addInstSelector() override;
+ bool addPreEmitPass() override;
};
} // namespace
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index a19a677..a57ca55 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -37,28 +37,28 @@ public:
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
- virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
- virtual const XCoreFrameLowering *getFrameLowering() const {
+ const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; }
+ const XCoreFrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
- virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual const XCoreTargetLowering *getTargetLowering() const {
+ const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; }
+ const XCoreTargetLowering *getTargetLowering() const override {
return &TLInfo;
}
- virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override {
return &TSInfo;
}
- virtual const TargetRegisterInfo *getRegisterInfo() const {
+ const TargetRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
- virtual const DataLayout *getDataLayout() const { return &DL; }
+ const DataLayout *getDataLayout() const override { return &DL; }
// Pass Pipeline Configuration
- virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
+ TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
- virtual void addAnalysisPasses(PassManagerBase &PM);
+ void addAnalysisPasses(PassManagerBase &PM) override;
};
} // end namespace llvm
diff --git a/lib/Target/XCore/XCoreTargetObjectFile.h b/lib/Target/XCore/XCoreTargetObjectFile.h
index 733e6d3..34d756e 100644
--- a/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -22,7 +22,7 @@ static const unsigned CodeModelLargeSize = 256;
const MCSection *ReadOnlySectionLarge;
const MCSection *DataRelROSectionLarge;
public:
- void Initialize(MCContext &Ctx, const TargetMachine &TM);
+ void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
const MCSection *
getExplicitSectionGlobal(const GlobalValue *GV,
diff --git a/lib/Target/XCore/XCoreTargetTransformInfo.cpp b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
index 313d18d..80d193d 100644
--- a/lib/Target/XCore/XCoreTargetTransformInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetTransformInfo.cpp
@@ -14,7 +14,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "xcoretti"
#include "XCore.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Support/Debug.h"
@@ -22,8 +21,10 @@
#include "llvm/Target/TargetLowering.h"
using namespace llvm;
+#define DEBUG_TYPE "xcoretti"
+
// Declare the pass initialization routine locally as target-specific passes
-// don't havve a target-wide initialization entry point, and so we rely on the
+// don't have a target-wide initialization entry point, and so we rely on the
// pass constructor initialization.
namespace llvm {
void initializeXCoreTTIPass(PassRegistry &);
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index c514c49..29b9bb8 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "hello"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "hello"
+
STATISTIC(HelloCounter, "Counts number of functions greeted");
namespace {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 48d3fba..377fa15 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -29,7 +29,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "argpromotion"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
@@ -49,6 +48,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "argpromotion"
+
STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
@@ -123,14 +124,14 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
Function *F = CGN->getFunction();
// Make sure that it is local to this module.
- if (!F || !F->hasLocalLinkage()) return 0;
+ if (!F || !F->hasLocalLinkage()) return nullptr;
// First check: see if there are any pointer arguments! If not, quick exit.
SmallVector<Argument*, 16> PointerArgs;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
if (I->getType()->isPointerTy())
PointerArgs.push_back(I);
- if (PointerArgs.empty()) return 0;
+ if (PointerArgs.empty()) return nullptr;
// Second check: make sure that all callers are direct callers. We can't
// transform functions that have indirect callers. Also see if the function
@@ -139,7 +140,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
for (Use &U : F->uses()) {
CallSite CS(U.getUser());
// Must be a direct call.
- if (CS.getInstruction() == 0 || !CS.isCallee(&U)) return 0;
+ if (CS.getInstruction() == nullptr || !CS.isCallee(&U)) return nullptr;
if (CS.getInstruction()->getParent()->getParent() == F)
isSelfRecursive = true;
@@ -207,7 +208,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
// No promotable pointer arguments.
if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
- return 0;
+ return nullptr;
return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
}
@@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
Value *Idx = GetElementPtrInst::Create(*AI, Idxs,
@@ -788,10 +789,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Just add all the struct element types.
Type *AgTy = cast<PointerType>(I->getType())->getElementType();
- Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+ Value *TheAlloca = new AllocaInst(AgTy, nullptr, "", InsertPt);
StructType *STy = cast<StructType>(AgTy);
Value *Idxs[2] = {
- ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+ ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), nullptr };
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
diff --git a/lib/Transforms/IPO/ConstantMerge.cpp b/lib/Transforms/IPO/ConstantMerge.cpp
index 5c3acea..23be081 100644
--- a/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/lib/Transforms/IPO/ConstantMerge.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constmerge"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerIntPair.h"
@@ -31,6 +30,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "constmerge"
+
STATISTIC(NumMerged, "Number of global constants merged");
namespace {
@@ -66,7 +67,7 @@ ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
/// Find values that are marked as llvm.used.
static void FindUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
- if (LLVMUsed == 0) return;
+ if (!LLVMUsed) return;
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
@@ -103,7 +104,7 @@ unsigned ConstantMerge::getAlignment(GlobalVariable *GV) const {
bool ConstantMerge::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
// Find all the globals that are marked "used". These cannot be merged.
SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
@@ -161,7 +162,7 @@ bool ConstantMerge::runOnModule(Module &M) {
// If this is the first constant we find or if the old one is local,
// replace with the current one. If the current is externally visible
// it cannot be replace, but can be the canonical constant we merge with.
- if (Slot == 0 || IsBetterCanonical(*GV, *Slot))
+ if (!Slot || IsBetterCanonical(*GV, *Slot))
Slot = GV;
}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1aba3df..284b896 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "deadargelim"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,8 +37,11 @@
#include "llvm/Support/raw_ostream.h"
#include <map>
#include <set>
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "deadargelim"
+
STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
STATISTIC(NumRetValsEliminated , "Number of unused return values removed");
STATISTIC(NumArgumentsReplacedWithUndef,
@@ -764,7 +766,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Find out the new return value.
Type *RetTy = FTy->getReturnType();
- Type *NRetTy = NULL;
+ Type *NRetTy = nullptr;
unsigned RetCount = NumRetVals(F);
// -1 means unused, other numbers are the new index
@@ -1050,7 +1052,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
Value *RetVal;
if (NFTy->getReturnType()->isVoidTy()) {
- RetVal = 0;
+ RetVal = nullptr;
} else {
assert (RetTy->isStructTy());
// The original return value was a struct, insert
diff --git a/lib/Transforms/IPO/ExtractGV.cpp b/lib/Transforms/IPO/ExtractGV.cpp
index 4211f12..40ec9fa 100644
--- a/lib/Transforms/IPO/ExtractGV.cpp
+++ b/lib/Transforms/IPO/ExtractGV.cpp
@@ -27,11 +27,10 @@ using namespace llvm;
/// the split module remain valid.
static void makeVisible(GlobalValue &GV, bool Delete) {
bool Local = GV.hasLocalLinkage();
- if (Local)
- GV.setVisibility(GlobalValue::HiddenVisibility);
-
if (Local || Delete) {
GV.setLinkage(GlobalValue::ExternalLinkage);
+ if (Local)
+ GV.setVisibility(GlobalValue::HiddenVisibility);
return;
}
@@ -95,7 +94,7 @@ namespace {
makeVisible(*I, Delete);
if (Delete)
- I->setInitializer(0);
+ I->setInitializer(nullptr);
}
// Visit the Functions.
@@ -134,7 +133,7 @@ namespace {
} else {
Declaration =
new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage,
- 0, CurI->getName());
+ nullptr, CurI->getName());
}
CurI->replaceAllUsesWith(Declaration);
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index b716718..fed8839 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "functionattrs"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -35,6 +34,8 @@
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "functionattrs"
+
STATISTIC(NumReadNone, "Number of functions marked readnone");
STATISTIC(NumReadOnly, "Number of functions marked readonly");
STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
@@ -46,7 +47,7 @@ STATISTIC(NumAnnotated, "Number of attributes added to library functions");
namespace {
struct FunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+ FunctionAttrs() : CallGraphSCCPass(ID), AA(nullptr) {
initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
}
@@ -160,7 +161,7 @@ bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - may write memory. Just give up.
return false;
@@ -319,7 +320,7 @@ namespace {
ArgumentGraphNode SyntheticRoot;
public:
- ArgumentGraph() { SyntheticRoot.Definition = 0; }
+ ArgumentGraph() { SyntheticRoot.Definition = nullptr; }
typedef SmallVectorImpl<ArgumentGraphNode*>::iterator iterator;
@@ -521,7 +522,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - only a problem for arguments that we pass to it.
continue;
@@ -600,7 +601,7 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
// captures.
for (scc_iterator<ArgumentGraph*> I = scc_begin(&AG); !I.isAtEnd(); ++I) {
- std::vector<ArgumentGraphNode*> &ArgumentSCC = *I;
+ const std::vector<ArgumentGraphNode *> &ArgumentSCC = *I;
if (ArgumentSCC.size() == 1) {
if (!ArgumentSCC[0]->Definition) continue; // synthetic root node
@@ -616,8 +617,8 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
}
bool SCCCaptured = false;
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *Node = *I;
if (Node->Uses.empty()) {
if (!Node->Definition->hasNoCaptureAttr())
@@ -629,13 +630,12 @@ bool FunctionAttrs::AddArgumentAttrs(const CallGraphSCC &SCC) {
SmallPtrSet<Argument*, 8> ArgumentSCCNodes;
// Fill ArgumentSCCNodes with the elements of the ArgumentSCC. Used for
// quickly looking up whether a given Argument is in this ArgumentSCC.
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end(); I != E; ++I) {
ArgumentSCCNodes.insert((*I)->Definition);
}
- for (std::vector<ArgumentGraphNode*>::iterator I = ArgumentSCC.begin(),
- E = ArgumentSCC.end(); I != E && !SCCCaptured; ++I) {
+ for (auto I = ArgumentSCC.begin(), E = ArgumentSCC.end();
+ I != E && !SCCCaptured; ++I) {
ArgumentGraphNode *N = *I;
for (SmallVectorImpl<ArgumentGraphNode*>::iterator UI = N->Uses.begin(),
UE = N->Uses.end(); UI != UE; ++UI) {
@@ -775,7 +775,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0)
+ if (!F)
// External node - skip it;
return false;
@@ -1668,7 +1668,7 @@ bool FunctionAttrs::annotateLibraryCalls(const CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F != 0 && F->isDeclaration())
+ if (F && F->isDeclaration())
MadeChange |= inferPrototypeAttributes(*F);
}
diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp
index 0c081f1..9decddc 100644
--- a/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/lib/Transforms/IPO/GlobalDCE.cpp
@@ -15,15 +15,18 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "globaldce"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "globaldce"
+
STATISTIC(NumAliases , "Number of global aliases removed");
STATISTIC(NumFunctions, "Number of functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
@@ -53,6 +56,15 @@ namespace {
};
}
+/// Returns true if F contains only a single "ret" instruction.
+static bool isEmptyFunction(Function *F) {
+ BasicBlock &Entry = F->getEntryBlock();
+ if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front()))
+ return false;
+ ReturnInst &RI = cast<ReturnInst>(Entry.front());
+ return RI.getReturnValue() == NULL;
+}
+
char GlobalDCE::ID = 0;
INITIALIZE_PASS(GlobalDCE, "globaldce",
"Dead Global Elimination", false, false)
@@ -61,7 +73,10 @@ ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
bool GlobalDCE::runOnModule(Module &M) {
bool Changed = false;
-
+
+ // Remove empty functions from the global ctors list.
+ Changed |= optimizeGlobalCtorsList(M, isEmptyFunction);
+
// Loop over the module, adding globals which are obviously necessary.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
Changed |= RemoveUnusedGlobalValue(*I);
@@ -99,7 +114,7 @@ bool GlobalDCE::runOnModule(Module &M) {
I != E; ++I)
if (!AliveGlobals.count(I)) {
DeadGlobalVars.push_back(I); // Keep track of dead globals
- I->setInitializer(0);
+ I->setInitializer(nullptr);
}
// The second pass drops the bodies of functions which are dead...
@@ -117,7 +132,7 @@ bool GlobalDCE::runOnModule(Module &M) {
++I)
if (!AliveGlobals.count(I)) {
DeadAliases.push_back(I);
- I->setAliasee(0);
+ I->setAliasee(nullptr);
}
if (!DeadFunctions.empty()) {
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 1a510cf..ae80c43 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "globalopt"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -39,11 +38,15 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/CtorUtils.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
+#include <deque>
using namespace llvm;
+#define DEBUG_TYPE "globalopt"
+
STATISTIC(NumMarked , "Number of globals marked constant");
STATISTIC(NumUnnamed , "Number of globals marked unnamed_addr");
STATISTIC(NumSRA , "Number of aggregate globals broken into scalars");
@@ -74,11 +77,9 @@ namespace {
bool runOnModule(Module &M) override;
private:
- GlobalVariable *FindGlobalCtors(Module &M);
bool OptimizeFunctions(Module &M);
bool OptimizeGlobalVars(Module &M);
bool OptimizeGlobalAliases(Module &M);
- bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
const GlobalStatus &GS);
@@ -294,7 +295,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Changed = true;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->getOpcode() == Instruction::GetElementPtr) {
- Constant *SubInit = 0;
+ Constant *SubInit = nullptr;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit, DL, TLI);
@@ -302,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
CE->getType()->isPointerTy()) ||
CE->getOpcode() == Instruction::AddrSpaceCast) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0, DL, TLI);
+ Changed |= CleanupConstantGlobalUsers(CE, nullptr, DL, TLI);
}
if (CE->use_empty()) {
@@ -313,7 +314,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
// Do not transform "gepinst (gep constexpr (GV))" here, because forming
// "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
// and will invalidate our notion of what Init is.
- Constant *SubInit = 0;
+ Constant *SubInit = nullptr;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, DL, TLI));
@@ -370,7 +371,7 @@ static bool isSafeSROAElementUse(Value *V) {
// Otherwise, it must be a GEP.
GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
- if (GEPI == 0) return false;
+ if (!GEPI) return false;
if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
!cast<Constant>(GEPI->getOperand(1))->isNullValue())
@@ -470,7 +471,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
- return 0;
+ return nullptr;
assert(GV->hasLocalLinkage() && !GV->isConstant());
Constant *Init = GV->getInitializer();
@@ -514,7 +515,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
NumElements = cast<VectorType>(STy)->getNumElements();
if (NumElements > 16 && GV->hasNUsesOrMore(16))
- return 0; // It's not worth it.
+ return nullptr; // It's not worth it.
NewGlobals.reserve(NumElements);
uint64_t EltSize = DL.getTypeAllocSize(STy->getElementType());
@@ -541,7 +542,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
}
if (NewGlobals.empty())
- return 0;
+ return nullptr;
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
@@ -603,7 +604,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
if (FirstGlobal == i) ++FirstGlobal;
}
- return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+ return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : nullptr;
}
/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
@@ -785,7 +786,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
Changed |= CleanupPointerRootUsers(GV, TLI);
} else {
Changed = true;
- CleanupConstantGlobalUsers(GV, 0, DL, TLI);
+ CleanupConstantGlobalUsers(GV, nullptr, DL, TLI);
}
if (GV->use_empty()) {
DEBUG(dbgs() << " *** GLOBAL NOW DEAD!\n");
@@ -847,7 +848,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// If there are bitcast users of the malloc (which is typical, usually we have
// a malloc + bitcast) then replace them with uses of the new global. Update
// other users to use the global as well.
- BitCastInst *TheBC = 0;
+ BitCastInst *TheBC = nullptr;
while (!CI->use_empty()) {
Instruction *User = cast<Instruction>(CI->user_back());
if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
@@ -858,7 +859,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
BCI->setOperand(0, NewGV);
}
} else {
- if (TheBC == 0)
+ if (!TheBC)
TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
User->replaceUsesOfWith(CI, TheBC);
}
@@ -1169,10 +1170,13 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
// field.
- StructType *ST = cast<StructType>(PN->getType()->getPointerElementType());
+ PointerType *PTy = cast<PointerType>(PN->getType());
+ StructType *ST = cast<StructType>(PTy->getElementType());
+
+ unsigned AS = PTy->getAddressSpace();
PHINode *NewPN =
- PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+ PHINode::Create(PointerType::get(ST->getElementType(FieldNo), AS),
PN->getNumIncomingValues(),
PN->getName()+".f"+Twine(FieldNo), PN);
Result = NewPN;
@@ -1284,9 +1288,10 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
std::vector<Value*> FieldGlobals;
std::vector<Value*> FieldMallocs;
+ unsigned AS = GV->getType()->getPointerAddressSpace();
for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
Type *FieldTy = STy->getElementType(FieldNo);
- PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+ PointerType *PFieldTy = PointerType::get(FieldTy, AS);
GlobalVariable *NGV =
new GlobalVariable(*GV->getParent(),
@@ -1302,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Type *IntPtrTy = DL->getIntPtrType(CI->getType());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
- NElems, 0,
+ NElems, nullptr,
CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
@@ -1535,7 +1540,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
AllocSize, NumElements,
- 0, CI->getName());
+ nullptr, CI->getName());
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
@@ -1750,7 +1755,8 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
->getEntryBlock().begin());
Type *ElemTy = GV->getType()->getElementType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst *Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+ AllocaInst *Alloca = new AllocaInst(ElemTy, nullptr,
+ GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
@@ -1957,116 +1963,6 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) {
return Changed;
}
-/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all
-/// initializers have an init priority of 65535.
-GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
- GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
- if (GV == 0) return 0;
-
- // Verify that the initializer is simple enough for us to handle. We are
- // only allowed to optimize the initializer if it is unique.
- if (!GV->hasUniqueInitializer()) return 0;
-
- if (isa<ConstantAggregateZero>(GV->getInitializer()))
- return GV;
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
-
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
- if (isa<ConstantAggregateZero>(*i))
- continue;
- ConstantStruct *CS = cast<ConstantStruct>(*i);
- if (isa<ConstantPointerNull>(CS->getOperand(1)))
- continue;
-
- // Must have a function or null ptr.
- if (!isa<Function>(CS->getOperand(1)))
- return 0;
-
- // Init priority must be standard.
- ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
- if (CI->getZExtValue() != 65535)
- return 0;
- }
-
- return GV;
-}
-
-/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
-/// return a list of the functions and null terminator as a vector.
-static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
- if (GV->getInitializer()->isNullValue())
- return std::vector<Function*>();
- ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
- std::vector<Function*> Result;
- Result.reserve(CA->getNumOperands());
- for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
- ConstantStruct *CS = cast<ConstantStruct>(*i);
- Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
- }
- return Result;
-}
-
-/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
-/// specified array, returning the new global to use.
-static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
- const std::vector<Function*> &Ctors) {
- // If we made a change, reassemble the initializer list.
- Constant *CSVals[2];
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
- CSVals[1] = 0;
-
- StructType *StructTy =
- cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
-
- // Create the new init list.
- std::vector<Constant*> CAList;
- for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
- if (Ctors[i]) {
- CSVals[1] = Ctors[i];
- } else {
- Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
- false);
- PointerType *PFTy = PointerType::getUnqual(FTy);
- CSVals[1] = Constant::getNullValue(PFTy);
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
- 0x7fffffff);
- }
- CAList.push_back(ConstantStruct::get(StructTy, CSVals));
- }
-
- // Create the array initializer.
- Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
- CAList.size()), CAList);
-
- // If we didn't change the number of elements, don't create a new GV.
- if (CA->getType() == GCL->getInitializer()->getType()) {
- GCL->setInitializer(CA);
- return GCL;
- }
-
- // Create the new global and insert it next to the existing list.
- GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
- GCL->getLinkage(), CA, "",
- GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL, NGV);
- NGV->takeName(GCL);
-
- // Nuke the old list, replacing any uses with the new one.
- if (!GCL->use_empty()) {
- Constant *V = NGV;
- if (V->getType() != GCL->getType())
- V = ConstantExpr::getBitCast(V, GCL->getType());
- GCL->replaceAllUsesWith(V);
- }
- GCL->eraseFromParent();
-
- if (Ctors.size())
- return NGV;
- else
- return 0;
-}
-
-
static inline bool
isSimpleEnoughValueToCommit(Constant *C,
SmallPtrSet<Constant*, 8> &SimpleConstants,
@@ -2271,22 +2167,16 @@ class Evaluator {
public:
Evaluator(const DataLayout *DL, const TargetLibraryInfo *TLI)
: DL(DL), TLI(TLI) {
- ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ ValueStack.emplace_back();
}
~Evaluator() {
- DeleteContainerPointers(ValueStack);
- while (!AllocaTmps.empty()) {
- GlobalVariable *Tmp = AllocaTmps.back();
- AllocaTmps.pop_back();
-
+ for (auto &Tmp : AllocaTmps)
// If there are still users of the alloca, the program is doing something
// silly, e.g. storing the address of the alloca somewhere and using it
// later. Since this is undefined, we'll just make it be null.
if (!Tmp->use_empty())
Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
- delete Tmp;
- }
}
/// EvaluateFunction - Evaluate a call to function F, returning true if
@@ -2302,13 +2192,13 @@ public:
Constant *getVal(Value *V) {
if (Constant *CV = dyn_cast<Constant>(V)) return CV;
- Constant *R = ValueStack.back()->lookup(V);
+ Constant *R = ValueStack.back().lookup(V);
assert(R && "Reference to an uncomputed value!");
return R;
}
void setVal(Value *V, Constant *C) {
- ValueStack.back()->operator[](V) = C;
+ ValueStack.back()[V] = C;
}
const DenseMap<Constant*, Constant*> &getMutatedMemory() const {
@@ -2323,9 +2213,9 @@ private:
Constant *ComputeLoadResult(Constant *P);
/// ValueStack - As we compute SSA register values, we store their contents
- /// here. The back of the vector contains the current function and the stack
+ /// here. The back of the deque contains the current function and the stack
/// contains the values in the calling frames.
- SmallVector<DenseMap<Value*, Constant*>*, 4> ValueStack;
+ std::deque<DenseMap<Value*, Constant*>> ValueStack;
/// CallStack - This is used to detect recursion. In pathological situations
/// we could hit exponential behavior, but at least there is nothing
@@ -2340,7 +2230,7 @@ private:
/// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
/// to represent its body. This vector is needed so we can delete the
/// temporary globals when we are done.
- SmallVector<GlobalVariable*, 32> AllocaTmps;
+ SmallVector<std::unique_ptr<GlobalVariable>, 32> AllocaTmps;
/// Invariants - These global variables have been marked invariant by the
/// static constructor.
@@ -2369,7 +2259,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
if (GV->hasDefinitiveInitializer())
return GV->getInitializer();
- return 0;
+ return nullptr;
}
// Handle a constantexpr getelementptr.
@@ -2381,7 +2271,7 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {
return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
}
- return 0; // don't know how to evaluate.
+ return nullptr; // don't know how to evaluate.
}
/// EvaluateBlock - Evaluate all instructions in block BB, returning true if
@@ -2391,7 +2281,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
BasicBlock *&NextBB) {
// This is the main evaluation loop.
while (1) {
- Constant *InstResult = 0;
+ Constant *InstResult = nullptr;
DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");
@@ -2517,7 +2407,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
"folding: " << *Ptr << "\n");
}
InstResult = ComputeLoadResult(Ptr);
- if (InstResult == 0) {
+ if (!InstResult) {
DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load."
"\n");
return false; // Could not evaluate load.
@@ -2530,11 +2420,10 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false; // Cannot handle array allocs.
}
Type *Ty = AI->getType()->getElementType();
- AllocaTmps.push_back(new GlobalVariable(Ty, false,
- GlobalValue::InternalLinkage,
- UndefValue::get(Ty),
- AI->getName()));
- InstResult = AllocaTmps.back();
+ AllocaTmps.push_back(
+ make_unique<GlobalVariable>(Ty, false, GlobalValue::InternalLinkage,
+ UndefValue::get(Ty), AI->getName()));
+ InstResult = AllocaTmps.back().get();
DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");
} else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {
CallSite CS(CurInst);
@@ -2636,17 +2525,17 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
return false;
}
- Constant *RetVal = 0;
+ Constant *RetVal = nullptr;
// Execute the call, if successful, use the return value.
- ValueStack.push_back(new DenseMap<Value*, Constant*>);
+ ValueStack.emplace_back();
if (!EvaluateFunction(Callee, RetVal, Formals)) {
DEBUG(dbgs() << "Failed to evaluate function.\n");
return false;
}
- delete ValueStack.pop_back_val();
+ ValueStack.pop_back();
InstResult = RetVal;
- if (InstResult != NULL) {
+ if (InstResult) {
DEBUG(dbgs() << "Successfully evaluated function. Result: " <<
InstResult << "\n\n");
} else {
@@ -2678,7 +2567,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
else
return false; // Cannot determine.
} else if (isa<ReturnInst>(CurInst)) {
- NextBB = 0;
+ NextBB = nullptr;
} else {
// invoke, unwind, resume, unreachable.
DEBUG(dbgs() << "Can not handle terminator.");
@@ -2743,13 +2632,13 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
BasicBlock::iterator CurInst = CurBB->begin();
while (1) {
- BasicBlock *NextBB = 0; // Initialized to avoid compiler warnings.
+ BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings.
DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");
if (!EvaluateBlock(CurInst, NextBB))
return false;
- if (NextBB == 0) {
+ if (!NextBB) {
// Successfully running until there's no next block means that we found
// the return. Fill it the return value and pop the call stack.
ReturnInst *RI = cast<ReturnInst>(CurBB->getTerminator());
@@ -2768,7 +2657,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
// Okay, we have never been in this block before. Check to see if there
// are any PHI nodes. If so, evaluate them with information about where
// we came from.
- PHINode *PN = 0;
+ PHINode *PN = nullptr;
for (CurInst = NextBB->begin();
(PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
setVal(PN, getVal(PN->getIncomingValueForBlock(CurBB)));
@@ -2789,6 +2678,8 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
SmallVector<Constant*, 0>());
if (EvalSuccess) {
+ ++NumCtorsEvaluated;
+
// We succeeded at evaluation: commit the result.
DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
<< F->getName() << "' to " << Eval.getMutatedMemory().size()
@@ -2806,46 +2697,6 @@ static bool EvaluateStaticConstructor(Function *F, const DataLayout *DL,
return EvalSuccess;
}
-/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
-/// Return true if anything changed.
-bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
- std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
- bool MadeChange = false;
- if (Ctors.empty()) return false;
-
- // Loop over global ctors, optimizing them when we can.
- for (unsigned i = 0; i != Ctors.size(); ++i) {
- Function *F = Ctors[i];
- // Found a null terminator in the middle of the list, prune off the rest of
- // the list.
- if (F == 0) {
- if (i != Ctors.size()-1) {
- Ctors.resize(i+1);
- MadeChange = true;
- }
- break;
- }
- DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
-
- // We cannot simplify external ctor functions.
- if (F->empty()) continue;
-
- // If we can evaluate the ctor at compile time, do.
- if (EvaluateStaticConstructor(F, DL, TLI)) {
- Ctors.erase(Ctors.begin()+i);
- MadeChange = true;
- --i;
- ++NumCtorsEvaluated;
- continue;
- }
- }
-
- if (!MadeChange) return false;
-
- GCL = InstallGlobalCtors(GCL, Ctors);
- return true;
-}
-
static int compareNames(Constant *const *A, Constant *const *B) {
return (*A)->getName().compare((*B)->getName());
}
@@ -3010,7 +2861,7 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
if (!hasUsesToReplace(*J, Used, RenameTarget))
continue;
- J->replaceAllUsesWith(Aliasee);
+ J->replaceAllUsesWith(ConstantExpr::getBitCast(Aliasee, J->getType()));
++NumAliasesResolved;
Changed = true;
@@ -3042,12 +2893,12 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::cxa_atexit))
- return 0;
+ return nullptr;
Function *Fn = M.getFunction(TLI->getName(LibFunc::cxa_atexit));
if (!Fn)
- return 0;
+ return nullptr;
FunctionType *FTy = Fn->getFunctionType();
@@ -3058,7 +2909,7 @@ static Function *FindCXAAtExit(Module &M, TargetLibraryInfo *TLI) {
!FTy->getParamType(0)->isPointerTy() ||
!FTy->getParamType(1)->isPointerTy() ||
!FTy->getParamType(2)->isPointerTy())
- return 0;
+ return nullptr;
return Fn;
}
@@ -3160,12 +3011,9 @@ bool GlobalOpt::runOnModule(Module &M) {
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
- // Try to find the llvm.globalctors list.
- GlobalVariable *GlobalCtors = FindGlobalCtors(M);
-
bool LocalChange = true;
while (LocalChange) {
LocalChange = false;
@@ -3174,8 +3022,9 @@ bool GlobalOpt::runOnModule(Module &M) {
LocalChange |= OptimizeFunctions(M);
// Optimize global_ctors list.
- if (GlobalCtors)
- LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+ LocalChange |= optimizeGlobalCtorsList(M, [&](Function *F) {
+ return EvaluateStaticConstructor(F, DL, TLI);
+ });
// Optimize non-address-taken globals.
LocalChange |= OptimizeGlobalVars(M);
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index 8684796..af541d1 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ipconstprop"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -27,6 +26,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "ipconstprop"
+
STATISTIC(NumArgumentsProped, "Number of args turned into constants");
STATISTIC(NumReturnValProped, "Number of return values turned into constants");
@@ -112,7 +113,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
continue;
Constant *C = dyn_cast<Constant>(*AI);
- if (C && ArgumentConstants[i].first == 0) {
+ if (C && ArgumentConstants[i].first == nullptr) {
ArgumentConstants[i].first = C; // First constant seen.
} else if (C && ArgumentConstants[i].first == C) {
// Still the constant value we think it is.
@@ -139,7 +140,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
continue;
Value *V = ArgumentConstants[i].first;
- if (V == 0) V = UndefValue::get(AI->getType());
+ if (!V) V = UndefValue::get(AI->getType());
AI->replaceAllUsesWith(V);
++NumArgumentsProped;
MadeChange = true;
@@ -209,7 +210,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
}
// Different or no known return value? Don't propagate this return
// value.
- RetVals[i] = 0;
+ RetVals[i] = nullptr;
// All values non-constant? Stop looking.
if (++NumNonConstant == RetVals.size())
return false;
@@ -235,7 +236,7 @@ bool IPCP::PropagateConstantReturn(Function &F) {
MadeChange = true;
- if (STy == 0) {
+ if (!STy) {
Value* New = RetVals[0];
if (Argument *A = dyn_cast<Argument>(New))
// Was an argument returned? Then find the corresponding argument in
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index 6cf3040..624cb90 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/CallGraph.h"
@@ -28,6 +27,8 @@
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
namespace {
/// \brief Inliner pass which only handles "always inline" functions.
@@ -36,12 +37,13 @@ class AlwaysInliner : public Inliner {
public:
// Use extremely low threshold.
- AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true), ICA(0) {
+ AlwaysInliner() : Inliner(ID, -2000000000, /*InsertLifetime*/ true),
+ ICA(nullptr) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
AlwaysInliner(bool InsertLifetime)
- : Inliner(ID, -2000000000, InsertLifetime), ICA(0) {
+ : Inliner(ID, -2000000000, InsertLifetime), ICA(nullptr) {
initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
}
@@ -93,8 +95,7 @@ InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
// that are viable for inlining. FIXME: We shouldn't even get here for
// declarations.
if (Callee && !Callee->isDeclaration() &&
- Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
- Attribute::AlwaysInline) &&
+ CS.hasFnAttr(Attribute::AlwaysInline) &&
ICA->isInlineViable(*Callee))
return InlineCost::getAlways();
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 7141064..d189756 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
@@ -26,6 +25,8 @@
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
namespace {
/// \brief Actual inliner pass implementation.
@@ -37,12 +38,12 @@ class SimpleInliner : public Inliner {
InlineCostAnalysis *ICA;
public:
- SimpleInliner() : Inliner(ID), ICA(0) {
+ SimpleInliner() : Inliner(ID), ICA(nullptr) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
SimpleInliner(int Threshold)
- : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(0) {
+ : Inliner(ID, Threshold, /*InsertLifetime*/ true), ICA(nullptr) {
initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index e97fb83..9087ab2 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "inline"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -21,6 +20,7 @@
#include "llvm/Analysis/InlineCost.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
@@ -32,6 +32,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "inline"
+
STATISTIC(NumInlined, "Number of functions inlined");
STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
@@ -183,7 +185,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
// canonicalized to be an allocation *of* an array), or allocations whose
// type is not itself an array (because we're afraid of pessimizing SRoA).
ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
- if (ATy == 0 || AI->isArrayAllocation())
+ if (!ATy || AI->isArrayAllocation())
continue;
// Get the list of all available allocas for this array type.
@@ -239,7 +241,7 @@ static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
AI->eraseFromParent();
MergedAwayAlloca = true;
++NumMergedAllocas;
- IFI.StaticAllocas[AllocaNo] = 0;
+ IFI.StaticAllocas[AllocaNo] = nullptr;
break;
}
@@ -288,12 +290,24 @@ unsigned Inliner::getInlineThreshold(CallSite CS) const {
bool ColdCallee = Callee && !Callee->isDeclaration() &&
Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::Cold);
- if (ColdCallee && ColdThreshold < thres)
+ // Command line argument for InlineLimit will override the default
+ // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
+ // do not use the default cold threshold even if it is smaller.
+ if ((InlineLimit.getNumOccurrences() == 0 ||
+ ColdThreshold.getNumOccurrences() > 0) && ColdCallee &&
+ ColdThreshold < thres)
thres = ColdThreshold;
return thres;
}
+static void emitAnalysis(CallSite CS, const Twine &Msg) {
+ Function *Caller = CS.getCaller();
+ LLVMContext &Ctx = Caller->getContext();
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+ emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+}
+
/// shouldInline - Return true if the inliner should attempt to inline
/// at the given CallSite.
bool Inliner::shouldInline(CallSite CS) {
@@ -302,12 +316,16 @@ bool Inliner::shouldInline(CallSite CS) {
if (IC.isAlways()) {
DEBUG(dbgs() << " Inlining: cost=always"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
+ " should always be inlined (cost=always)");
return true;
}
if (IC.isNever()) {
DEBUG(dbgs() << " NOT Inlining: cost=never"
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " should never be inlined (cost=never)"));
return false;
}
@@ -316,6 +334,10 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " NOT Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << "\n");
+ emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
+ " too costly to inline (cost=") +
+ Twine(IC.getCost()) + ", threshold=" +
+ Twine(IC.getCostDelta() + IC.getCost()) + ")");
return false;
}
@@ -383,6 +405,11 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " NOT Inlining: " << *CS.getInstruction() <<
" Cost = " << IC.getCost() <<
", outer Cost = " << TotalSecondaryCost << '\n');
+ emitAnalysis(
+ CS, Twine("Not inlining. Cost of inlining " +
+ CS.getCalledFunction()->getName() +
+ " increases the cost of inlining " +
+ CS.getCaller()->getName() + " in other contexts"));
return false;
}
}
@@ -390,6 +417,10 @@ bool Inliner::shouldInline(CallSite CS) {
DEBUG(dbgs() << " Inlining: cost=" << IC.getCost()
<< ", thres=" << (IC.getCostDelta() + IC.getCost())
<< ", Call: " << *CS.getInstruction() << '\n');
+ emitAnalysis(
+ CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
+ CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
+ " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
return true;
}
@@ -410,7 +441,7 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
bool Inliner::runOnSCC(CallGraphSCC &SCC) {
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
SmallPtrSet<Function*, 8> SCCFunctions;
@@ -499,7 +530,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
++NumCallsDeleted;
} else {
// We can only inline direct calls to non-declarations.
- if (Callee == 0 || Callee->isDeclaration()) continue;
+ if (!Callee || Callee->isDeclaration()) continue;
// If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
@@ -511,18 +542,37 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
continue;
-
+ LLVMContext &CallerCtx = Caller->getContext();
+
+ // Get DebugLoc to report. CS will be invalid after Inliner.
+ DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+
// If the policy determines that we should inline this function,
// try to do so.
- if (!shouldInline(CS))
+ if (!shouldInline(CS)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
// Attempt to inline the function.
if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
- InlineHistoryID, InsertLifetime, DL))
+ InlineHistoryID, InsertLifetime, DL)) {
+ emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() +
+ " will not be inlined into " +
+ Caller->getName()));
continue;
+ }
++NumInlined;
-
+
+ // Report the inline decision.
+ emitOptimizationRemark(
+ CallerCtx, DEBUG_TYPE, *Caller, DLoc,
+ Twine(Callee->getName() + " inlined into " + Caller->getName()));
+
// If inlining this function gave us any new call sites, throw them
// onto our worklist to process. They are useful inline candidates.
if (!InlineInfo.InlinedCalls.empty()) {
diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp
index c1fe01c..c970a1a 100644
--- a/lib/Transforms/IPO/Internalize.cpp
+++ b/lib/Transforms/IPO/Internalize.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "internalize"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -35,6 +34,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "internalize"
+
STATISTIC(NumAliases , "Number of aliases internalized");
STATISTIC(NumFunctions, "Number of functions internalized");
STATISTIC(NumGlobals , "Number of global vars internalized");
@@ -131,8 +132,8 @@ static bool shouldInternalize(const GlobalValue &GV,
bool InternalizePass::runOnModule(Module &M) {
CallGraphWrapperPass *CGPass = getAnalysisIfAvailable<CallGraphWrapperPass>();
- CallGraph *CG = CGPass ? &CGPass->getCallGraph() : 0;
- CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+ CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
+ CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
bool Changed = false;
SmallPtrSet<GlobalValue *, 8> Used;
@@ -158,6 +159,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
if (ExternalNode)
@@ -194,6 +196,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumGlobals;
@@ -206,6 +209,7 @@ bool InternalizePass::runOnModule(Module &M) {
if (!shouldInternalize(*I, ExternalNames))
continue;
+ I->setVisibility(GlobalValue::DefaultVisibility);
I->setLinkage(GlobalValue::InternalLinkage);
Changed = true;
++NumAliases;
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 464aa99..20414aa 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-extract"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopPass.h"
@@ -30,6 +29,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-extract"
+
STATISTIC(NumExtracted, "Number of loops extracted");
namespace {
@@ -136,7 +137,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (NumLoops == 0) return Changed;
--NumLoops;
CodeExtractor Extractor(DT, *L);
- if (Extractor.extractCodeRegion() != 0) {
+ if (Extractor.extractCodeRegion() != nullptr) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
@@ -241,7 +242,7 @@ void BlockExtractorPass::SplitLandingPadPreds(Function *F) {
if (!Split) continue;
SmallVector<BasicBlock*, 2> NewBBs;
- SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", 0, NewBBs);
+ SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", nullptr, NewBBs);
}
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 8555d2c..c3a2b12 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -43,7 +43,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
@@ -67,6 +66,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "mergefunc"
+
STATISTIC(NumFunctionsMerged, "Number of functions merged");
STATISTIC(NumThunksWritten, "Number of thunks generated");
STATISTIC(NumAliasesWritten, "Number of aliases generated");
@@ -120,12 +121,12 @@ public:
void release() {
assert(Func &&
"Attempted to release function twice, or release empty/tombstone!");
- Func = NULL;
+ Func = nullptr;
}
private:
explicit ComparableFunction(unsigned Hash)
- : Func(NULL), Hash(Hash), DL(NULL) {}
+ : Func(nullptr), Hash(Hash), DL(nullptr) {}
AssertingVH<Function> Func;
unsigned Hash;
@@ -175,19 +176,181 @@ private:
/// Test whether two basic blocks have equivalent behaviour.
bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+ /// Constants comparison.
+ /// Its analog to lexicographical comparison between hypothetical numbers
+ /// of next format:
+ /// <bitcastability-trait><raw-bit-contents>
+ ///
+ /// 1. Bitcastability.
+ /// Check whether L's type could be losslessly bitcasted to R's type.
+ /// On this stage method, in case when lossless bitcast is not possible
+ /// method returns -1 or 1, thus also defining which type is greater in
+ /// context of bitcastability.
+ /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+ /// to the contents comparison.
+ /// If types differ, remember types comparison result and check
+ /// whether we still can bitcast types.
+ /// Stage 1: Types that satisfies isFirstClassType conditions are always
+ /// greater then others.
+ /// Stage 2: Vector is greater then non-vector.
+ /// If both types are vectors, then vector with greater bitwidth is
+ /// greater.
+ /// If both types are vectors with the same bitwidth, then types
+ /// are bitcastable, and we can skip other stages, and go to contents
+ /// comparison.
+ /// Stage 3: Pointer types are greater than non-pointers. If both types are
+ /// pointers of the same address space - go to contents comparison.
+ /// Different address spaces: pointer with greater address space is
+ /// greater.
+ /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+ /// We don't know how to bitcast them. So, we better don't do it,
+ /// and return types comparison result (so it determines the
+ /// relationship among constants we don't know how to bitcast).
+ ///
+ /// Just for clearance, let's see how the set of constants could look
+ /// on single dimension axis:
+ ///
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ /// Where: NFCT - Not a FirstClassType
+ /// FCT - FirstClassTyp:
+ ///
+ /// 2. Compare raw contents.
+ /// It ignores types on this stage and only compares bits from L and R.
+ /// Returns 0, if L and R has equivalent contents.
+ /// -1 or 1 if values are different.
+ /// Pretty trivial:
+ /// 2.1. If contents are numbers, compare numbers.
+ /// Ints with greater bitwidth are greater. Ints with same bitwidths
+ /// compared by their contents.
+ /// 2.2. "And so on". Just to avoid discrepancies with comments
+ /// perhaps it would be better to read the implementation itself.
+ /// 3. And again about overall picture. Let's look back at how the ordered set
+ /// of constants will look like:
+ /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+ ///
+ /// Now look, what could be inside [FCT, "others"], for example:
+ /// [FCT, "others"] =
+ /// [
+ /// [double 0.1], [double 1.23],
+ /// [i32 1], [i32 2],
+ /// { double 1.0 }, ; StructTyID, NumElements = 1
+ /// { i32 1 }, ; StructTyID, NumElements = 1
+ /// { double 1, i32 1 }, ; StructTyID, NumElements = 2
+ /// { i32 1, double 1 } ; StructTyID, NumElements = 2
+ /// ]
+ ///
+ /// Let's explain the order. Float numbers will be less than integers, just
+ /// because of cmpType terms: FloatTyID < IntegerTyID.
+ /// Floats (with same fltSemantics) are sorted according to their value.
+ /// Then you can see integers, and they are, like a floats,
+ /// could be easy sorted among each others.
+ /// The structures. Structures are grouped at the tail, again because of their
+ /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+ /// Structures with greater number of elements are greater. Structures with
+ /// greater elements going first are greater.
+ /// The same logic with vectors, arrays and other possible complex types.
+ ///
+ /// Bitcastable constants.
+ /// Let's assume, that some constant, belongs to some group of
+ /// "so-called-equal" values with different types, and at the same time
+ /// belongs to another group of constants with equal types
+ /// and "really" equal values.
+ ///
+ /// Now, prove that this is impossible:
+ ///
+ /// If constant A with type TyA is bitcastable to B with type TyB, then:
+ /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+ /// those should be vectors (if TyA is vector), pointers
+ /// (if TyA is pointer), or else (if TyA equal to TyB), those types should
+ /// be equal to TyB.
+ /// 2. All constants with non-equal, but bitcastable types to TyA, are
+ /// bitcastable to B.
+ /// Once again, just because we allow it to vectors and pointers only.
+ /// This statement could be expanded as below:
+ /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+ /// vector B, and thus bitcastable to B as well.
+ /// 2.2. All pointers of the same address space, no matter what they point to,
+ /// bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+ /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+ /// QED.
+ ///
+ /// In another words, for pointers and vectors, we ignore top-level type and
+ /// look at their particular properties (bit-width for vectors, and
+ /// address space for pointers).
+ /// If these properties are equal - compare their contents.
+ int cmpConstants(const Constant *L, const Constant *R);
+
/// Assign or look up previously assigned numbers for the two values, and
/// return whether the numbers are equal. Numbers are assigned in the order
/// visited.
- bool enumerate(const Value *V1, const Value *V2);
+ /// Comparison order:
+ /// Stage 0: Value that is function itself is always greater then others.
+ /// If left and right values are references to their functions, then
+ /// they are equal.
+ /// Stage 1: Constants are greater than non-constants.
+ /// If both left and right are constants, then the result of
+ /// cmpConstants is used as cmpValues result.
+ /// Stage 2: InlineAsm instances are greater than others. If both left and
+ /// right are InlineAsm instances, InlineAsm* pointers casted to
+ /// integers and compared as numbers.
+ /// Stage 3: For all other cases we compare order we meet these values in
+ /// their functions. If right value was met first during scanning,
+ /// then left value is greater.
+ /// In another words, we compare serial numbers, for more details
+ /// see comments for sn_mapL and sn_mapR.
+ int cmpValues(const Value *L, const Value *R);
+
+ bool enumerate(const Value *V1, const Value *V2) {
+ return cmpValues(V1, V2) == 0;
+ }
/// Compare two Instructions for equivalence, similar to
/// Instruction::isSameOperationAs but with modifications to the type
/// comparison.
+ /// Stages are listed in "most significant stage first" order:
+ /// On each stage below, we do comparison between some left and right
+ /// operation parts. If parts are non-equal, we assign parts comparison
+ /// result to the operation comparison result and exit from method.
+ /// Otherwise we proceed to the next stage.
+ /// Stages:
+ /// 1. Operations opcodes. Compared as numbers.
+ /// 2. Number of operands.
+ /// 3. Operation types. Compared with cmpType method.
+ /// 4. Compare operation subclass optional data as stream of bytes:
+ /// just convert it to integers and call cmpNumbers.
+ /// 5. Compare in operation operand types with cmpType in
+ /// most significant operand first order.
+ /// 6. Last stage. Check operations for some specific attributes.
+ /// For example, for Load it would be:
+ /// 6.1.Load: volatile (as boolean flag)
+ /// 6.2.Load: alignment (as integer numbers)
+ /// 6.3.Load: synch-scope (as integer numbers)
+ /// On this stage its better to see the code, since its not more than 10-15
+ /// strings for particular instruction, and could change sometimes.
+ int cmpOperation(const Instruction *L, const Instruction *R) const;
+
bool isEquivalentOperation(const Instruction *I1,
- const Instruction *I2) const;
+ const Instruction *I2) const {
+ return cmpOperation(I1, I2) == 0;
+ }
/// Compare two GEPs for equivalent pointer arithmetic.
- bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+ /// Parts to be compared for each comparison stage,
+ /// most significant stage first:
+ /// 1. Address space. As numbers.
+ /// 2. Constant offset, (if "DataLayout *DL" field is not NULL,
+ /// using GEPOperator::accumulateConstantOffset method).
+ /// 3. Pointer operand type (using cmpType method).
+ /// 4. Number of operands.
+ /// 5. Compare operands, using cmpValues method.
+ int cmpGEP(const GEPOperator *GEPL, const GEPOperator *GEPR);
+ int cmpGEP(const GetElementPtrInst *GEPL, const GetElementPtrInst *GEPR) {
+ return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+ }
+
+ bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) {
+ return cmpGEP(GEP1, GEP2) == 0;
+ }
bool isEquivalentGEP(const GetElementPtrInst *GEP1,
const GetElementPtrInst *GEP2) {
return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
@@ -241,13 +404,50 @@ private:
int cmpNumbers(uint64_t L, uint64_t R) const;
+ int cmpAPInt(const APInt &L, const APInt &R) const;
+ int cmpAPFloat(const APFloat &L, const APFloat &R) const;
+ int cmpStrings(StringRef L, StringRef R) const;
+ int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+
// The two functions undergoing comparison.
const Function *F1, *F2;
const DataLayout *DL;
- DenseMap<const Value *, const Value *> id_map;
- DenseSet<const Value *> seen_values;
+ /// Assign serial numbers to values from left function, and values from
+ /// right function.
+ /// Explanation:
+ /// Being comparing functions we need to compare values we meet at left and
+ /// right sides.
+ /// Its easy to sort things out for external values. It just should be
+ /// the same value at left and right.
+ /// But for local values (those were introduced inside function body)
+ /// we have to ensure they were introduced at exactly the same place,
+ /// and plays the same role.
+ /// Let's assign serial number to each value when we meet it first time.
+ /// Values that were met at same place will be with same serial numbers.
+ /// In this case it would be good to explain few points about values assigned
+ /// to BBs and other ways of implementation (see below).
+ ///
+ /// 1. Safety of BB reordering.
+ /// It's safe to change the order of BasicBlocks in function.
+ /// Relationship with other functions and serial numbering will not be
+ /// changed in this case.
+ /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+ /// from the entry, and then take each terminator. So it doesn't matter how in
+ /// fact BBs are ordered in function. And since cmpValues are called during
+ /// this walk, the numbering depends only on how BBs located inside the CFG.
+ /// So the answer is - yes. We will get the same numbering.
+ ///
+ /// 2. Impossibility to use dominance properties of values.
+ /// If we compare two instruction operands: first is usage of local
+ /// variable AL from function FL, and second is usage of local variable AR
+ /// from FR, we could compare their origins and check whether they are
+ /// defined at the same place.
+ /// But, we are still not able to compare operands of PHI nodes, since those
+ /// could be operands from further BBs we didn't scan yet.
+ /// So it's impossible to use dominance properties in general.
+ DenseMap<const Value*, int> sn_mapL, sn_mapR;
};
}
@@ -258,6 +458,206 @@ int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
return 0;
}
+int FunctionComparator::cmpAPInt(const APInt &L, const APInt &R) const {
+ if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+ return Res;
+ if (L.ugt(R)) return 1;
+ if (R.ugt(L)) return -1;
+ return 0;
+}
+
+int FunctionComparator::cmpAPFloat(const APFloat &L, const APFloat &R) const {
+ if (int Res = cmpNumbers((uint64_t)&L.getSemantics(),
+ (uint64_t)&R.getSemantics()))
+ return Res;
+ return cmpAPInt(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpStrings(StringRef L, StringRef R) const {
+ // Prevent heavy comparison, compare sizes first.
+ if (int Res = cmpNumbers(L.size(), R.size()))
+ return Res;
+
+ // Compare strings lexicographically only when it is necessary: only when
+ // strings are equal in size.
+ return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+ const AttributeSet R) const {
+ if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+ return Res;
+
+ for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+ AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+ RE = R.end(i);
+ for (; LI != LE && RI != RE; ++LI, ++RI) {
+ Attribute LA = *LI;
+ Attribute RA = *RI;
+ if (LA < RA)
+ return -1;
+ if (RA < LA)
+ return 1;
+ }
+ if (LI != LE)
+ return 1;
+ if (RI != RE)
+ return -1;
+ }
+ return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L, const Constant *R) {
+
+ Type *TyL = L->getType();
+ Type *TyR = R->getType();
+
+ // Check whether types are bitcastable. This part is just re-factored
+ // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+ // we also pack into result which type is "less" for us.
+ int TypesRes = cmpType(TyL, TyR);
+ if (TypesRes != 0) {
+ // Types are different, but check whether we can bitcast them.
+ if (!TyL->isFirstClassType()) {
+ if (TyR->isFirstClassType())
+ return -1;
+ // Neither TyL nor TyR are values of first class type. Return the result
+ // of comparing the types
+ return TypesRes;
+ }
+ if (!TyR->isFirstClassType()) {
+ if (TyL->isFirstClassType())
+ return 1;
+ return TypesRes;
+ }
+
+ // Vector -> Vector conversions are always lossless if the two vector types
+ // have the same size, otherwise not.
+ unsigned TyLWidth = 0;
+ unsigned TyRWidth = 0;
+
+ if (const VectorType *VecTyL = dyn_cast<VectorType>(TyL))
+ TyLWidth = VecTyL->getBitWidth();
+ if (const VectorType *VecTyR = dyn_cast<VectorType>(TyR))
+ TyRWidth = VecTyR->getBitWidth();
+
+ if (TyLWidth != TyRWidth)
+ return cmpNumbers(TyLWidth, TyRWidth);
+
+ // Zero bit-width means neither TyL nor TyR are vectors.
+ if (!TyLWidth) {
+ PointerType *PTyL = dyn_cast<PointerType>(TyL);
+ PointerType *PTyR = dyn_cast<PointerType>(TyR);
+ if (PTyL && PTyR) {
+ unsigned AddrSpaceL = PTyL->getAddressSpace();
+ unsigned AddrSpaceR = PTyR->getAddressSpace();
+ if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+ return Res;
+ }
+ if (PTyL)
+ return 1;
+ if (PTyR)
+ return -1;
+
+ // TyL and TyR aren't vectors, nor pointers. We don't know how to
+ // bitcast them.
+ return TypesRes;
+ }
+ }
+
+ // OK, types are bitcastable, now check constant contents.
+
+ if (L->isNullValue() && R->isNullValue())
+ return TypesRes;
+ if (L->isNullValue() && !R->isNullValue())
+ return 1;
+ if (!L->isNullValue() && R->isNullValue())
+ return -1;
+
+ if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+ return Res;
+
+ switch (L->getValueID()) {
+ case Value::UndefValueVal: return TypesRes;
+ case Value::ConstantIntVal: {
+ const APInt &LInt = cast<ConstantInt>(L)->getValue();
+ const APInt &RInt = cast<ConstantInt>(R)->getValue();
+ return cmpAPInt(LInt, RInt);
+ }
+ case Value::ConstantFPVal: {
+ const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+ const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+ return cmpAPFloat(LAPF, RAPF);
+ }
+ case Value::ConstantArrayVal: {
+ const ConstantArray *LA = cast<ConstantArray>(L);
+ const ConstantArray *RA = cast<ConstantArray>(R);
+ uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+ uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+ cast<Constant>(RA->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantStructVal: {
+ const ConstantStruct *LS = cast<ConstantStruct>(L);
+ const ConstantStruct *RS = cast<ConstantStruct>(R);
+ unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (unsigned i = 0; i != NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+ cast<Constant>(RS->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantVectorVal: {
+ const ConstantVector *LV = cast<ConstantVector>(L);
+ const ConstantVector *RV = cast<ConstantVector>(R);
+ unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+ unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+ if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+ return Res;
+ for (uint64_t i = 0; i < NumElementsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+ cast<Constant>(RV->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::ConstantExprVal: {
+ const ConstantExpr *LE = cast<ConstantExpr>(L);
+ const ConstantExpr *RE = cast<ConstantExpr>(R);
+ unsigned NumOperandsL = LE->getNumOperands();
+ unsigned NumOperandsR = RE->getNumOperands();
+ if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+ return Res;
+ for (unsigned i = 0; i < NumOperandsL; ++i) {
+ if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+ cast<Constant>(RE->getOperand(i))))
+ return Res;
+ }
+ return 0;
+ }
+ case Value::FunctionVal:
+ case Value::GlobalVariableVal:
+ case Value::GlobalAliasVal:
+ default: // Unknown constant, cast L and R pointers to numbers and compare.
+ return cmpNumbers((uint64_t)L, (uint64_t)R);
+ }
+}
+
/// cmpType - compares two types,
/// defines total ordering among the types set.
/// See method declaration comments for more details.
@@ -350,143 +750,209 @@ int FunctionComparator::cmpType(Type *TyL, Type *TyR) const {
// Determine whether the two operations are the same except that pointer-to-A
// and pointer-to-B are equivalent. This should be kept in sync with
// Instruction::isSameOperationAs.
-bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
- const Instruction *I2) const {
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperation(const Instruction *L,
+ const Instruction *R) const {
// Differences from Instruction::isSameOperationAs:
// * replace type comparison with calls to isEquivalentType.
// * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
// * because of the above, we don't test for the tail bit on calls later on
- if (I1->getOpcode() != I2->getOpcode() ||
- I1->getNumOperands() != I2->getNumOperands() ||
- !isEquivalentType(I1->getType(), I2->getType()) ||
- !I1->hasSameSubclassOptionalData(I2))
- return false;
+ if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+ return Res;
+
+ if (int Res = cmpType(L->getType(), R->getType()))
+ return Res;
+
+ if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+ R->getRawSubclassOptionalData()))
+ return Res;
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same type
- for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
- if (!isEquivalentType(I1->getOperand(i)->getType(),
- I2->getOperand(i)->getType()))
- return false;
+ for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+ if (int Res =
+ cmpType(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+ return Res;
+ }
// Check special state that is a part of some instructions.
- if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
- return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
- LI->getAlignment() == cast<LoadInst>(I2)->getAlignment() &&
- LI->getOrdering() == cast<LoadInst>(I2)->getOrdering() &&
- LI->getSynchScope() == cast<LoadInst>(I2)->getSynchScope();
- if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
- return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
- SI->getAlignment() == cast<StoreInst>(I2)->getAlignment() &&
- SI->getOrdering() == cast<StoreInst>(I2)->getOrdering() &&
- SI->getSynchScope() == cast<StoreInst>(I2)->getSynchScope();
- if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
- return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
- if (const CallInst *CI = dyn_cast<CallInst>(I1))
- return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
- CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
- if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
- return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
- CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
- if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1))
- return IVI->getIndices() == cast<InsertValueInst>(I2)->getIndices();
- if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1))
- return EVI->getIndices() == cast<ExtractValueInst>(I2)->getIndices();
- if (const FenceInst *FI = dyn_cast<FenceInst>(I1))
- return FI->getOrdering() == cast<FenceInst>(I2)->getOrdering() &&
- FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope();
- if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1))
- return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() &&
- CXI->getSuccessOrdering() ==
- cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() &&
- CXI->getFailureOrdering() ==
- cast<AtomicCmpXchgInst>(I2)->getFailureOrdering() &&
- CXI->getSynchScope() == cast<AtomicCmpXchgInst>(I2)->getSynchScope();
- if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I1))
- return RMWI->getOperation() == cast<AtomicRMWInst>(I2)->getOperation() &&
- RMWI->isVolatile() == cast<AtomicRMWInst>(I2)->isVolatile() &&
- RMWI->getOrdering() == cast<AtomicRMWInst>(I2)->getOrdering() &&
- RMWI->getSynchScope() == cast<AtomicRMWInst>(I2)->getSynchScope();
+ if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+ if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope());
+ }
+ if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+ if (int Res =
+ cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+ return Res;
+ if (int Res =
+ cmpNumbers(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+ }
+ if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+ return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+ if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<CallInst>(R)->getCallingConv()))
+ return Res;
+ return cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes());
+ }
+ if (const InvokeInst *CI = dyn_cast<InvokeInst>(L)) {
+ if (int Res = cmpNumbers(CI->getCallingConv(),
+ cast<InvokeInst>(R)->getCallingConv()))
+ return Res;
+ return cmpAttrs(CI->getAttributes(), cast<InvokeInst>(R)->getAttributes());
+ }
+ if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = IVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+ ArrayRef<unsigned> LIndices = EVI->getIndices();
+ ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+ if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+ return Res;
+ for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+ if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+ return Res;
+ }
+ }
+ if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+ if (int Res =
+ cmpNumbers(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+ }
- return true;
+ if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+ if (int Res = cmpNumbers(CXI->isVolatile(),
+ cast<AtomicCmpXchgInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getSuccessOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+ return Res;
+ if (int Res = cmpNumbers(CXI->getFailureOrdering(),
+ cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+ return Res;
+ return cmpNumbers(CXI->getSynchScope(),
+ cast<AtomicCmpXchgInst>(R)->getSynchScope());
+ }
+ if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+ if (int Res = cmpNumbers(RMWI->getOperation(),
+ cast<AtomicRMWInst>(R)->getOperation()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->isVolatile(),
+ cast<AtomicRMWInst>(R)->isVolatile()))
+ return Res;
+ if (int Res = cmpNumbers(RMWI->getOrdering(),
+ cast<AtomicRMWInst>(R)->getOrdering()))
+ return Res;
+ return cmpNumbers(RMWI->getSynchScope(),
+ cast<AtomicRMWInst>(R)->getSynchScope());
+ }
+ return 0;
}
// Determine whether two GEP operations perform the same underlying arithmetic.
-bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
- const GEPOperator *GEP2) {
- unsigned AS = GEP1->getPointerAddressSpace();
- if (AS != GEP2->getPointerAddressSpace())
- return false;
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEP(const GEPOperator *GEPL,
+ const GEPOperator *GEPR) {
+
+ unsigned int ASL = GEPL->getPointerAddressSpace();
+ unsigned int ASR = GEPR->getPointerAddressSpace();
+ if (int Res = cmpNumbers(ASL, ASR))
+ return Res;
+
+ // When we have target data, we can reduce the GEP down to the value in bytes
+ // added to the address.
if (DL) {
- // When we have target data, we can reduce the GEP down to the value in bytes
- // added to the address.
- unsigned BitWidth = DL ? DL->getPointerSizeInBits(AS) : 1;
- APInt Offset1(BitWidth, 0), Offset2(BitWidth, 0);
- if (GEP1->accumulateConstantOffset(*DL, Offset1) &&
- GEP2->accumulateConstantOffset(*DL, Offset2)) {
- return Offset1 == Offset2;
- }
+ unsigned BitWidth = DL->getPointerSizeInBits(ASL);
+ APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ if (GEPL->accumulateConstantOffset(*DL, OffsetL) &&
+ GEPR->accumulateConstantOffset(*DL, OffsetR))
+ return cmpAPInt(OffsetL, OffsetR);
}
- if (GEP1->getPointerOperand()->getType() !=
- GEP2->getPointerOperand()->getType())
- return false;
+ if (int Res = cmpNumbers((uint64_t)GEPL->getPointerOperand()->getType(),
+ (uint64_t)GEPR->getPointerOperand()->getType()))
+ return Res;
- if (GEP1->getNumOperands() != GEP2->getNumOperands())
- return false;
+ if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+ return Res;
- for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
- if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
- return false;
+ for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+ if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+ return Res;
}
- return true;
+ return 0;
}
-// Compare two values used by the two functions under pair-wise comparison. If
-// this is the first time the values are seen, they're added to the mapping so
-// that we will detect mismatches on next use.
-bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
- // Check for function @f1 referring to itself and function @f2 referring to
- // itself, or referring to each other, or both referring to either of them.
- // They're all equivalent if the two functions are otherwise equivalent.
- if (V1 == F1 && V2 == F2)
- return true;
- if (V1 == F2 && V2 == F1)
- return true;
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) {
+ // Catch self-reference case.
+ if (L == F1) {
+ if (R == F2)
+ return 0;
+ return -1;
+ }
+ if (R == F2) {
+ if (L == F1)
+ return 0;
+ return 1;
+ }
- if (const Constant *C1 = dyn_cast<Constant>(V1)) {
- if (V1 == V2) return true;
- const Constant *C2 = dyn_cast<Constant>(V2);
- if (!C2) return false;
- // TODO: constant expressions with GEP or references to F1 or F2.
- if (C1->isNullValue() && C2->isNullValue() &&
- isEquivalentType(C1->getType(), C2->getType()))
- return true;
- // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
- // then they must have equal bit patterns.
- return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
- C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
- }
-
- if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
- return V1 == V2;
-
- // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
- // check whether it's equal to V2. When there is no mapping then we need to
- // ensure that V2 isn't already equivalent to something else. For this
- // purpose, we track the V2 values in a set.
-
- const Value *&map_elem = id_map[V1];
- if (map_elem)
- return map_elem == V2;
- if (!seen_values.insert(V2).second)
- return false;
- map_elem = V2;
- return true;
-}
+ const Constant *ConstL = dyn_cast<Constant>(L);
+ const Constant *ConstR = dyn_cast<Constant>(R);
+ if (ConstL && ConstR) {
+ if (L == R)
+ return 0;
+ return cmpConstants(ConstL, ConstR);
+ }
+
+ if (ConstL)
+ return 1;
+ if (ConstR)
+ return -1;
+
+ const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+ const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+ if (InlineAsmL && InlineAsmR)
+ return cmpNumbers((uint64_t)L, (uint64_t)R);
+ if (InlineAsmL)
+ return 1;
+ if (InlineAsmR)
+ return -1;
+
+ auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+ RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+ return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
// Test whether two basic blocks have equivalent behaviour.
bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
@@ -535,6 +1001,9 @@ bool FunctionComparator::compare() {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
+ sn_mapL.clear();
+ sn_mapR.clear();
+
if (F1->getAttributes() != F2->getAttributes())
return false;
@@ -683,7 +1152,7 @@ ModulePass *llvm::createMergeFunctionsPass() {
bool MergeFunctions::runOnModule(Module &M) {
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
@@ -783,8 +1252,23 @@ void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
// Helper for writeThunk,
// Selects proper bitcast operation,
// but a bit simpler then CastInst::getCastOpcode.
-static Value* createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
+static Value *createCast(IRBuilder<false> &Builder, Value *V, Type *DestTy) {
Type *SrcTy = V->getType();
+ if (SrcTy->isStructTy()) {
+ assert(DestTy->isStructTy());
+ assert(SrcTy->getStructNumElements() == DestTy->getStructNumElements());
+ Value *Result = UndefValue::get(DestTy);
+ for (unsigned int I = 0, E = SrcTy->getStructNumElements(); I < E; ++I) {
+ Value *Element = createCast(
+ Builder, Builder.CreateExtractValue(V, ArrayRef<unsigned int>(I)),
+ DestTy->getStructElementType(I));
+
+ Result =
+ Builder.CreateInsertValue(Result, Element, ArrayRef<unsigned int>(I));
+ }
+ return Result;
+ }
+ assert(!DestTy->isStructTy());
if (SrcTy->isIntegerTy() && DestTy->isPointerTy())
return Builder.CreateIntToPtr(V, DestTy);
else if (SrcTy->isPointerTy() && DestTy->isIntegerTy())
@@ -843,9 +1327,9 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
// Replace G with an alias to F and delete G.
void MergeFunctions::writeAlias(Function *F, Function *G) {
- Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
- GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
- BitcastF, G->getParent());
+ PointerType *PTy = G->getType();
+ auto *GA = GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ G->getLinkage(), "", F);
F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
GA->takeName(G);
GA->setVisibility(G->getVisibility());
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index ac88aee..76d6dfa 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partialinlining"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/CFG.h"
@@ -24,6 +23,8 @@
#include "llvm/Transforms/Utils/CodeExtractor.h"
using namespace llvm;
+#define DEBUG_TYPE "partialinlining"
+
STATISTIC(NumPartialInlined, "Number of functions partially inlined");
namespace {
@@ -52,10 +53,10 @@ Function* PartialInliner::unswitchFunction(Function* F) {
BasicBlock* entryBlock = F->begin();
BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
if (!BR || BR->isUnconditional())
- return 0;
+ return nullptr;
- BasicBlock* returnBlock = 0;
- BasicBlock* nonReturnBlock = 0;
+ BasicBlock* returnBlock = nullptr;
+ BasicBlock* nonReturnBlock = nullptr;
unsigned returnCount = 0;
for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
SI != SE; ++SI)
@@ -66,7 +67,7 @@ Function* PartialInliner::unswitchFunction(Function* F) {
nonReturnBlock = *SI;
if (returnCount != 1)
- return 0;
+ return nullptr;
// Clone the function, so that we can hack away on it.
ValueToValueMapTy VMap;
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 4a28b34..38e1b8e 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -56,8 +56,9 @@ RunLoopRerolling("reroll-loops", cl::Hidden,
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
SizeLevel = 0;
- LibraryInfo = 0;
- Inliner = 0;
+ LibraryInfo = nullptr;
+ Inliner = nullptr;
+ DisableTailCalls = false;
DisableUnitAtATime = false;
DisableUnrollLoops = false;
BBVectorize = RunBBVectorization;
@@ -128,7 +129,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (OptLevel == 0) {
if (Inliner) {
MPM.add(Inliner);
- Inliner = 0;
+ Inliner = nullptr;
}
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
@@ -156,6 +157,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
}
@@ -164,7 +166,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createPruneEHPass()); // Remove dead EH info
if (Inliner) {
MPM.add(Inliner);
- Inliner = 0;
+ Inliner = nullptr;
}
if (!DisableUnitAtATime)
MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
@@ -182,8 +184,10 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Combine silly seq's
+ addExtensionsToPM(EP_Peephole, MPM);
- MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
+ if (!DisableTailCalls)
+ MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
MPM.add(createLoopRotatePass()); // Rotate Loop
@@ -206,6 +210,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// Run instcombine after redundancy elimination to exploit opportunities
// opened up by them.
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createJumpThreadingPass()); // Thread jumps
MPM.add(createCorrelatedValuePropagationPass());
MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
@@ -220,6 +225,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
if (BBVectorize) {
MPM.add(createBBVectorizePass());
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
if (OptLevel > 1 && UseGVNAfterVectorization)
MPM.add(createGVNPass()); // Remove redundancies
else
@@ -233,6 +239,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
MPM.add(createAggressiveDCEPass()); // Delete dead instructions
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createInstructionCombiningPass()); // Clean up after everything.
+ addExtensionsToPM(EP_Peephole, MPM);
// FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
// pass manager that we are specifically trying to avoid. To prevent this
@@ -245,6 +252,7 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) {
// as function calls, so that we can only pass them when the vectorizer
// changed the code.
MPM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, MPM);
MPM.add(createCFGSimplificationPass());
if (!DisableUnrollLoops)
@@ -297,6 +305,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
// Inline small functions
if (RunInliner)
@@ -315,6 +324,7 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// The IPO passes may leave cruft around. Clean up after them.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
// Break up allocas
@@ -334,11 +344,17 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM,
// Nuke dead stores.
PM.add(createDeadStoreEliminationPass());
- // More loops are countable try to vectorize them.
+ // More loops are countable; try to optimize them.
+ PM.add(createIndVarSimplifyPass());
+ PM.add(createLoopDeletionPass());
PM.add(createLoopVectorizePass(true, true));
+ // More scalar chains could be vectorized due to more alias information
+ PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
+
// Cleanup and simplify the code after the scalar optimizations.
PM.add(createInstructionCombiningPass());
+ addExtensionsToPM(EP_Peephole, PM);
PM.add(createJumpThreadingPass());
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index c61ec5e..b2c4a09 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "prune-eh"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -30,6 +29,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "prune-eh"
+
STATISTIC(NumRemoved, "Number of invokes removed");
STATISTIC(NumUnreach, "Number of noreturn calls optimized");
@@ -85,7 +86,7 @@ bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end();
(!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
Function *F = (*I)->getFunction();
- if (F == 0) {
+ if (!F) {
SCCMightUnwind = true;
SCCMightReturn = true;
} else if (F->isDeclaration() || F->mayBeOverridden()) {
diff --git a/lib/Transforms/IPO/StripDeadPrototypes.cpp b/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 1c6532d..956991a 100644
--- a/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -14,13 +14,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "strip-dead-prototypes"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "strip-dead-prototypes"
+
STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
namespace {
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 6d0be8f..1abbccc 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -192,7 +192,7 @@ static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
/// Find values that are marked as llvm.used.
static void findUsedValues(GlobalVariable *LLVMUsed,
SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
- if (LLVMUsed == 0) return;
+ if (!LLVMUsed) return;
UsedValues.insert(LLVMUsed);
ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 822e146..e04b1be 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -20,34 +20,38 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#define DEBUG_TYPE "instcombine"
+
namespace llvm {
- class CallSite;
- class DataLayout;
- class TargetLibraryInfo;
- class DbgDeclareInst;
- class MemIntrinsic;
- class MemSetInst;
+class CallSite;
+class DataLayout;
+class TargetLibraryInfo;
+class DbgDeclareInst;
+class MemIntrinsic;
+class MemSetInst;
/// SelectPatternFlavor - We can match a variety of different patterns for
/// select operations.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
- SPF_SMIN, SPF_UMIN,
- SPF_SMAX, SPF_UMAX
- //SPF_ABS - TODO.
+ SPF_SMIN,
+ SPF_UMIN,
+ SPF_SMAX,
+ SPF_UMAX
+ // SPF_ABS - TODO.
};
/// getComplexity: Assign a complexity or rank value to LLVM Values...
/// 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
static inline unsigned getComplexity(Value *V) {
if (isa<Instruction>(V)) {
- if (BinaryOperator::isNeg(V) ||
- BinaryOperator::isFNeg(V) ||
+ if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) ||
BinaryOperator::isNot(V))
return 3;
return 4;
}
- if (isa<Argument>(V)) return 3;
+ if (isa<Argument>(V))
+ return 3;
return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
}
@@ -60,18 +64,18 @@ static inline Constant *SubOne(Constant *C) {
return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
-
/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
/// just like the normal insertion helper, but also adds any new instructions
/// to the instcombine worklist.
class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
+
public:
InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
- void InsertHelper(Instruction *I, const Twine &Name,
- BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+ void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
+ BasicBlock::iterator InsertPt) const {
IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
Worklist.Add(I);
}
@@ -79,13 +83,14 @@ public:
/// InstCombiner - The -instcombine pass.
class LLVM_LIBRARY_VISIBILITY InstCombiner
- : public FunctionPass,
- public InstVisitor<InstCombiner, Instruction*> {
+ : public FunctionPass,
+ public InstVisitor<InstCombiner, Instruction *> {
const DataLayout *DL;
TargetLibraryInfo *TLI;
bool MadeIRChange;
LibCallSimplifier *Simplifier;
bool MinimizeSize;
+
public:
/// Worklist - All of the instructions that need to be simplified.
InstCombineWorklist Worklist;
@@ -96,7 +101,7 @@ public:
BuilderTy *Builder;
static char ID; // Pass identification, replacement for typeid
- InstCombiner() : FunctionPass(ID), DL(0), Builder(0) {
+ InstCombiner() : FunctionPass(ID), DL(nullptr), Builder(nullptr) {
MinimizeSize = false;
initializeInstCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -144,9 +149,9 @@ public:
Instruction *visitAnd(BinaryOperator &I);
Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
- Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
- Value *A, Value *B, Value *C);
- Instruction *visitOr (BinaryOperator &I);
+ Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op, Value *A,
+ Value *B, Value *C);
+ Instruction *visitOr(BinaryOperator &I);
Instruction *visitXor(BinaryOperator &I);
Instruction *visitShl(BinaryOperator &I);
Instruction *visitAShr(BinaryOperator &I);
@@ -156,12 +161,11 @@ public:
Constant *RHSC);
Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
GlobalVariable *GV, CmpInst &ICI,
- ConstantInt *AndCst = 0);
+ ConstantInt *AndCst = nullptr);
Instruction *visitFCmpInst(FCmpInst &I);
Instruction *visitICmpInst(ICmpInst &I);
Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
- Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
- Instruction *LHS,
+ Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS,
ConstantInt *RHS);
Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
ConstantInt *DivRHS);
@@ -171,7 +175,7 @@ public:
ICmpInst::Predicate Pred);
Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond, Instruction &I);
- Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+ Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I);
Instruction *commonCastTransforms(CastInst &CI);
Instruction *commonPointerCastTransforms(CastInst &CI);
@@ -188,9 +192,8 @@ public:
Instruction *visitIntToPtr(IntToPtrInst &CI);
Instruction *visitBitCast(BitCastInst &CI);
Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
- Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
- Instruction *FI);
- Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+ Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI);
+ Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *);
Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
Value *A, Value *B, Instruction &Outer,
SelectPatternFlavor SPF2, Value *C);
@@ -209,6 +212,7 @@ public:
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
Instruction *visitSwitchInst(SwitchInst &SI);
+ Instruction *visitInsertValueInst(InsertValueInst &IV);
Instruction *visitInsertElementInst(InsertElementInst &IE);
Instruction *visitExtractElementInst(ExtractElementInst &EI);
Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
@@ -216,21 +220,21 @@ public:
Instruction *visitLandingPadInst(LandingPadInst &LI);
// visitInstruction - Specify what to return for unhandled instructions...
- Instruction *visitInstruction(Instruction &I) { return 0; }
+ Instruction *visitInstruction(Instruction &I) { return nullptr; }
private:
bool ShouldChangeType(Type *From, Type *To) const;
Value *dyn_castNegVal(Value *V) const;
- Value *dyn_castFNegVal(Value *V, bool NoSignedZero=false) const;
+ Value *dyn_castFNegVal(Value *V, bool NoSignedZero = false) const;
Type *FindElementAtOffset(Type *PtrTy, int64_t Offset,
- SmallVectorImpl<Value*> &NewIndices);
+ SmallVectorImpl<Value *> &NewIndices);
Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
/// results in any code being generated and is interesting to optimize out. If
/// the cast can be eliminated by some other simple transformation, we prefer
/// to do the simplification first.
- bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+ bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
Type *Ty);
Instruction *visitCallSite(CallSite CS);
@@ -251,10 +255,10 @@ public:
// in the program. Add the new instruction to the worklist.
//
Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
- assert(New && New->getParent() == 0 &&
+ assert(New && !New->getParent() &&
"New instruction already inserted into a basic block!");
BasicBlock *BB = Old.getParent();
- BB->getInstList().insert(&Old, New); // Insert inst
+ BB->getInstList().insert(&Old, New); // Insert inst
Worklist.Add(New);
return New;
}
@@ -274,7 +278,7 @@ public:
// modified.
//
Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
- Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
+ Worklist.AddUsersToWorkList(I); // Add all modified instrs to worklist.
// If we are replacing the instruction with itself, this must be in a
// segment of unreachable code, so just clobber the instruction.
@@ -306,12 +310,12 @@ public:
Worklist.Remove(&I);
I.eraseFromParent();
MadeIRChange = true;
- return 0; // Don't do anything with FI
+ return nullptr; // Don't do anything with FI
}
- void ComputeMaskedBits(Value *V, APInt &KnownZero,
- APInt &KnownOne, unsigned Depth = 0) const {
- return llvm::ComputeMaskedBits(V, KnownZero, KnownOne, DL, Depth);
+ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+ unsigned Depth = 0) const {
+ return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth);
}
bool MaskedValueIsZero(Value *V, const APInt &Mask,
@@ -323,7 +327,6 @@ public:
}
private:
-
/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
/// operators which are associative or commutative.
bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
@@ -337,12 +340,10 @@ private:
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
- Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth);
- bool SimplifyDemandedBits(Use &U, APInt DemandedMask,
- APInt& KnownZero, APInt& KnownOne,
- unsigned Depth=0);
+ Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth);
+ bool SimplifyDemandedBits(Use &U, APInt DemandedMask, APInt &KnownZero,
+ APInt &KnownOne, unsigned Depth = 0);
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify demanded
/// bit for "r1 = shr x, c1; r2 = shl r1, c2" instruction sequence.
Value *SimplifyShrShlDemandedBits(Instruction *Lsr, Instruction *Sftl,
@@ -355,7 +356,9 @@ private:
bool SimplifyDemandedInstructionBits(Instruction &Inst);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
- APInt& UndefElts, unsigned Depth = 0);
+ APInt &UndefElts, unsigned Depth = 0);
+
+ Value *SimplifyVectorOp(BinaryOperator &Inst);
// FoldOpIntoPhi - Given a binary operator, cast instruction, or select
// which has a PHI node as operand #0, see if we can fold the instruction
@@ -372,21 +375,19 @@ private:
Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
-
Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
ConstantInt *AndRHS, BinaryOperator &TheAnd);
Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
bool isSub, Instruction &I);
- Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
- bool isSigned, bool Inside);
+ Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
+ bool Inside);
Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
Instruction *MatchBSwap(BinaryOperator &I);
bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
Instruction *SimplifyMemSet(MemSetInst *MI);
-
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
/// Descale - Return a value X such that Val = X * Scale, or null if none. If
@@ -394,8 +395,8 @@ private:
Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
};
-
-
} // end namespace llvm.
+#undef DEBUG_TYPE
+
#endif
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 97910c7..c37a9cf 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
namespace {
/// Class representing coefficient of floating-point addend.
@@ -112,12 +114,12 @@ namespace {
///
class FAddend {
public:
- FAddend() { Val = 0; }
+ FAddend() { Val = nullptr; }
Value *getSymVal (void) const { return Val; }
const FAddendCoef &getCoef(void) const { return Coeff; }
- bool isConstant() const { return Val == 0; }
+ bool isConstant() const { return Val == nullptr; }
bool isZero() const { return Coeff.isZero(); }
void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; }
@@ -154,7 +156,7 @@ namespace {
///
class FAddCombine {
public:
- FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {}
+ FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(nullptr) {}
Value *simplify(Instruction *FAdd);
private:
@@ -348,8 +350,8 @@ Value *FAddendCoef::getValue(Type *Ty) const {
//
unsigned FAddend::drillValueDownOneStep
(Value *Val, FAddend &Addend0, FAddend &Addend1) {
- Instruction *I = 0;
- if (Val == 0 || !(I = dyn_cast<Instruction>(Val)))
+ Instruction *I = nullptr;
+ if (!Val || !(I = dyn_cast<Instruction>(Val)))
return 0;
unsigned Opcode = I->getOpcode();
@@ -359,16 +361,16 @@ unsigned FAddend::drillValueDownOneStep
Value *Opnd0 = I->getOperand(0);
Value *Opnd1 = I->getOperand(1);
if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero())
- Opnd0 = 0;
+ Opnd0 = nullptr;
if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero())
- Opnd1 = 0;
+ Opnd1 = nullptr;
if (Opnd0) {
if (!C0)
Addend0.set(1, Opnd0);
else
- Addend0.set(C0, 0);
+ Addend0.set(C0, nullptr);
}
if (Opnd1) {
@@ -376,7 +378,7 @@ unsigned FAddend::drillValueDownOneStep
if (!C1)
Addend.set(1, Opnd1);
else
- Addend.set(C1, 0);
+ Addend.set(C1, nullptr);
if (Opcode == Instruction::FSub)
Addend.negate();
}
@@ -385,7 +387,7 @@ unsigned FAddend::drillValueDownOneStep
return Opnd0 && Opnd1 ? 2 : 1;
// Both operands are zero. Weird!
- Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0);
+ Addend0.set(APFloat(C0->getValueAPF().getSemantics()), nullptr);
return 1;
}
@@ -443,13 +445,13 @@ Value *FAddCombine::performFactorization(Instruction *I) {
Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1));
if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode())
- return 0;
+ return nullptr;
bool isMpy = false;
if (I0->getOpcode() == Instruction::FMul)
isMpy = true;
else if (I0->getOpcode() != Instruction::FDiv)
- return 0;
+ return nullptr;
Value *Opnd0_0 = I0->getOperand(0);
Value *Opnd0_1 = I0->getOperand(1);
@@ -461,8 +463,8 @@ Value *FAddCombine::performFactorization(Instruction *I) {
// (x*y) +/- (x*z) x y z
// (y/x) +/- (z/x) x y z
//
- Value *Factor = 0;
- Value *AddSub0 = 0, *AddSub1 = 0;
+ Value *Factor = nullptr;
+ Value *AddSub0 = nullptr, *AddSub1 = nullptr;
if (isMpy) {
if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1)
@@ -481,7 +483,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
}
if (!Factor)
- return 0;
+ return nullptr;
FastMathFlags Flags;
Flags.setUnsafeAlgebra();
@@ -495,7 +497,7 @@ Value *FAddCombine::performFactorization(Instruction *I) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) {
const APFloat &F = CFP->getValueAPF();
if (!F.isNormal())
- return 0;
+ return nullptr;
} else if (Instruction *II = dyn_cast<Instruction>(NewAddSub))
II->setFastMathFlags(Flags);
@@ -517,7 +519,7 @@ Value *FAddCombine::simplify(Instruction *I) {
// Currently we are not able to handle vector type.
if (I->getType()->isVectorTy())
- return 0;
+ return nullptr;
assert((I->getOpcode() == Instruction::FAdd ||
I->getOpcode() == Instruction::FSub) && "Expect add/sub");
@@ -568,7 +570,7 @@ Value *FAddCombine::simplify(Instruction *I) {
// been optimized into "I = Y - X" in the previous steps.
//
const FAddendCoef &CE = Opnd0.getCoef();
- return CE.isOne() ? Opnd0.getSymVal() : 0;
+ return CE.isOne() ? Opnd0.getSymVal() : nullptr;
}
// step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1]
@@ -614,7 +616,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
// constant close to supper-expr(s) will potentially reveal some optimization
// opportunities in super-expr(s).
//
- const FAddend *ConstAdd = 0;
+ const FAddend *ConstAdd = nullptr;
// Simplified addends are placed <SimpVect>.
AddendVect SimpVect;
@@ -647,7 +649,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
if (T && T->getSymVal() == Val) {
// Set null such that next iteration of the outer loop will not process
// this addend again.
- Addends[SameSymIdx] = 0;
+ Addends[SameSymIdx] = nullptr;
SimpVect.push_back(T);
}
}
@@ -661,7 +663,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
// Pop all addends being folded and push the resulting folded addend.
SimpVect.resize(StartIdx);
- if (Val != 0) {
+ if (Val) {
if (!R.isZero()) {
SimpVect.push_back(&R);
}
@@ -698,7 +700,7 @@ Value *FAddCombine::createNaryFAdd
//
unsigned InstrNeeded = calcInstrNumber(Opnds);
if (InstrNeeded > InstrQuota)
- return 0;
+ return nullptr;
initCreateInstNum();
@@ -710,7 +712,7 @@ Value *FAddCombine::createNaryFAdd
// N-ary addition has at most two instructions, and we don't need to worry
// about tree-height when constructing the N-ary addition.
- Value *LastVal = 0;
+ Value *LastVal = nullptr;
bool LastValNeedNeg = false;
// Iterate the addends, creating fadd/fsub using adjacent two addends.
@@ -870,10 +872,10 @@ Value *FAddCombine::createAddendVal
//
static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy())
- return 0;
+ return nullptr;
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return 0;
+ if (!I) return nullptr;
if (I->getOpcode() == Instruction::Mul)
if ((CST = dyn_cast<Constant>(I->getOperand(1))))
@@ -884,7 +886,7 @@ static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) {
CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST);
return I->getOperand(0);
}
- return 0;
+ return nullptr;
}
@@ -918,6 +920,9 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
@@ -942,7 +947,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (ZI->getSrcTy()->isIntegerTy(1))
return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
- Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+ Value *XorLHS = nullptr; ConstantInt *XorRHS = nullptr;
if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
const APInt &RHSVal = CI->getValue();
@@ -974,7 +979,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
IntegerType *IT = cast<IntegerType>(I.getType());
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(XorLHS, LHSKnownZero, LHSKnownOne);
if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue())
return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI),
XorLHS);
@@ -1042,11 +1047,11 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
APInt LHSKnownOne(IT->getBitWidth(), 0);
APInt LHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
if (LHSKnownZero != 0) {
APInt RHSKnownOne(IT->getBitWidth(), 0);
APInt RHSKnownZero(IT->getBitWidth(), 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
// No bits in common -> bitwise or.
if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
@@ -1174,7 +1179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// Check for (x & y) + (x ^ y)
{
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (match(RHS, m_Xor(m_Value(A), m_Value(B))) &&
(match(LHS, m_And(m_Specific(A), m_Specific(B))) ||
match(LHS, m_And(m_Specific(B), m_Specific(A)))))
@@ -1186,13 +1191,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
return BinaryOperator::CreateOr(A, B);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1266,7 +1274,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (match(LHS, m_Select(m_Value(C1), m_Value(A1), m_Value(B1))) &&
match(RHS, m_Select(m_Value(C2), m_Value(A2), m_Value(B2)))) {
if (C1 == C2) {
- Constant *Z1=0, *Z2=0;
+ Constant *Z1=nullptr, *Z2=nullptr;
Value *A, *B, *C=C1;
if (match(A1, m_AnyZero()) && match(B2, m_AnyZero())) {
Z1 = dyn_cast<Constant>(A1); A = A2;
@@ -1290,7 +1298,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
@@ -1305,7 +1313,7 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
// this.
bool Swapped = false;
- GEPOperator *GEP1 = 0, *GEP2 = 0;
+ GEPOperator *GEP1 = nullptr, *GEP2 = nullptr;
// For now we require one side to be the base pointer "A" or a constant
// GEP derived from it.
@@ -1343,9 +1351,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
// Avoid duplicating the arithmetic if GEP2 has non-constant indices and
// multiple users.
- if (GEP1 == 0 ||
- (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
- return 0;
+ if (!GEP1 ||
+ (GEP2 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse()))
+ return nullptr;
// Emit the offset of the GEP and an intptr_t.
Value *Result = EmitGEPOffset(GEP1);
@@ -1368,6 +1376,9 @@ Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
Instruction *InstCombiner::visitSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
I.hasNoUnsignedWrap(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1393,7 +1404,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
if (Constant *C = dyn_cast<Constant>(Op0)) {
// C - ~X == X + (1+C)
- Value *X = 0;
+ Value *X = nullptr;
if (match(Op1, m_Not(m_Value(X))))
return BinaryOperator::CreateAdd(X, AddOne(C));
@@ -1451,9 +1462,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
}
if (Op1->hasOneUse()) {
- Value *X = 0, *Y = 0, *Z = 0;
- Constant *C = 0;
- Constant *CI = 0;
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+ Constant *C = nullptr;
+ Constant *CI = nullptr;
// (X - (Y - Z)) --> (X + (Z - Y)).
if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
@@ -1532,12 +1543,15 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Res);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL))
return ReplaceInstUsesWith(I, V);
@@ -1574,5 +1588,5 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
return ReplaceInstUsesWith(I, V);
}
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2c1bfc7..4f5d65a 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -20,6 +20,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// isFreeToInvert - Return true if the specified value is free to invert (apply
/// ~ to). This happens in cases where the ~ can be eliminated.
static inline bool isFreeToInvert(Value *V) {
@@ -50,7 +52,7 @@ static inline Value *dyn_castNotVal(Value *V) {
// Constants can be considered to be not'ed values...
if (ConstantInt *C = dyn_cast<ConstantInt>(V))
return ConstantInt::get(C->getType(), ~C->getValue());
- return 0;
+ return nullptr;
}
/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
@@ -123,7 +125,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
ConstantInt *AndRHS,
BinaryOperator &TheAnd) {
Value *X = Op->getOperand(0);
- Constant *Together = 0;
+ Constant *Together = nullptr;
if (!Op->isShift())
Together = ConstantExpr::getAnd(AndRHS, OpRHS);
@@ -250,7 +252,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
}
break;
}
- return 0;
+ return nullptr;
}
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
@@ -332,12 +334,12 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
Instruction &I) {
Instruction *LHSI = dyn_cast<Instruction>(LHS);
if (!LHSI || LHSI->getNumOperands() != 2 ||
- !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+ !isa<ConstantInt>(LHSI->getOperand(1))) return nullptr;
ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
switch (LHSI->getOpcode()) {
- default: return 0;
+ default: return nullptr;
case Instruction::And:
if (ConstantExpr::getAnd(N, Mask) == Mask) {
// If the AndRHS is a power of two minus one (0+1+), this is simple.
@@ -357,7 +359,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
break;
}
}
- return 0;
+ return nullptr;
case Instruction::Or:
case Instruction::Xor:
// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
@@ -365,7 +367,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
&& ConstantExpr::getAnd(N, Mask)->isNullValue())
break;
- return 0;
+ return nullptr;
}
if (isSub)
@@ -418,12 +420,12 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
- bool icmp_abit = (ACst != 0 && !ACst->isZero() &&
+ bool icmp_abit = (ACst && !ACst->isZero() &&
ACst->getValue().isPowerOf2());
- bool icmp_bbit = (BCst != 0 && !BCst->isZero() &&
+ bool icmp_bbit = (BCst && !BCst->isZero() &&
BCst->getValue().isPowerOf2());
unsigned result = 0;
- if (CCst != 0 && CCst->isZero()) {
+ if (CCst && CCst->isZero()) {
// if C is zero, then both A and B qualify as mask
result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_Mask_AllZeroes |
@@ -455,7 +457,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
FoldMskICmp_AMask_NotMixed)
: (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_AMask_Mixed));
- } else if (ACst != 0 && CCst != 0 &&
+ } else if (ACst && CCst &&
ConstantExpr::getAnd(ACst, CCst) == CCst) {
result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
: FoldMskICmp_AMask_NotMixed);
@@ -470,7 +472,7 @@ static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C,
FoldMskICmp_BMask_NotMixed)
: (FoldMskICmp_Mask_AllZeroes |
FoldMskICmp_BMask_Mixed));
- } else if (BCst != 0 && CCst != 0 &&
+ } else if (BCst && CCst &&
ConstantExpr::getAnd(BCst, CCst) == CCst) {
result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
: FoldMskICmp_BMask_NotMixed);
@@ -570,12 +572,12 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
Value *L11,*L12,*L21,*L22;
// Check whether the icmp can be decomposed into a bit test.
if (decomposeBitTestICmp(LHS, LHSCC, L11, L12, L2)) {
- L21 = L22 = L1 = 0;
+ L21 = L22 = L1 = nullptr;
} else {
// Look for ANDs in the LHS icmp.
if (!L1->getType()->isIntegerTy()) {
// You can icmp pointers, for example. They really aren't masks.
- L11 = L12 = 0;
+ L11 = L12 = nullptr;
} else if (!match(L1, m_And(m_Value(L11), m_Value(L12)))) {
// Any icmp can be viewed as being trivially masked; if it allows us to
// remove one, it's worth it.
@@ -585,7 +587,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
if (!L2->getType()->isIntegerTy()) {
// You can icmp pointers, for example. They really aren't masks.
- L21 = L22 = 0;
+ L21 = L22 = nullptr;
} else if (!match(L2, m_And(m_Value(L21), m_Value(L22)))) {
L21 = L2;
L22 = Constant::getAllOnesValue(L2->getType());
@@ -608,7 +610,7 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
} else {
return 0;
}
- E = R2; R1 = 0; ok = true;
+ E = R2; R1 = nullptr; ok = true;
} else if (R1->getType()->isIntegerTy()) {
if (!match(R1, m_And(m_Value(R11), m_Value(R12)))) {
// As before, model no mask as a trivial mask if it'll let us do an
@@ -665,11 +667,11 @@ static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A,
/// into a single (icmp(A & X) ==/!= Y)
static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
llvm::InstCombiner::BuilderTy* Builder) {
- Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS,
LHSCC, RHSCC);
- if (mask == 0) return 0;
+ if (mask == 0) return nullptr;
assert(ICmpInst::isEquality(LHSCC) && ICmpInst::isEquality(RHSCC) &&
"foldLogOpOfMaskedICmpsHelper must return an equality predicate.");
@@ -722,9 +724,9 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// their actual values. This isn't strictly, necessary, just a "handle the
// easy cases for now" decision.
ConstantInt *BCst = dyn_cast<ConstantInt>(B);
- if (BCst == 0) return 0;
+ if (!BCst) return nullptr;
ConstantInt *DCst = dyn_cast<ConstantInt>(D);
- if (DCst == 0) return 0;
+ if (!DCst) return nullptr;
if (mask & (FoldMskICmp_Mask_NotAllZeroes | FoldMskICmp_BMask_NotAllOnes)) {
// (icmp ne (A & B), 0) & (icmp ne (A & D), 0) and
@@ -763,11 +765,11 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set
ConstantInt *CCst = dyn_cast<ConstantInt>(C);
- if (CCst == 0) return 0;
+ if (!CCst) return nullptr;
if (LHSCC != NEWCC)
CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
ConstantInt *ECst = dyn_cast<ConstantInt>(E);
- if (ECst == 0) return 0;
+ if (!ECst) return nullptr;
if (RHSCC != NEWCC)
ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
ConstantInt* MCst = dyn_cast<ConstantInt>(
@@ -776,13 +778,13 @@ static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// if there is a conflict we should actually return a false for the
// whole construct
if (!MCst->isZero())
- return 0;
+ return nullptr;
Value *newOr1 = Builder->CreateOr(B, D);
Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
Value *newAnd = Builder->CreateAnd(A, newOr1);
return Builder->CreateICmp(NEWCC, newAnd, newOr2);
}
- return 0;
+ return nullptr;
}
/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
@@ -811,7 +813,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
- if (LHSCst == 0 || RHSCst == 0) return 0;
+ if (!LHSCst || !RHSCst) return nullptr;
if (LHSCst == RHSCst && LHSCC == RHSCC) {
// (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
@@ -835,7 +837,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHSCC == ICmpInst::ICMP_EQ && LHSCC == RHSCC &&
LHS->hasOneUse() && RHS->hasOneUse()) {
Value *V;
- ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0;
+ ConstantInt *AndCst, *SmallCst = nullptr, *BigCst = nullptr;
// (trunc x) == C1 & (and x, CA) == C2
// (and x, CA) == C2 & (trunc x) == C1
@@ -866,14 +868,14 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// From here on, we only handle:
// (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return 0;
+ if (Val != Val2) return nullptr;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
- return 0;
+ return nullptr;
// Make a constant range that's the intersection of the two icmp ranges.
// If the intersection is empty, we know that the result is false.
@@ -887,7 +889,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// We can't fold (ugt x, C) & (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
- return 0;
+ return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
@@ -1016,7 +1018,7 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
break;
}
- return 0;
+ return nullptr;
}
/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp). NOTE: Unlike the rest of
@@ -1026,7 +1028,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
RHS->getPredicate() == FCmpInst::FCMP_ORD) {
if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType())
- return 0;
+ return nullptr;
// (fcmp ord x, c) & (fcmp ord y, c) -> (fcmp ord x, y)
if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
@@ -1043,7 +1045,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
- return 0;
+ return nullptr;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
@@ -1096,7 +1098,7 @@ Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
}
}
- return 0;
+ return nullptr;
}
@@ -1104,6 +1106,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAndInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1198,7 +1203,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// If this is an integer truncation, and if the source is an 'and' with
// immediate, transform it. This frequently occurs for bitfield accesses.
{
- Value *X = 0; ConstantInt *YC = 0;
+ Value *X = nullptr; ConstantInt *YC = nullptr;
if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
// Change: and (trunc (and X, YC) to T), C2
// into : and (trunc X to T), trunc(YC) & C2
@@ -1231,7 +1236,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
{
- Value *A = 0, *B = 0, *C = 0, *D = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
// (A|B) & ~(A&B) -> A^B
if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
@@ -1339,7 +1344,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
}
{
- Value *X = 0;
+ Value *X = nullptr;
bool OpsSwapped = false;
// Canonicalize SExt or Not to the LHS
if (match(Op1, m_SExt(m_Value())) ||
@@ -1366,7 +1371,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
std::swap(Op0, Op1);
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// CollectBSwapParts - Analyze the specified subexpression and see if it is
@@ -1498,7 +1503,7 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
if (!ITy || ITy->getBitWidth() % 16 ||
// ByteMask only allows up to 32-byte values.
ITy->getBitWidth() > 32*8)
- return 0; // Can only bswap pairs of bytes. Can't do vectors.
+ return nullptr; // Can only bswap pairs of bytes. Can't do vectors.
/// ByteValues - For each byte of the result, we keep track of which value
/// defines each byte.
@@ -1508,16 +1513,16 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
// Try to find all the pieces corresponding to the bswap.
uint32_t ByteMask = ~0U >> (32-ByteValues.size());
if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
- return 0;
+ return nullptr;
// Check to see if all of the bytes come from the same value.
Value *V = ByteValues[0];
- if (V == 0) return 0; // Didn't find a byte? Must be zero.
+ if (!V) return nullptr; // Didn't find a byte? Must be zero.
// Check to make sure that all of the bytes come from the same value.
for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
if (ByteValues[i] != V)
- return 0;
+ return nullptr;
Module *M = I.getParent()->getParent()->getParent();
Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
return CallInst::Create(F, V);
@@ -1529,10 +1534,10 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
Value *C, Value *D) {
// If A is not a select of -1/0, this cannot match.
- Value *Cond = 0;
+ Value *Cond = nullptr;
if (!match(A, m_SExt(m_Value(Cond))) ||
!Cond->getType()->isIntegerTy(1))
- return 0;
+ return nullptr;
// ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
@@ -1545,7 +1550,7 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
return SelectInst::Create(Cond, C, D);
if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
return SelectInst::Create(Cond, C, D);
- return 0;
+ return nullptr;
}
/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
@@ -1566,8 +1571,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
LAnd->getOpcode() == Instruction::And &&
RAnd->getOpcode() == Instruction::And) {
- Value *Mask = 0;
- Value *Masked = 0;
+ Value *Mask = nullptr;
+ Value *Masked = nullptr;
if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
isKnownToBeAPowerOfTwo(LAnd->getOperand(1)) &&
isKnownToBeAPowerOfTwo(RAnd->getOperand(1))) {
@@ -1608,7 +1613,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
if (LHS->hasOneUse() || RHS->hasOneUse()) {
// (icmp eq B, 0) | (icmp ult A, B) -> (icmp ule A, B-1)
// (icmp eq B, 0) | (icmp ugt B, A) -> (icmp ule A, B-1)
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
if (LHSCC == ICmpInst::ICMP_EQ && LHSCst && LHSCst->isZero()) {
B = Val;
if (RHSCC == ICmpInst::ICMP_ULT && Val == RHS->getOperand(1))
@@ -1632,7 +1637,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
- if (LHSCst == 0 || RHSCst == 0) return 0;
+ if (!LHSCst || !RHSCst) return nullptr;
if (LHSCst == RHSCst && LHSCC == RHSCC) {
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
@@ -1653,18 +1658,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
// From here on, we only handle:
// (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
- if (Val != Val2) return 0;
+ if (Val != Val2) return nullptr;
// ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
- return 0;
+ return nullptr;
// We can't fold (ugt x, C) | (sgt x, C2).
if (!PredicatesFoldable(LHSCC, RHSCC))
- return 0;
+ return nullptr;
// Ensure that the larger constant is on the RHS.
bool ShouldSwap;
@@ -1809,7 +1814,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
}
break;
}
- return 0;
+ return nullptr;
}
/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp). NOTE: Unlike the rest of
@@ -1837,7 +1842,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
isa<ConstantAggregateZero>(RHS->getOperand(1)))
return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
- return 0;
+ return nullptr;
}
Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
@@ -1869,7 +1874,7 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
}
}
- return 0;
+ return nullptr;
}
/// FoldOrWithConstants - This helper function folds:
@@ -1884,27 +1889,30 @@ Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
Value *A, Value *B, Value *C) {
ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
- if (!CI1) return 0;
+ if (!CI1) return nullptr;
- Value *V1 = 0;
- ConstantInt *CI2 = 0;
- if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+ Value *V1 = nullptr;
+ ConstantInt *CI2 = nullptr;
+ if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return nullptr;
APInt Xor = CI1->getValue() ^ CI2->getValue();
- if (!Xor.isAllOnesValue()) return 0;
+ if (!Xor.isAllOnesValue()) return nullptr;
if (V1 == A || V1 == B) {
Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
return BinaryOperator::CreateOr(NewOp, V1);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyOrInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1918,7 +1926,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return &I;
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
- ConstantInt *C1 = 0; Value *X = 0;
+ ConstantInt *C1 = nullptr; Value *X = nullptr;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
// iff (C1 & C2) == 0.
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
@@ -1949,8 +1957,8 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
return NV;
}
- Value *A = 0, *B = 0;
- ConstantInt *C1 = 0, *C2 = 0;
+ Value *A = nullptr, *B = nullptr;
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
@@ -1981,10 +1989,10 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
// (A & C)|(B & D)
- Value *C = 0, *D = 0;
+ Value *C = nullptr, *D = nullptr;
if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
match(Op1, m_And(m_Value(B), m_Value(D)))) {
- Value *V1 = 0, *V2 = 0;
+ Value *V1 = nullptr, *V2 = nullptr;
C1 = dyn_cast<ConstantInt>(C);
C2 = dyn_cast<ConstantInt>(D);
if (C1 && C2) { // (A & C1)|(B & C2)
@@ -2028,7 +2036,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
// iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
- ConstantInt *C3 = 0, *C4 = 0;
+ ConstantInt *C3 = nullptr, *C4 = nullptr;
if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
(C3->getValue() & ~C1->getValue()) == 0 &&
match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
@@ -2220,7 +2228,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// Since this OR statement hasn't been optimized further yet, we hope
// that this transformation will allow the new ORs to be optimized.
{
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (Op0->hasOneUse() && Op1->hasOneUse() &&
match(Op0, m_Select(m_Value(X), m_Value(A), m_Value(B))) &&
match(Op1, m_Select(m_Value(Y), m_Value(C), m_Value(D))) && X == Y) {
@@ -2230,13 +2238,16 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
Instruction *InstCombiner::visitXor(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyXorInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -2494,5 +2505,5 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 0bc3ac7..d4b583b 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -22,6 +22,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumSimplified, "Number of library calls simplified");
/// getPromotedType - Return the specified type promoted as it would be to pass
@@ -70,7 +72,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
- if (MemOpLength == 0) return 0;
+ if (!MemOpLength) return nullptr;
// Source and destination pointer types are always "i8*" for intrinsic. See
// if the size is something we can handle with a single primitive load/store.
@@ -80,7 +82,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
assert(Size && "0-sized memory transferring should be removed already.");
if (Size > 8 || (Size&(Size-1)))
- return 0; // If not 1/2/4/8 bytes, exit.
+ return nullptr; // If not 1/2/4/8 bytes, exit.
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
@@ -99,7 +101,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
- MDNode *CopyMD = 0;
+ MDNode *CopyMD = nullptr;
if (StrippedDest != MI->getArgOperand(0)) {
Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
@@ -163,7 +165,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
- return 0;
+ return nullptr;
uint64_t Len = LenC->getLimitedValue();
Alignment = MI->getAlignment();
assert(Len && "0-sized memory setting should be removed already.");
@@ -191,7 +193,7 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
return MI;
}
- return 0;
+ return nullptr;
}
/// visitCallInst - CallInst simplification. This mostly only handles folding
@@ -233,7 +235,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// No other transformations apply to volatile transfers.
if (MI->isVolatile())
- return 0;
+ return nullptr;
// If we have a memmove and the source operation is a constant global,
// then the source and dest pointers can't alias, so we can change this
@@ -276,11 +278,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint64_t Size;
if (getObjectSize(II->getArgOperand(0), Size, DL, TLI))
return ReplaceInstUsesWith(CI, ConstantInt::get(CI.getType(), Size));
- return 0;
+ return nullptr;
}
case Intrinsic::bswap: {
Value *IIOperand = II->getArgOperand(0);
- Value *X = 0;
+ Value *X = nullptr;
// bswap(bswap(x)) -> x
if (match(IIOperand, m_BSwap(m_Value(X))))
@@ -320,7 +322,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
if ((Mask & KnownZero) == Mask)
@@ -338,7 +340,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getArgOperand(0), KnownZero, KnownOne);
+ computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
if ((Mask & KnownZero) == Mask)
@@ -353,14 +355,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
uint32_t BitWidth = IT->getBitWidth();
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
if (LHSKnownNegative || LHSKnownPositive) {
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
if (LHSKnownNegative && RHSKnownNegative) {
@@ -447,10 +449,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
APInt LHSKnownZero(BitWidth, 0);
APInt LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne);
+ computeKnownBits(LHS, LHSKnownZero, LHSKnownOne);
APInt RHSKnownZero(BitWidth, 0);
APInt RHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne);
+ computeKnownBits(RHS, RHSKnownZero, RHSKnownOne);
// Get the largest possible values for each operand.
APInt LHSMax = ~LHSKnownZero;
@@ -554,6 +556,79 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ // Constant fold <A x Bi> << Ci.
+ // FIXME: We don't handle _dq because it's a shift of an i128, but is
+ // represented in the IR as <2 x i64>. A per element shift is wrong.
+ case Intrinsic::x86_sse2_psll_d:
+ case Intrinsic::x86_sse2_psll_q:
+ case Intrinsic::x86_sse2_psll_w:
+ case Intrinsic::x86_sse2_pslli_d:
+ case Intrinsic::x86_sse2_pslli_q:
+ case Intrinsic::x86_sse2_pslli_w:
+ case Intrinsic::x86_avx2_psll_d:
+ case Intrinsic::x86_avx2_psll_q:
+ case Intrinsic::x86_avx2_psll_w:
+ case Intrinsic::x86_avx2_pslli_d:
+ case Intrinsic::x86_avx2_pslli_q:
+ case Intrinsic::x86_avx2_pslli_w:
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: {
+ // Simplify if count is constant. To 0 if >= BitWidth,
+ // otherwise to shl/lshr.
+ auto CDV = dyn_cast<ConstantDataVector>(II->getArgOperand(1));
+ auto CInt = dyn_cast<ConstantInt>(II->getArgOperand(1));
+ if (!CDV && !CInt)
+ break;
+ ConstantInt *Count;
+ if (CDV)
+ Count = cast<ConstantInt>(CDV->getElementAsConstant(0));
+ else
+ Count = CInt;
+
+ auto Vec = II->getArgOperand(0);
+ auto VT = cast<VectorType>(Vec->getType());
+ if (Count->getZExtValue() >
+ VT->getElementType()->getPrimitiveSizeInBits() - 1)
+ return ReplaceInstUsesWith(
+ CI, ConstantAggregateZero::get(Vec->getType()));
+
+ bool isPackedShiftLeft = true;
+ switch (II->getIntrinsicID()) {
+ default : break;
+ case Intrinsic::x86_sse2_psrl_d:
+ case Intrinsic::x86_sse2_psrl_q:
+ case Intrinsic::x86_sse2_psrl_w:
+ case Intrinsic::x86_sse2_psrli_d:
+ case Intrinsic::x86_sse2_psrli_q:
+ case Intrinsic::x86_sse2_psrli_w:
+ case Intrinsic::x86_avx2_psrl_d:
+ case Intrinsic::x86_avx2_psrl_q:
+ case Intrinsic::x86_avx2_psrl_w:
+ case Intrinsic::x86_avx2_psrli_d:
+ case Intrinsic::x86_avx2_psrli_q:
+ case Intrinsic::x86_avx2_psrli_w: isPackedShiftLeft = false; break;
+ }
+
+ unsigned VWidth = VT->getNumElements();
+ // Get a constant vector of the same type as the first operand.
+ auto VTCI = ConstantInt::get(VT->getElementType(), Count->getZExtValue());
+ if (isPackedShiftLeft)
+ return BinaryOperator::CreateShl(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+
+ return BinaryOperator::CreateLShr(Vec,
+ Builder->CreateVectorSplat(VWidth, VTCI));
+ }
case Intrinsic::x86_sse41_pmovsxbw:
case Intrinsic::x86_sse41_pmovsxwd:
@@ -576,6 +651,153 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
+ case Intrinsic::x86_sse4a_insertqi: {
+ // insertqi x, y, 64, 0 can just copy y's lower bits and leave the top
+ // ones undef
+ // TODO: eventually we should lower this intrinsic to IR
+ if (auto CIWidth = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+ if (auto CIStart = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
+ if (CIWidth->equalsInt(64) && CIStart->isZero()) {
+ Value *Vec = II->getArgOperand(1);
+ Value *Undef = UndefValue::get(Vec->getType());
+ const uint32_t Mask[] = { 0, 2 };
+ return ReplaceInstUsesWith(
+ CI,
+ Builder->CreateShuffleVector(
+ Vec, Undef, ConstantDataVector::get(
+ II->getContext(), ArrayRef<uint32_t>(Mask))));
+
+ } else if (auto Source =
+ dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+ if (Source->hasOneUse() &&
+ Source->getArgOperand(1) == II->getArgOperand(1)) {
+ // If the source of the insert has only one use and it's another
+ // insert (and they're both inserting from the same vector), try to
+ // bundle both together.
+ auto CISourceWidth =
+ dyn_cast<ConstantInt>(Source->getArgOperand(2));
+ auto CISourceStart =
+ dyn_cast<ConstantInt>(Source->getArgOperand(3));
+ if (CISourceStart && CISourceWidth) {
+ unsigned Start = CIStart->getZExtValue();
+ unsigned Width = CIWidth->getZExtValue();
+ unsigned End = Start + Width;
+ unsigned SourceStart = CISourceStart->getZExtValue();
+ unsigned SourceWidth = CISourceWidth->getZExtValue();
+ unsigned SourceEnd = SourceStart + SourceWidth;
+ unsigned NewStart, NewWidth;
+ bool ShouldReplace = false;
+ if (Start <= SourceStart && SourceStart <= End) {
+ NewStart = Start;
+ NewWidth = std::max(End, SourceEnd) - NewStart;
+ ShouldReplace = true;
+ } else if (SourceStart <= Start && Start <= SourceEnd) {
+ NewStart = SourceStart;
+ NewWidth = std::max(SourceEnd, End) - NewStart;
+ ShouldReplace = true;
+ }
+
+ if (ShouldReplace) {
+ Constant *ConstantWidth = ConstantInt::get(
+ II->getArgOperand(2)->getType(), NewWidth, false);
+ Constant *ConstantStart = ConstantInt::get(
+ II->getArgOperand(3)->getType(), NewStart, false);
+ Value *Args[4] = { Source->getArgOperand(0),
+ II->getArgOperand(1), ConstantWidth,
+ ConstantStart };
+ Module *M = CI.getParent()->getParent()->getParent();
+ Value *F =
+ Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
+ return ReplaceInstUsesWith(CI, Builder->CreateCall(F, Args));
+ }
+ }
+ }
+ }
+ }
+ }
+ break;
+ }
+
+ case Intrinsic::x86_sse41_pblendvb:
+ case Intrinsic::x86_sse41_blendvps:
+ case Intrinsic::x86_sse41_blendvpd:
+ case Intrinsic::x86_avx_blendv_ps_256:
+ case Intrinsic::x86_avx_blendv_pd_256:
+ case Intrinsic::x86_avx2_pblendvb: {
+ // Convert blendv* to vector selects if the mask is constant.
+ // This optimization is convoluted because the intrinsic is defined as
+ // getting a vector of floats or doubles for the ps and pd versions.
+ // FIXME: That should be changed.
+ Value *Mask = II->getArgOperand(2);
+ if (auto C = dyn_cast<ConstantDataVector>(Mask)) {
+ auto Tyi1 = Builder->getInt1Ty();
+ auto SelectorType = cast<VectorType>(Mask->getType());
+ auto EltTy = SelectorType->getElementType();
+ unsigned Size = SelectorType->getNumElements();
+ unsigned BitWidth =
+ EltTy->isFloatTy()
+ ? 32
+ : (EltTy->isDoubleTy() ? 64 : EltTy->getIntegerBitWidth());
+ assert((BitWidth == 64 || BitWidth == 32 || BitWidth == 8) &&
+ "Wrong arguments for variable blend intrinsic");
+ SmallVector<Constant *, 32> Selectors;
+ for (unsigned I = 0; I < Size; ++I) {
+ // The intrinsics only read the top bit
+ uint64_t Selector;
+ if (BitWidth == 8)
+ Selector = C->getElementAsInteger(I);
+ else
+ Selector = C->getElementAsAPFloat(I).bitcastToAPInt().getZExtValue();
+ Selectors.push_back(ConstantInt::get(Tyi1, Selector >> (BitWidth - 1)));
+ }
+ auto NewSelector = ConstantVector::get(Selectors);
+ return SelectInst::Create(NewSelector, II->getArgOperand(1),
+ II->getArgOperand(0), "blendv");
+ } else {
+ break;
+ }
+ }
+
+ case Intrinsic::x86_avx_vpermilvar_ps:
+ case Intrinsic::x86_avx_vpermilvar_ps_256:
+ case Intrinsic::x86_avx_vpermilvar_pd:
+ case Intrinsic::x86_avx_vpermilvar_pd_256: {
+ // Convert vpermil* to shufflevector if the mask is constant.
+ Value *V = II->getArgOperand(1);
+ unsigned Size = cast<VectorType>(V->getType())->getNumElements();
+ assert(Size == 8 || Size == 4 || Size == 2);
+ uint32_t Indexes[8];
+ if (auto C = dyn_cast<ConstantDataVector>(V)) {
+ // The intrinsics only read one or two bits, clear the rest.
+ for (unsigned I = 0; I < Size; ++I) {
+ uint32_t Index = C->getElementAsInteger(I) & 0x3;
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+ Index >>= 1;
+ Indexes[I] = Index;
+ }
+ } else if (isa<ConstantAggregateZero>(V)) {
+ for (unsigned I = 0; I < Size; ++I)
+ Indexes[I] = 0;
+ } else {
+ break;
+ }
+ // The _256 variants are a bit trickier since the mask bits always index
+ // into the corresponding 128 half. In order to convert to a generic
+ // shuffle, we have to make that explicit.
+ if (II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
+ II->getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) {
+ for (unsigned I = Size / 2; I < Size; ++I)
+ Indexes[I] += Size / 2;
+ }
+ auto NewC =
+ ConstantDataVector::get(V->getContext(), makeArrayRef(Indexes, Size));
+ auto V1 = II->getArgOperand(0);
+ auto V2 = UndefValue::get(V1->getType());
+ auto Shuffle = Builder->CreateShuffleVector(V1, V2, NewC);
+ return ReplaceInstUsesWith(CI, Shuffle);
+ }
+
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
@@ -586,8 +808,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
bool AllEltsOk = true;
for (unsigned i = 0; i != 16; ++i) {
Constant *Elt = Mask->getAggregateElement(i);
- if (Elt == 0 ||
- !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
+ if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
AllEltsOk = false;
break;
}
@@ -612,7 +833,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
Idx &= 31; // Match the hardware behavior.
- if (ExtractedElts[Idx] == 0) {
+ if (!ExtractedElts[Idx]) {
ExtractedElts[Idx] =
Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1,
Builder->getInt32(Idx&15));
@@ -655,8 +876,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu:
- case Intrinsic::arm64_neon_smull:
- case Intrinsic::arm64_neon_umull: {
+ case Intrinsic::aarch64_neon_smull:
+ case Intrinsic::aarch64_neon_umull: {
Value *Arg0 = II->getArgOperand(0);
Value *Arg1 = II->getArgOperand(1);
@@ -667,7 +888,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Check for constant LHS & RHS - in this case we just simplify.
bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
- II->getIntrinsicID() == Intrinsic::arm64_neon_umull);
+ II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
VectorType *NewVT = cast<VectorType>(II->getType());
if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
@@ -776,14 +997,14 @@ static bool isSafeToEliminateVarargsCast(const CallSite CS,
// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
// strcat_chk and strncat_chk.
Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const DataLayout *DL) {
- if (CI->getCalledFunction() == 0) return 0;
+ if (!CI->getCalledFunction()) return nullptr;
if (Value *With = Simplifier->optimizeCall(CI)) {
++NumSimplified;
return CI->use_empty() ? CI : ReplaceInstUsesWith(*CI, With);
}
- return 0;
+ return nullptr;
}
static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
@@ -792,35 +1013,35 @@ static IntrinsicInst *FindInitTrampolineFromAlloca(Value *TrampMem) {
Value *Underlying = TrampMem->stripPointerCasts();
if (Underlying != TrampMem &&
(!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
- return 0;
+ return nullptr;
if (!isa<AllocaInst>(Underlying))
- return 0;
+ return nullptr;
- IntrinsicInst *InitTrampoline = 0;
+ IntrinsicInst *InitTrampoline = nullptr;
for (User *U : TrampMem->users()) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
if (!II)
- return 0;
+ return nullptr;
if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
if (InitTrampoline)
// More than one init_trampoline writes to this value. Give up.
- return 0;
+ return nullptr;
InitTrampoline = II;
continue;
}
if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
// Allow any number of calls to adjust.trampoline.
continue;
- return 0;
+ return nullptr;
}
// No call to init.trampoline found.
if (!InitTrampoline)
- return 0;
+ return nullptr;
// Check that the alloca is being used in the expected way.
if (InitTrampoline->getOperand(0) != TrampMem)
- return 0;
+ return nullptr;
return InitTrampoline;
}
@@ -837,9 +1058,9 @@ static IntrinsicInst *FindInitTrampolineFromBB(IntrinsicInst *AdjustTramp,
II->getOperand(0) == TrampMem)
return II;
if (Inst->mayWriteToMemory())
- return 0;
+ return nullptr;
}
- return 0;
+ return nullptr;
}
// Given a call to llvm.adjust.trampoline, find and return the corresponding
@@ -851,7 +1072,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
if (!AdjustTramp ||
AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
- return 0;
+ return nullptr;
Value *TrampMem = AdjustTramp->getOperand(0);
@@ -859,7 +1080,7 @@ static IntrinsicInst *FindInitTrampoline(Value *Callee) {
return IT;
if (IntrinsicInst *IT = FindInitTrampolineFromBB(AdjustTramp, TrampMem))
return IT;
- return 0;
+ return nullptr;
}
// visitCallSite - Improvements for call and invoke instructions.
@@ -874,7 +1095,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// arguments of the call/invoke.
Value *Callee = CS.getCalledValue();
if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
- return 0;
+ return nullptr;
if (Function *CalleeF = dyn_cast<Function>(Callee))
// If the call and callee calling conventions don't match, this call must
@@ -899,7 +1120,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
// change the callee to a null pointer.
cast<InvokeInst>(OldCall)->setCalledFunction(
Constant::getNullValue(CalleeF->getType()));
- return 0;
+ return nullptr;
}
if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
@@ -911,7 +1132,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (isa<InvokeInst>(CS.getInstruction())) {
// Can't remove an invoke because we cannot change the CFG.
- return 0;
+ return nullptr;
}
// This instruction is not reachable, just remove it. We insert a store to
@@ -959,7 +1180,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
if (I) return EraseInstFromFunction(*I);
}
- return Changed ? CS.getInstruction() : 0;
+ return Changed ? CS.getInstruction() : nullptr;
}
// transformConstExprCastCall - If the callee is a constexpr cast of a function,
@@ -968,7 +1189,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
bool InstCombiner::transformConstExprCastCall(CallSite CS) {
Function *Callee =
dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
- if (Callee == 0)
+ if (!Callee)
return false;
Instruction *Caller = CS.getInstruction();
const AttributeSet &CallerPAL = CS.getAttributes();
@@ -1044,7 +1265,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
CallerPAL.getParamAttributes(i + 1).hasAttribute(i + 1,
Attribute::ByVal)) {
PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || DL == 0)
+ if (!ParamPTy || !ParamPTy->getElementType()->isSized() || !DL)
return false;
Type *CurElTy = ActTy->getPointerElementType();
@@ -1235,7 +1456,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
// If the call already has the 'nest' attribute somewhere then give up -
// otherwise 'nest' would occur twice after splicing in the chain.
if (Attrs.hasAttrSomewhere(Attribute::Nest))
- return 0;
+ return nullptr;
assert(Tramp &&
"transformCallThroughTrampoline called with incorrect CallSite.");
@@ -1247,7 +1468,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
const AttributeSet &NestAttrs = NestF->getAttributes();
if (!NestAttrs.isEmpty()) {
unsigned NestIdx = 1;
- Type *NestTy = 0;
+ Type *NestTy = nullptr;
AttributeSet NestAttr;
// Look for a parameter marked with the 'nest' attribute.
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index c2b862a..356803a 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
/// expression. If so, decompose it, returning some value X, such that Val is
/// X*Scale+Offset.
@@ -79,7 +81,7 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst &AI) {
// This requires DataLayout to get the alloca alignment and size information.
- if (!DL) return 0;
+ if (!DL) return nullptr;
PointerType *PTy = cast<PointerType>(CI.getType());
@@ -89,26 +91,26 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// Get the type really allocated and the type casted to.
Type *AllocElTy = AI.getAllocatedType();
Type *CastElTy = PTy->getElementType();
- if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+ if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
unsigned AllocElTyAlign = DL->getABITypeAlignment(AllocElTy);
unsigned CastElTyAlign = DL->getABITypeAlignment(CastElTy);
- if (CastElTyAlign < AllocElTyAlign) return 0;
+ if (CastElTyAlign < AllocElTyAlign) return nullptr;
// If the allocation has multiple uses, only promote it if we are strictly
// increasing the alignment of the resultant allocation. If we keep it the
// same, we open the door to infinite loops of various kinds.
- if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0;
+ if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
uint64_t AllocElTySize = DL->getTypeAllocSize(AllocElTy);
uint64_t CastElTySize = DL->getTypeAllocSize(CastElTy);
- if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+ if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
// If the allocation has multiple uses, only promote it if we're not
// shrinking the amount of memory being allocated.
uint64_t AllocElTyStoreSize = DL->getTypeStoreSize(AllocElTy);
uint64_t CastElTyStoreSize = DL->getTypeStoreSize(CastElTy);
- if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return 0;
+ if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
@@ -120,10 +122,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// If we can now satisfy the modulus, by using a non-1 scale, we really can
// do the xform.
if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
- (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return 0;
+ (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
- Value *Amt = 0;
+ Value *Amt = nullptr;
if (Scale == 1) {
Amt = NumElements;
} else {
@@ -141,6 +143,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
New->setAlignment(AI.getAlignment());
New->takeName(&AI);
+ New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
// If the allocation has multiple real uses, insert a cast and change all
// things that used it to use the new cast. This will also hack on CI, but it
@@ -169,7 +172,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
- Instruction *Res = 0;
+ Instruction *Res = nullptr;
unsigned Opc = I->getOpcode();
switch (Opc) {
case Instruction::Add:
@@ -245,11 +248,11 @@ isEliminableCastPair(
Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
Instruction::CastOps secondOp = Instruction::CastOps(opcode);
Type *SrcIntPtrTy = DL && SrcTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(SrcTy) : 0;
+ DL->getIntPtrType(SrcTy) : nullptr;
Type *MidIntPtrTy = DL && MidTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(MidTy) : 0;
+ DL->getIntPtrType(MidTy) : nullptr;
Type *DstIntPtrTy = DL && DstTy->isPtrOrPtrVectorTy() ?
- DL->getIntPtrType(DstTy) : 0;
+ DL->getIntPtrType(DstTy) : nullptr;
unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
DstTy, SrcIntPtrTy, MidIntPtrTy,
DstIntPtrTy);
@@ -318,7 +321,7 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
}
- return 0;
+ return nullptr;
}
/// CanEvaluateTruncated - Return true if we can evaluate the specified
@@ -470,7 +473,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
}
// Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
- Value *A = 0; ConstantInt *Cst = 0;
+ Value *A = nullptr; ConstantInt *Cst = nullptr;
if (Src->hasOneUse() &&
match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
// We have three types to worry about here, the type of A, the source of
@@ -502,7 +505,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
ConstantExpr::getTrunc(Cst, CI.getType()));
}
- return 0;
+ return nullptr;
}
/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
@@ -550,7 +553,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
// If Op1C some other power of two, convert:
uint32_t BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(ICI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(ICI->getOperand(0), KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
@@ -598,8 +601,8 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- ComputeMaskedBits(LHS, KnownZeroLHS, KnownOneLHS);
- ComputeMaskedBits(RHS, KnownZeroRHS, KnownOneRHS);
+ computeKnownBits(LHS, KnownZeroLHS, KnownOneLHS);
+ computeKnownBits(RHS, KnownZeroRHS, KnownOneRHS);
if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
APInt KnownBits = KnownZeroLHS | KnownOneLHS;
@@ -627,7 +630,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
}
}
- return 0;
+ return nullptr;
}
/// CanEvaluateZExtd - Determine if the specified value can be computed in the
@@ -758,7 +761,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
- return 0;
+ return nullptr;
// If one of the common conversion will work, do it.
if (Instruction *Result = commonCastTransforms(CI))
@@ -883,7 +886,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
}
- return 0;
+ return nullptr;
}
/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations
@@ -918,7 +921,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
unsigned BitWidth = Op1C->getType()->getBitWidth();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(Op0, KnownZero, KnownOne);
+ computeKnownBits(Op0, KnownZero, KnownOne);
APInt KnownZeroMask(~KnownZero);
if (KnownZeroMask.isPowerOf2()) {
@@ -967,7 +970,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) {
}
}
- return 0;
+ return nullptr;
}
/// CanEvaluateSExtd - Return true if we can take the specified value
@@ -1039,7 +1042,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// If this sign extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this sext.
if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
- return 0;
+ return nullptr;
if (Instruction *I = commonCastTransforms(CI))
return I;
@@ -1107,9 +1110,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// into:
// %a = shl i32 %i, 30
// %d = ashr i32 %a, 30
- Value *A = 0;
+ Value *A = nullptr;
// TODO: Eventually this could be subsumed by EvaluateInDifferentType.
- ConstantInt *BA = 0, *CA = 0;
+ ConstantInt *BA = nullptr, *CA = nullptr;
if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
m_ConstantInt(CA))) &&
BA == CA && A->getType() == CI.getType()) {
@@ -1121,7 +1124,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
return BinaryOperator::CreateAShr(A, ShAmtV);
}
- return 0;
+ return nullptr;
}
@@ -1133,7 +1136,7 @@ static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
(void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
if (!losesInfo)
return ConstantFP::get(CFP->getContext(), F);
- return 0;
+ return nullptr;
}
/// LookThroughFPExtensions - If this is an fp extension instruction, look
@@ -1345,7 +1348,7 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFPExt(CastInst &CI) {
@@ -1354,7 +1357,7 @@ Instruction *InstCombiner::visitFPExt(CastInst &CI) {
Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
// fptoui(uitofp(X)) --> X
@@ -1374,7 +1377,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
- if (OpI == 0)
+ if (!OpI)
return commonCastTransforms(FI);
// fptosi(sitofp(X)) --> X
@@ -1421,7 +1424,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
if (Instruction *I = commonCastTransforms(CI))
return I;
- return 0;
+ return nullptr;
}
/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
@@ -1520,7 +1523,7 @@ static Instruction *OptimizeVectorResize(Value *InVal, VectorType *DestTy,
// there yet.
if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
DestTy->getElementType()->getPrimitiveSizeInBits())
- return 0;
+ return nullptr;
SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
@@ -1598,7 +1601,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
ElementIndex = Elements.size() - ElementIndex - 1;
// Fail if multiple elements are inserted into this slot.
- if (Elements[ElementIndex] != 0)
+ if (Elements[ElementIndex])
return false;
Elements[ElementIndex] = V;
@@ -1638,7 +1641,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
if (!V->hasOneUse()) return false;
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return false;
+ if (!I) return false;
switch (I->getOpcode()) {
default: return false; // Unhandled case.
case Instruction::BitCast:
@@ -1659,7 +1662,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
case Instruction::Shl: {
// Must be shifting by a constant that is a multiple of the element size.
ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
Shift += CI->getZExtValue();
if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
return CollectInsertionElements(I->getOperand(0), Shift,
@@ -1687,7 +1690,7 @@ static bool CollectInsertionElements(Value *V, unsigned Shift,
static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
InstCombiner &IC) {
// We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return 0;
+ if (!IC.getDataLayout()) return nullptr;
VectorType *DestVecTy = cast<VectorType>(CI.getType());
Value *IntInput = CI.getOperand(0);
@@ -1695,14 +1698,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
if (!CollectInsertionElements(IntInput, 0, Elements,
DestVecTy->getElementType(), IC))
- return 0;
+ return nullptr;
// If we succeeded, we know that all of the element are specified by Elements
// or are zero if Elements has a null entry. Recast this as a set of
// insertions.
Value *Result = Constant::getNullValue(CI.getType());
for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
- if (Elements[i] == 0) continue; // Unset element.
+ if (!Elements[i]) continue; // Unset element.
Result = IC.Builder->CreateInsertElement(Result, Elements[i],
IC.Builder->getInt32(i));
@@ -1716,14 +1719,14 @@ static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
/// bitcast. The various long double bitcasts can't get in here.
static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
// We need to know the target byte order to perform this optimization.
- if (!IC.getDataLayout()) return 0;
+ if (!IC.getDataLayout()) return nullptr;
Value *Src = CI.getOperand(0);
Type *DestTy = CI.getType();
// If this is a bitcast from int to float, check to see if the int is an
// extraction from a vector.
- Value *VecInput = 0;
+ Value *VecInput = nullptr;
// bitcast(trunc(bitcast(somevector)))
if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
isa<VectorType>(VecInput->getType())) {
@@ -1747,7 +1750,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
}
// bitcast(trunc(lshr(bitcast(somevector), cst))
- ConstantInt *ShAmt = 0;
+ ConstantInt *ShAmt = nullptr;
if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
m_ConstantInt(ShAmt)))) &&
isa<VectorType>(VecInput->getType())) {
@@ -1769,7 +1772,7 @@ static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 8c0ad52..02e8bf1 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -24,6 +24,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
static ConstantInt *getOne(Constant *C) {
return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
}
@@ -218,15 +220,15 @@ Instruction *InstCombiner::
FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
CmpInst &ICI, ConstantInt *AndCst) {
// We need TD information to know the pointer size unless this is inbounds.
- if (!GEP->isInBounds() && DL == 0)
- return 0;
+ if (!GEP->isInBounds() && !DL)
+ return nullptr;
Constant *Init = GV->getInitializer();
if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
- return 0;
+ return nullptr;
uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
- if (ArrayElementCount > 1024) return 0; // Don't blow up on huge arrays.
+ if (ArrayElementCount > 1024) return nullptr; // Don't blow up on huge arrays.
// There are many forms of this optimization we can handle, for now, just do
// the simple index into a single-dimensional array.
@@ -236,7 +238,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
!isa<ConstantInt>(GEP->getOperand(1)) ||
!cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
isa<Constant>(GEP->getOperand(2)))
- return 0;
+ return nullptr;
// Check that indices after the variable are constants and in-range for the
// type they index. Collect the indices. This is typically for arrays of
@@ -246,18 +248,18 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Type *EltTy = Init->getType()->getArrayElementType();
for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (Idx == 0) return 0; // Variable index.
+ if (!Idx) return nullptr; // Variable index.
uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+ if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
if (StructType *STy = dyn_cast<StructType>(EltTy))
EltTy = STy->getElementType(IdxVal);
else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements()) return 0;
+ if (IdxVal >= ATy->getNumElements()) return nullptr;
EltTy = ATy->getElementType();
} else {
- return 0; // Unknown type.
+ return nullptr; // Unknown type.
}
LaterIndices.push_back(IdxVal);
@@ -296,7 +298,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
Constant *Elt = Init->getAggregateElement(i);
- if (Elt == 0) return 0;
+ if (!Elt) return nullptr;
// If this is indexing an array of structures, get the structure element.
if (!LaterIndices.empty())
@@ -321,7 +323,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
- if (!isa<ConstantInt>(C)) return 0;
+ if (!isa<ConstantInt>(C)) return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
@@ -375,7 +377,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
FalseRangeEnd == Overdefined)
- return 0;
+ return nullptr;
}
// Now that we've scanned the entire array, emit our new comparison(s). We
@@ -467,7 +469,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
// of this load, replace it with computation that does:
// ((magic_cst >> i) & 1) != 0
{
- Type *Ty = 0;
+ Type *Ty = nullptr;
// Look for an appropriate type:
// - The type of Idx if the magic fits
@@ -480,7 +482,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
else if (ArrayElementCount <= 32)
Ty = Type::getInt32Ty(Init->getContext());
- if (Ty != 0) {
+ if (Ty) {
Value *V = Builder->CreateIntCast(Idx, Ty, false);
V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
@@ -488,7 +490,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
}
}
- return 0;
+ return nullptr;
}
@@ -533,7 +535,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// If there are no variable indices, we must have a constant offset, just
// evaluate it the general way.
- if (i == e) return 0;
+ if (i == e) return nullptr;
Value *VariableIdx = GEP->getOperand(i);
// Determine the scale factor of the variable element. For example, this is
@@ -543,7 +545,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// Verify that there are no other variable indices. If so, emit the hard way.
for (++i, ++GTI; i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!CI) return 0;
+ if (!CI) return nullptr;
// Compute the aggregate offset of constant indices.
if (CI->isZero()) continue;
@@ -587,7 +589,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC) {
// multiple of the variable scale.
int64_t NewOffs = Offset / (int64_t)VariableScale;
if (Offset != NewOffs*(int64_t)VariableScale)
- return 0;
+ return nullptr;
// Okay, we can do this evaluation. Start by converting the index to intptr.
if (VariableIdx->getType() != IntPtrTy)
@@ -608,7 +610,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
// the maximum signed value for the pointer type.
if (ICmpInst::isSigned(Cond))
- return 0;
+ return nullptr;
// Look through bitcasts.
if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
@@ -623,7 +625,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this);
// If not, synthesize the offset the hard way.
- if (Offset == 0)
+ if (!Offset)
Offset = EmitGEPOffset(GEPLHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
@@ -661,7 +663,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Otherwise, the base pointers are different and the indices are
// different, bail out.
- return 0;
+ return nullptr;
}
// If one of the GEPs has all zero indices, recurse.
@@ -729,7 +731,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
}
}
- return 0;
+ return nullptr;
}
/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
@@ -812,11 +814,11 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// if it finds it.
bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
- return 0;
+ return nullptr;
if (DivRHS->isZero())
- return 0; // The ProdOV computation fails on divide by zero.
+ return nullptr; // The ProdOV computation fails on divide by zero.
if (DivIsSigned && DivRHS->isAllOnesValue())
- return 0; // The overflow computation also screws up here
+ return nullptr; // The overflow computation also screws up here
if (DivRHS->isOne()) {
// This eliminates some funny cases with INT_MIN.
ICI.setOperand(0, DivI->getOperand(0)); // X/1 == X.
@@ -850,7 +852,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// overflow variable is set to 0 if it's corresponding bound variable is valid
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
- Constant *LoBound = 0, *HiBound = 0;
+ Constant *LoBound = nullptr, *HiBound = nullptr;
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
@@ -890,7 +892,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
if (HiBound == DivRHS) { // -INTMIN = INTMIN
HiOverflow = 1; // [INTMIN+1, overflow)
- HiBound = 0; // e.g. X/INTMIN = 0 --> X > INTMIN
+ HiBound = nullptr; // e.g. X/INTMIN = 0 --> X > INTMIN
}
} else if (CmpRHSV.isStrictlyPositive()) { // (X / neg) op pos
// e.g. X/-5 op 3 --> [-19, -14)
@@ -964,20 +966,20 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
uint32_t TypeBits = CmpRHSV.getBitWidth();
uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
if (ShAmtVal >= TypeBits || ShAmtVal == 0)
- return 0;
+ return nullptr;
if (!ICI.isEquality()) {
// If we have an unsigned comparison and an ashr, we can't simplify this.
// Similarly for signed comparisons with lshr.
if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
- return 0;
+ return nullptr;
// Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
// by a power of 2. Since we already have logic to simplify these,
// transform to div and then simplify the resultant comparison.
if (Shr->getOpcode() == Instruction::AShr &&
(!Shr->isExact() || ShAmtVal == TypeBits - 1))
- return 0;
+ return nullptr;
// Revisit the shift (to delete it).
Worklist.Add(Shr);
@@ -994,7 +996,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
// If the builder folded the binop, just return it.
BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
- if (TheDiv == 0)
+ if (!TheDiv)
return &ICI;
// Otherwise, fold this div/compare.
@@ -1037,7 +1039,7 @@ Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
Mask, Shr->getName()+".mask");
return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
}
- return 0;
+ return nullptr;
}
@@ -1056,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
- ComputeMaskedBits(LHSI->getOperand(0), KnownZero, KnownOne);
+ computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne);
// If all the high bits are known, we can do this xform.
if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
@@ -1181,10 +1183,10 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// access.
BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
if (Shift && !Shift->isShift())
- Shift = 0;
+ Shift = nullptr;
ConstantInt *ShAmt;
- ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+ ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr;
// This seemingly simple opportunity to fold away a shift turns out to
// be rather complicated. See PR17827
@@ -1777,7 +1779,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
}
}
}
- return 0;
+ return nullptr;
}
/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
@@ -1794,7 +1796,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// integer type is the same size as the pointer type.
if (DL && LHSCI->getOpcode() == Instruction::PtrToInt &&
DL->getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth()) {
- Value *RHSOp = 0;
+ Value *RHSOp = nullptr;
if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
} else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
@@ -1812,7 +1814,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Enforce this.
if (LHSCI->getOpcode() != Instruction::ZExt &&
LHSCI->getOpcode() != Instruction::SExt)
- return 0;
+ return nullptr;
bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
bool isSignedCmp = ICI.isSigned();
@@ -1821,12 +1823,12 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// Not an extension from the same type?
RHSCIOp = CI->getOperand(0);
if (RHSCIOp->getType() != LHSCIOp->getType())
- return 0;
+ return nullptr;
// If the signedness of the two casts doesn't agree (i.e. one is a sext
// and the other is a zext), then we can't handle this.
if (CI->getOpcode() != LHSCI->getOpcode())
- return 0;
+ return nullptr;
// Deal with equality cases early.
if (ICI.isEquality())
@@ -1844,7 +1846,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// If we aren't dealing with a constant on the RHS, exit early
ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
if (!CI)
- return 0;
+ return nullptr;
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DestTy.
@@ -1873,7 +1875,7 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
// by SimplifyICmpInst, so only deal with the tricky case.
if (isSignedCmp || !isSignedExt)
- return 0;
+ return nullptr;
// Evaluate the comparison for LT (we invert for GT below). LE and GE cases
// should have been folded away previously and not enter in here.
@@ -1909,12 +1911,12 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// In order to eliminate the add-with-constant, the compare can be its only
// use.
Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
- if (!AddWithCst->hasOneUse()) return 0;
+ if (!AddWithCst->hasOneUse()) return nullptr;
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
- if (!CI2->getValue().isPowerOf2()) return 0;
+ if (!CI2->getValue().isPowerOf2()) return nullptr;
unsigned NewWidth = CI2->getValue().countTrailingZeros();
- if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+ if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr;
// The width of the new add formed is 1 more than the bias.
++NewWidth;
@@ -1922,7 +1924,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// Check to see that CI1 is an all-ones value with NewWidth bits.
if (CI1->getBitWidth() == NewWidth ||
CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
- return 0;
+ return nullptr;
// This is only really a signed overflow check if the inputs have been
// sign-extended; check for that condition. For example, if CI2 is 2^31 and
@@ -1930,7 +1932,7 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
if (IC.ComputeNumSignBits(A) < NeededSignBits ||
IC.ComputeNumSignBits(B) < NeededSignBits)
- return 0;
+ return nullptr;
// In order to replace the original add with a narrower
// llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
@@ -1946,8 +1948,8 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// original add had another add which was then immediately truncated, we
// could still do the transformation.
TruncInst *TI = dyn_cast<TruncInst>(U);
- if (TI == 0 ||
- TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+ if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
+ return nullptr;
}
// If the pattern matches, truncate the inputs to the narrower type and
@@ -1983,11 +1985,11 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
InstCombiner &IC) {
// Don't bother doing this transformation for pointers, don't do it for
// vectors.
- if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+ if (!isa<IntegerType>(OrigAddV->getType())) return nullptr;
// If the add is a constant expr, then we don't bother transforming it.
Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
- if (OrigAdd == 0) return 0;
+ if (!OrigAdd) return nullptr;
Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
@@ -2008,6 +2010,236 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
return ExtractValueInst::Create(Call, 1, "uadd.overflow");
}
+/// \brief Recognize and process idiom involving test for multiplication
+/// overflow.
+///
+/// The caller has matched a pattern of the form:
+/// I = cmp u (mul(zext A, zext B), V
+/// The function checks if this is a test for overflow and if so replaces
+/// multiplication with call to 'mul.with.overflow' intrinsic.
+///
+/// \param I Compare instruction.
+/// \param MulVal Result of 'mult' instruction. It is one of the arguments of
+/// the compare instruction. Must be of integer type.
+/// \param OtherVal The other argument of compare instruction.
+/// \returns Instruction which must replace the compare instruction, NULL if no
+/// replacement required.
+static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
+ Value *OtherVal, InstCombiner &IC) {
+ assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
+ assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
+ assert(isa<IntegerType>(MulVal->getType()));
+ Instruction *MulInstr = cast<Instruction>(MulVal);
+ assert(MulInstr->getOpcode() == Instruction::Mul);
+
+ Instruction *LHS = cast<Instruction>(MulInstr->getOperand(0)),
+ *RHS = cast<Instruction>(MulInstr->getOperand(1));
+ assert(LHS->getOpcode() == Instruction::ZExt);
+ assert(RHS->getOpcode() == Instruction::ZExt);
+ Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
+
+ // Calculate type and width of the result produced by mul.with.overflow.
+ Type *TyA = A->getType(), *TyB = B->getType();
+ unsigned WidthA = TyA->getPrimitiveSizeInBits(),
+ WidthB = TyB->getPrimitiveSizeInBits();
+ unsigned MulWidth;
+ Type *MulType;
+ if (WidthB > WidthA) {
+ MulWidth = WidthB;
+ MulType = TyB;
+ } else {
+ MulWidth = WidthA;
+ MulType = TyA;
+ }
+
+ // In order to replace the original mul with a narrower mul.with.overflow,
+ // all uses must ignore upper bits of the product. The number of used low
+ // bits must be not greater than the width of mul.with.overflow.
+ if (MulVal->hasNUsesOrMore(2))
+ for (User *U : MulVal->users()) {
+ if (U == &I)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ // Check if truncation ignores bits above MulWidth.
+ unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
+ if (TruncWidth > MulWidth)
+ return nullptr;
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ // Check if AND ignores bits above MulWidth.
+ if (BO->getOpcode() != Instruction::And)
+ return nullptr;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+ const APInt &CVal = CI->getValue();
+ if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
+ return nullptr;
+ }
+ } else {
+ // Other uses prohibit this transformation.
+ return nullptr;
+ }
+ }
+
+ // Recognize patterns
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_EQ:
+ case ICmpInst::ICMP_NE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, zext trunc mulval
+ if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
+ if (Zext->hasOneUse()) {
+ Value *ZextArg = Zext->getOperand(0);
+ if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
+ if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
+ break; //Recognized
+ }
+
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
+ ConstantInt *CI;
+ Value *ValToMask;
+ if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
+ if (ValToMask != MulVal)
+ return nullptr;
+ const APInt &CVal = CI->getValue() + 1;
+ if (CVal.isPowerOf2()) {
+ unsigned MaskWidth = CVal.logBase2();
+ if (MaskWidth == MulWidth)
+ break; // Recognized
+ }
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ugt mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_UGE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp uge mulval, max+1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULE:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getMaxValue(MulWidth);
+ MaxVal = MaxVal.zext(CI->getBitWidth());
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ case ICmpInst::ICMP_ULT:
+ // Recognize pattern:
+ // mulval = mul(zext A, zext B)
+ // cmp ule mulval, max + 1
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
+ APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
+ if (MaxVal.eq(CI->getValue()))
+ break; // Recognized
+ }
+ return nullptr;
+
+ default:
+ return nullptr;
+ }
+
+ InstCombiner::BuilderTy *Builder = IC.Builder;
+ Builder->SetInsertPoint(MulInstr);
+ Module *M = I.getParent()->getParent()->getParent();
+
+ // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
+ Value *MulA = A, *MulB = B;
+ if (WidthA < MulWidth)
+ MulA = Builder->CreateZExt(A, MulType);
+ if (WidthB < MulWidth)
+ MulB = Builder->CreateZExt(B, MulType);
+ Value *F =
+ Intrinsic::getDeclaration(M, Intrinsic::umul_with_overflow, MulType);
+ CallInst *Call = Builder->CreateCall2(F, MulA, MulB, "umul");
+ IC.Worklist.Add(MulInstr);
+
+ // If there are uses of mul result other than the comparison, we know that
+ // they are truncation or binary AND. Change them to use result of
+ // mul.with.overflow and adjust properly mask/size.
+ if (MulVal->hasNUsesOrMore(2)) {
+ Value *Mul = Builder->CreateExtractValue(Call, 0, "umul.value");
+ for (User *U : MulVal->users()) {
+ if (U == &I || U == OtherVal)
+ continue;
+ if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
+ if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
+ IC.ReplaceInstUsesWith(*TI, Mul);
+ else
+ TI->setOperand(0, Mul);
+ } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
+ assert(BO->getOpcode() == Instruction::And);
+ // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
+ ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
+ APInt ShortMask = CI->getValue().trunc(MulWidth);
+ Value *ShortAnd = Builder->CreateAnd(Mul, ShortMask);
+ Instruction *Zext =
+ cast<Instruction>(Builder->CreateZExt(ShortAnd, BO->getType()));
+ IC.Worklist.Add(Zext);
+ IC.ReplaceInstUsesWith(*BO, Zext);
+ } else {
+ llvm_unreachable("Unexpected Binary operation");
+ }
+ IC.Worklist.Add(cast<Instruction>(U));
+ }
+ }
+ if (isa<Instruction>(OtherVal))
+ IC.Worklist.Add(cast<Instruction>(OtherVal));
+
+ // The original icmp gets replaced with the overflow value, maybe inverted
+ // depending on predicate.
+ bool Inverse = false;
+ switch (I.getPredicate()) {
+ case ICmpInst::ICMP_NE:
+ break;
+ case ICmpInst::ICMP_EQ:
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE:
+ if (I.getOperand(0) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE:
+ if (I.getOperand(1) == MulVal)
+ break;
+ Inverse = true;
+ break;
+ default:
+ llvm_unreachable("Unexpected predicate");
+ }
+ if (Inverse) {
+ Value *Res = Builder->CreateExtractValue(Call, 1);
+ return BinaryOperator::CreateNot(Res);
+ }
+
+ return ExtractValueInst::Create(Call, 1);
+}
+
// DemandedBitsLHSMask - When performing a comparison against a constant,
// it is possible that not all the bits in the LHS are demanded. This helper
// method computes the mask that IS demanded.
@@ -2178,7 +2410,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// See if we are doing a comparison with a constant.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
- Value *A = 0, *B = 0;
+ Value *A = nullptr, *B = nullptr;
// Match the following pattern, which is a common idiom when writing
// overflow-safe integer arithmetic function. The source performs an
@@ -2293,15 +2525,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op0KnownZeroInverted = ~Op0KnownZero;
if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
// If the LHS is an AND with the same constant, look through it.
- Value *LHS = 0;
- ConstantInt *LHSC = 0;
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) == 0" into "x != 3".
- Value *X = 0;
+ Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
return new ICmpInst(ICmpInst::ICMP_NE, X,
@@ -2330,15 +2562,15 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
APInt Op0KnownZeroInverted = ~Op0KnownZero;
if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
// If the LHS is an AND with the same constant, look through it.
- Value *LHS = 0;
- ConstantInt *LHSC = 0;
+ Value *LHS = nullptr;
+ ConstantInt *LHSC = nullptr;
if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
LHSC->getValue() != Op0KnownZeroInverted)
LHS = Op0;
// If the LHS is 1 << x, and we know the result is a power of 2 like 8,
// then turn "((1 << x)&8) != 0" into "x == 3".
- Value *X = 0;
+ Value *X = nullptr;
if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
return new ICmpInst(ICmpInst::ICMP_EQ, X,
@@ -2470,7 +2702,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (SelectInst *SI = dyn_cast<SelectInst>(*I.user_begin()))
if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
(SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
- return 0;
+ return nullptr;
// See if we are doing a comparison between a constant and an instruction that
// can be folded into the comparison.
@@ -2506,7 +2738,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If either operand of the select is a constant, we can fold the
// comparison into the select arms, which will cause one to be
// constant folded and the select turned into a bitwise or.
- Value *Op1 = 0, *Op2 = 0;
+ Value *Op1 = nullptr, *Op2 = nullptr;
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
@@ -2618,7 +2850,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
- Value *A = 0, *B = 0, *C = 0, *D = 0;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
if (BO0 && BO0->getOpcode() == Instruction::Add)
A = BO0->getOperand(0), B = BO0->getOperand(1);
if (BO1 && BO1->getOpcode() == Instruction::Add)
@@ -2713,7 +2945,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// Analyze the case when either Op0 or Op1 is a sub instruction.
// Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
- A = 0; B = 0; C = 0; D = 0;
+ A = nullptr; B = nullptr; C = nullptr; D = nullptr;
if (BO0 && BO0->getOpcode() == Instruction::Sub)
A = BO0->getOperand(0), B = BO0->getOperand(1);
if (BO1 && BO1->getOpcode() == Instruction::Sub)
@@ -2739,7 +2971,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
BO0->hasOneUse() && BO1->hasOneUse())
return new ICmpInst(Pred, D, B);
- BinaryOperator *SRem = NULL;
+ // icmp (0-X) < cst --> x > -cst
+ if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
+ Value *X;
+ if (match(BO0, m_Neg(m_Value(X))))
+ if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+ if (!RHSC->isMinValue(/*isSigned=*/true))
+ return new ICmpInst(I.getSwappedPredicate(), X,
+ ConstantExpr::getNeg(RHSC));
+ }
+
+ BinaryOperator *SRem = nullptr;
// icmp (srem X, Y), Y
if (BO0 && BO0->getOpcode() == Instruction::SRem &&
Op1 == BO0->getOperand(1))
@@ -2877,6 +3119,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
(Op0 == A || Op0 == B))
if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
return R;
+
+ // (zext a) * (zext b) --> llvm.umul.with.overflow.
+ if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this))
+ return R;
+ }
+ if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
+ if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this))
+ return R;
+ }
}
if (I.isEquality()) {
@@ -2918,7 +3170,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// (X&Z) == (Y&Z) -> (X^Y) & Z == 0
if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
- Value *X = 0, *Y = 0, *Z = 0;
+ Value *X = nullptr, *Y = nullptr, *Z = nullptr;
if (A == C) {
X = B; Y = D; Z = A;
@@ -3009,7 +3261,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
@@ -3017,13 +3269,13 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
Instruction *LHSI,
Constant *RHSC) {
- if (!isa<ConstantFP>(RHSC)) return 0;
+ if (!isa<ConstantFP>(RHSC)) return nullptr;
const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
// Get the width of the mantissa. We don't want to hack on conversions that
// might lose information from the integer, e.g. "i64 -> float"
int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
- if (MantissaWidth == -1) return 0; // Unknown.
+ if (MantissaWidth == -1) return nullptr; // Unknown.
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
@@ -3037,7 +3289,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// If the conversion would lose info, don't hack on this.
if ((int)InputSize > MantissaWidth)
- return 0;
+ return nullptr;
// Otherwise, we can potentially simplify the comparison. We know that it
// will always come through as an integer value and we know the constant is
@@ -3383,5 +3635,5 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0),
RHSExt->getOperand(0));
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index dcc8b0f..66d0938 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -20,6 +20,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
@@ -29,10 +31,13 @@ STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
static bool pointsToConstantGlobal(Value *V) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return GV->isConstant();
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->getOpcode() == Instruction::BitCast ||
+ CE->getOpcode() == Instruction::AddrSpaceCast ||
CE->getOpcode() == Instruction::GetElementPtr)
return pointsToConstantGlobal(CE->getOperand(0));
+ }
return false;
}
@@ -60,9 +65,9 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
continue;
}
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
// If uses of the bitcast are ok, we are ok.
- if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset))
+ if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset))
return false;
continue;
}
@@ -112,7 +117,7 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
// If this is isn't our memcpy/memmove, reject it as something we can't
// handle.
MemTransferInst *MI = dyn_cast<MemTransferInst>(I);
- if (MI == 0)
+ if (!MI)
return false;
// If the transfer is using the alloca as a source of the transfer, then
@@ -148,10 +153,10 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
static MemTransferInst *
isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
SmallVectorImpl<Instruction *> &ToDelete) {
- MemTransferInst *TheCopy = 0;
+ MemTransferInst *TheCopy = nullptr;
if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
return TheCopy;
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
@@ -172,7 +177,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
Type *NewTy =
ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
- AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+ AllocaInst *New = Builder->CreateAlloca(NewTy, nullptr, AI.getName());
New->setAlignment(AI.getAlignment());
// Scan to the end of the allocation instructions, to skip over a block of
@@ -295,7 +300,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
// If the address spaces don't match, don't eliminate the cast.
if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
- return 0;
+ return nullptr;
Type *SrcPTy = SrcTy->getElementType();
@@ -346,7 +351,7 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
@@ -373,7 +378,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
// None of the following transforms are legal for volatile/atomic loads.
// FIXME: Some of it is okay for atomic loads; needs refactoring.
- if (!LI.isSimple()) return 0;
+ if (!LI.isSimple()) return nullptr;
// Do really simple store-to-load forwarding and load CSE, to catch cases
// where there are several consecutive memory accesses to the same location,
@@ -455,7 +460,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
}
}
}
- return 0;
+ return nullptr;
}
/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
@@ -467,12 +472,12 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
- if (SrcTy == 0) return 0;
+ if (!SrcTy) return nullptr;
Type *SrcPTy = SrcTy->getElementType();
if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
- return 0;
+ return nullptr;
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
/// to its first element. This allows us to handle things like:
@@ -506,20 +511,20 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
}
if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
- return 0;
+ return nullptr;
// If the pointers point into different address spaces don't do the
// transformation.
if (SrcTy->getAddressSpace() !=
cast<PointerType>(CI->getType())->getAddressSpace())
- return 0;
+ return nullptr;
// If the pointers point to values of different sizes don't do the
// transformation.
if (!IC.getDataLayout() ||
IC.getDataLayout()->getTypeSizeInBits(SrcPTy) !=
IC.getDataLayout()->getTypeSizeInBits(DestPTy))
- return 0;
+ return nullptr;
// If the pointers point to pointers to different address spaces don't do the
// transformation. It is not safe to introduce an addrspacecast instruction in
@@ -527,7 +532,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// cast.
if (SrcPTy->isPointerTy() && DestPTy->isPointerTy() &&
SrcPTy->getPointerAddressSpace() != DestPTy->getPointerAddressSpace())
- return 0;
+ return nullptr;
// Okay, we are casting from one integer or pointer type to another of
// the same size. Instead of casting the pointer before
@@ -607,7 +612,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Don't hack volatile/atomic stores.
// FIXME: Some bits are legal for atomic stores; needs refactoring.
- if (!SI.isSimple()) return 0;
+ if (!SI.isSimple()) return nullptr;
// If the RHS is an alloca with a single use, zapify the store, making the
// alloca dead.
@@ -674,7 +679,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (Instruction *U = dyn_cast<Instruction>(Val))
Worklist.Add(U); // Dropped a use.
}
- return 0; // Do not modify these!
+ return nullptr; // Do not modify these!
}
// store undef, Ptr -> noop
@@ -703,9 +708,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
- return 0; // xform done!
+ return nullptr; // xform done!
- return 0;
+ return nullptr;
}
/// SimplifyStoreAtEndOfBlock - Turn things like:
@@ -728,7 +733,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
BasicBlock *P = *PI;
- BasicBlock *OtherBB = 0;
+ BasicBlock *OtherBB = nullptr;
if (P != StoreBB)
OtherBB = P;
@@ -758,7 +763,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. there is an instruction before the branch.
- StoreInst *OtherStore = 0;
+ StoreInst *OtherStore = nullptr;
if (OtherBr->isUnconditional()) {
--BBI;
// Skip over debugging info.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 71fbb6c..9996ebc 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// simplifyValueKnownNonZero - The specific integer value is used in a context
/// where it is known to be non-zero. If this allows us to simplify the
@@ -27,13 +29,13 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// If V has multiple uses, then we would have to do more analysis to determine
// if this is safe. For example, the use could be in dynamically unreached
// code.
- if (!V->hasOneUse()) return 0;
+ if (!V->hasOneUse()) return nullptr;
bool MadeChange = false;
// ((1 << A) >>u B) --> (1 << (A-B))
// Because V cannot be zero, we know that B is less than A.
- Value *A = 0, *B = 0, *PowerOf2 = 0;
+ Value *A = nullptr, *B = nullptr, *PowerOf2 = nullptr;
if (match(V, m_LShr(m_OneUse(m_Shl(m_Value(PowerOf2), m_Value(A))),
m_Value(B))) &&
// The "1" can be any value known to be a power of 2.
@@ -68,7 +70,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC) {
// If V is a phi node, we can call this on each of its operands.
// "select cond, X, 0" can simplify to "X".
- return MadeChange ? V : 0;
+ return MadeChange ? V : nullptr;
}
@@ -107,7 +109,7 @@ static Constant *getLogBase2Vector(ConstantDataVector *CV) {
for (unsigned I = 0, E = CV->getNumElements(); I != E; ++I) {
Constant *Elt = CV->getElementAsConstant(I);
if (!match(Elt, m_APInt(IVal)) || !IVal->isPowerOf2())
- return 0;
+ return nullptr;
Elts.push_back(ConstantInt::get(Elt->getType(), IVal->logBase2()));
}
@@ -118,6 +120,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyMulInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -139,7 +144,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return BinaryOperator::CreateMul(NewOp, ConstantExpr::getShl(C1, C2));
if (match(&I, m_Mul(m_Value(NewOp), m_Constant(C1)))) {
- Constant *NewCst = 0;
+ Constant *NewCst = nullptr;
if (match(C1, m_APInt(IVal)) && IVal->isPowerOf2())
// Replace X*(2^C) with X << C, where C is either a scalar or a splat.
NewCst = ConstantInt::get(NewOp->getType(), IVal->logBase2());
@@ -165,10 +170,10 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
const APInt & Val = CI->getValue();
const APInt &PosVal = Val.abs();
if (Val.isNegative() && PosVal.isPowerOf2()) {
- Value *X = 0, *Y = 0;
+ Value *X = nullptr, *Y = nullptr;
if (Op0->hasOneUse()) {
ConstantInt *C1;
- Value *Sub = 0;
+ Value *Sub = nullptr;
if (match(Op0, m_Sub(m_Value(Y), m_Value(X))))
Sub = Builder->CreateSub(X, Y, "suba");
else if (match(Op0, m_Add(m_Value(Y), m_ConstantInt(C1))))
@@ -268,7 +273,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
- Value *BoolCast = 0, *OtherOp = 0;
+ Value *BoolCast = nullptr, *OtherOp = nullptr;
if (MaskedValueIsZero(Op0, Negative2))
BoolCast = Op0, OtherOp = Op1;
else if (MaskedValueIsZero(Op1, Negative2))
@@ -281,7 +286,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
}
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
//
@@ -384,7 +389,7 @@ Value *InstCombiner::foldFMulConst(Instruction *FMulOrDiv, Constant *C,
Constant *C0 = dyn_cast<Constant>(Opnd0);
Constant *C1 = dyn_cast<Constant>(Opnd1);
- BinaryOperator *R = 0;
+ BinaryOperator *R = nullptr;
// (X * C0) * C => X * (C0*C)
if (FMulOrDiv->getOpcode() == Instruction::FMul) {
@@ -426,6 +431,9 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool Changed = SimplifyAssociativeOrCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (isa<Constant>(Op0))
std::swap(Op0, Op1);
@@ -483,7 +491,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Value *M1 = ConstantExpr::getFMul(C1, C);
Value *M0 = isNormalFp(cast<Constant>(M1)) ?
foldFMulConst(cast<Instruction>(Opnd0), C, &I) :
- 0;
+ nullptr;
if (M0 && M1) {
if (Swap && FAddSub->getOpcode() == Instruction::FSub)
std::swap(M0, M1);
@@ -503,8 +511,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// Under unsafe algebra do:
// X * log2(0.5*Y) = X*log2(Y) - X
if (I.hasUnsafeAlgebra()) {
- Value *OpX = NULL;
- Value *OpY = NULL;
+ Value *OpX = nullptr;
+ Value *OpY = nullptr;
IntrinsicInst *Log2;
detectLog2OfHalf(Op0, OpY, Log2);
if (OpY) {
@@ -567,7 +575,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
Value *Opnd0_0, *Opnd0_1;
if (Opnd0->hasOneUse() &&
match(Opnd0, m_FMul(m_Value(Opnd0_0), m_Value(Opnd0_1)))) {
- Value *Y = 0;
+ Value *Y = nullptr;
if (Opnd0_0 == Opnd1 && Opnd0_1 != Opnd1)
Y = Opnd0_1;
else if (Opnd0_1 == Opnd1 && Opnd0_0 != Opnd1)
@@ -621,7 +629,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
break;
}
- return Changed ? &I : 0;
+ return Changed ? &I : nullptr;
}
/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
@@ -682,12 +690,12 @@ bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
// If we past the instruction, quit looking for it.
if (&*BBI == SI)
- SI = 0;
+ SI = nullptr;
if (&*BBI == SelectCond)
- SelectCond = 0;
+ SelectCond = nullptr;
// If we ran out of things to eliminate, break out of the loop.
- if (SelectCond == 0 && SI == 0)
+ if (!SelectCond && !SI)
break;
}
@@ -719,7 +727,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
if (MultiplyOverflows(RHS, LHSRHS,
- I.getOpcode()==Instruction::SDiv))
+ I.getOpcode() == Instruction::SDiv))
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
ConstantExpr::getMul(RHS, LHSRHS));
@@ -735,12 +743,31 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
}
}
+ if (ConstantInt *One = dyn_cast<ConstantInt>(Op0)) {
+ if (One->isOne() && !I.getType()->isIntegerTy(1)) {
+ bool isSigned = I.getOpcode() == Instruction::SDiv;
+ if (isSigned) {
+ // If Op1 is 0 then it's undefined behaviour, if Op1 is 1 then the
+ // result is one, if Op1 is -1 then the result is minus one, otherwise
+ // it's zero.
+ Value *Inc = Builder->CreateAdd(Op1, One);
+ Value *Cmp = Builder->CreateICmpULT(
+ Inc, ConstantInt::get(I.getType(), 3));
+ return SelectInst::Create(Cmp, Op1, ConstantInt::get(I.getType(), 0));
+ } else {
+ // If Op1 is 0 then it's undefined behaviour. If Op1 is 1 then the
+ // result is one, otherwise it's zero.
+ return new ZExtInst(Builder->CreateICmpEQ(Op1, One), I.getType());
+ }
+ }
+ }
+
// See if we can fold away this div instruction.
if (SimplifyDemandedInstructionBits(I))
return &I;
// (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
- Value *X = 0, *Z = 0;
+ Value *X = nullptr, *Z = nullptr;
if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
bool isSigned = I.getOpcode() == Instruction::SDiv;
if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
@@ -748,7 +775,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return BinaryOperator::Create(I.getOpcode(), X, Op1);
}
- return 0;
+ return nullptr;
}
/// dyn_castZExtVal - Checks if V is a zext or constant that can
@@ -761,7 +788,7 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
if (C->getValue().getActiveBits() <= cast<IntegerType>(Ty)->getBitWidth())
return ConstantExpr::getTrunc(C, Ty);
}
- return 0;
+ return nullptr;
}
namespace {
@@ -786,7 +813,7 @@ struct UDivFoldAction {
};
UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand)
- : FoldAction(FA), OperandToFold(InputOperand), FoldResult(0) {}
+ : FoldAction(FA), OperandToFold(InputOperand), FoldResult(nullptr) {}
UDivFoldAction(FoldUDivOperandCb FA, Value *InputOperand, size_t SLHS)
: FoldAction(FA), OperandToFold(InputOperand), SelectLHSIdx(SLHS) {}
};
@@ -865,7 +892,8 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (size_t LHSIdx = visitUDivOperand(Op0, SI->getOperand(1), I, Actions))
if (visitUDivOperand(Op0, SI->getOperand(2), I, Actions)) {
- Actions.push_back(UDivFoldAction((FoldUDivOperandCb)0, Op1, LHSIdx-1));
+ Actions.push_back(UDivFoldAction((FoldUDivOperandCb)nullptr, Op1,
+ LHSIdx-1));
return Actions.size();
}
@@ -875,6 +903,9 @@ static size_t visitUDivOperand(Value *Op0, Value *Op1, const BinaryOperator &I,
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyUDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -928,12 +959,15 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
return Inst;
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -983,7 +1017,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
/// CvtFDivConstToReciprocal tries to convert X/C into X*1/C if C not a special
@@ -997,7 +1031,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
Constant *Divisor,
bool AllowReciprocal) {
if (!isa<ConstantFP>(Divisor)) // TODO: handle vectors.
- return 0;
+ return nullptr;
const APFloat &FpVal = cast<ConstantFP>(Divisor)->getValueAPF();
APFloat Reciprocal(FpVal.getSemantics());
@@ -1010,7 +1044,7 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
}
if (!Cvt)
- return 0;
+ return nullptr;
ConstantFP *R;
R = ConstantFP::get(Dividend->getType()->getContext(), Reciprocal);
@@ -1020,6 +1054,9 @@ static Instruction *CvtFDivConstToReciprocal(Value *Dividend,
Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFDivInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1037,10 +1074,10 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
return R;
if (AllowReassociate) {
- Constant *C1 = 0;
+ Constant *C1 = nullptr;
Constant *C2 = Op1C;
Value *X;
- Instruction *Res = 0;
+ Instruction *Res = nullptr;
if (match(Op0, m_FMul(m_Value(X), m_Constant(C1)))) {
// (X*C1)/C2 => X * (C1/C2)
@@ -1071,12 +1108,12 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
return T;
}
- return 0;
+ return nullptr;
}
if (AllowReassociate && isa<Constant>(Op0)) {
Constant *C1 = cast<Constant>(Op0), *C2;
- Constant *Fold = 0;
+ Constant *Fold = nullptr;
Value *X;
bool CreateDiv = true;
@@ -1098,13 +1135,13 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
R->setFastMathFlags(I.getFastMathFlags());
return R;
}
- return 0;
+ return nullptr;
}
if (AllowReassociate) {
Value *X, *Y;
- Value *NewInst = 0;
- Instruction *SimpR = 0;
+ Value *NewInst = nullptr;
+ Instruction *SimpR = nullptr;
if (Op0->hasOneUse() && match(Op0, m_FDiv(m_Value(X), m_Value(Y)))) {
// (X/Y) / Z => X / (Y*Z)
@@ -1140,7 +1177,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
/// This function implements the transforms common to both integer remainder
@@ -1176,12 +1213,15 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitURem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyURemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1208,12 +1248,15 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Ext);
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifySRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1250,7 +1293,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
bool hasMissing = false;
for (unsigned i = 0; i != VWidth; ++i) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0) {
+ if (!Elt) {
hasMissing = true;
break;
}
@@ -1279,12 +1322,15 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyFRemInst(Op0, Op1, DL))
return ReplaceInstUsesWith(I, V);
@@ -1292,5 +1338,5 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
return &I;
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 0ab657a..46f7b8a 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -18,6 +18,8 @@
#include "llvm/IR/DataLayout.h"
using namespace llvm;
+#define DEBUG_TYPE "instcombine"
+
/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
/// and if a/b/c and the add's all have a single use, turn this into a phi
/// and a single binop.
@@ -48,12 +50,12 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// types.
I->getOperand(0)->getType() != LHSType ||
I->getOperand(1)->getType() != RHSType)
- return 0;
+ return nullptr;
// If they are CmpInst instructions, check their predicates
if (CmpInst *CI = dyn_cast<CmpInst>(I))
if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
- return 0;
+ return nullptr;
if (isNUW)
isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
@@ -63,8 +65,8 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
isExact = cast<PossiblyExactOperator>(I)->isExact();
// Keep track of which operand needs a phi node.
- if (I->getOperand(0) != LHSVal) LHSVal = 0;
- if (I->getOperand(1) != RHSVal) RHSVal = 0;
+ if (I->getOperand(0) != LHSVal) LHSVal = nullptr;
+ if (I->getOperand(1) != RHSVal) RHSVal = nullptr;
}
// If both LHS and RHS would need a PHI, don't do this transformation,
@@ -72,14 +74,14 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
// which leads to higher register pressure. This is especially
// bad when the PHIs are in the header of a loop.
if (!LHSVal && !RHSVal)
- return 0;
+ return nullptr;
// Otherwise, this is safe to transform!
Value *InLHS = FirstInst->getOperand(0);
Value *InRHS = FirstInst->getOperand(1);
- PHINode *NewLHS = 0, *NewRHS = 0;
- if (LHSVal == 0) {
+ PHINode *NewLHS = nullptr, *NewRHS = nullptr;
+ if (!LHSVal) {
NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(0)->getName() + ".pn");
NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
@@ -87,7 +89,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
LHSVal = NewLHS;
}
- if (RHSVal == 0) {
+ if (!RHSVal) {
NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(),
FirstInst->getOperand(1)->getName() + ".pn");
NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
@@ -148,7 +150,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
GEP->getNumOperands() != FirstInst->getNumOperands())
- return 0;
+ return nullptr;
AllInBounds &= GEP->isInBounds();
@@ -170,19 +172,19 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// for struct indices, which must always be constant.
if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
isa<ConstantInt>(GEP->getOperand(op)))
- return 0;
+ return nullptr;
if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
- return 0;
+ return nullptr;
// If we already needed a PHI for an earlier operand, and another operand
// also requires a PHI, we'd be introducing more PHIs than we're
// eliminating, which increases register pressure on entry to the PHI's
// block.
if (NeededPhi)
- return 0;
+ return nullptr;
- FixedOperands[op] = 0; // Needs a PHI.
+ FixedOperands[op] = nullptr; // Needs a PHI.
NeededPhi = true;
}
}
@@ -194,7 +196,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
// load up into the predecessors so that we have a load of a gep of an alloca,
// which can usually all be folded into the load.
if (AllBasePointersAreAllocas)
- return 0;
+ return nullptr;
// Otherwise, this is safe to transform. Insert PHI nodes for each operand
// that is variable.
@@ -288,7 +290,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// FIXME: This is overconservative; this transform is allowed in some cases
// for atomic operations.
if (FirstLI->isAtomic())
- return 0;
+ return nullptr;
// When processing loads, we need to propagate two bits of information to the
// sunk load: whether it is volatile, and what its alignment is. We currently
@@ -303,20 +305,20 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// load and the PHI.
if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
!isSafeAndProfitableToSinkLoad(FirstLI))
- return 0;
+ return nullptr;
// If the PHI is of volatile loads and the load block has multiple
// successors, sinking it would remove a load of the volatile value from
// the path through the other successor.
if (isVolatile &&
FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
if (!LI || !LI->hasOneUse())
- return 0;
+ return nullptr;
// We can't sink the load if the loaded value could be modified between
// the load and the PHI.
@@ -324,12 +326,12 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
LI->getParent() != PN.getIncomingBlock(i) ||
LI->getPointerAddressSpace() != LoadAddrSpace ||
!isSafeAndProfitableToSinkLoad(LI))
- return 0;
+ return nullptr;
// If some of the loads have an alignment specified but not all of them,
// we can't do the transformation.
if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
- return 0;
+ return nullptr;
LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
@@ -338,7 +340,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
// the path through the other successor.
if (isVolatile &&
LI->getParent()->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
}
// Okay, they are all the same operation. Create a new PHI node of the
@@ -354,7 +356,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
if (NewInVal != InVal)
- InVal = 0;
+ InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
@@ -398,8 +400,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// If all input operands to the phi are the same instruction (e.g. a cast from
// the same type or "+42") we can pull the operation through the PHI, reducing
// code size and simplifying code.
- Constant *ConstantOp = 0;
- Type *CastSrcTy = 0;
+ Constant *ConstantOp = nullptr;
+ Type *CastSrcTy = nullptr;
bool isNUW = false, isNSW = false, isExact = false;
if (isa<CastInst>(FirstInst)) {
@@ -409,13 +411,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// the code by turning an i32 into an i1293.
if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
if (!ShouldChangeType(PN.getType(), CastSrcTy))
- return 0;
+ return nullptr;
}
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
// Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
- if (ConstantOp == 0)
+ if (!ConstantOp)
return FoldPHIArgBinOpIntoPHI(PN);
if (OverflowingBinaryOperator *BO =
@@ -426,19 +428,19 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
dyn_cast<PossiblyExactOperator>(FirstInst))
isExact = PEO->isExact();
} else {
- return 0; // Cannot fold this operation.
+ return nullptr; // Cannot fold this operation.
}
// Check to see if all arguments are the same operation.
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
- if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
- return 0;
+ if (!I || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+ return nullptr;
if (CastSrcTy) {
if (I->getOperand(0)->getType() != CastSrcTy)
- return 0; // Cast operation must match.
+ return nullptr; // Cast operation must match.
} else if (I->getOperand(1) != ConstantOp) {
- return 0;
+ return nullptr;
}
if (isNUW)
@@ -462,7 +464,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
if (NewInVal != InVal)
- InVal = 0;
+ InVal = nullptr;
NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
}
@@ -587,10 +589,10 @@ namespace llvm {
template<>
struct DenseMapInfo<LoweredPHIRecord> {
static inline LoweredPHIRecord getEmptyKey() {
- return LoweredPHIRecord(0, 0);
+ return LoweredPHIRecord(nullptr, 0);
}
static inline LoweredPHIRecord getTombstoneKey() {
- return LoweredPHIRecord(0, 1);
+ return LoweredPHIRecord(nullptr, 1);
}
static unsigned getHashValue(const LoweredPHIRecord &Val) {
return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
@@ -637,14 +639,14 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// bail out.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
- if (II == 0) continue;
+ if (!II) continue;
if (II->getParent() != PN->getIncomingBlock(i))
continue;
// If we have a phi, and if it's directly in the predecessor, then we have
// a critical edge where we need to put the truncate. Since we can't
// split the edge in instcombine, we have to bail out.
- return 0;
+ return nullptr;
}
for (User *U : PN->users()) {
@@ -667,7 +669,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
if (UserI->getOpcode() != Instruction::LShr ||
!UserI->hasOneUse() || !isa<TruncInst>(UserI->user_back()) ||
!isa<ConstantInt>(UserI->getOperand(1)))
- return 0;
+ return nullptr;
unsigned Shift = cast<ConstantInt>(UserI->getOperand(1))->getZExtValue();
PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, UserI->user_back()));
@@ -705,7 +707,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
// If we've already lowered a user like this, reuse the previously lowered
// value.
- if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == nullptr) {
// Otherwise, Create the new PHI node for this user.
EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(),
@@ -894,5 +896,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e74d912..9a41e4b 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -18,16 +18,18 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
/// returning the kind and providing the out parameter results if we
/// successfully match.
static SelectPatternFlavor
MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
- if (SI == 0) return SPF_UNKNOWN;
+ if (!SI) return SPF_UNKNOWN;
ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
- if (ICI == 0) return SPF_UNKNOWN;
+ if (!ICI) return SPF_UNKNOWN;
LHS = ICI->getOperand(0);
RHS = ICI->getOperand(1);
@@ -129,15 +131,15 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
if (TI->isCast()) {
Type *FIOpndTy = FI->getOperand(0)->getType();
if (TI->getOperand(0)->getType() != FIOpndTy)
- return 0;
+ return nullptr;
// The select condition may be a vector. We may only change the operand
// type if the vector width remains the same (and matches the condition).
Type *CondTy = SI.getCondition()->getType();
if (CondTy->isVectorTy() && (!FIOpndTy->isVectorTy() ||
CondTy->getVectorNumElements() != FIOpndTy->getVectorNumElements()))
- return 0;
+ return nullptr;
} else {
- return 0; // unknown unary op.
+ return nullptr; // unknown unary op.
}
// Fold this by inserting a select from the input values.
@@ -149,7 +151,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
// Only handle binary operators here.
if (!isa<BinaryOperator>(TI))
- return 0;
+ return nullptr;
// Figure out if the operations have any operands in common.
Value *MatchOp, *OtherOpT, *OtherOpF;
@@ -165,7 +167,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
OtherOpF = FI->getOperand(0);
MatchIsOpZero = false;
} else if (!TI->isCommutative()) {
- return 0;
+ return nullptr;
} else if (TI->getOperand(0) == FI->getOperand(1)) {
MatchOp = TI->getOperand(0);
OtherOpT = TI->getOperand(1);
@@ -177,7 +179,7 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
OtherOpF = FI->getOperand(1);
MatchIsOpZero = true;
} else {
- return 0;
+ return nullptr;
}
// If we reach here, they do have operations in common.
@@ -282,7 +284,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
}
}
- return 0;
+ return nullptr;
}
/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
@@ -296,7 +298,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
- return 0;
+ return nullptr;
// If this is a binary operator, try to simplify it with the replaced op.
if (BinaryOperator *B = dyn_cast<BinaryOperator>(I)) {
@@ -347,7 +349,7 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
}
}
- return 0;
+ return nullptr;
}
/// foldSelectICmpAndOr - We want to turn:
@@ -368,18 +370,18 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
- return 0;
+ return nullptr;
Value *CmpLHS = IC->getOperand(0);
Value *CmpRHS = IC->getOperand(1);
if (!match(CmpRHS, m_Zero()))
- return 0;
+ return nullptr;
Value *X;
const APInt *C1;
if (!match(CmpLHS, m_And(m_Value(X), m_Power2(C1))))
- return 0;
+ return nullptr;
const APInt *C2;
bool OrOnTrueVal = false;
@@ -388,7 +390,7 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal,
OrOnTrueVal = match(TrueVal, m_Or(m_Specific(FalseVal), m_Power2(C2)));
if (!OrOnFalseVal && !OrOnTrueVal)
- return 0;
+ return nullptr;
Value *V = CmpLHS;
Value *Y = OrOnFalseVal ? TrueVal : FalseVal;
@@ -527,7 +529,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
if (TrueVal->getType() == Ty) {
if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
- ConstantInt *C1 = NULL, *C2 = NULL;
+ ConstantInt *C1 = nullptr, *C2 = nullptr;
if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
C1 = dyn_cast<ConstantInt>(TrueVal);
C2 = dyn_cast<ConstantInt>(FalseVal);
@@ -586,7 +588,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
if (Value *V = foldSelectICmpAndOr(SI, TrueVal, FalseVal, Builder))
return ReplaceInstUsesWith(SI, V);
- return Changed ? &SI : 0;
+ return Changed ? &SI : nullptr;
}
@@ -606,7 +608,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
// If the value is a non-instruction value like a constant or argument, it
// can always be mapped.
const Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return true;
+ if (!I) return true;
// If V is a PHI node defined in the same block as the condition PHI, we can
// map the arguments.
@@ -649,11 +651,35 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
return ReplaceInstUsesWith(Outer, C);
}
- // TODO: MIN(MIN(A, 23), 97)
- return 0;
+ if (SPF1 == SPF2) {
+ if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) {
+ if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) {
+ APInt ACB = CB->getValue();
+ APInt ACC = CC->getValue();
+
+ // MIN(MIN(A, 23), 97) -> MIN(A, 23)
+ // MAX(MAX(A, 97), 23) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sle(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.uge(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.sge(ACC)))
+ return ReplaceInstUsesWith(Outer, Inner);
+
+ // MIN(MIN(A, 97), 23) -> MIN(A, 23)
+ // MAX(MAX(A, 23), 97) -> MAX(A, 97)
+ if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) ||
+ (SPF1 == SPF_SMIN && ACB.sgt(ACC)) ||
+ (SPF1 == SPF_UMAX && ACB.ult(ACC)) ||
+ (SPF1 == SPF_SMAX && ACB.slt(ACC))) {
+ Outer.replaceUsesOfWith(Inner, A);
+ return &Outer;
+ }
+ }
+ }
+ }
+ return nullptr;
}
-
/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
/// both be) and we have an icmp instruction with zero, and we have an 'and'
/// with the non-constant value and a power of two we can turn the select
@@ -663,27 +689,27 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
InstCombiner::BuilderTy *Builder) {
const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
if (!IC || !IC->isEquality() || !SI.getType()->isIntegerTy())
- return 0;
+ return nullptr;
if (!match(IC->getOperand(1), m_Zero()))
- return 0;
+ return nullptr;
ConstantInt *AndRHS;
Value *LHS = IC->getOperand(0);
if (!match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
- return 0;
+ return nullptr;
// If both select arms are non-zero see if we have a select of the form
// 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
// for 'x ? 2^n : 0' and fix the thing up at the end.
- ConstantInt *Offset = 0;
+ ConstantInt *Offset = nullptr;
if (!TrueVal->isZero() && !FalseVal->isZero()) {
if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
Offset = FalseVal;
else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
Offset = TrueVal;
else
- return 0;
+ return nullptr;
// Adjust TrueVal and FalseVal to the offset.
TrueVal = ConstantInt::get(Builder->getContext(),
@@ -696,7 +722,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
if (!AndRHS->getValue().isPowerOf2() ||
(!TrueVal->getValue().isPowerOf2() &&
!FalseVal->getValue().isPowerOf2()))
- return 0;
+ return nullptr;
// Determine which shift is needed to transform result of the 'and' into the
// desired result.
@@ -708,7 +734,7 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
// or a trunc of the 'and'. The trunc case requires that all of the truncated
// bits are zero, we can figure that out by looking at the 'and' mask.
if (AndZeros >= ValC->getBitWidth())
- return 0;
+ return nullptr;
Value *V = Builder->CreateZExtOrTrunc(LHS, SI.getType());
if (ValZeros > AndZeros)
@@ -866,7 +892,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
if (TI->hasOneUse() && FI->hasOneUse()) {
- Instruction *AddOp = 0, *SubOp = 0;
+ Instruction *AddOp = nullptr, *SubOp = nullptr;
// Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
if (TI->getOpcode() == FI->getOpcode())
@@ -888,7 +914,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
if (AddOp) {
- Value *OtherAddOp = 0;
+ Value *OtherAddOp = nullptr;
if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
OtherAddOp = AddOp->getOperand(1);
} else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
@@ -969,7 +995,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
if (TrueSI->getCondition() == CondVal) {
if (SI.getTrueValue() == TrueSI->getTrueValue())
- return 0;
+ return nullptr;
SI.setOperand(1, TrueSI->getTrueValue());
return &SI;
}
@@ -977,7 +1003,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
if (FalseSI->getCondition() == CondVal) {
if (SI.getFalseValue() == FalseSI->getFalseValue())
- return 0;
+ return nullptr;
SI.setOperand(2, FalseSI->getFalseValue());
return &SI;
}
@@ -1005,5 +1031,5 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
}
}
- return 0;
+ return nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 8273dfd..cc6665c 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -19,6 +19,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -33,7 +35,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
- if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+ if (Constant *CUI = dyn_cast<Constant>(Op1))
if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
return Res;
@@ -50,7 +52,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
return &I;
}
- return 0;
+ return nullptr;
}
/// CanEvaluateShifted - See if we can compute the specified value, but shifted
@@ -78,7 +80,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
// if the needed bits are already zero in the input. This allows us to reuse
// the value which means that we don't care if the shift has multiple uses.
// TODO: Handle opposite shift by exact value.
- ConstantInt *CI = 0;
+ ConstantInt *CI = nullptr;
if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
(!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
if (CI->getZExtValue() == NumBits) {
@@ -115,7 +117,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::Shl: {
// We can often fold the shift into shifts-by-a-constant.
CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
// We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
if (isLeftShift) return true;
@@ -139,7 +141,7 @@ static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
case Instruction::LShr: {
// We can often fold the shift into shifts-by-a-constant.
CI = dyn_cast<ConstantInt>(I->getOperand(1));
- if (CI == 0) return false;
+ if (!CI) return false;
// We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
if (!isLeftShift) return true;
@@ -309,37 +311,38 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
-Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
BinaryOperator &I) {
bool isLeftShift = I.getOpcode() == Instruction::Shl;
+ ConstantInt *COp1 = nullptr;
+ if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else if (ConstantVector *CV = dyn_cast<ConstantVector>(Op1))
+ COp1 = dyn_cast_or_null<ConstantInt>(CV->getSplatValue());
+ else
+ COp1 = dyn_cast<ConstantInt>(Op1);
+
+ if (!COp1)
+ return nullptr;
// See if we can propagate this shift into the input, this covers the trivial
// cast of lshr(shl(x,c1),c2) as well as other more complex cases.
if (I.getOpcode() != Instruction::AShr &&
- CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+ CanEvaluateShifted(Op0, COp1->getZExtValue(), isLeftShift, *this)) {
DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
" to eliminate shift:\n IN: " << *Op0 << "\n SH: " << I <<"\n");
return ReplaceInstUsesWith(I,
- GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+ GetShiftedValue(Op0, COp1->getZExtValue(), isLeftShift, *this));
}
-
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
- // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
- // a signed shift.
- //
- if (Op1->uge(TypeBits)) {
- if (I.getOpcode() != Instruction::AShr)
- return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
- // ashr i32 X, 32 --> ashr i32 X, 31
- I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
- return &I;
- }
+ assert(!COp1->uge(TypeBits) &&
+ "Shift over the type width should have been removed already");
// ((X*C1) << C2) == (X * (C1 << C2))
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
@@ -367,7 +370,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
if (TrOp && I.isLogicalShift() && TrOp->isShift() &&
isa<ConstantInt>(TrOp->getOperand(1))) {
// Okay, we'll do this xform. Make the shift of shift.
- Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+ Constant *ShAmt = ConstantExpr::getZExt(COp1, TrOp->getType());
// (shift2 (shift1 & 0x00FF), c2)
Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
@@ -384,10 +387,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// shift. We know that it is a logical shift by a constant, so adjust the
// mask as appropriate.
if (I.getOpcode() == Instruction::Shl)
- MaskV <<= Op1->getZExtValue();
+ MaskV <<= COp1->getZExtValue();
else {
assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
- MaskV = MaskV.lshr(Op1->getZExtValue());
+ MaskV = MaskV.lshr(COp1->getZExtValue());
}
// shift1 & 0x00FF
@@ -421,9 +424,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
Op0BO->getOperand(1)->getName());
- uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
- APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (Y + ((X >> C) & CC)) << C -> ((X & (CC << C)) + (Y << C))
@@ -453,9 +460,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// (X + (Y << C))
Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
Op0BO->getOperand(0)->getName());
- uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
- return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
- APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+ uint32_t Op1Val = COp1->getLimitedValue(TypeBits);
+
+ APInt Bits = APInt::getHighBitsSet(TypeBits, TypeBits - Op1Val);
+ Constant *Mask = ConstantInt::get(I.getContext(), Bits);
+ if (VectorType *VT = dyn_cast<VectorType>(X->getType()))
+ Mask = ConstantVector::getSplat(VT->getNumElements(), Mask);
+ return BinaryOperator::CreateAnd(X, Mask);
}
// Turn (((X >> C)&CC) + Y) << C -> (X + (Y << C)) & (CC << C)
@@ -523,7 +534,7 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// Find out if this is a shift of a shift by a constant.
BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
if (ShiftOp && !ShiftOp->isShift())
- ShiftOp = 0;
+ ShiftOp = nullptr;
if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
@@ -541,9 +552,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
- uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+ uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
- if (ShiftAmt1 == 0) return 0; // Will be simplified in the future.
+ if (ShiftAmt1 == 0) return nullptr; // Will be simplified in the future.
Value *X = ShiftOp->getOperand(0);
IntegerType *Ty = cast<IntegerType>(I.getType());
@@ -671,10 +682,13 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
}
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
DL))
@@ -709,10 +723,13 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
match(I.getOperand(1), m_Constant(C2)))
return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A);
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
@@ -749,10 +766,13 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+ if (Value *V = SimplifyVectorOp(I))
+ return ReplaceInstUsesWith(I, V);
+
if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
I.isExact(), DL))
return ReplaceInstUsesWith(I, V);
@@ -805,6 +825,5 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (NumSignBits == Op0->getType()->getScalarSizeInBits())
return ReplaceInstUsesWith(I, Op0);
- return 0;
+ return nullptr;
}
-
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index a47b709..1b42d3d 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-
#include "InstCombine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -21,6 +20,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
@@ -57,7 +58,7 @@ bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask,
KnownZero, KnownOne, 0);
- if (V == 0) return false;
+ if (!V) return false;
if (V == &Inst) return true;
ReplaceInstUsesWith(Inst, V);
return true;
@@ -71,7 +72,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
unsigned Depth) {
Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
KnownZero, KnownOne, Depth);
- if (NewVal == 0) return false;
+ if (!NewVal) return false;
U = NewVal;
return true;
}
@@ -101,7 +102,7 @@ bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask,
Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) {
- assert(V != 0 && "Null pointer of Value???");
+ assert(V != nullptr && "Null pointer of Value???");
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = V->getType();
@@ -118,33 +119,33 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
KnownZero = ~KnownOne & DemandedMask;
- return 0;
+ return nullptr;
}
if (isa<ConstantPointerNull>(V)) {
// We know all of the bits for a constant!
KnownOne.clearAllBits();
KnownZero = DemandedMask;
- return 0;
+ return nullptr;
}
KnownZero.clearAllBits();
KnownOne.clearAllBits();
if (DemandedMask == 0) { // Not demanding any bits from V.
if (isa<UndefValue>(V))
- return 0;
+ return nullptr;
return UndefValue::get(VTy);
}
if (Depth == 6) // Limit search depth.
- return 0;
+ return nullptr;
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
Instruction *I = dyn_cast<Instruction>(V);
if (!I) {
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
- return 0; // Only analyze instructions.
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
+ return nullptr; // Only analyze instructions.
}
// If there are multiple uses of this value and we aren't at the root, then
@@ -157,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// this instruction has a simpler value in that context.
if (I->getOpcode() == Instruction::And) {
// If either the LHS or the RHS are Zero, the result is zero.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known 1 on one side, return the other.
// These bits cannot contribute to the result of the 'and' in this
@@ -179,8 +180,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// only bits from X or Y are demanded.
// If either the LHS or the RHS are One, the result is One.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other. These bits cannot contribute to the result of the 'or' in this
@@ -204,8 +205,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// We can simplify (X^Y) -> X or Y in the user's context if we know that
// only bits from X or Y are demanded.
- ComputeMaskedBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(1), RHSKnownZero, RHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If all of the demanded bits are known zero on one side, return the
// other.
@@ -216,8 +217,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
// Compute the KnownZero/KnownOne bits to simplify things downstream.
- ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
- return 0;
+ computeKnownBits(I, KnownZero, KnownOne, Depth);
+ return nullptr;
}
// If this is the root being simplified, allow it to have multiple uses,
@@ -229,7 +230,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
switch (I->getOpcode()) {
default:
- ComputeMaskedBits(I, KnownZero, KnownOne, Depth);
+ computeKnownBits(I, KnownZero, KnownOne, Depth);
break;
case Instruction::And:
// If either the LHS or the RHS are Zero, the result is zero.
@@ -409,20 +410,20 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::BitCast:
if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
- return 0; // vector->int or fp->int?
+ return nullptr; // vector->int or fp->int?
if (VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
if (VectorType *SrcVTy =
dyn_cast<VectorType>(I->getOperand(0)->getType())) {
if (DstVTy->getNumElements() != SrcVTy->getNumElements())
// Don't touch a bitcast between vectors of different element counts.
- return 0;
+ return nullptr;
} else
// Don't touch a scalar-to-vector bitcast.
- return 0;
+ return nullptr;
} else if (I->getOperand(0)->getType()->isVectorTy())
// Don't touch a vector-to-scalar bitcast.
- return 0;
+ return nullptr;
if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
KnownZero, KnownOne, Depth+1))
@@ -578,9 +579,9 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
}
- // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+ // Otherwise just hand the sub off to computeKnownBits to fill in
// the known zeros and ones.
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
// Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
// zero.
@@ -751,7 +752,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// remainder is zero.
if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
- ComputeMaskedBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
+ computeKnownBits(I->getOperand(0), LHSKnownZero, LHSKnownOne, Depth+1);
// If it's known zero, our sign bit is also zero.
if (LHSKnownZero.isNegative())
KnownZero.setBit(KnownZero.getBitWidth() - 1);
@@ -810,10 +811,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Intrinsic::x86_sse42_crc32_64_64:
KnownZero = APInt::getHighBitsSet(64, 32);
- return 0;
+ return nullptr;
}
}
- ComputeMaskedBits(V, KnownZero, KnownOne, Depth);
+ computeKnownBits(V, KnownZero, KnownOne, Depth);
break;
}
@@ -821,7 +822,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// constant.
if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
return Constant::getIntegerValue(VTy, KnownOne);
- return 0;
+ return nullptr;
}
/// Helper routine of SimplifyDemandedUseBits. It tries to simplify
@@ -847,13 +848,13 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
const APInt &ShlOp1 = cast<ConstantInt>(Shl->getOperand(1))->getValue();
const APInt &ShrOp1 = cast<ConstantInt>(Shr->getOperand(1))->getValue();
if (!ShlOp1 || !ShrOp1)
- return 0; // Noop.
+ return nullptr; // Noop.
Value *VarX = Shr->getOperand(0);
Type *Ty = VarX->getType();
unsigned BitWidth = Ty->getIntegerBitWidth();
if (ShlOp1.uge(BitWidth) || ShrOp1.uge(BitWidth))
- return 0; // Undef.
+ return nullptr; // Undef.
unsigned ShlAmt = ShlOp1.getZExtValue();
unsigned ShrAmt = ShrOp1.getZExtValue();
@@ -882,7 +883,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
return VarX;
if (!Shr->hasOneUse())
- return 0;
+ return nullptr;
BinaryOperator *New;
if (ShrAmt < ShlAmt) {
@@ -902,7 +903,7 @@ Value *InstCombiner::SimplifyShrShlDemandedBits(Instruction *Shr,
return InsertNewInstWith(New, *Shl);
}
- return 0;
+ return nullptr;
}
/// SimplifyDemandedVectorElts - The specified value produces a vector with
@@ -923,7 +924,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
if (isa<UndefValue>(V)) {
// If the entire vector is undefined, just return this info.
UndefElts = EltMask;
- return 0;
+ return nullptr;
}
if (DemandedElts == 0) { // If nothing is demanded, provide undef.
@@ -938,7 +939,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// Check if this is identity. If so, return 0 since we are not simplifying
// anything.
if (DemandedElts.isAllOnesValue())
- return 0;
+ return nullptr;
Type *EltTy = cast<VectorType>(V->getType())->getElementType();
Constant *Undef = UndefValue::get(EltTy);
@@ -952,7 +953,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
}
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0) return 0;
+ if (!Elt) return nullptr;
if (isa<UndefValue>(Elt)) { // Already undef.
Elts.push_back(Undef);
@@ -964,12 +965,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If we changed the constant, return it.
Constant *NewCV = ConstantVector::get(Elts);
- return NewCV != C ? NewCV : 0;
+ return NewCV != C ? NewCV : nullptr;
}
// Limit search depth.
if (Depth == 10)
- return 0;
+ return nullptr;
// If multiple users are using the root value, proceed with
// simplification conservatively assuming that all elements
@@ -980,14 +981,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// the main instcombine process.
if (Depth != 0)
// TODO: Just compute the UndefElts information recursively.
- return 0;
+ return nullptr;
// Conservatively assume that all elements are needed.
DemandedElts = EltMask;
}
Instruction *I = dyn_cast<Instruction>(V);
- if (!I) return 0; // Only analyze instructions.
+ if (!I) return nullptr; // Only analyze instructions.
bool MadeChange = false;
APInt UndefElts2(VWidth, 0);
@@ -999,7 +1000,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
// If this is a variable index, we don't know which element it overwrites.
// demand exactly the same input as we produce.
ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
- if (Idx == 0) {
+ if (!Idx) {
// Note that we can't propagate undef elt info, because we don't know
// which elt is getting updated.
TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
@@ -1281,5 +1282,5 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
}
- return MadeChange ? I : 0;
+ return MadeChange ? I : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 521dc9c..8c5e202 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -17,6 +17,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
/// is to leave as a vector operation. isConstant indicates whether we're
/// extracting one known element. If false we're extracting a variable index.
@@ -73,7 +75,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
// If this is an insert to a variable element, we don't know what it is.
if (!isa<ConstantInt>(III->getOperand(2)))
- return 0;
+ return nullptr;
unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
// If this is an insert to the element we are looking for, return the
@@ -97,14 +99,14 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
}
// Extract a value from a vector add operation with a constant zero.
- Value *Val = 0; Constant *Con = 0;
+ Value *Val = nullptr; Constant *Con = nullptr;
if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
if (Con->getAggregateElement(EltNo)->isNullValue())
return FindScalarElement(Val, EltNo);
}
// Otherwise, we don't know.
- return 0;
+ return nullptr;
}
// If we have a PHI node with a vector type that has only 2 uses: feed
@@ -113,7 +115,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) {
Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
if (!PN->hasNUses(2))
- return NULL;
+ return nullptr;
// If so, it's known at this point that one operand is PHI and the other is
// an extractelement node. Find the PHI user that is not the extractelement
@@ -128,7 +130,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
// otherwise return NULL.
if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
!(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
- return NULL;
+ return nullptr;
// Create a scalar PHI node that will replace the vector PHI node
// just before the current PHI node.
@@ -318,7 +320,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
}
}
}
- return 0;
+ return nullptr;
}
/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
@@ -440,10 +442,10 @@ static ShuffleOps CollectShuffleElements(Value *V,
// Either the extracted from or inserted into vector must be RHSVec,
// otherwise we'd end up with a shuffle of three inputs.
- if (EI->getOperand(0) == PermittedRHS || PermittedRHS == 0) {
+ if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
ShuffleOps LR = CollectShuffleElements(VecOp, Mask, RHS);
- assert(LR.second == 0 || LR.second == RHS);
+ assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
// We tried our best, but we can't find anything compatible with RHS
@@ -488,6 +490,41 @@ static ShuffleOps CollectShuffleElements(Value *V,
return std::make_pair(V, nullptr);
}
+/// Try to find redundant insertvalue instructions, like the following ones:
+/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
+/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
+/// Here the second instruction inserts values at the same indices, as the
+/// first one, making the first one redundant.
+/// It should be transformed to:
+/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
+Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
+ bool IsRedundant = false;
+ ArrayRef<unsigned int> FirstIndices = I.getIndices();
+
+ // If there is a chain of insertvalue instructions (each of them except the
+ // last one has only one use and it's another insertvalue insn from this
+ // chain), check if any of the 'children' uses the same indices as the first
+ // instruction. In this case, the first one is redundant.
+ Value *V = &I;
+ unsigned Depth = 0;
+ while (V->hasOneUse() && Depth < 10) {
+ User *U = V->user_back();
+ auto UserInsInst = dyn_cast<InsertValueInst>(U);
+ if (!UserInsInst || U->getOperand(0) != V)
+ break;
+ if (UserInsInst->getIndices() == FirstIndices) {
+ IsRedundant = true;
+ break;
+ }
+ V = UserInsInst;
+ Depth++;
+ }
+
+ if (IsRedundant)
+ return ReplaceInstUsesWith(I, I.getOperand(0));
+ return nullptr;
+}
+
Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
Value *VecOp = IE.getOperand(0);
Value *ScalarOp = IE.getOperand(1);
@@ -523,13 +560,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
// (and any insertelements it points to), into one big shuffle.
if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) {
SmallVector<Constant*, 16> Mask;
- ShuffleOps LR = CollectShuffleElements(&IE, Mask, 0);
+ ShuffleOps LR = CollectShuffleElements(&IE, Mask, nullptr);
// The proposed shuffle may be trivial, in which case we shouldn't
// perform the combine.
if (LR.first != &IE && LR.second != &IE) {
// We now have a shuffle of LHS, RHS, Mask.
- if (LR.second == 0) LR.second = UndefValue::get(LR.first->getType());
+ if (LR.second == nullptr)
+ LR.second = UndefValue::get(LR.first->getType());
return new ShuffleVectorInst(LR.first, LR.second,
ConstantVector::get(Mask));
}
@@ -546,7 +584,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
return &IE;
}
- return 0;
+ return nullptr;
}
/// Return true if we can evaluate the specified expression tree if the vector
@@ -801,6 +839,20 @@ InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
llvm_unreachable("failed to reorder elements of vector instruction!");
}
+static void RecognizeIdentityMask(const SmallVectorImpl<int> &Mask,
+ bool &isLHSID, bool &isRHSID) {
+ isLHSID = isRHSID = true;
+
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+ if (Mask[i] < 0) continue; // Ignore undef values.
+ // Is this an identity shuffle of the LHS value?
+ isLHSID &= (Mask[i] == (int)i);
+
+ // Is this an identity shuffle of the RHS value?
+ isRHSID &= (Mask[i]-e == i);
+ }
+}
+
Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
@@ -864,16 +916,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (VWidth == LHSWidth) {
// Analyze the shuffle, are the LHS or RHS and identity shuffles?
- bool isLHSID = true, isRHSID = true;
-
- for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] < 0) continue; // Ignore undef values.
- // Is this an identity shuffle of the LHS value?
- isLHSID &= (Mask[i] == (int)i);
-
- // Is this an identity shuffle of the RHS value?
- isRHSID &= (Mask[i]-e == i);
- }
+ bool isLHSID, isRHSID;
+ RecognizeIdentityMask(Mask, isLHSID, isRHSID);
// Eliminate identity shuffles.
if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
@@ -932,16 +976,16 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
if (LHSShuffle)
if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
- LHSShuffle = NULL;
+ LHSShuffle = nullptr;
if (RHSShuffle)
if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
- RHSShuffle = NULL;
+ RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
- return MadeChange ? &SVI : 0;
+ return MadeChange ? &SVI : nullptr;
- Value* LHSOp0 = NULL;
- Value* LHSOp1 = NULL;
- Value* RHSOp0 = NULL;
+ Value* LHSOp0 = nullptr;
+ Value* LHSOp1 = nullptr;
+ Value* RHSOp0 = nullptr;
unsigned LHSOp0Width = 0;
unsigned RHSOp0Width = 0;
if (LHSShuffle) {
@@ -973,11 +1017,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// case 4
if (LHSOp0 == RHSOp0) {
newLHS = LHSOp0;
- newRHS = NULL;
+ newRHS = nullptr;
}
if (newLHS == LHS && newRHS == RHS)
- return MadeChange ? &SVI : 0;
+ return MadeChange ? &SVI : nullptr;
SmallVector<int, 16> LHSMask;
SmallVector<int, 16> RHSMask;
@@ -1037,7 +1081,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// If newRHS == newLHS, we want to remap any references from newRHS to
// newLHS so that we can properly identify splats that may occur due to
// obfuscation across the two vectors.
- if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
+ if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
eltMask += newLHSWidth;
}
@@ -1063,10 +1107,17 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
}
}
- if (newRHS == NULL)
+ if (!newRHS)
newRHS = UndefValue::get(newLHS->getType());
return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
}
- return MadeChange ? &SVI : 0;
+ // If the result mask is an identity, replace uses of this instruction with
+ // corresponding argument.
+ bool isLHSID, isRHSID;
+ RecognizeIdentityMask(newMask, isLHSID, isRHSID);
+ if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS);
+ if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS);
+
+ return MadeChange ? &SVI : nullptr;
}
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 8c780b5..1ab7db3 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -10,7 +10,6 @@
#ifndef INSTCOMBINE_WORKLIST_H
#define INSTCOMBINE_WORKLIST_H
-#define DEBUG_TYPE "instcombine"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Instruction.h"
@@ -18,6 +17,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "instcombine"
+
namespace llvm {
/// InstCombineWorklist - This is the worklist management logic for
@@ -68,7 +69,7 @@ public:
if (It == WorklistMap.end()) return; // Not in worklist.
// Don't bother moving everything down, just null out the slot.
- Worklist[It->second] = 0;
+ Worklist[It->second] = nullptr;
WorklistMap.erase(It);
}
@@ -101,4 +102,6 @@ public:
} // end namespace llvm.
+#undef DEBUG_TYPE
+
#endif
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 0cab81b..4c36887 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,7 +33,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instcombine"
#include "llvm/Transforms/Scalar.h"
#include "InstCombine.h"
#include "llvm-c/Initialization.h"
@@ -58,6 +57,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "instcombine"
+
STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
STATISTIC(NumDeadInst , "Number of dead inst eliminated");
@@ -512,7 +513,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
}
}
- return 0;
+ return nullptr;
}
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
@@ -530,7 +531,7 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const {
if (C->getType()->getElementType()->isIntegerTy())
return ConstantExpr::getNeg(C);
- return 0;
+ return nullptr;
}
// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
@@ -549,7 +550,7 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
if (C->getType()->getElementType()->isFloatingPointTy())
return ConstantExpr::getFNeg(C);
- return 0;
+ return nullptr;
}
static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
@@ -595,13 +596,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
// not have a second operand.
Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
// Don't modify shared select instructions
- if (!SI->hasOneUse()) return 0;
+ if (!SI->hasOneUse()) return nullptr;
Value *TV = SI->getOperand(1);
Value *FV = SI->getOperand(2);
if (isa<Constant>(TV) || isa<Constant>(FV)) {
// Bool selects with constant operands can be folded to logical ops.
- if (SI->getType()->isIntegerTy(1)) return 0;
+ if (SI->getType()->isIntegerTy(1)) return nullptr;
// If it's a bitcast involving vectors, make sure it has the same number of
// elements on both sides.
@@ -610,10 +611,10 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
// Verify that either both or neither are vectors.
- if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+ if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr;
// If vectors, verify that they have the same number of elements.
if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
- return 0;
+ return nullptr;
}
Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
@@ -622,7 +623,7 @@ Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
return SelectInst::Create(SI->getCondition(),
SelectTrueVal, SelectFalseVal);
}
- return 0;
+ return nullptr;
}
@@ -634,7 +635,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
PHINode *PN = cast<PHINode>(I.getOperand(0));
unsigned NumPHIValues = PN->getNumIncomingValues();
if (NumPHIValues == 0)
- return 0;
+ return nullptr;
// We normally only transform phis with a single use. However, if a PHI has
// multiple uses and they are all the same operation, we can fold *all* of the
@@ -644,7 +645,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
for (User *U : PN->users()) {
Instruction *UI = cast<Instruction>(U);
if (UI != &I && !I.isIdenticalTo(UI))
- return 0;
+ return nullptr;
}
// Otherwise, we can replace *all* users with the new PHI we form.
}
@@ -654,14 +655,14 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// remember the BB it is in. If there is more than one or if *it* is a PHI,
// bail out. We don't do arbitrary constant expressions here because moving
// their computation can be expensive without a cost model.
- BasicBlock *NonConstBB = 0;
+ BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
continue;
- if (isa<PHINode>(InVal)) return 0; // Itself a phi.
- if (NonConstBB) return 0; // More than one non-const value.
+ if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
+ if (NonConstBB) return nullptr; // More than one non-const value.
NonConstBB = PN->getIncomingBlock(i);
@@ -669,22 +670,22 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
// insert a computation after it without breaking the edge.
if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
if (II->getParent() == NonConstBB)
- return 0;
+ return nullptr;
// If the incoming non-constant value is in I's block, we will remove one
// instruction, but insert another equivalent one, leading to infinite
// instcombine.
if (NonConstBB == I.getParent())
- return 0;
+ return nullptr;
}
// If there is exactly one non-constant value, we can insert a copy of the
// operation in that block. However, if this is a critical edge, we would be
// inserting the computation one some other paths (e.g. inside a loop). Only
// do this if the pred block is unconditionally branching into the phi block.
- if (NonConstBB != 0) {
+ if (NonConstBB != nullptr) {
BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
- if (!BI || !BI->isUnconditional()) return 0;
+ if (!BI || !BI->isUnconditional()) return nullptr;
}
// Okay, we can do the transformation: create the new PHI node.
@@ -708,7 +709,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
BasicBlock *ThisBB = PN->getIncomingBlock(i);
Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
- Value *InV = 0;
+ Value *InV = nullptr;
// Beware of ConstantExpr: it may eventually evaluate to getNullValue,
// even if currently isNullValue gives false.
Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i));
@@ -722,7 +723,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
} else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = 0;
+ Value *InV = nullptr;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
else if (isa<ICmpInst>(CI))
@@ -736,7 +737,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
} else if (I.getNumOperands() == 2) {
Constant *C = cast<Constant>(I.getOperand(1));
for (unsigned i = 0; i != NumPHIValues; ++i) {
- Value *InV = 0;
+ Value *InV = nullptr;
if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
InV = ConstantExpr::get(I.getOpcode(), InC, C);
else
@@ -776,11 +777,11 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
assert(PtrTy->isPtrOrPtrVectorTy());
if (!DL)
- return 0;
+ return nullptr;
Type *Ty = PtrTy->getPointerElementType();
if (!Ty->isSized())
- return 0;
+ return nullptr;
// Start with the index over the outer type. Note that the type size
// might be zero (even if the offset isn't zero) if the indexed type
@@ -806,7 +807,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
while (Offset) {
// Indexing into tail padding between struct/array elements.
if (uint64_t(Offset*8) >= DL->getTypeSizeInBits(Ty))
- return 0;
+ return nullptr;
if (StructType *STy = dyn_cast<StructType>(Ty)) {
const StructLayout *SL = DL->getStructLayout(STy);
@@ -827,7 +828,7 @@ Type *InstCombiner::FindElementAtOffset(Type *PtrTy, int64_t Offset,
Ty = AT->getElementType();
} else {
// Otherwise, we can't index into the middle of this atomic type, bail.
- return 0;
+ return nullptr;
}
}
@@ -859,7 +860,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// If Scale is zero then it does not divide Val.
if (Scale.isMinValue())
- return 0;
+ return nullptr;
// Look through chains of multiplications, searching for a constant that is
// divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
@@ -902,7 +903,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
if (!Remainder.isMinValue())
// Not divisible by Scale.
- return 0;
+ return nullptr;
// Replace with the quotient in the parent.
Op = ConstantInt::get(CI->getType(), Quotient);
NoSignedWrap = true;
@@ -915,7 +916,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication.
NoSignedWrap = BO->hasNoSignedWrap();
if (RequireNoSignedWrap && !NoSignedWrap)
- return 0;
+ return nullptr;
// There are three cases for multiplication: multiplication by exactly
// the scale, multiplication by a constant different to the scale, and
@@ -934,7 +935,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Otherwise drill down into the constant.
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
Parent = std::make_pair(BO, 1);
continue;
@@ -943,7 +944,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication by something else. Drill down into the left-hand side
// since that's where the reassociate pass puts the good stuff.
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
Parent = std::make_pair(BO, 0);
continue;
@@ -954,7 +955,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Multiplication by a power of 2.
NoSignedWrap = BO->hasNoSignedWrap();
if (RequireNoSignedWrap && !NoSignedWrap)
- return 0;
+ return nullptr;
Value *LHS = BO->getOperand(0);
int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
@@ -968,7 +969,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
break;
}
if (Amt < logScale || !Op->hasOneUse())
- return 0;
+ return nullptr;
// Multiplication by more than the scale. Reduce the multiplying amount
// by the scale in the parent.
@@ -979,7 +980,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
}
if (!Op->hasOneUse())
- return 0;
+ return nullptr;
if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
if (Cast->getOpcode() == Instruction::SExt) {
@@ -993,7 +994,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// Scale and the multiplication Y * SmallScale should not overflow.
if (SmallScale.sext(Scale.getBitWidth()) != Scale)
// SmallScale does not sign-extend to Scale.
- return 0;
+ return nullptr;
assert(SmallScale.exactLogBase2() == logScale);
// Require that Y * SmallScale must not overflow.
RequireNoSignedWrap = true;
@@ -1012,7 +1013,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
// trunc (Y * sext Scale) does not, so nsw flags need to be cleared
// from this point up in the expression (see later).
if (RequireNoSignedWrap)
- return 0;
+ return nullptr;
// Drill down through the cast.
unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
@@ -1026,7 +1027,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
}
// Unsupported expression, bail out.
- return 0;
+ return nullptr;
}
// We know that we can successfully descale, so from here on we can safely
@@ -1082,6 +1083,101 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
} while (1);
}
+/// \brief Creates node of binary operation with the same attributes as the
+/// specified one but with other operands.
+static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS,
+ InstCombiner::BuilderTy *B) {
+ Value *BORes = B->CreateBinOp(Inst.getOpcode(), LHS, RHS);
+ if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BORes)) {
+ if (isa<OverflowingBinaryOperator>(NewBO)) {
+ NewBO->setHasNoSignedWrap(Inst.hasNoSignedWrap());
+ NewBO->setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(NewBO))
+ NewBO->setIsExact(Inst.isExact());
+ }
+ return BORes;
+}
+
+/// \brief Makes transformation of binary operation specific for vector types.
+/// \param Inst Binary operator to transform.
+/// \return Pointer to node that must replace the original binary operator, or
+/// null pointer if no transformation was made.
+Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
+ if (!Inst.getType()->isVectorTy()) return nullptr;
+
+ unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements();
+ Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1);
+ assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth);
+ assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth);
+
+ // If both arguments of binary operation are shuffles, which use the same
+ // mask and shuffle within a single vector, it is worthwhile to move the
+ // shuffle after binary operation:
+ // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m)
+ if (isa<ShuffleVectorInst>(LHS) && isa<ShuffleVectorInst>(RHS)) {
+ ShuffleVectorInst *LShuf = cast<ShuffleVectorInst>(LHS);
+ ShuffleVectorInst *RShuf = cast<ShuffleVectorInst>(RHS);
+ if (isa<UndefValue>(LShuf->getOperand(1)) &&
+ isa<UndefValue>(RShuf->getOperand(1)) &&
+ LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType() &&
+ LShuf->getMask() == RShuf->getMask()) {
+ Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0),
+ RShuf->getOperand(0), Builder);
+ Value *Res = Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(NewBO->getType()), LShuf->getMask());
+ return Res;
+ }
+ }
+
+ // If one argument is a shuffle within one vector, the other is a constant,
+ // try moving the shuffle after the binary operation.
+ ShuffleVectorInst *Shuffle = nullptr;
+ Constant *C1 = nullptr;
+ if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS);
+ if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS);
+ if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS);
+ if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS);
+ if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) &&
+ Shuffle->getType() == Shuffle->getOperand(0)->getType()) {
+ SmallVector<int, 16> ShMask = Shuffle->getShuffleMask();
+ // Find constant C2 that has property:
+ // shuffle(C2, ShMask) = C1
+ // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>)
+ // reorder is not possible.
+ SmallVector<Constant*, 16> C2M(VWidth,
+ UndefValue::get(C1->getType()->getScalarType()));
+ bool MayChange = true;
+ for (unsigned I = 0; I < VWidth; ++I) {
+ if (ShMask[I] >= 0) {
+ assert(ShMask[I] < (int)VWidth);
+ if (!isa<UndefValue>(C2M[ShMask[I]])) {
+ MayChange = false;
+ break;
+ }
+ C2M[ShMask[I]] = C1->getAggregateElement(I);
+ }
+ }
+ if (MayChange) {
+ Constant *C2 = ConstantVector::get(C2M);
+ Value *NewLHS, *NewRHS;
+ if (isa<Constant>(LHS)) {
+ NewLHS = C2;
+ NewRHS = Shuffle->getOperand(0);
+ } else {
+ NewLHS = Shuffle->getOperand(0);
+ NewRHS = C2;
+ }
+ Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder);
+ Value *Res = Builder->CreateShuffleVector(NewBO,
+ UndefValue::get(Inst.getType()), Shuffle->getMask());
+ return Res;
+ }
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
@@ -1130,7 +1226,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
//
if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
- return 0;
+ return nullptr;
// Note that if our source is a gep chain itself then we wait for that
// chain to be resolved before we perform this transformation. This
@@ -1138,7 +1234,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (GEPOperator *SrcGEP =
dyn_cast<GEPOperator>(Src->getOperand(0)))
if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
- return 0; // Wait until our source is folded to completion.
+ return nullptr; // Wait until our source is folded to completion.
SmallVector<Value*, 8> Indices;
@@ -1166,7 +1262,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// intptr_t). Just avoid transforming this until the input has been
// normalized.
if (SO1->getType() != GO1->getType())
- return 0;
+ return nullptr;
Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
}
@@ -1216,7 +1312,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// We do not handle pointer-vector geps here.
if (!StrippedPtrTy)
- return 0;
+ return nullptr;
if (StrippedPtr != PtrOp) {
bool HasZeroPointerIndex = false;
@@ -1241,7 +1337,15 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst *Res =
GetElementPtrInst::Create(StrippedPtr, Idx, GEP.getName());
Res->setIsInBounds(GEP.isInBounds());
- return Res;
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
+ return Res;
+ // Insert Res, and create an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
+ // ->
+ // %0 = GEP i8 addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ return new AddrSpaceCastInst(Builder->Insert(Res), GEP.getType());
}
if (ArrayType *XATy =
@@ -1253,8 +1357,24 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// to an array of the same type as the destination pointer
// array. Because the array type is never stepped over (there
// is a leading zero) we can fold the cast into this GEP.
- GEP.setOperand(0, StrippedPtr);
- return &GEP;
+ if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
+ GEP.setOperand(0, StrippedPtr);
+ return &GEP;
+ }
+ // Cannot replace the base pointer directly because StrippedPtr's
+ // address space is different. Instead, create a new GEP followed by
+ // an addrspacecast.
+ // e.g.,
+ // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
+ // i32 0, ...
+ // ->
+ // %0 = GEP [10 x i8] addrspace(1)* X, ...
+ // addrspacecast i8 addrspace(1)* %0 to i8*
+ SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end());
+ Value *NewGEP = GEP.isInBounds() ?
+ Builder->CreateInBoundsGEP(StrippedPtr, Idx, GEP.getName()) :
+ Builder->CreateGEP(StrippedPtr, Idx, GEP.getName());
+ return new AddrSpaceCastInst(NewGEP, GEP.getType());
}
}
}
@@ -1360,7 +1480,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
if (!DL)
- return 0;
+ return nullptr;
/// See if we can simplify:
/// X = bitcast A* to B*
@@ -1412,7 +1532,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
- return 0;
+ return nullptr;
}
static bool
@@ -1527,7 +1647,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
}
return EraseInstFromFunction(MI);
}
- return 0;
+ return nullptr;
}
/// \brief Move the call to free before a NULL test.
@@ -1556,30 +1676,30 @@ tryToMoveFreeBeforeNullTest(CallInst &FI) {
// would duplicate the call to free in each predecessor and it may
// not be profitable even for code size.
if (!PredBB)
- return 0;
+ return nullptr;
// Validate constraint #2: Does this block contains only the call to
// free and an unconditional branch?
// FIXME: We could check if we can speculate everything in the
// predecessor block
if (FreeInstrBB->size() != 2)
- return 0;
+ return nullptr;
BasicBlock *SuccBB;
if (!match(FreeInstrBB->getTerminator(), m_UnconditionalBr(SuccBB)))
- return 0;
+ return nullptr;
// Validate the rest of constraint #1 by matching on the pred branch.
TerminatorInst *TI = PredBB->getTerminator();
BasicBlock *TrueBB, *FalseBB;
ICmpInst::Predicate Pred;
if (!match(TI, m_Br(m_ICmp(Pred, m_Specific(Op), m_Zero()), TrueBB, FalseBB)))
- return 0;
+ return nullptr;
if (Pred != ICmpInst::ICMP_EQ && Pred != ICmpInst::ICMP_NE)
- return 0;
+ return nullptr;
// Validate constraint #3: Ensure the null case just falls through.
if (SuccBB != (Pred == ICmpInst::ICMP_EQ ? TrueBB : FalseBB))
- return 0;
+ return nullptr;
assert(FreeInstrBB == (Pred == ICmpInst::ICMP_EQ ? FalseBB : TrueBB) &&
"Broken CFG: missing edge from predecessor to successor");
@@ -1614,14 +1734,14 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {
if (Instruction *I = tryToMoveFreeBeforeNullTest(FI))
return I;
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
// Change br (not X), label True, label False to: br X, label False, True
- Value *X = 0;
+ Value *X = nullptr;
BasicBlock *TrueDest;
BasicBlock *FalseDest;
if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
@@ -1664,7 +1784,7 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
return &BI;
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
@@ -1688,7 +1808,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
return &SI;
}
}
- return 0;
+ return nullptr;
}
Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
@@ -1705,7 +1825,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// first index
return ExtractValueInst::Create(C2, EV.getIndices().slice(1));
}
- return 0; // Can't handle other constants
+ return nullptr; // Can't handle other constants
}
if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
@@ -1838,7 +1958,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
// and if again single-use then via load (gep (gep)) to load (gep).
// However, double extracts from e.g. function arguments or return values
// aren't handled yet.
- return 0;
+ return nullptr;
}
enum Personality_Type {
@@ -2177,7 +2297,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
return &LI;
}
- return 0;
+ return nullptr;
}
@@ -2270,7 +2390,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
i != e; ++i) {
ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
- if (CE == 0) continue;
+ if (CE == nullptr) continue;
Constant*& FoldRes = FoldedConstants[CE];
if (!FoldRes)
@@ -2374,7 +2494,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
while (!Worklist.isEmpty()) {
Instruction *I = Worklist.RemoveOne();
- if (I == 0) continue; // skip null values.
+ if (I == nullptr) continue; // skip null values.
// Check to see if we can DCE the instruction.
if (isInstructionTriviallyDead(I, TLI)) {
@@ -2516,7 +2636,7 @@ bool InstCombiner::runOnFunction(Function &F) {
return false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// Minimizing size?
MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
@@ -2543,7 +2663,7 @@ bool InstCombiner::runOnFunction(Function &F) {
while (DoOneIteration(F, Iteration++))
EverMadeChange = true;
- Builder = 0;
+ Builder = nullptr;
return EverMadeChange;
}
diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bbfa4c5..95fca75 100644
--- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -13,8 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "asan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -53,8 +51,11 @@
using namespace llvm;
+#define DEBUG_TYPE "asan"
+
static const uint64_t kDefaultShadowScale = 3;
static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
+static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000; // < 2G.
static const uint64_t kPPC64_ShadowOffset64 = 1ULL << 41;
@@ -79,6 +80,7 @@ static const char *const kAsanUnregisterGlobalsName =
static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";
static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";
static const char *const kAsanInitName = "__asan_init_v3";
+static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init";
static const char *const kAsanCovName = "__sanitizer_cov";
static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp";
static const char *const kAsanPtrSub = "__sanitizer_ptr_sub";
@@ -135,10 +137,12 @@ static cl::opt<bool> ClGlobals("asan-globals",
static cl::opt<int> ClCoverage("asan-coverage",
cl::desc("ASan coverage. 0: none, 1: entry block, 2: all blocks"),
cl::Hidden, cl::init(false));
+static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold",
+ cl::desc("Add coverage instrumentation only to the entry block if there "
+ "are more than this number of blocks."),
+ cl::Hidden, cl::init(1500));
static cl::opt<bool> ClInitializers("asan-initialization-order",
cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false));
-static cl::opt<bool> ClMemIntrin("asan-memintrin",
- cl::desc("Handle memset/memcpy/memmove"), cl::Hidden, cl::init(true));
static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair",
cl::desc("Instrument <, <=, >, >=, - with pointer operands"),
cl::Hidden, cl::init(false));
@@ -148,6 +152,16 @@ static cl::opt<unsigned> ClRealignStack("asan-realign-stack",
static cl::opt<std::string> ClBlacklistFile("asan-blacklist",
cl::desc("File containing the list of objects to ignore "
"during instrumentation"), cl::Hidden);
+static cl::opt<int> ClInstrumentationWithCallsThreshold(
+ "asan-instrumentation-with-call-threshold",
+ cl::desc("If the function being instrumented contains more than "
+ "this number of memory accesses, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(7000));
+static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
+ "asan-memory-access-callback-prefix",
+ cl::desc("Prefix for memory access callbacks"), cl::Hidden,
+ cl::init("__asan_"));
// This is an experimental feature that will allow to choose between
// instrumented and non-instrumented code at link-time.
@@ -238,7 +252,7 @@ struct ShadowMapping {
static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
llvm::Triple TargetTriple(M.getTargetTriple());
bool IsAndroid = TargetTriple.getEnvironment() == llvm::Triple::Android;
- // bool IsMacOSX = TargetTriple.getOS() == llvm::Triple::MacOSX;
+ bool IsIOS = TargetTriple.getOS() == llvm::Triple::IOS;
bool IsFreeBSD = TargetTriple.getOS() == llvm::Triple::FreeBSD;
bool IsLinux = TargetTriple.getOS() == llvm::Triple::Linux;
bool IsPPC64 = TargetTriple.getArch() == llvm::Triple::ppc64 ||
@@ -256,6 +270,8 @@ static ShadowMapping getShadowMapping(const Module &M, int LongSize) {
Mapping.Offset = kMIPS32_ShadowOffset32;
else if (IsFreeBSD)
Mapping.Offset = kFreeBSD_ShadowOffset32;
+ else if (IsIOS)
+ Mapping.Offset = kIOSShadowOffset32;
else
Mapping.Offset = kDefaultShadowOffset32;
} else { // LongSize == 64
@@ -303,20 +319,17 @@ struct AddressSanitizer : public FunctionPass {
const char *getPassName() const override {
return "AddressSanitizerFunctionPass";
}
- void instrumentMop(Instruction *I);
+ void instrumentMop(Instruction *I, bool UseCalls);
void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
Value *Addr, uint32_t TypeSize, bool IsWrite,
- Value *SizeArgument);
+ Value *SizeArgument, bool UseCalls);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue, uint32_t TypeSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex,
Value *SizeArgument);
- bool instrumentMemIntrinsic(MemIntrinsic *MI);
- void instrumentMemIntrinsicParam(Instruction *OrigIns, Value *Addr,
- Value *Size,
- Instruction *InsertBefore, bool IsWrite);
+ void instrumentMemIntrinsic(MemIntrinsic *MI);
Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
bool runOnFunction(Function &F) override;
bool maybeInsertAsanInitAtFunctionEntry(Function &F);
@@ -349,8 +362,11 @@ struct AddressSanitizer : public FunctionPass {
std::unique_ptr<SpecialCaseList> BL;
// This array is indexed by AccessIsWrite and log2(AccessSize).
Function *AsanErrorCallback[2][kNumberOfAccessSizes];
+ Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes];
// This array is indexed by AccessIsWrite.
- Function *AsanErrorCallbackSized[2];
+ Function *AsanErrorCallbackSized[2],
+ *AsanMemoryAccessCallbackSized[2];
+ Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
InlineAsm *EmptyAsm;
SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals;
@@ -393,6 +409,7 @@ class AddressSanitizerModule : public ModulePass {
Function *AsanUnpoisonGlobals;
Function *AsanRegisterGlobals;
Function *AsanUnregisterGlobals;
+ Function *AsanCovModuleInit;
};
// Stack poisoning does not play well with exception handling.
@@ -443,11 +460,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
bool runOnFunction() {
if (!ClStack) return false;
// Collect alloca, ret, lifetime instructions etc.
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *BB = *DI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
visit(*BB);
- }
+
if (AllocaVec.empty()) return false;
initializeCallbacks(*F.getParent());
@@ -590,72 +605,54 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
}
-void AddressSanitizer::instrumentMemIntrinsicParam(
- Instruction *OrigIns,
- Value *Addr, Value *Size, Instruction *InsertBefore, bool IsWrite) {
- IRBuilder<> IRB(InsertBefore);
- if (Size->getType() != IntptrTy)
- Size = IRB.CreateIntCast(Size, IntptrTy, false);
- // Check the first byte.
- instrumentAddress(OrigIns, InsertBefore, Addr, 8, IsWrite, Size);
- // Check the last byte.
- IRB.SetInsertPoint(InsertBefore);
- Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
- Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
- Value *AddrLast = IRB.CreateAdd(AddrLong, SizeMinusOne);
- instrumentAddress(OrigIns, InsertBefore, AddrLast, 8, IsWrite, Size);
-}
-
// Instrument memset/memmove/memcpy
-bool AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
- Value *Dst = MI->getDest();
- MemTransferInst *MemTran = dyn_cast<MemTransferInst>(MI);
- Value *Src = MemTran ? MemTran->getSource() : 0;
- Value *Length = MI->getLength();
-
- Constant *ConstLength = dyn_cast<Constant>(Length);
- Instruction *InsertBefore = MI;
- if (ConstLength) {
- if (ConstLength->isNullValue()) return false;
- } else {
- // The size is not a constant so it could be zero -- check at run-time.
- IRBuilder<> IRB(InsertBefore);
-
- Value *Cmp = IRB.CreateICmpNE(Length,
- Constant::getNullValue(Length->getType()));
- InsertBefore = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false);
+void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
+ IRBuilder<> IRB(MI);
+ if (isa<MemTransferInst>(MI)) {
+ IRB.CreateCall3(
+ isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
+ IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
+ } else if (isa<MemSetInst>(MI)) {
+ IRB.CreateCall3(
+ AsanMemset,
+ IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false));
}
-
- instrumentMemIntrinsicParam(MI, Dst, Length, InsertBefore, true);
- if (Src)
- instrumentMemIntrinsicParam(MI, Src, Length, InsertBefore, false);
- return true;
+ MI->eraseFromParent();
}
// If I is an interesting memory access, return the PointerOperand
-// and set IsWrite. Otherwise return NULL.
-static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite) {
+// and set IsWrite/Alignment. Otherwise return NULL.
+static Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
+ unsigned *Alignment) {
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
- if (!ClInstrumentReads) return NULL;
+ if (!ClInstrumentReads) return nullptr;
*IsWrite = false;
+ *Alignment = LI->getAlignment();
return LI->getPointerOperand();
}
if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
- if (!ClInstrumentWrites) return NULL;
+ if (!ClInstrumentWrites) return nullptr;
*IsWrite = true;
+ *Alignment = SI->getAlignment();
return SI->getPointerOperand();
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
- if (!ClInstrumentAtomics) return NULL;
+ if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *Alignment = 0;
return RMW->getPointerOperand();
}
if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
- if (!ClInstrumentAtomics) return NULL;
+ if (!ClInstrumentAtomics) return nullptr;
*IsWrite = true;
+ *Alignment = 0;
return XCHG->getPointerOperand();
}
- return NULL;
+ return nullptr;
}
static bool isPointerOperand(Value *V) {
@@ -700,9 +697,10 @@ AddressSanitizer::instrumentPointerComparisonOrSubtraction(Instruction *I) {
IRB.CreateCall2(F, Param[0], Param[1]);
}
-void AddressSanitizer::instrumentMop(Instruction *I) {
+void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) {
bool IsWrite = false;
- Value *Addr = isInterestingMemoryAccess(I, &IsWrite);
+ unsigned Alignment = 0;
+ Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &Alignment);
assert(Addr);
if (ClOpt && ClOptGlobals) {
if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) {
@@ -737,22 +735,29 @@ void AddressSanitizer::instrumentMop(Instruction *I) {
else
NumInstrumentedReads++;
- // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check.
- if (TypeSize == 8 || TypeSize == 16 ||
- TypeSize == 32 || TypeSize == 64 || TypeSize == 128)
- return instrumentAddress(I, I, Addr, TypeSize, IsWrite, 0);
- // Instrument unusual size (but still multiple of 8).
+ unsigned Granularity = 1 << Mapping.Scale;
+ // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+ // if the data is properly aligned.
+ if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+ TypeSize == 128) &&
+ (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
+ return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls);
+ // Instrument unusual size or unusual alignment.
// We can not do it with a single check, so we do 1-byte check for the first
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
IRBuilder<> IRB(I);
- Value *LastByte = IRB.CreateIntToPtr(
- IRB.CreateAdd(IRB.CreatePointerCast(Addr, IntptrTy),
- ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
- OrigPtrTy);
Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
- instrumentAddress(I, I, Addr, 8, IsWrite, Size);
- instrumentAddress(I, I, LastByte, 8, IsWrite, Size);
+ Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ if (UseCalls) {
+ IRB.CreateCall2(AsanMemoryAccessCallbackSized[IsWrite], AddrLong, Size);
+ } else {
+ Value *LastByte = IRB.CreateIntToPtr(
+ IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ OrigPtrTy);
+ instrumentAddress(I, I, Addr, 8, IsWrite, Size, false);
+ instrumentAddress(I, I, LastByte, 8, IsWrite, Size, false);
+ }
}
// Validate the result of Module::getOrInsertFunction called for an interface
@@ -800,11 +805,18 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
}
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
- Instruction *InsertBefore,
- Value *Addr, uint32_t TypeSize,
- bool IsWrite, Value *SizeArgument) {
+ Instruction *InsertBefore, Value *Addr,
+ uint32_t TypeSize, bool IsWrite,
+ Value *SizeArgument, bool UseCalls) {
IRBuilder<> IRB(InsertBefore);
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+
+ if (UseCalls) {
+ IRB.CreateCall(AsanMemoryAccessCallback[IsWrite][AccessSizeIndex],
+ AddrLong);
+ return;
+ }
Type *ShadowTy = IntegerType::get(
*C, std::max(8U, TypeSize >> Mapping.Scale));
@@ -815,9 +827,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
Value *Cmp = IRB.CreateICmpNE(ShadowValue, CmpVal);
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
size_t Granularity = 1 << Mapping.Scale;
- TerminatorInst *CrashTerm = 0;
+ TerminatorInst *CrashTerm = nullptr;
if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
TerminatorInst *CheckTerm =
@@ -842,8 +853,29 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
void AddressSanitizerModule::createInitializerPoisonCalls(
Module &M, GlobalValue *ModuleName) {
- // We do all of our poisoning and unpoisoning within _GLOBAL__I_a.
- Function *GlobalInit = M.getFunction("_GLOBAL__I_a");
+ // We do all of our poisoning and unpoisoning within a global constructor.
+ // These are called _GLOBAL__(sub_)?I_.*.
+ // TODO: Consider looking through the functions in
+ // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly
+ // typed approach.
+ Function *GlobalInit = nullptr;
+ for (auto &F : M.getFunctionList()) {
+ StringRef FName = F.getName();
+
+ const char kGlobalPrefix[] = "_GLOBAL__";
+ if (!FName.startswith(kGlobalPrefix))
+ continue;
+ FName = FName.substr(strlen(kGlobalPrefix));
+
+ const char kOptionalSub[] = "sub_";
+ if (FName.startswith(kOptionalSub))
+ FName = FName.substr(strlen(kOptionalSub));
+
+ if (FName.startswith("I_")) {
+ GlobalInit = &F;
+ break;
+ }
+ }
// If that function is not present, this TU contains no globals, or they have
// all been optimized away
if (!GlobalInit)
@@ -858,7 +890,7 @@ void AddressSanitizerModule::createInitializerPoisonCalls(
// Add calls to unpoison all globals before each return instruction.
for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end();
- I != E; ++I) {
+ I != E; ++I) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) {
CallInst::Create(AsanUnpoisonGlobals, "", RI);
}
@@ -902,8 +934,8 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// Ignore the globals from the __OBJC section. The ObjC runtime assumes
// those conform to /usr/lib/objc/runtime.h, so we can't add redzones to
// them.
- if ((Section.find("__OBJC,") == 0) ||
- (Section.find("__DATA, __objc_") == 0)) {
+ if (Section.startswith("__OBJC,") ||
+ Section.startswith("__DATA, __objc_")) {
DEBUG(dbgs() << "Ignoring ObjC runtime global: " << *G << "\n");
return false;
}
@@ -915,16 +947,26 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) {
// is placed into __DATA,__cfstring
// Therefore there's no point in placing redzones into __DATA,__cfstring.
// Moreover, it causes the linker to crash on OS X 10.7
- if (Section.find("__DATA,__cfstring") == 0) {
+ if (Section.startswith("__DATA,__cfstring")) {
DEBUG(dbgs() << "Ignoring CFString: " << *G << "\n");
return false;
}
// The linker merges the contents of cstring_literals and removes the
// trailing zeroes.
- if (Section.find("__TEXT,__cstring,cstring_literals") == 0) {
+ if (Section.startswith("__TEXT,__cstring,cstring_literals")) {
DEBUG(dbgs() << "Ignoring a cstring literal: " << *G << "\n");
return false;
}
+
+ // Callbacks put into the CRT initializer/terminator sections
+ // should not be instrumented.
+ // See https://code.google.com/p/address-sanitizer/issues/detail?id=305
+ // and http://msdn.microsoft.com/en-US/en-en/library/bb918180(v=vs.120).aspx
+ if (Section.startswith(".CRT")) {
+ DEBUG(dbgs() << "Ignoring a global initializer callback: " << *G << "\n");
+ return false;
+ }
+
// Globals from llvm.metadata aren't emitted, do not instrument them.
if (Section == "llvm.metadata") return false;
}
@@ -950,6 +992,10 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {
kAsanUnregisterGlobalsName,
IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
AsanUnregisterGlobals->setLinkage(Function::ExternalLinkage);
+ AsanCovModuleInit = checkInterfaceFunction(M.getOrInsertFunction(
+ kAsanCovModuleInitName,
+ IRB.getVoidTy(), IntptrTy, NULL));
+ AsanCovModuleInit->setLinkage(Function::ExternalLinkage);
}
// This function replaces all global variables with new variables that have
@@ -980,6 +1026,14 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
GlobalsToChange.push_back(G);
}
+ Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
+ assert(CtorFunc);
+ IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
+
+ Function *CovFunc = M.getFunction(kAsanCovName);
+ int nCov = CovFunc ? CovFunc->getNumUses() : 0;
+ IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov));
+
size_t n = GlobalsToChange.size();
if (n == 0) return false;
@@ -996,10 +1050,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) {
IntptrTy, IntptrTy, NULL);
SmallVector<Constant *, 16> Initializers(n);
- Function *CtorFunc = M.getFunction(kAsanModuleCtorName);
- assert(CtorFunc);
- IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator());
-
bool HasDynamicallyInitializedGlobals = false;
// We shouldn't merge same module names, as this string serves as unique
@@ -1110,12 +1160,16 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
// IsWrite and TypeSize are encoded in the function name.
- std::string FunctionName = std::string(kAsanReportErrorTemplate) +
+ std::string Suffix =
(AccessIsWrite ? "store" : "load") + itostr(1 << AccessSizeIndex);
- // If we are merging crash callbacks, they have two parameters.
AsanErrorCallback[AccessIsWrite][AccessSizeIndex] =
- checkInterfaceFunction(M.getOrInsertFunction(
- FunctionName, IRB.getVoidTy(), IntptrTy, NULL));
+ checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanReportErrorTemplate + Suffix,
+ IRB.getVoidTy(), IntptrTy, NULL));
+ AsanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
+ checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + Suffix,
+ IRB.getVoidTy(), IntptrTy, NULL));
}
}
AsanErrorCallbackSized[0] = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1123,8 +1177,25 @@ void AddressSanitizer::initializeCallbacks(Module &M) {
AsanErrorCallbackSized[1] = checkInterfaceFunction(M.getOrInsertFunction(
kAsanReportStoreN, IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
- AsanHandleNoReturnFunc = checkInterfaceFunction(M.getOrInsertFunction(
- kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
+ AsanMemoryAccessCallbackSized[0] = checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "loadN",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+ AsanMemoryAccessCallbackSized[1] = checkInterfaceFunction(
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "storeN",
+ IRB.getVoidTy(), IntptrTy, IntptrTy, NULL));
+
+ AsanMemmove = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memmove", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ AsanMemcpy = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memcpy", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy, NULL));
+ AsanMemset = checkInterfaceFunction(M.getOrInsertFunction(
+ ClMemoryAccessCallbackPrefix + "memset", IRB.getInt8PtrTy(),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy, NULL));
+
+ AsanHandleNoReturnFunc = checkInterfaceFunction(
+ M.getOrInsertFunction(kAsanHandleNoReturnName, IRB.getVoidTy(), NULL));
AsanCovFunction = checkInterfaceFunction(M.getOrInsertFunction(
kAsanCovName, IRB.getVoidTy(), NULL));
AsanPtrCmpFunction = checkInterfaceFunction(M.getOrInsertFunction(
@@ -1142,7 +1213,7 @@ bool AddressSanitizer::doInitialization(Module &M) {
// Initialize the private fields. No one has accessed them before.
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -1241,7 +1312,8 @@ bool AddressSanitizer::InjectCoverage(Function &F,
const ArrayRef<BasicBlock *> AllBlocks) {
if (!ClCoverage) return false;
- if (ClCoverage == 1) {
+ if (ClCoverage == 1 ||
+ (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) {
InjectCoverageAtBlock(F, F.getEntryBlock());
} else {
for (size_t i = 0, n = AllBlocks.size(); i < n; i++)
@@ -1275,6 +1347,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
SmallVector<Instruction*, 16> PointerComparisonsOrSubtracts;
int NumAllocas = 0;
bool IsWrite;
+ unsigned Alignment;
// Fill the set of memory operations to instrument.
for (Function::iterator FI = F.begin(), FE = F.end();
@@ -1285,7 +1358,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
BI != BE; ++BI) {
if (LooksLikeCodeInBug11395(BI)) return false;
- if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite)) {
+ if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) {
if (ClOpt && ClOptSameTemp) {
if (!TempsToInstrument.insert(Addr))
continue; // We've seen this temp in the current BB.
@@ -1294,7 +1367,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
isInterestingPointerComparisonOrSubtraction(BI)) {
PointerComparisonsOrSubtracts.push_back(BI);
continue;
- } else if (isa<MemIntrinsic>(BI) && ClMemIntrin) {
+ } else if (isa<MemIntrinsic>(BI)) {
// ok, take it.
} else {
if (isa<AllocaInst>(BI))
@@ -1315,7 +1388,7 @@ bool AddressSanitizer::runOnFunction(Function &F) {
}
}
- Function *UninstrumentedDuplicate = 0;
+ Function *UninstrumentedDuplicate = nullptr;
bool LikelyToInstrument =
!NoReturnCalls.empty() || !ToInstrument.empty() || (NumAllocas > 0);
if (ClKeepUninstrumented && LikelyToInstrument) {
@@ -1326,14 +1399,19 @@ bool AddressSanitizer::runOnFunction(Function &F) {
F.getParent()->getFunctionList().push_back(UninstrumentedDuplicate);
}
+ bool UseCalls = false;
+ if (ClInstrumentationWithCallsThreshold >= 0 &&
+ ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold)
+ UseCalls = true;
+
// Instrument.
int NumInstrumented = 0;
for (size_t i = 0, n = ToInstrument.size(); i != n; i++) {
Instruction *Inst = ToInstrument[i];
if (ClDebugMin < 0 || ClDebugMax < 0 ||
(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {
- if (isInterestingMemoryAccess(Inst, &IsWrite))
- instrumentMop(Inst);
+ if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment))
+ instrumentMop(Inst, UseCalls);
else
instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));
}
@@ -1464,12 +1542,23 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
}
}
+static DebugLoc getFunctionEntryDebugLocation(Function &F) {
+ BasicBlock::iterator I = F.getEntryBlock().begin(),
+ E = F.getEntryBlock().end();
+ for (; I != E; ++I)
+ if (!isa<AllocaInst>(I))
+ break;
+ return I->getDebugLoc();
+}
+
void FunctionStackPoisoner::poisonStack() {
int StackMallocIdx = -1;
+ DebugLoc EntryDebugLocation = getFunctionEntryDebugLocation(F);
assert(AllocaVec.size() > 0);
Instruction *InsBefore = AllocaVec[0];
IRBuilder<> IRB(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
SmallVector<ASanStackVariableDescription, 16> SVD;
SVD.reserve(AllocaVec.size());
@@ -1493,6 +1582,7 @@ void FunctionStackPoisoner::poisonStack() {
Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize);
AllocaInst *MyAlloca =
new AllocaInst(ByteArrayTy, "MyAlloca", InsBefore);
+ MyAlloca->setDebugLoc(EntryDebugLocation);
assert((ClRealignStack & (ClRealignStack - 1)) == 0);
size_t FrameAlignment = std::max(L.FrameAlignment, (size_t)ClRealignStack);
MyAlloca->setAlignment(FrameAlignment);
@@ -1513,11 +1603,13 @@ void FunctionStackPoisoner::poisonStack() {
Instruction *Term = SplitBlockAndInsertIfThen(Cmp, InsBefore, false);
BasicBlock *CmpBlock = cast<Instruction>(Cmp)->getParent();
IRBuilder<> IRBIf(Term);
+ IRBIf.SetCurrentDebugLocation(EntryDebugLocation);
LocalStackBase = IRBIf.CreateCall2(
AsanStackMallocFunc[StackMallocIdx],
ConstantInt::get(IntptrTy, LocalStackSize), OrigStackBase);
BasicBlock *SetBlock = cast<Instruction>(LocalStackBase)->getParent();
IRB.SetInsertPoint(InsBefore);
+ IRB.SetCurrentDebugLocation(EntryDebugLocation);
PHINode *Phi = IRB.CreatePHI(IntptrTy, 2);
Phi->addIncoming(OrigStackBase, CmpBlock);
Phi->addIncoming(LocalStackBase, SetBlock);
@@ -1654,7 +1746,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
// We're intested only in allocas we can handle.
- return isInterestingAlloca(*AI) ? AI : 0;
+ return isInterestingAlloca(*AI) ? AI : nullptr;
// See if we've already calculated (or started to calculate) alloca for a
// given value.
AllocaForValueMapTy::iterator I = AllocaForValue.find(V);
@@ -1662,8 +1754,8 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
return I->second;
// Store 0 while we're calculating alloca for value V to avoid
// infinite recursion if the value references itself.
- AllocaForValue[V] = 0;
- AllocaInst *Res = 0;
+ AllocaForValue[V] = nullptr;
+ AllocaInst *Res = nullptr;
if (CastInst *CI = dyn_cast<CastInst>(V))
Res = findAllocaForValue(CI->getOperand(0));
else if (PHINode *PN = dyn_cast<PHINode>(V)) {
@@ -1673,12 +1765,12 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
if (IncValue == PN) continue;
AllocaInst *IncValueAI = findAllocaForValue(IncValue);
// AI for incoming values should exist and should all be equal.
- if (IncValueAI == 0 || (Res != 0 && IncValueAI != Res))
- return 0;
+ if (IncValueAI == nullptr || (Res != nullptr && IncValueAI != Res))
+ return nullptr;
Res = IncValueAI;
}
}
- if (Res != 0)
+ if (Res)
AllocaForValue[V] = Res;
return Res;
}
diff --git a/lib/Transforms/Instrumentation/BoundsChecking.cpp b/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 505fb83..9a5cea8 100644
--- a/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "bounds-checking"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -28,6 +27,8 @@
#include "llvm/Target/TargetLibraryInfo.h"
using namespace llvm;
+#define DEBUG_TYPE "bounds-checking"
+
static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
cl::desc("Use one trap block per function"));
@@ -61,7 +62,7 @@ namespace {
BasicBlock *TrapBB;
BasicBlock *getTrapBB();
- void emitBranchToTrap(Value *Cmp = 0);
+ void emitBranchToTrap(Value *Cmp = nullptr);
bool instrument(Value *Ptr, Value *Val);
};
}
@@ -103,7 +104,7 @@ void BoundsChecking::emitBranchToTrap(Value *Cmp) {
if (!C->getZExtValue())
return;
else
- Cmp = 0; // unconditional branch
+ Cmp = nullptr; // unconditional branch
}
++ChecksAdded;
@@ -167,7 +168,7 @@ bool BoundsChecking::runOnFunction(Function &F) {
DL = &getAnalysis<DataLayoutPass>().getDataLayout();
TLI = &getAnalysis<TargetLibraryInfo>();
- TrapBB = 0;
+ TrapBB = nullptr;
BuilderTy TheBuilder(F.getContext(), TargetFolder(DL));
Builder = &TheBuilder;
ObjectSizeOffsetEvaluator TheObjSizeEval(DL, TLI, F.getContext(),
diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index df1549d..7f468f7 100644
--- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -211,7 +211,8 @@ class DataFlowSanitizer : public ModulePass {
public:
DataFlowSanitizer(StringRef ABIListFile = StringRef(),
- void *(*getArgTLS)() = 0, void *(*getRetValTLS)() = 0);
+ void *(*getArgTLS)() = nullptr,
+ void *(*getRetValTLS)() = nullptr);
static char ID;
bool doInitialization(Module &M) override;
bool runOnModule(Module &M) override;
@@ -233,8 +234,8 @@ struct DFSanFunction {
DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI)
: DFS(DFS), F(F), IA(DFS.getInstrumentedABI()),
- IsNativeABI(IsNativeABI), ArgTLSPtr(0), RetvalTLSPtr(0),
- LabelReturnAlloca(0) {}
+ IsNativeABI(IsNativeABI), ArgTLSPtr(nullptr), RetvalTLSPtr(nullptr),
+ LabelReturnAlloca(nullptr) {}
Value *getArgTLSPtr();
Value *getArgTLS(unsigned Index, Instruction *Pos);
Value *getRetvalTLS();
@@ -303,7 +304,7 @@ FunctionType *DataFlowSanitizer::getArgsFunctionType(FunctionType *T) {
ArgTypes.push_back(ShadowPtrTy);
Type *RetType = T->getReturnType();
if (!RetType->isVoidTy())
- RetType = StructType::get(RetType, ShadowTy, (Type *)0);
+ RetType = StructType::get(RetType, ShadowTy, (Type *)nullptr);
return FunctionType::get(RetType, ArgTypes, T->isVarArg());
}
@@ -345,7 +346,7 @@ FunctionType *DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
bool DataFlowSanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
Mod = &M;
@@ -373,18 +374,20 @@ bool DataFlowSanitizer::doInitialization(Module &M) {
if (GetArgTLSPtr) {
Type *ArgTLSTy = ArrayType::get(ShadowTy, 64);
- ArgTLS = 0;
+ ArgTLS = nullptr;
GetArgTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetArgTLSPtr)),
PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ArgTLSTy), (Type *)0)));
+ FunctionType::get(PointerType::getUnqual(ArgTLSTy),
+ (Type *)nullptr)));
}
if (GetRetvalTLSPtr) {
- RetvalTLS = 0;
+ RetvalTLS = nullptr;
GetRetvalTLS = ConstantExpr::getIntToPtr(
ConstantInt::get(IntptrTy, uintptr_t(GetRetvalTLSPtr)),
PointerType::getUnqual(
- FunctionType::get(PointerType::getUnqual(ShadowTy), (Type *)0)));
+ FunctionType::get(PointerType::getUnqual(ShadowTy),
+ (Type *)nullptr)));
}
ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
@@ -554,7 +557,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
++i;
// Don't stop on weak. We assume people aren't playing games with the
// instrumentedness of overridden weak aliases.
- if (Function *F = dyn_cast<Function>(GA->getAliasedGlobal())) {
+ if (Function *F = dyn_cast<Function>(GA->getAliasee())) {
bool GAInst = isInstrumented(GA), FInst = isInstrumented(F);
if (GAInst && FInst) {
addGlobalNamePrefix(GA);
@@ -629,7 +632,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// function... yet.
} else if (FT->isVarArg()) {
UnwrappedFnMap[&F] = &F;
- *i = 0;
+ *i = nullptr;
} else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
// Build a wrapper function for F. The wrapper simply calls F, and is
// added to FnsToInstrument so that any instrumentation according to its
@@ -680,9 +683,8 @@ bool DataFlowSanitizer::runOnModule(Module &M) {
// DFSanVisitor may create new basic blocks, which confuses df_iterator.
// Build a copy of the list before iterating over it.
- llvm::SmallVector<BasicBlock *, 4> BBList;
- std::copy(df_begin(&(*i)->getEntryBlock()), df_end(&(*i)->getEntryBlock()),
- std::back_inserter(BBList));
+ llvm::SmallVector<BasicBlock *, 4> BBList(
+ depth_first(&(*i)->getEntryBlock()));
for (llvm::SmallVector<BasicBlock *, 4>::iterator i = BBList.begin(),
e = BBList.end();
@@ -1313,7 +1315,7 @@ void DFSanVisitor::visitCallSite(CallSite CS) {
}
}
- Instruction *Next = 0;
+ Instruction *Next = nullptr;
if (!CS.getType()->isVoidTy()) {
if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
if (II->getNormalDest()->getSinglePredecessor()) {
diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp
index 069886e..18bda1a 100644
--- a/lib/Transforms/Instrumentation/DebugIR.cpp
+++ b/lib/Transforms/Instrumentation/DebugIR.cpp
@@ -16,8 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "debug-ir"
-
#include "llvm/IR/ValueMap.h"
#include "DebugIR.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
@@ -42,6 +40,8 @@
using namespace llvm;
+#define DEBUG_TYPE "debug-ir"
+
namespace {
/// Builds a map of Value* to line numbers on which the Value appears in a
@@ -118,7 +118,7 @@ public:
void visitInstruction(Instruction &I) {
if (I.getMetadata(LLVMContext::MD_dbg))
- I.setMetadata(LLVMContext::MD_dbg, 0);
+ I.setMetadata(LLVMContext::MD_dbg, nullptr);
}
void run(Module *M) {
@@ -168,11 +168,11 @@ class DIUpdater : public InstVisitor<DIUpdater> {
public:
DIUpdater(Module &M, StringRef Filename = StringRef(),
- StringRef Directory = StringRef(), const Module *DisplayM = 0,
- const ValueToValueMapTy *VMap = 0)
+ StringRef Directory = StringRef(), const Module *DisplayM = nullptr,
+ const ValueToValueMapTy *VMap = nullptr)
: Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
- Finder(), Filename(Filename), Directory(Directory), FileNode(0),
- LexicalBlockFileNode(0), CUNode(0) {
+ Finder(), Filename(Filename), Directory(Directory), FileNode(nullptr),
+ LexicalBlockFileNode(nullptr), CUNode(nullptr) {
Finder.processModule(M);
visit(&M);
}
@@ -184,7 +184,7 @@ public:
report_fatal_error("DebugIR pass supports only a signle compile unit per "
"Module.");
createCompileUnit(Finder.compile_unit_count() == 1 ?
- (MDNode*)*Finder.compile_units().begin() : 0);
+ (MDNode*)*Finder.compile_units().begin() : nullptr);
}
void visitFunction(Function &F) {
@@ -232,7 +232,7 @@ public:
/// If a ValueToValueMap is provided, use it to get the real instruction as
/// the line table was generated on a clone of the module on which we are
/// operating.
- Value *RealInst = 0;
+ Value *RealInst = nullptr;
if (VMap)
RealInst = VMap->lookup(&I);
@@ -256,7 +256,7 @@ public:
NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
Loc.getInlinedAt(RealInst->getContext()));
else if (MDNode *scope = findScope(&I))
- NewLoc = DebugLoc::get(Line, Col, scope, 0);
+ NewLoc = DebugLoc::get(Line, Col, scope, nullptr);
else {
DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
<< ". no DebugLoc will be present."
@@ -334,7 +334,7 @@ private:
}
DEBUG(dbgs() << "unable to find DISubprogram node for function "
<< F->getName().str() << "\n");
- return 0;
+ return nullptr;
}
/// Sets Line to the line number on which V appears and returns true. If a
@@ -366,7 +366,7 @@ private:
TypeNodeIter i = TypeDescriptors.find(T);
if (i != TypeDescriptors.end())
return i->second;
- return 0;
+ return nullptr;
}
/// Returns a DebugInfo type from an LLVM type T.
@@ -375,12 +375,12 @@ private:
if (N)
return DIDerivedType(N);
else if (T->isVoidTy())
- return DIDerivedType(0);
+ return DIDerivedType(nullptr);
else if (T->isStructTy()) {
N = Builder.createStructType(
DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
- DIType(0), DIArray(0)); // filled in later
+ DIType(nullptr), DIArray(nullptr)); // filled in later
// N is added to the map (early) so that element search below can find it,
// so as to avoid infinite recursion for structs that contain pointers to
@@ -535,7 +535,7 @@ void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
Out.reset(new raw_fd_ostream(*fd, true));
}
- M->print(*Out, 0);
+ M->print(*Out, nullptr);
Out->close();
}
diff --git a/lib/Transforms/Instrumentation/DebugIR.h b/lib/Transforms/Instrumentation/DebugIR.h
index 3f57da5..02831ed 100644
--- a/lib/Transforms/Instrumentation/DebugIR.h
+++ b/lib/Transforms/Instrumentation/DebugIR.h
@@ -90,7 +90,7 @@ private:
/// Write M to disk, optionally passing in an fd to an open file which is
/// closed by this function after writing. If no fd is specified, a new file
/// is opened, written, and closed.
- void writeDebugBitcode(const llvm::Module *M, int *fd = 0);
+ void writeDebugBitcode(const llvm::Module *M, int *fd = nullptr);
};
} // llvm namespace
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index bd00ec8..8330a9b 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -14,8 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "insert-gcov-profiling"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Hashing.h"
@@ -39,10 +37,13 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
+#include <memory>
#include <string>
#include <utility>
using namespace llvm;
+#define DEBUG_TYPE "insert-gcov-profiling"
+
static cl::opt<std::string>
DefaultGCOVVersion("default-gcov-version", cl::init("402*"), cl::Hidden,
cl::ValueRequired);
@@ -77,9 +78,6 @@ namespace {
"GCOVProfiler asked to do nothing?");
init();
}
- ~GCOVProfiler() {
- DeleteContainerPointers(Funcs);
- }
const char *getPassName() const override {
return "GCOV Profiler";
}
@@ -141,7 +139,7 @@ namespace {
Module *M;
LLVMContext *Ctx;
- SmallVector<GCOVFunction *, 16> Funcs;
+ SmallVector<std::unique_ptr<GCOVFunction>, 16> Funcs;
};
}
@@ -449,6 +447,21 @@ bool GCOVProfiler::runOnModule(Module &M) {
return false;
}
+static bool functionHasLines(Function *F) {
+ // Check whether this function actually has any source lines. Not only
+ // do these waste space, they also can crash gcov.
+ for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+ for (BasicBlock::iterator I = BB->begin(), IE = BB->end();
+ I != IE; ++I) {
+ const DebugLoc &Loc = I->getDebugLoc();
+ if (Loc.isUnknown()) continue;
+ if (Loc.getLine() != 0)
+ return true;
+ }
+ }
+ return false;
+}
+
void GCOVProfiler::emitProfileNotes() {
NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu");
if (!CU_Nodes) return;
@@ -474,6 +487,7 @@ void GCOVProfiler::emitProfileNotes() {
Function *F = SP.getFunction();
if (!F) continue;
+ if (!functionHasLines(F)) continue;
// gcov expects every function to start with an entry block that has a
// single successor, so split the entry block to make sure of that.
@@ -483,19 +497,19 @@ void GCOVProfiler::emitProfileNotes() {
++It;
EntryBlock.splitBasicBlock(It);
- GCOVFunction *Func =
- new GCOVFunction(SP, &out, i, Options.UseCfgChecksum);
- Funcs.push_back(Func);
+ Funcs.push_back(
+ make_unique<GCOVFunction>(SP, &out, i, Options.UseCfgChecksum));
+ GCOVFunction &Func = *Funcs.back();
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
- GCOVBlock &Block = Func->getBlock(BB);
+ GCOVBlock &Block = Func.getBlock(BB);
TerminatorInst *TI = BB->getTerminator();
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
- Block.addEdge(Func->getBlock(TI->getSuccessor(i)));
+ Block.addEdge(Func.getBlock(TI->getSuccessor(i)));
}
} else if (isa<ReturnInst>(TI)) {
- Block.addEdge(Func->getReturnBlock());
+ Block.addEdge(Func.getReturnBlock());
}
uint32_t Line = 0;
@@ -511,7 +525,7 @@ void GCOVProfiler::emitProfileNotes() {
Lines.addLine(Loc.getLine());
}
}
- EdgeDestinations += Func->getEdgeDestinations();
+ EdgeDestinations += Func.getEdgeDestinations();
}
FileChecksums.push_back(hash_value(EdgeDestinations));
@@ -519,9 +533,7 @@ void GCOVProfiler::emitProfileNotes() {
out.write(ReversedVersion, 4);
out.write(reinterpret_cast<char*>(&FileChecksums.back()), 4);
- for (SmallVectorImpl<GCOVFunction *>::iterator I = Funcs.begin(),
- E = Funcs.end(); I != E; ++I) {
- GCOVFunction *Func = *I;
+ for (auto &Func : Funcs) {
Func->setCfgChecksum(FileChecksums.back());
Func->writeOut();
}
@@ -549,6 +561,7 @@ bool GCOVProfiler::emitProfileArcs() {
continue;
Function *F = SP.getFunction();
if (!F) continue;
+ if (!functionHasLines(F)) continue;
if (!Result) Result = true;
unsigned Edges = 0;
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ec1a195..b8e632e 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -93,12 +93,11 @@
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "msan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
@@ -122,6 +121,8 @@
using namespace llvm;
+#define DEBUG_TYPE "msan"
+
static const uint64_t kShadowMask32 = 1ULL << 31;
static const uint64_t kShadowMask64 = 1ULL << 46;
static const uint64_t kOriginOffset32 = 1ULL << 30;
@@ -129,6 +130,9 @@ static const uint64_t kOriginOffset64 = 1ULL << 45;
static const unsigned kMinOriginAlignment = 4;
static const unsigned kShadowTLSAlignment = 8;
+// Accesses sizes are powers of two: 1, 2, 4, 8.
+static const size_t kNumberOfAccessSizes = 4;
+
/// \brief Track origins of uninitialized values.
///
/// Adds a section to MemorySanitizer report that points to the allocation
@@ -178,6 +182,14 @@ static cl::opt<std::string> ClBlacklistFile("msan-blacklist",
cl::desc("File containing the list of functions where MemorySanitizer "
"should not report bugs"), cl::Hidden);
+static cl::opt<int> ClInstrumentationWithCallThreshold(
+ "msan-instrumentation-with-call-threshold",
+ cl::desc(
+ "If the function being instrumented requires more than "
+ "this number of checks and origin stores, use callbacks instead of "
+ "inline checks (-1 means never use callbacks)."),
+ cl::Hidden, cl::init(3500));
+
// Experimental. Wraps all indirect calls in the instrumented code with
// a call to the given function. This is needed to assist the dynamic
// helper tool (MSanDR) to regain control on transition between instrumented and
@@ -203,8 +215,8 @@ class MemorySanitizer : public FunctionPass {
StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
- DL(0),
- WarningFn(0),
+ DL(nullptr),
+ WarningFn(nullptr),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile),
WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {}
const char *getPassName() const override { return "MemorySanitizer"; }
@@ -245,6 +257,10 @@ class MemorySanitizer : public FunctionPass {
/// \brief The run-time callback to print a warning.
Value *WarningFn;
+ // These arrays are indexed by log2(AccessSize).
+ Value *MaybeWarningFn[kNumberOfAccessSizes];
+ Value *MaybeStoreOriginFn[kNumberOfAccessSizes];
+
/// \brief Run-time helper that generates a new origin value for a stack
/// allocation.
Value *MsanSetAllocaOrigin4Fn;
@@ -321,6 +337,20 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
: "__msan_warning_noreturn";
WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), NULL);
+ for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
+ AccessSizeIndex++) {
+ unsigned AccessSize = 1 << AccessSizeIndex;
+ std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
+ MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt32Ty(), NULL);
+
+ FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
+ MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
+ FunctionName, IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8),
+ IRB.getInt8PtrTy(), IRB.getInt32Ty(), NULL);
+ }
+
MsanSetAllocaOrigin4Fn = M.getOrInsertFunction(
"__msan_set_alloca_origin4", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
IRB.getInt8PtrTy(), IntptrTy, NULL);
@@ -341,31 +371,32 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
// Create globals.
RetvalTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 8), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_retval_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_retval_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
RetvalOriginTLS = new GlobalVariable(
- M, OriginTy, false, GlobalVariable::ExternalLinkage, 0,
- "__msan_retval_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ M, OriginTy, false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_retval_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
ParamTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_param_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_param_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
ParamOriginTLS = new GlobalVariable(
M, ArrayType::get(OriginTy, 1000), false, GlobalVariable::ExternalLinkage,
- 0, "__msan_param_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ nullptr, "__msan_param_origin_tls", nullptr,
+ GlobalVariable::InitialExecTLSModel);
VAArgTLS = new GlobalVariable(
M, ArrayType::get(IRB.getInt64Ty(), 1000), false,
- GlobalVariable::ExternalLinkage, 0, "__msan_va_arg_tls", 0,
+ GlobalVariable::ExternalLinkage, nullptr, "__msan_va_arg_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
VAArgOverflowSizeTLS = new GlobalVariable(
- M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_va_arg_overflow_size_tls", 0,
+ M, IRB.getInt64Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_va_arg_overflow_size_tls", nullptr,
GlobalVariable::InitialExecTLSModel);
OriginTLS = new GlobalVariable(
- M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, 0,
- "__msan_origin_tls", 0, GlobalVariable::InitialExecTLSModel);
+ M, IRB.getInt32Ty(), false, GlobalVariable::ExternalLinkage, nullptr,
+ "__msan_origin_tls", nullptr, GlobalVariable::InitialExecTLSModel);
// We insert an empty inline asm after __msan_report* to avoid callback merge.
EmptyAsm = InlineAsm::get(FunctionType::get(IRB.getVoidTy(), false),
@@ -379,14 +410,14 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
ClWrapIndirectCalls, AnyFunctionPtrTy, AnyFunctionPtrTy, NULL);
}
- if (ClWrapIndirectCallsFast) {
+ if (WrapIndirectCalls && ClWrapIndirectCallsFast) {
MsandrModuleStart = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
- 0, "__executable_start");
+ nullptr, "__executable_start");
MsandrModuleStart->setVisibility(GlobalVariable::HiddenVisibility);
MsandrModuleEnd = new GlobalVariable(
M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage,
- 0, "_end");
+ nullptr, "_end");
MsandrModuleEnd->setVisibility(GlobalVariable::HiddenVisibility);
}
}
@@ -397,7 +428,7 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
bool MemorySanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -474,6 +505,11 @@ VarArgHelper*
CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
MemorySanitizerVisitor &Visitor);
+unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
+ if (TypeSize <= 8) return 0;
+ return Log2_32_Ceil(TypeSize / 8);
+}
+
/// This class does all the work for a given function. Store and Load
/// instructions store and load corresponding shadow and origin
/// values. Most instructions propagate shadow from arguments to their
@@ -529,9 +565,42 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return IRB.CreateCall(MS.MsanChainOriginFn, V);
}
- void materializeStores() {
+ void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
+ unsigned Alignment, bool AsCall) {
+ if (isa<StructType>(Shadow->getType())) {
+ IRB.CreateAlignedStore(updateOrigin(Origin, IRB), getOriginPtr(Addr, IRB),
+ Alignment);
+ } else {
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ // TODO(eugenis): handle non-zero constant shadow by inserting an
+ // unconditional check (can not simply fail compilation as this could
+ // be in the dead code).
+ if (isa<Constant>(ConvertedShadow)) return;
+ unsigned TypeSizeInBits =
+ MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ Value *Fn = MS.MaybeStoreOriginFn[SizeIndex];
+ Value *ConvertedShadow2 = IRB.CreateZExt(
+ ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall3(Fn, ConvertedShadow2,
+ IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+ updateOrigin(Origin, IRB));
+ } else {
+ Value *Cmp = IRB.CreateICmpNE(
+ ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp");
+ Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+ Cmp, IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
+ IRBuilder<> IRBNew(CheckTerm);
+ IRBNew.CreateAlignedStore(updateOrigin(Origin, IRBNew),
+ getOriginPtr(Addr, IRBNew), Alignment);
+ }
+ }
+ }
+
+ void materializeStores(bool InstrumentWithCalls) {
for (size_t i = 0, n = StoreList.size(); i < n; i++) {
- StoreInst& I = *dyn_cast<StoreInst>(StoreList[i]);
+ StoreInst &I = *dyn_cast<StoreInst>(StoreList[i]);
IRBuilder<> IRB(&I);
Value *Val = I.getValueOperand();
@@ -540,53 +609,41 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB);
StoreInst *NewSI =
- IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
+ IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment());
DEBUG(dbgs() << " STORE: " << *NewSI << "\n");
(void)NewSI;
- if (ClCheckAccessAddress)
- insertShadowCheck(Addr, &I);
+ if (ClCheckAccessAddress) insertShadowCheck(Addr, &I);
- if (I.isAtomic())
- I.setOrdering(addReleaseOrdering(I.getOrdering()));
+ if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering()));
if (MS.TrackOrigins) {
unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment());
- if (isa<StructType>(Shadow->getType())) {
- IRB.CreateAlignedStore(updateOrigin(getOrigin(Val), IRB),
- getOriginPtr(Addr, IRB), Alignment);
- } else {
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
-
- // TODO(eugenis): handle non-zero constant shadow by inserting an
- // unconditional check (can not simply fail compilation as this could
- // be in the dead code).
- if (isa<Constant>(ConvertedShadow))
- continue;
-
- Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
- getCleanShadow(ConvertedShadow), "_mscmp");
- Instruction *CheckTerm =
- SplitBlockAndInsertIfThen(Cmp, &I, false, MS.OriginStoreWeights);
- IRBuilder<> IRBNew(CheckTerm);
- IRBNew.CreateAlignedStore(updateOrigin(getOrigin(Val), IRBNew),
- getOriginPtr(Addr, IRBNew), Alignment);
- }
+ storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment,
+ InstrumentWithCalls);
}
}
}
- void materializeChecks() {
- for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
- Value *Shadow = InstrumentationList[i].Shadow;
- Instruction *OrigIns = InstrumentationList[i].OrigIns;
- IRBuilder<> IRB(OrigIns);
- DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
- Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
- DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
- // See the comment in materializeStores().
- if (isa<Constant>(ConvertedShadow))
- continue;
+ void materializeOneCheck(Instruction *OrigIns, Value *Shadow, Value *Origin,
+ bool AsCall) {
+ IRBuilder<> IRB(OrigIns);
+ DEBUG(dbgs() << " SHAD0 : " << *Shadow << "\n");
+ Value *ConvertedShadow = convertToShadowTyNoVec(Shadow, IRB);
+ DEBUG(dbgs() << " SHAD1 : " << *ConvertedShadow << "\n");
+ // See the comment in materializeStores().
+ if (isa<Constant>(ConvertedShadow)) return;
+ unsigned TypeSizeInBits =
+ MS.DL->getTypeSizeInBits(ConvertedShadow->getType());
+ unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
+ if (AsCall && SizeIndex < kNumberOfAccessSizes) {
+ Value *Fn = MS.MaybeWarningFn[SizeIndex];
+ Value *ConvertedShadow2 =
+ IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
+ IRB.CreateCall2(Fn, ConvertedShadow2, MS.TrackOrigins && Origin
+ ? Origin
+ : (Value *)IRB.getInt32(0));
+ } else {
Value *Cmp = IRB.CreateICmpNE(ConvertedShadow,
getCleanShadow(ConvertedShadow), "_mscmp");
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
@@ -595,14 +652,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.SetInsertPoint(CheckTerm);
if (MS.TrackOrigins) {
- Value *Origin = InstrumentationList[i].Origin;
- IRB.CreateStore(Origin ? (Value*)Origin : (Value*)IRB.getInt32(0),
+ IRB.CreateStore(Origin ? (Value *)Origin : (Value *)IRB.getInt32(0),
MS.OriginTLS);
}
IRB.CreateCall(MS.WarningFn);
IRB.CreateCall(MS.EmptyAsm);
DEBUG(dbgs() << " CHECK: " << *Cmp << "\n");
}
+ }
+
+ void materializeChecks(bool InstrumentWithCalls) {
+ for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) {
+ Instruction *OrigIns = InstrumentationList[i].OrigIns;
+ Value *Shadow = InstrumentationList[i].Shadow;
+ Value *Origin = InstrumentationList[i].Origin;
+ materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls);
+ }
DEBUG(dbgs() << "DONE:\n" << F);
}
@@ -662,17 +727,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Iterate all BBs in depth-first order and create shadow instructions
// for all instructions (where applicable).
// For PHI nodes we create dummy shadow PHIs which will be finalized later.
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *BB = *DI;
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
visit(*BB);
- }
+
// Finalize PHI nodes.
for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) {
PHINode *PN = ShadowPHINodes[i];
PHINode *PNS = cast<PHINode>(getShadow(PN));
- PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : 0;
+ PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
size_t NumValues = PN->getNumIncomingValues();
for (size_t v = 0; v < NumValues; v++) {
PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
@@ -683,12 +746,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
VAHelper->finalizeInstrumentation();
+ bool InstrumentWithCalls = ClInstrumentationWithCallThreshold >= 0 &&
+ InstrumentationList.size() + StoreList.size() >
+ (unsigned)ClInstrumentationWithCallThreshold;
+
// Delayed instrumentation of StoreInst.
// This may add new checks to be inserted later.
- materializeStores();
+ materializeStores(InstrumentWithCalls);
// Insert shadow value checks.
- materializeChecks();
+ materializeChecks(InstrumentWithCalls);
// Wrap indirect calls.
materializeIndirectCalls();
@@ -704,7 +771,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Compute the shadow type that corresponds to a given Type.
Type *getShadowTy(Type *OrigTy) {
if (!OrigTy->isSized()) {
- return 0;
+ return nullptr;
}
// For integer type, shadow is the same as the original type.
// This may return weird-sized types like i1.
@@ -784,7 +851,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Compute the origin address for a given function argument.
Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB,
int ArgOffset) {
- if (!MS.TrackOrigins) return 0;
+ if (!MS.TrackOrigins) return nullptr;
Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
@@ -825,7 +892,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *getCleanShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
- return 0;
+ return nullptr;
return Constant::getNullValue(ShadowTy);
}
@@ -845,7 +912,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *getPoisonedShadow(Value *V) {
Type *ShadowTy = getShadowTy(V);
if (!ShadowTy)
- return 0;
+ return nullptr;
return getPoisonedShadow(ShadowTy);
}
@@ -936,7 +1003,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Get the origin for a value.
Value *getOrigin(Value *V) {
- if (!MS.TrackOrigins) return 0;
+ if (!MS.TrackOrigins) return nullptr;
if (isa<Instruction>(V) || isa<Argument>(V)) {
Value *Origin = OriginMap[V];
if (!Origin) {
@@ -1234,7 +1301,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
public:
Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB) :
- Shadow(0), Origin(0), IRB(IRB), MSV(MSV) {}
+ Shadow(nullptr), Origin(nullptr), IRB(IRB), MSV(MSV) {}
/// \brief Add a pair of shadow and origin values to the mix.
Combiner &Add(Value *OpShadow, Value *OpOrigin) {
@@ -1265,7 +1332,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// \brief Add an application value to the mix.
Combiner &Add(Value *V) {
Value *OpShadow = MSV->getShadow(V);
- Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : 0;
+ Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
return Add(OpShadow, OpOrigin);
}
@@ -1480,7 +1547,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void handleSignedRelationalComparison(ICmpInst &I) {
Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
- Value* op = NULL;
+ Value* op = nullptr;
CmpInst::Predicate pre = I.getPredicate();
if (constOp0 && constOp0->isNullValue() &&
(pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE)) {
@@ -1789,7 +1856,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
case 1:
ConvertOp = I.getArgOperand(0);
- CopyOp = NULL;
+ CopyOp = nullptr;
break;
default:
llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
@@ -1803,7 +1870,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// FIXME: consider propagating shadow of ConvertOp, at least in the case of
// int->any conversion.
Value *ConvertShadow = getShadow(ConvertOp);
- Value *AggShadow = 0;
+ Value *AggShadow = nullptr;
if (ConvertOp->getType()->isVectorTy()) {
AggShadow = IRB.CreateExtractElement(
ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
@@ -2055,7 +2122,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
continue;
}
unsigned Size = 0;
- Value *Store = 0;
+ Value *Store = nullptr;
// Compute the Shadow for arg even if it is ByVal, because
// in that case getShadow() will copy the actual arg shadow to
// __msan_param_tls.
@@ -2080,7 +2147,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRB.CreateStore(getOrigin(A),
getOriginPtrForArgument(A, IRB, ArgOffset));
(void)Store;
- assert(Size != 0 && Store != 0);
+ assert(Size != 0 && Store != nullptr);
DEBUG(dbgs() << " Param:" << *Store << "\n");
ArgOffset += DataLayout::RoundUpAlignment(Size, 8);
}
@@ -2098,7 +2165,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Until we have full dynamic coverage, make sure the retval shadow is 0.
Value *Base = getShadowPtrForRetval(&I, IRBBefore);
IRBBefore.CreateAlignedStore(getCleanShadow(&I), Base, kShadowTLSAlignment);
- Instruction *NextInsn = 0;
+ Instruction *NextInsn = nullptr;
if (CS.isCall()) {
NextInsn = I.getNextNode();
} else {
@@ -2318,7 +2385,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(0), VAArgOverflowSize(0) { }
+ : F(F), MS(MS), MSV(MSV), VAArgTLSCopy(nullptr),
+ VAArgOverflowSize(nullptr) {}
enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 5ffb17c..8fe9bca 100644
--- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,8 +19,6 @@
// The rest is handled by the run-time library.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tsan"
-
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
@@ -46,6 +44,8 @@
using namespace llvm;
+#define DEBUG_TYPE "tsan"
+
static cl::opt<std::string> ClBlacklistFile("tsan-blacklist",
cl::desc("Blacklist file"), cl::Hidden);
static cl::opt<bool> ClInstrumentMemoryAccesses(
@@ -78,7 +78,7 @@ namespace {
struct ThreadSanitizer : public FunctionPass {
ThreadSanitizer(StringRef BlacklistFile = StringRef())
: FunctionPass(ID),
- DL(0),
+ DL(nullptr),
BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile
: BlacklistFile) { }
const char *getPassName() const override;
@@ -174,8 +174,8 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
for (int op = AtomicRMWInst::FIRST_BINOP;
op <= AtomicRMWInst::LAST_BINOP; ++op) {
- TsanAtomicRMW[op][i] = NULL;
- const char *NamePart = NULL;
+ TsanAtomicRMW[op][i] = nullptr;
+ const char *NamePart = nullptr;
if (op == AtomicRMWInst::Xchg)
NamePart = "_exchange";
else if (op == AtomicRMWInst::Add)
@@ -226,7 +226,7 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
bool ThreadSanitizer::doInitialization(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
if (!DLP)
- return false;
+ report_fatal_error("data layout missing");
DL = &DLP->getDataLayout();
BL.reset(SpecialCaseList::createOrDie(BlacklistFile));
@@ -518,7 +518,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) {
if (Idx < 0)
return false;
Function *F = TsanAtomicRMW[RMWI->getOperation()][Idx];
- if (F == NULL)
+ if (!F)
return false;
const size_t ByteSize = 1 << Idx;
const size_t BitSize = ByteSize * 8;
diff --git a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 4eac39d..4098428 100644
--- a/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -43,34 +43,34 @@ public:
EPT_RetainAutoreleaseRV
};
- ARCRuntimeEntryPoints() : TheModule(0),
- AutoreleaseRV(0),
- Release(0),
- Retain(0),
- RetainBlock(0),
- Autorelease(0),
- StoreStrong(0),
- RetainRV(0),
- RetainAutorelease(0),
- RetainAutoreleaseRV(0) { }
+ ARCRuntimeEntryPoints() : TheModule(nullptr),
+ AutoreleaseRV(nullptr),
+ Release(nullptr),
+ Retain(nullptr),
+ RetainBlock(nullptr),
+ Autorelease(nullptr),
+ StoreStrong(nullptr),
+ RetainRV(nullptr),
+ RetainAutorelease(nullptr),
+ RetainAutoreleaseRV(nullptr) { }
~ARCRuntimeEntryPoints() { }
void Initialize(Module *M) {
TheModule = M;
- AutoreleaseRV = 0;
- Release = 0;
- Retain = 0;
- RetainBlock = 0;
- Autorelease = 0;
- StoreStrong = 0;
- RetainRV = 0;
- RetainAutorelease = 0;
- RetainAutoreleaseRV = 0;
+ AutoreleaseRV = nullptr;
+ Release = nullptr;
+ Retain = nullptr;
+ RetainBlock = nullptr;
+ Autorelease = nullptr;
+ StoreStrong = nullptr;
+ RetainRV = nullptr;
+ RetainAutorelease = nullptr;
+ RetainAutoreleaseRV = nullptr;
}
Constant *get(const EntryPointType entry) {
- assert(TheModule != 0 && "Not initialized.");
+ assert(TheModule != nullptr && "Not initialized.");
switch (entry) {
case EPT_AutoreleaseRV:
diff --git a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 8780359..08c8842 100644
--- a/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -20,7 +20,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-dependency"
#include "ObjCARC.h"
#include "DependencyAnalysis.h"
#include "ProvenanceAnalysis.h"
@@ -29,6 +28,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-dependency"
+
/// Test whether the given instruction can result in a reference count
/// modification (positive or negative) for the pointer's object.
bool
@@ -223,7 +224,7 @@ llvm::objcarc::FindDependencies(DependenceKind Flavor,
pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
if (PI == PE)
// If we've reached the function entry, produce a null dependence.
- DependingInsts.insert(0);
+ DependingInsts.insert(nullptr);
else
// Add the predecessors to the worklist.
do {
diff --git a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
index cb7e4da..1a25391 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAPElim.cpp
@@ -24,7 +24,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-ap-elim"
#include "ObjCARC.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/Constants.h"
@@ -34,6 +33,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-ap-elim"
+
namespace {
/// \brief Autorelease pool elimination.
class ObjCARCAPElim : public ModulePass {
@@ -93,7 +94,7 @@ bool ObjCARCAPElim::MayAutorelease(ImmutableCallSite CS, unsigned Depth) {
bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
bool Changed = false;
- Instruction *Push = 0;
+ Instruction *Push = nullptr;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
Instruction *Inst = I++;
switch (GetBasicInstructionClass(Inst)) {
@@ -112,11 +113,11 @@ bool ObjCARCAPElim::OptimizeBB(BasicBlock *BB) {
Inst->eraseFromParent();
Push->eraseFromParent();
}
- Push = 0;
+ Push = nullptr;
break;
case IC_CallOrUser:
if (MayAutorelease(ImmutableCallSite(Inst)))
- Push = 0;
+ Push = nullptr;
break;
default:
break;
@@ -154,8 +155,8 @@ bool ObjCARCAPElim::runOnModule(Module &M) {
for (User::op_iterator OI = Init->op_begin(), OE = Init->op_end();
OI != OE; ++OI) {
Value *Op = *OI;
- // llvm.global_ctors is an array of pairs where the second members
- // are constructor functions.
+ // llvm.global_ctors is an array of three-field structs where the second
+ // members are constructor functions.
Function *F = dyn_cast<Function>(cast<ConstantStruct>(Op)->getOperand(1));
// If the user used a constructor function with the wrong signature and
// it got bitcasted or whatever, look the other way.
diff --git a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
index d18667b..2c09e70 100644
--- a/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp
@@ -20,7 +20,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-aa"
#include "ObjCARC.h"
#include "ObjCARCAliasAnalysis.h"
#include "llvm/IR/Instruction.h"
@@ -28,6 +27,8 @@
#include "llvm/PassAnalysisSupport.h"
#include "llvm/PassSupport.h"
+#define DEBUG_TYPE "objc-arc-aa"
+
namespace llvm {
class Function;
class Value;
diff --git a/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 3da5a0e..f48d53d 100644
--- a/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -26,7 +26,6 @@
// TODO: ObjCARCContract could insert PHI nodes when uses aren't
// dominated by single calls.
-#define DEBUG_TYPE "objc-arc-contract"
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
@@ -40,6 +39,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-contract"
+
STATISTIC(NumPeeps, "Number of calls peephole-optimized");
STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
@@ -157,7 +158,7 @@ ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
// Check that there are no instructions between the retain and the autorelease
// (such as an autorelease_pop) which may change the count.
- CallInst *Retain = 0;
+ CallInst *Retain = nullptr;
if (Class == IC_AutoreleaseRV)
FindDependencies(RetainAutoreleaseRVDep, Arg,
Autorelease->getParent(), Autorelease,
@@ -218,7 +219,7 @@ void ObjCARCContract::ContractRelease(Instruction *Release,
BasicBlock::iterator I = Load, End = BB->end();
++I;
AliasAnalysis::Location Loc = AA->getLocation(Load);
- StoreInst *Store = 0;
+ StoreInst *Store = nullptr;
bool SawRelease = false;
for (; !Store || !SawRelease; ++I) {
if (I == End)
@@ -300,7 +301,7 @@ bool ObjCARCContract::doInitialization(Module &M) {
EP.Initialize(&M);
// Initialize RetainRVMarker.
- RetainRVMarker = 0;
+ RetainRVMarker = nullptr;
if (NamedMDNode *NMD =
M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
if (NMD->getNumOperands() == 1) {
diff --git a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
index 8bec699..bf9fcbb 100644
--- a/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCExpand.cpp
@@ -23,8 +23,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-expand"
-
#include "ObjCARC.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
@@ -40,6 +38,8 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#define DEBUG_TYPE "objc-arc-expand"
+
namespace llvm {
class Module;
}
diff --git a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index eed3cb2..dd4dd50 100644
--- a/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -24,7 +24,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "objc-arc-opts"
#include "ObjCARC.h"
#include "ARCRuntimeEntryPoints.h"
#include "DependencyAnalysis.h"
@@ -44,6 +43,8 @@
using namespace llvm;
using namespace llvm::objcarc;
+#define DEBUG_TYPE "objc-arc-opts"
+
/// \defgroup MiscUtils Miscellaneous utilities that are not ARC specific.
/// @{
@@ -156,7 +157,7 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
return FindSingleUseIdentifiedObject(
cast<CallInst>(Arg)->getArgOperand(0));
if (!IsObjCIdentifiedObject(Arg))
- return 0;
+ return nullptr;
return Arg;
}
@@ -165,12 +166,12 @@ static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
if (IsObjCIdentifiedObject(Arg)) {
for (const User *U : Arg->users())
if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
- return 0;
+ return nullptr;
return Arg;
}
- return 0;
+ return nullptr;
}
/// This is a wrapper around getUnderlyingObjCPtr along the lines of
@@ -373,7 +374,7 @@ namespace {
bool CFGHazardAfflicted;
RRInfo() :
- KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(0),
+ KnownSafe(false), IsTailCallRelease(false), ReleaseMetadata(nullptr),
CFGHazardAfflicted(false) {}
void clear();
@@ -388,7 +389,7 @@ namespace {
void RRInfo::clear() {
KnownSafe = false;
IsTailCallRelease = false;
- ReleaseMetadata = 0;
+ ReleaseMetadata = nullptr;
Calls.clear();
ReverseInsertPts.clear();
CFGHazardAfflicted = false;
@@ -397,7 +398,7 @@ void RRInfo::clear() {
bool RRInfo::Merge(const RRInfo &Other) {
// Conservatively merge the ReleaseMetadata information.
if (ReleaseMetadata != Other.ReleaseMetadata)
- ReleaseMetadata = 0;
+ ReleaseMetadata = nullptr;
// Conservatively merge the boolean state.
KnownSafe &= Other.KnownSafe;
@@ -456,7 +457,7 @@ namespace {
}
bool IsTrackingImpreciseReleases() const {
- return RRI.ReleaseMetadata != 0;
+ return RRI.ReleaseMetadata != nullptr;
}
const MDNode *GetReleaseMetadata() const {
@@ -818,7 +819,7 @@ ARCAnnotationTargetIdentifier("objc-arc-annotation-target-identifier",
/// arc annotation processor tool. If the function is an
static MDString *AppendMDNodeToSourcePtr(unsigned NodeId,
Value *Ptr) {
- MDString *Hash = 0;
+ MDString *Hash = nullptr;
// If pointer is a result of an instruction and it does not have a source
// MDNode it, attach a new MDNode onto it. If pointer is a result of
@@ -880,7 +881,7 @@ static void AppendMDNodeToInstForPtr(unsigned NodeId,
MDString *PtrSourceMDNodeID,
Sequence OldSeq,
Sequence NewSeq) {
- MDNode *Node = 0;
+ MDNode *Node = nullptr;
Value *tmp[3] = {PtrSourceMDNodeID,
SequenceToMDString(Inst->getContext(),
OldSeq),
@@ -916,7 +917,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *PtrName;
StringRef Tmp = Ptr->getName();
- if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
Tmp + "_STR");
PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -925,7 +926,7 @@ static void GenerateARCBBEntranceAnnotation(const char *Name, BasicBlock *BB,
Value *S;
std::string SeqStr = SequenceToString(Seq);
- if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
SeqStr + "_STR");
S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -959,7 +960,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *PtrName;
StringRef Tmp = Ptr->getName();
- if (0 == (PtrName = M->getGlobalVariable(Tmp, true))) {
+ if (nullptr == (PtrName = M->getGlobalVariable(Tmp, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(Tmp,
Tmp + "_STR");
PtrName = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -968,7 +969,7 @@ static void GenerateARCBBTerminatorAnnotation(const char *Name, BasicBlock *BB,
Value *S;
std::string SeqStr = SequenceToString(Seq);
- if (0 == (S = M->getGlobalVariable(SeqStr, true))) {
+ if (nullptr == (S = M->getGlobalVariable(SeqStr, true))) {
Value *ActualPtrName = Builder.CreateGlobalStringPtr(SeqStr,
SeqStr + "_STR");
S = new GlobalVariable(*M, I8X, true, GlobalVariable::InternalLinkage,
@@ -1718,7 +1719,7 @@ ObjCARCOpt::VisitInstructionBottomUp(Instruction *Inst,
BBState &MyStates) {
bool NestingDetected = false;
InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
+ const Value *Arg = nullptr;
DEBUG(dbgs() << "Class: " << Class << "\n");
@@ -1974,7 +1975,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
BBState &MyStates) {
bool NestingDetected = false;
InstructionClass Class = GetInstructionClass(Inst);
- const Value *Arg = 0;
+ const Value *Arg = nullptr;
switch (Class) {
case IC_RetainBlock:
@@ -2026,7 +2027,7 @@ ObjCARCOpt::VisitInstructionTopDown(Instruction *Inst,
switch (OldSeq) {
case S_Retain:
case S_CanRelease:
- if (OldSeq == S_Retain || ReleaseMetadata != 0)
+ if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
S.ClearReverseInsertPts();
// FALL THROUGH
case S_Use:
@@ -2432,7 +2433,7 @@ ObjCARCOpt::ConnectTDBUTraversals(DenseMap<const BasicBlock *, BBState>
} else {
if (ReleasesToMove.ReleaseMetadata !=
NewRetainReleaseRRI.ReleaseMetadata)
- ReleasesToMove.ReleaseMetadata = 0;
+ ReleasesToMove.ReleaseMetadata = nullptr;
if (ReleasesToMove.IsTailCallRelease !=
NewRetainReleaseRRI.IsTailCallRelease)
ReleasesToMove.IsTailCallRelease = false;
@@ -2884,7 +2885,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
FindDependencies(CanChangeRetainCount, Arg,
BB, Autorelease, DepInsts, Visited, PA);
if (DepInsts.size() != 1)
- return 0;
+ return nullptr;
CallInst *Retain =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
@@ -2893,7 +2894,7 @@ FindPredecessorRetainWithSafePath(const Value *Arg, BasicBlock *BB,
if (!Retain ||
!IsRetain(GetBasicInstructionClass(Retain)) ||
GetObjCArg(Retain) != Arg) {
- return 0;
+ return nullptr;
}
return Retain;
@@ -2911,17 +2912,17 @@ FindPredecessorAutoreleaseWithSafePath(const Value *Arg, BasicBlock *BB,
FindDependencies(NeedsPositiveRetainCount, Arg,
BB, Ret, DepInsts, V, PA);
if (DepInsts.size() != 1)
- return 0;
+ return nullptr;
CallInst *Autorelease =
dyn_cast_or_null<CallInst>(*DepInsts.begin());
if (!Autorelease)
- return 0;
+ return nullptr;
InstructionClass AutoreleaseClass = GetBasicInstructionClass(Autorelease);
if (!IsAutorelease(AutoreleaseClass))
- return 0;
+ return nullptr;
if (GetObjCArg(Autorelease) != Arg)
- return 0;
+ return nullptr;
return Autorelease;
}
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index fa8b598..1a3a4aa 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "adce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -28,6 +27,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "adce"
+
STATISTIC(NumRemoved, "Number of instructions removed");
namespace {
diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk
index 3894f93..079cc86 100644
--- a/lib/Transforms/Scalar/Android.mk
+++ b/lib/Transforms/Scalar/Android.mk
@@ -32,6 +32,7 @@ transforms_scalar_SRC_FILES := \
Scalar.cpp \
Scalarizer.cpp \
ScalarReplAggregates.cpp \
+ SeparateConstOffsetFromGEP.cpp \
SimplifyCFGPass.cpp \
Sink.cpp \
StructurizeCFG.cpp \
@@ -60,11 +61,6 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := $(transforms_scalar_SRC_FILES)
LOCAL_MODULE:= libLLVMScalarOpts
-# Override the default optimization level to work around a SIGSEGV
-# on x86 target builds for SROA.cpp.
-# Bug: 8047767
-LOCAL_CFLAGS_x86 += -O1
-
LOCAL_MODULE_TAGS := optional
include $(LLVM_DEVICE_BUILD_MK)
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 27434c1..3ad1488 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -5,19 +5,19 @@ add_llvm_library(LLVMScalarOpts
CorrelatedValuePropagation.cpp
DCE.cpp
DeadStoreElimination.cpp
- Scalarizer.cpp
EarlyCSE.cpp
- GlobalMerge.cpp
+ FlattenCFGPass.cpp
GVN.cpp
+ GlobalMerge.cpp
IndVarSimplify.cpp
JumpThreading.cpp
LICM.cpp
LoopDeletion.cpp
LoopIdiomRecognize.cpp
LoopInstSimplify.cpp
+ LoopRerollPass.cpp
LoopRotation.cpp
LoopStrengthReduce.cpp
- LoopRerollPass.cpp
LoopUnrollPass.cpp
LoopUnswitch.cpp
LowerAtomic.cpp
@@ -25,13 +25,14 @@ add_llvm_library(LLVMScalarOpts
PartiallyInlineLibCalls.cpp
Reassociate.cpp
Reg2Mem.cpp
- SampleProfile.cpp
SCCP.cpp
SROA.cpp
+ SampleProfile.cpp
Scalar.cpp
ScalarReplAggregates.cpp
+ Scalarizer.cpp
+ SeparateConstOffsetFromGEP.cpp
SimplifyCFGPass.cpp
- FlattenCFGPass.cpp
Sink.cpp
StructurizeCFG.cpp
TailRecursionElimination.cpp
diff --git a/lib/Transforms/Scalar/ConstantHoisting.cpp b/lib/Transforms/Scalar/ConstantHoisting.cpp
index 57a1521..763d02b 100644
--- a/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -33,7 +33,6 @@
// %0 = load i64* inttoptr (i64 big_constant to i64*)
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "consthoist"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -44,9 +43,12 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
+#include <tuple>
using namespace llvm;
+#define DEBUG_TYPE "consthoist"
+
STATISTIC(NumConstantsHoisted, "Number of constants hoisted");
STATISTIC(NumConstantsRebased, "Number of constants rebased");
@@ -117,7 +119,8 @@ class ConstantHoisting : public FunctionPass {
SmallVector<ConstantInfo, 8> ConstantVec;
public:
static char ID; // Pass identification, replacement for typeid
- ConstantHoisting() : FunctionPass(ID), TTI(0), DT(0), Entry(0) {
+ ConstantHoisting() : FunctionPass(ID), TTI(nullptr), DT(nullptr),
+ Entry(nullptr) {
initializeConstantHoistingPass(*PassRegistry::getPassRegistry());
}
@@ -206,7 +209,16 @@ bool ConstantHoisting::runOnFunction(Function &Fn) {
/// \brief Find the constant materialization insertion point.
Instruction *ConstantHoisting::findMatInsertPt(Instruction *Inst,
unsigned Idx) const {
- // The simple and common case.
+ // If the operand is a cast instruction, then we have to materialize the
+ // constant before the cast instruction.
+ if (Idx != ~0U) {
+ Value *Opnd = Inst->getOperand(Idx);
+ if (auto CastInst = dyn_cast<Instruction>(Opnd))
+ if (CastInst->isCast())
+ return CastInst;
+ }
+
+ // The simple and common case. This also includes constant expressions.
if (!isa<PHINode>(Inst) && !isa<LandingPadInst>(Inst))
return Inst;
@@ -228,7 +240,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
SmallPtrSet<BasicBlock *, 8> BBs;
for (auto const &RCI : ConstInfo.RebasedConstants)
for (auto const &U : RCI.Uses)
- BBs.insert(U.Inst->getParent());
+ BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
if (BBs.count(Entry))
return &Entry->front();
@@ -487,8 +499,8 @@ void ConstantHoisting::emitBaseConstants(Instruction *Base, Constant *Offset,
ClonedCastInst->insertAfter(CastInst);
// Use the same debug location as the original cast instruction.
ClonedCastInst->setDebugLoc(CastInst->getDebugLoc());
- DEBUG(dbgs() << "Clone instruction: " << *ClonedCastInst << '\n'
- << "To : " << *CastInst << '\n');
+ DEBUG(dbgs() << "Clone instruction: " << *CastInst << '\n'
+ << "To : " << *ClonedCastInst << '\n');
}
DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 7045b36..dd51ce1 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -18,7 +18,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "constprop"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -31,6 +30,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "constprop"
+
STATISTIC(NumInstKilled, "Number of instructions killed");
namespace {
@@ -68,7 +69,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
}
bool Changed = false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
while (!WorkList.empty()) {
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0490767..0829462 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "correlated-value-propagation"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "correlated-value-propagation"
+
STATISTIC(NumPhis, "Number of phis propagated");
STATISTIC(NumSelects, "Number of selects propagated");
STATISTIC(NumMemAccess, "Number of memory access targets propagated");
@@ -138,7 +139,7 @@ bool CorrelatedValuePropagation::processPHI(PHINode *P) {
}
bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
- Value *Pointer = 0;
+ Value *Pointer = nullptr;
if (LoadInst *L = dyn_cast<LoadInst>(I))
Pointer = L->getPointerOperand();
else
diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp
index 8377fd9..99fac75 100644
--- a/lib/Transforms/Scalar/DCE.cpp
+++ b/lib/Transforms/Scalar/DCE.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/InstIterator.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dce"
+
STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
STATISTIC(DCEEliminated, "Number of insts removed");
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index f54c00d..3af8ee7 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "dse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "dse"
+
STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed");
@@ -49,7 +50,7 @@ namespace {
const TargetLibraryInfo *TLI;
static char ID; // Pass identification, replacement for typeid
- DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) {
+ DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry());
}
@@ -69,7 +70,7 @@ namespace {
if (DT->isReachableFromEntry(I))
Changed |= runOnBasicBlock(*I);
- AA = 0; MD = 0; DT = 0;
+ AA = nullptr; MD = nullptr; DT = nullptr;
return Changed;
}
@@ -111,9 +112,9 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
- MemoryDependenceAnalysis &MD,
- const TargetLibraryInfo *TLI,
- SmallSetVector<Value*, 16> *ValueSet = 0) {
+ MemoryDependenceAnalysis &MD,
+ const TargetLibraryInfo *TLI,
+ SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
@@ -131,7 +132,7 @@ static void DeleteDeadInstruction(Instruction *I,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -203,13 +204,13 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// memset/memcpy, which writes more than an i8.
- if (Loc.Size == AliasAnalysis::UnknownSize && DL == 0)
+ if (Loc.Size == AliasAnalysis::UnknownSize && DL == nullptr)
return AliasAnalysis::Location();
return Loc;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
- if (II == 0) return AliasAnalysis::Location();
+ if (!II) return AliasAnalysis::Location();
switch (II->getIntrinsicID()) {
default: return AliasAnalysis::Location(); // Unhandled intrinsic.
@@ -217,7 +218,7 @@ getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
// If we don't have target data around, an unknown size in Location means
// that we should use the size of the pointee type. This isn't valid for
// init.trampoline, which writes more than an i8.
- if (DL == 0) return AliasAnalysis::Location();
+ if (!DL) return AliasAnalysis::Location();
// FIXME: We don't know the size of the trampoline, so we can't really
// handle it here.
@@ -359,7 +360,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// If we have no DataLayout information around, then the size of the store
// is inferrable from the pointee type. If they are the same type, then
// we know that the store is safe.
- if (DL == 0 && Later.Ptr->getType() == Earlier.Ptr->getType())
+ if (DL == nullptr && Later.Ptr->getType() == Earlier.Ptr->getType())
return OverwriteComplete;
return OverwriteUnknown;
@@ -373,7 +374,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later,
// Otherwise, we have to have size information, and the later store has to be
// larger than the earlier one.
if (Later.Size == AliasAnalysis::UnknownSize ||
- Earlier.Size == AliasAnalysis::UnknownSize || DL == 0)
+ Earlier.Size == AliasAnalysis::UnknownSize || DL == nullptr)
return OverwriteUnknown;
// Check to see if the later store is to the entire object (either a global,
@@ -461,7 +462,7 @@ static bool isPossibleSelfRead(Instruction *Inst,
// Self reads can only happen for instructions that read memory. Get the
// location read.
AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
- if (InstReadLoc.Ptr == 0) return false; // Not a reading instruction.
+ if (!InstReadLoc.Ptr) return false; // Not a reading instruction.
// If the read and written loc obviously don't alias, it isn't a read.
if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
@@ -528,7 +529,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
DeleteDeadInstruction(SI, *MD, TLI);
- if (NextInst == 0) // Next instruction deleted.
+ if (!NextInst) // Next instruction deleted.
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
@@ -543,7 +544,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
// If we didn't get a useful location, fail.
- if (Loc.Ptr == 0)
+ if (!Loc.Ptr)
continue;
while (InstDep.isDef() || InstDep.isClobber()) {
@@ -557,7 +558,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *DepWrite = InstDep.getInst();
AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
// If we didn't get a useful location, or if it isn't a size, bail out.
- if (DepLoc.Ptr == 0)
+ if (!DepLoc.Ptr)
break;
// If we find a write that is a) removable (i.e., non-volatile), b) is
diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp
index af2c3d1..735f5c1 100644
--- a/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "early-cse"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/ScopedHashTable.h"
@@ -29,6 +28,8 @@
#include <vector>
using namespace llvm;
+#define DEBUG_TYPE "early-cse"
+
STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
STATISTIC(NumCSE, "Number of instructions CSE'd");
STATISTIC(NumCSELoad, "Number of load instructions CSE'd");
@@ -207,7 +208,7 @@ namespace {
return false;
CallInst *CI = dyn_cast<CallInst>(Inst);
- if (CI == 0 || !CI->onlyReadsMemory())
+ if (!CI || !CI->onlyReadsMemory())
return false;
return true;
}
@@ -405,14 +406,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// have invalidated the live-out memory values of our parent value. For now,
// just be conservative and invalidate memory if this block has multiple
// predecessors.
- if (BB->getSinglePredecessor() == 0)
+ if (!BB->getSinglePredecessor())
++CurrentGeneration;
/// LastStore - Keep track of the last non-volatile store that we saw... for
/// as long as there in no instruction that reads memory. If we see a store
/// to the same location, we delete the dead store. This zaps trivial dead
/// stores which can occur in bitfield code among other things.
- StoreInst *LastStore = 0;
+ StoreInst *LastStore = nullptr;
bool Changed = false;
@@ -462,7 +463,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
// Ignore volatile loads.
if (!LI->isSimple()) {
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -470,7 +471,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal =
AvailableLoads->lookup(Inst->getOperand(0));
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -483,20 +484,20 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Otherwise, remember that we have this instruction.
AvailableLoads->insert(Inst->getOperand(0),
std::pair<Value*, unsigned>(Inst, CurrentGeneration));
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
// If this instruction may read from memory, forget LastStore.
if (Inst->mayReadFromMemory())
- LastStore = 0;
+ LastStore = nullptr;
// If this is a read-only call, process it.
if (CallValue::canHandle(Inst)) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
- if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+ if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << " to: "
<< *InVal.first << '\n');
if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
@@ -528,7 +529,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LastStore->eraseFromParent();
Changed = true;
++NumDSE;
- LastStore = 0;
+ LastStore = nullptr;
continue;
}
@@ -558,7 +559,7 @@ bool EarlyCSE::runOnFunction(Function &F) {
std::vector<StackNode *> nodesToProcess;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
diff --git a/lib/Transforms/Scalar/FlattenCFGPass.cpp b/lib/Transforms/Scalar/FlattenCFGPass.cpp
index e7f2564..0430c18 100644
--- a/lib/Transforms/Scalar/FlattenCFGPass.cpp
+++ b/lib/Transforms/Scalar/FlattenCFGPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/IR/CFG.h"
@@ -19,6 +18,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
struct FlattenCFGPass : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 33c387c..6d07ddd 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -15,11 +15,11 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "gvn"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -50,6 +50,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "gvn"
+
STATISTIC(NumGVNInstr, "Number of instructions deleted");
STATISTIC(NumGVNLoad, "Number of loads deleted");
STATISTIC(NumGVNPRE, "Number of instructions PRE'd");
@@ -213,13 +215,13 @@ Expression ValueTable::create_cmp_expression(unsigned Opcode,
}
Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
- assert(EI != 0 && "Not an ExtractValueInst?");
+ assert(EI && "Not an ExtractValueInst?");
Expression e;
e.type = EI->getType();
e.opcode = 0;
IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
- if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+ if (I != nullptr && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
// EI might be an extract from one of our recognised intrinsics. If it
// is we'll synthesize a semantically equivalent expression instead on
// an extract value expression.
@@ -327,7 +329,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
MD->getNonLocalCallDependency(CallSite(C));
// FIXME: Move the checking logic to MemDep!
- CallInst* cdep = 0;
+ CallInst* cdep = nullptr;
// Check to see if we have a single dominating call instruction that is
// identical to C.
@@ -338,8 +340,8 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
// We don't handle non-definitions. If we already have a call, reject
// instruction dependencies.
- if (!I->getResult().isDef() || cdep != 0) {
- cdep = 0;
+ if (!I->getResult().isDef() || cdep != nullptr) {
+ cdep = nullptr;
break;
}
@@ -350,7 +352,7 @@ uint32_t ValueTable::lookup_or_add_call(CallInst *C) {
continue;
}
- cdep = 0;
+ cdep = nullptr;
break;
}
@@ -551,7 +553,7 @@ namespace {
static AvailableValueInBlock getUndef(BasicBlock *BB) {
AvailableValueInBlock Res;
Res.BB = BB;
- Res.Val.setPointer(0);
+ Res.Val.setPointer(nullptr);
Res.Val.setInt(UndefVal);
Res.Offset = 0;
return Res;
@@ -611,7 +613,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
explicit GVN(bool noloads = false)
- : FunctionPass(ID), NoLoads(noloads), MD(0) {
+ : FunctionPass(ID), NoLoads(noloads), MD(nullptr) {
initializeGVNPass(*PassRegistry::getPassRegistry());
}
@@ -649,7 +651,7 @@ namespace {
/// removeFromLeaderTable - Scan the list of values corresponding to a given
/// value number, and remove the given instruction if encountered.
void removeFromLeaderTable(uint32_t N, Instruction *I, BasicBlock *BB) {
- LeaderTableEntry* Prev = 0;
+ LeaderTableEntry* Prev = nullptr;
LeaderTableEntry* Curr = &LeaderTable[N];
while (Curr->Val != I || Curr->BB != BB) {
@@ -661,8 +663,8 @@ namespace {
Prev->Next = Curr->Next;
} else {
if (!Curr->Next) {
- Curr->Val = 0;
- Curr->BB = 0;
+ Curr->Val = nullptr;
+ Curr->BB = nullptr;
} else {
LeaderTableEntry* Next = Curr->Next;
Curr->Val = Next->Val;
@@ -855,7 +857,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
Instruction *InsertPt,
const DataLayout &DL) {
if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL))
- return 0;
+ return nullptr;
// If this is already the right type, just return it.
Type *StoredValTy = StoredVal->getType();
@@ -1060,7 +1062,7 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
const DataLayout &DL) {
// If the mem operation is a non-constant size, we can't handle it.
ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
- if (SizeCst == 0) return -1;
+ if (!SizeCst) return -1;
uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
// If this is memset, we just need to see if the offset is valid in the size
@@ -1075,10 +1077,10 @@ static int AnalyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemTransferInst *MTI = cast<MemTransferInst>(MI);
Constant *Src = dyn_cast<Constant>(MTI->getSource());
- if (Src == 0) return -1;
+ if (!Src) return -1;
GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &DL));
- if (GV == 0 || !GV->isConstant()) return -1;
+ if (!GV || !GV->isConstant()) return -1;
// See if the access is within the bounds of the transfer.
int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
@@ -1420,8 +1422,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
// If this is a clobber and L is the first instruction in its block, then
// we have the first instruction in the entry block.
if (DepLI != LI && Address && DL) {
- int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(),
- LI->getPointerOperand(),
+ int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), Address,
DepLI, *DL);
if (Offset != -1) {
@@ -1469,8 +1470,8 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (S->getValueOperand()->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
- LI->getType(), *DL)) {
+ if (!DL || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+ LI->getType(), *DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1486,7 +1487,7 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps,
if (LD->getType() != LI->getType()) {
// If the stored value is larger or equal to the loaded value, we can
// reuse it.
- if (DL == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)){
+ if (!DL || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*DL)) {
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1539,7 +1540,7 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check to see how many predecessors have the loaded value fully
// available.
- DenseMap<BasicBlock*, Value*> PredLoads;
+ MapVector<BasicBlock *, Value *> PredLoads;
DenseMap<BasicBlock*, char> FullyAvailableBlocks;
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
@@ -1553,7 +1554,6 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks, 0)) {
continue;
}
- PredLoads[Pred] = 0;
if (Pred->getTerminator()->getNumSuccessors() != 1) {
if (isa<IndirectBrInst>(Pred->getTerminator())) {
@@ -1570,11 +1570,14 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
}
CriticalEdgePred.push_back(Pred);
+ } else {
+ // Only add the predecessors that will not be split for now.
+ PredLoads[Pred] = nullptr;
}
}
// Decide whether PRE is profitable for this load.
- unsigned NumUnavailablePreds = PredLoads.size();
+ unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
@@ -1586,12 +1589,10 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
return false;
// Split critical edges, and update the unavailable predecessors accordingly.
- for (SmallVectorImpl<BasicBlock *>::iterator I = CriticalEdgePred.begin(),
- E = CriticalEdgePred.end(); I != E; I++) {
- BasicBlock *OrigPred = *I;
+ for (BasicBlock *OrigPred : CriticalEdgePred) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
- PredLoads.erase(OrigPred);
- PredLoads[NewPred] = 0;
+ assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
+ PredLoads[NewPred] = nullptr;
DEBUG(dbgs() << "Split critical edge " << OrigPred->getName() << "->"
<< LoadBB->getName() << '\n');
}
@@ -1599,9 +1600,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
SmallVector<Instruction*, 8> NewInsts;
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
+ for (auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
@@ -1610,20 +1610,20 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
// the load on the pred (?!?), so we can insert code to materialize the
// pointer if it is not available.
PHITransAddr Address(LI->getPointerOperand(), DL);
- Value *LoadPtr = 0;
+ Value *LoadPtr = nullptr;
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
- if (LoadPtr == 0) {
+ if (!LoadPtr) {
DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getPointerOperand() << "\n");
CanDoPRE = false;
break;
}
- I->second = LoadPtr;
+ PredLoad.second = LoadPtr;
}
if (!CanDoPRE) {
@@ -1632,8 +1632,8 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
if (MD) MD->removeInstruction(I);
I->eraseFromParent();
}
- // HINT:Don't revert the edge-splitting as following transformation may
- // also need to split these critial edges.
+ // HINT: Don't revert the edge-splitting as following transformation may
+ // also need to split these critical edges.
return !CriticalEdgePred.empty();
}
@@ -1654,10 +1654,9 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
VN.lookup_or_add(NewInsts[i]);
}
- for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
- E = PredLoads.end(); I != E; ++I) {
- BasicBlock *UnavailablePred = I->first;
- Value *LoadPtr = I->second;
+ for (const auto &PredLoad : PredLoads) {
+ BasicBlock *UnavailablePred = PredLoad.first;
+ Value *LoadPtr = PredLoad.second;
Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
LI->getAlignment(),
@@ -1776,7 +1775,7 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
MDNode *ReplMD = Metadata[i].second;
switch(Kind) {
default:
- ReplInst->setMetadata(Kind, NULL); // Remove unknown metadata
+ ReplInst->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_dbg:
llvm_unreachable("getAllMetadataOtherThanDebugLoc returned a MD_dbg");
@@ -1832,7 +1831,7 @@ bool GVN::processLoad(LoadInst *L) {
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
- Value *AvailVal = 0;
+ Value *AvailVal = nullptr;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst())) {
int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
L->getPointerOperand(),
@@ -1920,7 +1919,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
L, *DL);
- if (StoredVal == 0)
+ if (!StoredVal)
return false;
DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
@@ -1949,7 +1948,7 @@ bool GVN::processLoad(LoadInst *L) {
if (DL) {
AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(),
L, *DL);
- if (AvailableVal == 0)
+ if (!AvailableVal)
return false;
DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
@@ -1999,9 +1998,9 @@ bool GVN::processLoad(LoadInst *L) {
// a few comparisons of DFS numbers.
Value *GVN::findLeader(const BasicBlock *BB, uint32_t num) {
LeaderTableEntry Vals = LeaderTable[num];
- if (!Vals.Val) return 0;
+ if (!Vals.Val) return nullptr;
- Value *Val = 0;
+ Value *Val = nullptr;
if (DT->dominates(Vals.BB, BB)) {
Val = Vals.Val;
if (isa<Constant>(Val)) return Val;
@@ -2052,7 +2051,7 @@ static bool isOnlyReachableViaThisEdge(const BasicBlockEdge &E,
const BasicBlock *Src = E.getStart();
assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
(void)Src;
- return Pred != 0;
+ return Pred != nullptr;
}
/// propagateEquality - The given values are known to be equal in every block
@@ -2296,7 +2295,7 @@ bool GVN::processInstruction(Instruction *I) {
// Perform fast-path value-number based elimination of values inherited from
// dominators.
Value *repl = findLeader(I->getParent(), Num);
- if (repl == 0) {
+ if (!repl) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
@@ -2319,7 +2318,7 @@ bool GVN::runOnFunction(Function& F) {
MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -2421,10 +2420,7 @@ bool GVN::processBlock(BasicBlock *BB) {
bool GVN::performPRE(Function &F) {
bool Changed = false;
SmallVector<std::pair<Value*, BasicBlock*>, 8> predMap;
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
- BasicBlock *CurrentBlock = *DI;
-
+ for (BasicBlock *CurrentBlock : depth_first(&F.getEntryBlock())) {
// Nothing to PRE in the entry block.
if (CurrentBlock == &F.getEntryBlock()) continue;
@@ -2464,7 +2460,7 @@ bool GVN::performPRE(Function &F) {
// more complicated to get right.
unsigned NumWith = 0;
unsigned NumWithout = 0;
- BasicBlock *PREPred = 0;
+ BasicBlock *PREPred = nullptr;
predMap.clear();
for (pred_iterator PI = pred_begin(CurrentBlock),
@@ -2482,8 +2478,8 @@ bool GVN::performPRE(Function &F) {
}
Value* predV = findLeader(P, ValNo);
- if (predV == 0) {
- predMap.push_back(std::make_pair(static_cast<Value *>(0), P));
+ if (!predV) {
+ predMap.push_back(std::make_pair(static_cast<Value *>(nullptr), P));
PREPred = P;
++NumWithout;
} else if (predV == CurInst) {
@@ -2637,9 +2633,8 @@ bool GVN::iterateOnFunction(Function &F) {
//
std::vector<BasicBlock *> BBVect;
BBVect.reserve(256);
- for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
- DE = df_end(DT->getRootNode()); DI != DE; ++DI)
- BBVect.push_back(DI->getBlock());
+ for (DomTreeNode *x : depth_first(DT->getRootNode()))
+ BBVect.push_back(x->getBlock());
for (std::vector<BasicBlock *>::iterator I = BBVect.begin(), E = BBVect.end();
I != E; I++)
diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/Transforms/Scalar/GlobalMerge.cpp
index 8ffd64b..990d067 100644
--- a/lib/Transforms/Scalar/GlobalMerge.cpp
+++ b/lib/Transforms/Scalar/GlobalMerge.cpp
@@ -51,7 +51,6 @@
// note that we saved 2 registers here almostly "for free".
// ===---------------------------------------------------------------------===//
-#define DEBUG_TYPE "global-merge"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
@@ -70,6 +69,8 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
+#define DEBUG_TYPE "global-merge"
+
cl::opt<bool>
EnableGlobalMerge("global-merge", cl::Hidden,
cl::desc("Enable global merge pass"),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid.
- explicit GlobalMerge(const TargetMachine *TM = 0)
+ explicit GlobalMerge(const TargetMachine *TM = nullptr)
: FunctionPass(ID), TM(TM) {
initializeGlobalMergePass(*PassRegistry::getPassRegistry());
}
@@ -173,7 +174,8 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
GlobalValue::InternalLinkage,
MergedInit, "_MergedGlobals",
- 0, GlobalVariable::NotThreadLocal,
+ nullptr,
+ GlobalVariable::NotThreadLocal,
AddrSpace);
for (size_t k = i; k < j; ++k) {
Constant *Idx[2] = {
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7537632..e83a5c4 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -24,7 +24,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -50,6 +49,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumWidened , "Number of indvars widened");
STATISTIC(NumReplaced , "Number of exit values replaced");
STATISTIC(NumLFTR , "Number of loop exit tests replaced");
@@ -79,8 +80,8 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- IndVarSimplify() : LoopPass(ID), LI(0), SE(0), DT(0), DL(0),
- Changed(false) {
+ IndVarSimplify() : LoopPass(ID), LI(nullptr), SE(nullptr), DT(nullptr),
+ DL(nullptr), Changed(false) {
initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
}
@@ -196,7 +197,7 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
if (!PHI)
return User;
- Instruction *InsertPt = 0;
+ Instruction *InsertPt = nullptr;
for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
if (PHI->getIncomingValue(i) != Def)
continue;
@@ -257,13 +258,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
- if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+ if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return;
// If this is not an add of the PHI with a constantfp, or if the constant fp
// is not an integer, bail out.
ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
int64_t IncValue;
- if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+ if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
!ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
return;
@@ -280,7 +281,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
if (!Compare)
Compare = dyn_cast<FCmpInst>(U2);
- if (Compare == 0 || !Compare->hasOneUse() ||
+ if (!Compare || !Compare->hasOneUse() ||
!isa<BranchInst>(Compare->user_back()))
return;
@@ -301,7 +302,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
// transform it.
ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
int64_t ExitValue;
- if (ExitValueVal == 0 ||
+ if (ExitValueVal == nullptr ||
!ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
return;
@@ -651,7 +652,8 @@ namespace {
Type *WidestNativeType; // Widest integer type created [sz]ext
bool IsSigned; // Was an sext user seen before a zext?
- WideIVInfo() : NarrowIV(0), WidestNativeType(0), IsSigned(false) {}
+ WideIVInfo() : NarrowIV(nullptr), WidestNativeType(nullptr),
+ IsSigned(false) {}
};
}
@@ -693,7 +695,7 @@ struct NarrowIVDefUse {
Instruction *NarrowUse;
Instruction *WideDef;
- NarrowIVDefUse(): NarrowDef(0), NarrowUse(0), WideDef(0) {}
+ NarrowIVDefUse(): NarrowDef(nullptr), NarrowUse(nullptr), WideDef(nullptr) {}
NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD):
NarrowDef(ND), NarrowUse(NU), WideDef(WD) {}
@@ -736,9 +738,9 @@ public:
L(LI->getLoopFor(OrigPhi->getParent())),
SE(SEv),
DT(DTree),
- WidePhi(0),
- WideInc(0),
- WideIncExpr(0),
+ WidePhi(nullptr),
+ WideInc(nullptr),
+ WideIncExpr(nullptr),
DeadInsts(DI) {
assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
}
@@ -793,7 +795,7 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
unsigned Opcode = DU.NarrowUse->getOpcode();
switch (Opcode) {
default:
- return 0;
+ return nullptr;
case Instruction::Add:
case Instruction::Mul:
case Instruction::UDiv:
@@ -838,14 +840,14 @@ Instruction *WidenIV::CloneIVUser(NarrowIVDefUse DU) {
const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
// Handle the common case of add<nsw/nuw>
if (DU.NarrowUse->getOpcode() != Instruction::Add)
- return 0;
+ return nullptr;
// One operand (NarrowDef) has already been extended to WideDef. Now determine
// if extending the other will lead to a recurrence.
unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
- const SCEV *ExtendOperExpr = 0;
+ const SCEV *ExtendOperExpr = nullptr;
const OverflowingBinaryOperator *OBO =
cast<OverflowingBinaryOperator>(DU.NarrowUse);
if (IsSigned && OBO->hasNoSignedWrap())
@@ -855,7 +857,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
ExtendOperExpr = SE->getZeroExtendExpr(
SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
else
- return 0;
+ return nullptr;
// When creating this AddExpr, don't apply the current operations NSW or NUW
// flags. This instruction may be guarded by control flow that the no-wrap
@@ -866,7 +868,7 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
SE->getAddExpr(SE->getSCEV(DU.WideDef), ExtendOperExpr));
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -877,14 +879,14 @@ const SCEVAddRecExpr* WidenIV::GetExtendedOperandRecurrence(NarrowIVDefUse DU) {
/// recurrence. Otherwise return NULL.
const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
if (!SE->isSCEVable(NarrowUse->getType()))
- return 0;
+ return nullptr;
const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
if (SE->getTypeSizeInBits(NarrowExpr->getType())
>= SE->getTypeSizeInBits(WideType)) {
// NarrowUse implicitly widens its operand. e.g. a gep with a narrow
// index. So don't follow this use.
- return 0;
+ return nullptr;
}
const SCEV *WideExpr = IsSigned ?
@@ -892,7 +894,7 @@ const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
SE->getZeroExtendExpr(NarrowExpr, WideType);
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
if (!AddRec || AddRec->getLoop() != L)
- return 0;
+ return nullptr;
return AddRec;
}
@@ -930,7 +932,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi
<< " to " << *WidePhi << "\n");
}
- return 0;
+ return nullptr;
}
}
// Our raison d'etre! Eliminate sign and zero extension.
@@ -968,7 +970,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// push the uses of WideDef here.
// No further widening is needed. The deceased [sz]ext had done it for us.
- return 0;
+ return nullptr;
}
// Does this user itself evaluate to a recurrence after widening?
@@ -981,7 +983,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// follow it. Instead insert a Trunc to kill off the original use,
// eventually isolating the original narrow IV so it can be removed.
truncateIVUse(DU, DT);
- return 0;
+ return nullptr;
}
// Assume block terminators cannot evaluate to a recurrence. We can't to
// insert a Trunc after a terminator if there happens to be a critical edge.
@@ -990,14 +992,14 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
// Reuse the IV increment that SCEVExpander created as long as it dominates
// NarrowUse.
- Instruction *WideUse = 0;
+ Instruction *WideUse = nullptr;
if (WideAddRec == WideIncExpr
&& Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
WideUse = WideInc;
else {
WideUse = CloneIVUser(DU);
if (!WideUse)
- return 0;
+ return nullptr;
}
// Evaluation of WideAddRec ensured that the narrow expression could be
// extended outside the loop without overflow. This suggests that the wide use
@@ -1008,7 +1010,7 @@ Instruction *WidenIV::WidenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
<< ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
DeadInsts.push_back(WideUse);
- return 0;
+ return nullptr;
}
// Returning WideUse pushes it on the worklist.
@@ -1043,7 +1045,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Is this phi an induction variable?
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
if (!AddRec)
- return NULL;
+ return nullptr;
// Widen the induction variable expression.
const SCEV *WideIVExpr = IsSigned ?
@@ -1056,7 +1058,7 @@ PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
// Can the IV be extended outside the loop without overflow?
AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
if (!AddRec || AddRec->getLoop() != L)
- return NULL;
+ return nullptr;
// An AddRec must have loop-invariant operands. Since this AddRec is
// materialized by a loop header phi, the expression cannot have any post-loop
@@ -1282,7 +1284,7 @@ static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
Instruction *IncI = dyn_cast<Instruction>(IncV);
if (!IncI)
- return 0;
+ return nullptr;
switch (IncI->getOpcode()) {
case Instruction::Add:
@@ -1293,17 +1295,17 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (IncI->getNumOperands() == 2)
break;
default:
- return 0;
+ return nullptr;
}
PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
if (Phi && Phi->getParent() == L->getHeader()) {
if (isLoopInvariant(IncI->getOperand(1), L, DT))
return Phi;
- return 0;
+ return nullptr;
}
if (IncI->getOpcode() == Instruction::GetElementPtr)
- return 0;
+ return nullptr;
// Allow add/sub to be commuted.
Phi = dyn_cast<PHINode>(IncI->getOperand(1));
@@ -1311,7 +1313,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
if (isLoopInvariant(IncI->getOperand(0), L, DT))
return Phi;
}
- return 0;
+ return nullptr;
}
/// Return the compare guarding the loop latch, or NULL for unrecognized tests.
@@ -1321,7 +1323,7 @@ static ICmpInst *getLoopTest(Loop *L) {
BasicBlock *LatchBlock = L->getLoopLatch();
// Don't bother with LFTR if the loop is not properly simplified.
if (!LatchBlock)
- return 0;
+ return nullptr;
BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
assert(BI && "expected exit branch");
@@ -1446,8 +1448,8 @@ FindLoopCounter(Loop *L, const SCEV *BECount,
cast<BranchInst>(L->getExitingBlock()->getTerminator())->getCondition();
// Loop over all of the PHI nodes, looking for a simple counter.
- PHINode *BestPhi = 0;
- const SCEV *BestInit = 0;
+ PHINode *BestPhi = nullptr;
+ const SCEV *BestInit = nullptr;
BasicBlock *LatchBlock = L->getLoopLatch();
assert(LatchBlock && "needsLFTR should guarantee a loop latch");
@@ -1571,7 +1573,7 @@ static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L,
// IVInit integer and IVCount pointer would only occur if a canonical IV
// were generated on top of case #2, which is not expected.
- const SCEV *IVLimit = 0;
+ const SCEV *IVLimit = nullptr;
// For unit stride, IVCount = Start + BECount with 2's complement overflow.
// For non-zero Start, compute IVCount here.
if (AR->getStart()->isZero())
@@ -1813,7 +1815,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
DeadInsts.clear();
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 067deb7..230a381 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "jump-threading"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "jump-threading"
+
STATISTIC(NumThreads, "Number of jumps threaded");
STATISTIC(NumFolds, "Number of terminators folded");
STATISTIC(NumDupes, "Number of branch blocks duplicated to eliminate phi");
@@ -153,7 +154,7 @@ bool JumpThreading::runOnFunction(Function &F) {
DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
LVI = &getAnalysis<LazyValueInfo>();
@@ -308,7 +309,7 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// Returns null if Val is null or not an appropriate constant.
static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
if (!Val)
- return 0;
+ return nullptr;
// Undef is "known" enough.
if (UndefValue *U = dyn_cast<UndefValue>(Val))
@@ -352,7 +353,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a non-instruction value, or an instruction in a different block,
// then it can't be derived from a PHI.
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0 || I->getParent() != BB) {
+ if (!I || I->getParent() != BB) {
// Okay, if this is a live-in value, see if it has a known value at the end
// of any of our predecessors.
@@ -495,7 +496,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL);
- if (Res == 0) {
+ if (!Res) {
if (!isa<Constant>(RHS))
continue;
@@ -581,7 +582,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Either operand will do, so be sure to pick the one that's a known
// constant.
// FIXME: Do this more cleverly if both values are known constants?
- KnownCond = (TrueVal != 0);
+ KnownCond = (TrueVal != nullptr);
}
// See if the select has a known constant value for this predecessor.
@@ -737,7 +738,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
Instruction *CondInst = dyn_cast<Instruction>(Condition);
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0) {
+ if (!CondInst) {
// FIXME: Unify this with code below.
if (ProcessThreadableEdges(Condition, BB, Preference))
return true;
@@ -890,7 +891,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallPtrSet<BasicBlock*, 8> PredsScanned;
typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
AvailablePredsTy AvailablePreds;
- BasicBlock *OneUnavailablePred = 0;
+ BasicBlock *OneUnavailablePred = nullptr;
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
@@ -904,16 +905,16 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Scan the predecessor to see if the value is available in the pred.
BBIt = PredBB->end();
- MDNode *ThisTBAATag = 0;
+ MDNode *ThisTBAATag = nullptr;
Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6,
- 0, &ThisTBAATag);
+ nullptr, &ThisTBAATag);
if (!PredAvailable) {
OneUnavailablePred = PredBB;
continue;
}
// If tbaa tags disagree or are not present, forget about them.
- if (TBAATag != ThisTBAATag) TBAATag = 0;
+ if (TBAATag != ThisTBAATag) TBAATag = nullptr;
// If so, this load is partially redundant. Remember this info so that we
// can create a PHI node.
@@ -929,7 +930,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// predecessor, we want to insert a merge block for those common predecessors.
// This ensures that we only have to insert one reload, thus not increasing
// code size.
- BasicBlock *UnavailablePred = 0;
+ BasicBlock *UnavailablePred = nullptr;
// If there is exactly one predecessor where the value is unavailable, the
// already computed 'OneUnavailablePred' block is it. If it ends in an
@@ -996,7 +997,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(P, (Value*)0));
+ std::make_pair(P, (Value*)nullptr));
assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
@@ -1103,7 +1104,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
SmallPtrSet<BasicBlock*, 16> SeenPreds;
SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
- BasicBlock *OnlyDest = 0;
+ BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
@@ -1120,7 +1121,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
- DestBB = 0;
+ DestBB = nullptr;
else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
@@ -1171,7 +1172,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// If the threadable edges are branching on an undefined value, we get to pick
// the destination that these predecessors should get to.
- if (MostPopularDest == 0)
+ if (!MostPopularDest)
MostPopularDest = BB->getTerminator()->
getSuccessor(GetBestDestForJumpOnUndef(BB));
@@ -1273,7 +1274,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
}
// Determine which value to split on, true, false, or undef if neither.
- ConstantInt *SplitVal = 0;
+ ConstantInt *SplitVal = nullptr;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
@@ -1294,7 +1295,7 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// help us. However, we can just replace the LHS or RHS with the constant.
if (BlocksToFoldInto.size() ==
cast<PHINode>(BB->front()).getNumIncomingValues()) {
- if (SplitVal == 0) {
+ if (!SplitVal) {
// If all preds provide undef, just nuke the xor, because it is undef too.
BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
BO->eraseFromParent();
@@ -1531,7 +1532,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
- if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+ if (!OldPredBranch || !OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index b69f2dc..0a8d16f 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -30,7 +30,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "licm"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -60,6 +59,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "licm"
+
STATISTIC(NumSunk , "Number of instructions sunk out of loop");
STATISTIC(NumHoisted , "Number of instructions hoisted out of loop");
STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
@@ -223,7 +224,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
@@ -315,8 +316,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
"Parent loop not left in LCSSA form after LICM!");
// Clear out loops state information for the next iteration
- CurLoop = 0;
- Preheader = 0;
+ CurLoop = nullptr;
+ Preheader = nullptr;
// If this loop is nested inside of another one, save the alias information
// for when we process the outer loop.
@@ -334,7 +335,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
/// iteration.
///
void LICM::SinkRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -381,7 +382,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
void LICM::HoistRegion(DomTreeNode *N) {
- assert(N != 0 && "Null dominator tree node?");
+ assert(N != nullptr && "Null dominator tree node?");
BasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
@@ -774,7 +775,7 @@ void LICM::PromoteAliasSet(AliasSet &AS,
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
- MDNode *TBAATag = 0;
+ MDNode *TBAATag = nullptr;
// Check that all of the pointers in the alias set have the same type. We
// cannot (yet) promote a memory location that is loaded and stored in
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 9a520c8..5ab686a 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-delete"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -23,6 +22,8 @@
#include "llvm/IR/Dominators.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-delete"
+
STATISTIC(NumDeleted, "Number of loops deleted");
namespace {
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index e5e8b84..26a83df 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -41,7 +41,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-idiom"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -61,6 +60,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-idiom"
+
STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
@@ -114,7 +115,7 @@ namespace {
Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const;
/// Return true iff the idiom is detected in the loop. and 1) \p CntInst
- /// is set to the instruction counting the pupulation bit. 2) \p CntPhi
+ /// is set to the instruction counting the population bit. 2) \p CntPhi
/// is set to the corresponding phi node. 3) \p Var is set to the value
/// whose population bits are being counted.
bool detectIdiom
@@ -138,7 +139,7 @@ namespace {
static char ID;
explicit LoopIdiomRecognize() : LoopPass(ID) {
initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
- DL = 0; DT = 0; SE = 0; TLI = 0; TTI = 0;
+ DL = nullptr; DT = nullptr; SE = nullptr; TLI = nullptr; TTI = nullptr;
}
bool runOnLoop(Loop *L, LPPassManager &LPM) override;
@@ -182,7 +183,7 @@ namespace {
if (DL)
return DL;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
return DL;
}
@@ -247,7 +248,7 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE,
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
- DeadInst->setOperand(op, 0);
+ DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
@@ -292,9 +293,9 @@ bool LIRUtil::isAlmostEmpty(BasicBlock *BB) {
BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
if (BasicBlock *BB = PreHead->getSinglePredecessor()) {
BranchInst *Br = getBranch(BB);
- return Br && Br->isConditional() ? BB : 0;
+ return Br && Br->isConditional() ? BB : nullptr;
}
- return 0;
+ return nullptr;
}
//===----------------------------------------------------------------------===//
@@ -304,7 +305,7 @@ BasicBlock *LIRUtil::getPrecondBb(BasicBlock *PreHead) {
//===----------------------------------------------------------------------===//
NclPopcountRecognize::NclPopcountRecognize(LoopIdiomRecognize &TheLIR):
- LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(0) {
+ LIR(TheLIR), CurLoop(TheLIR.getLoop()), PreCondBB(nullptr) {
}
bool NclPopcountRecognize::preliminaryScreen() {
@@ -341,25 +342,25 @@ bool NclPopcountRecognize::preliminaryScreen() {
return true;
}
-Value *NclPopcountRecognize::matchCondition (BranchInst *Br,
- BasicBlock *LoopEntry) const {
+Value *NclPopcountRecognize::matchCondition(BranchInst *Br,
+ BasicBlock *LoopEntry) const {
if (!Br || !Br->isConditional())
- return 0;
+ return nullptr;
ICmpInst *Cond = dyn_cast<ICmpInst>(Br->getCondition());
if (!Cond)
- return 0;
+ return nullptr;
ConstantInt *CmpZero = dyn_cast<ConstantInt>(Cond->getOperand(1));
if (!CmpZero || !CmpZero->isZero())
- return 0;
+ return nullptr;
ICmpInst::Predicate Pred = Cond->getPredicate();
if ((Pred == ICmpInst::ICMP_NE && Br->getSuccessor(0) == LoopEntry) ||
(Pred == ICmpInst::ICMP_EQ && Br->getSuccessor(1) == LoopEntry))
return Cond->getOperand(0);
- return 0;
+ return nullptr;
}
bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
@@ -390,9 +391,9 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
Value *VarX1, *VarX0;
PHINode *PhiX, *CountPhi;
- DefX2 = CountInst = 0;
- VarX1 = VarX0 = 0;
- PhiX = CountPhi = 0;
+ DefX2 = CountInst = nullptr;
+ VarX1 = VarX0 = nullptr;
+ PhiX = CountPhi = nullptr;
LoopEntry = *(CurLoop->block_begin());
// step 1: Check if the loop-back branch is in desirable form.
@@ -439,7 +440,7 @@ bool NclPopcountRecognize::detectIdiom(Instruction *&CntInst,
// step 4: Find the instruction which count the population: cnt2 = cnt1 + 1
{
- CountInst = NULL;
+ CountInst = nullptr;
for (BasicBlock::iterator Iter = LoopEntry->getFirstNonPHI(),
IterE = LoopEntry->end(); Iter != IterE; Iter++) {
Instruction *Inst = Iter;
@@ -744,7 +745,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the store invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -757,7 +758,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
// If processing the memset invalidated our iterator, start over from the
// top of the block.
- if (InstPtr == 0)
+ if (!InstPtr)
I = BB->begin();
continue;
}
@@ -784,7 +785,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
// random store we can't handle.
const SCEVAddRecExpr *StoreEv =
dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
- if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+ if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
return false;
// Check to see if the stride matches the size of the store. If so, then we
@@ -792,7 +793,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
unsigned StoreSize = (unsigned)SizeInBits >> 3;
const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
- if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+ if (!Stride || StoreSize != Stride->getValue()->getValue()) {
// TODO: Could also handle negative stride here someday, that will require
// the validity check in mayLoopAccessLocation to be updated though.
// Enable this to print exact negative strides.
@@ -841,7 +842,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// loop, which indicates a strided store. If we have something else, it's a
// random store we can't handle.
const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
- if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+ if (!Ev || Ev->getLoop() != CurLoop || !Ev->isAffine())
return false;
// Reject memsets that are so large that they overflow an unsigned.
@@ -855,7 +856,7 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
// TODO: Could also handle negative stride here someday, that will require the
// validity check in mayLoopAccessLocation to be updated though.
- if (Stride == 0 || MSI->getLength() != Stride->getValue())
+ if (!Stride || MSI->getLength() != Stride->getValue())
return false;
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
@@ -908,23 +909,23 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout &DL) {
// array. We could theoretically do a store to an alloca or something, but
// that doesn't seem worthwhile.
Constant *C = dyn_cast<Constant>(V);
- if (C == 0) return 0;
+ if (!C) return nullptr;
// Only handle simple values that are a power of two bytes in size.
uint64_t Size = DL.getTypeSizeInBits(V->getType());
if (Size == 0 || (Size & 7) || (Size & (Size-1)))
- return 0;
+ return nullptr;
// Don't care enough about darwin/ppc to implement this.
if (DL.isBigEndian())
- return 0;
+ return nullptr;
// Convert to size in bytes.
Size /= 8;
// TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
// if the top and bottom are the same (e.g. for vectors and large integers).
- if (Size > 16) return 0;
+ if (Size > 16) return nullptr;
// If the constant is exactly 16 bytes, just use it.
if (Size == 16) return C;
@@ -949,7 +950,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// are stored. A store of i32 0x01020304 can never be turned into a memset,
// but it can be turned into memset_pattern if the target supports it.
Value *SplatValue = isBytewiseValue(StoredVal);
- Constant *PatternValue = 0;
+ Constant *PatternValue = nullptr;
unsigned DestAS = DestPtr->getType()->getPointerAddressSpace();
@@ -960,13 +961,13 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
// promote the memset.
CurLoop->isLoopInvariant(SplatValue)) {
// Keep and use SplatValue.
- PatternValue = 0;
+ PatternValue = nullptr;
} else if (DestAS == 0 &&
TLI->has(LibFunc::memset_pattern16) &&
(PatternValue = getMemSetPatternValue(StoredVal, *DL))) {
// Don't create memset_pattern16s with address spaces.
// It looks like we can use PatternValue!
- SplatValue = 0;
+ SplatValue = nullptr;
} else {
// Otherwise, this isn't an idiom we can transform. For example, we can't
// do anything with a 3-byte store.
@@ -1033,7 +1034,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
Int8PtrTy,
Int8PtrTy,
IntPtr,
- (void*)0);
+ (void*)nullptr);
// Otherwise we should form a memset_pattern16. PatternValue is known to be
// an constant array of 16-bytes. Plop the value into a mergable global.
diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 263ba93..ab1a939 100644
--- a/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -26,6 +25,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions simplified");
namespace {
@@ -70,10 +71,10 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = &getAnalysis<LoopInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallVector<BasicBlock*, 8> ExitBlocks;
@@ -126,7 +127,15 @@ bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
++NumSimplified;
}
}
- LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ LocalChanged |= res;
+ }
if (IsSubloopHeader && !isa<PHINode>(I))
break;
diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp
index 81c1e42..8b5e036 100644
--- a/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
@@ -36,6 +35,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-reroll"
+
STATISTIC(NumRerolledLoops, "Number of rerolled loops");
static cl::opt<unsigned>
@@ -945,7 +946,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
bool InReduction = Reductions.isPairInSame(J1, J2);
if (!(InReduction && J1->isAssociative())) {
- bool Swapped = false, SomeOpMatched = false;;
+ bool Swapped = false, SomeOpMatched = false;
for (unsigned j = 0; j < J1->getNumOperands() && !MatchFailed; ++j) {
Value *Op2 = J2->getOperand(j);
@@ -1133,7 +1134,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
SE = &getAnalysis<ScalarEvolution>();
TLI = &getAnalysis<TargetLibraryInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
BasicBlock *Header = L->getHeader();
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index fde6bac..2ce5831 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-rotate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
@@ -24,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -31,7 +31,11 @@
#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
-#define MAX_HEADER_SIZE 16
+#define DEBUG_TYPE "loop-rotate"
+
+static cl::opt<unsigned>
+DefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
+ cl::desc("The default maximum header size for automatic loop rotation"));
STATISTIC(NumRotated, "Number of loops rotated");
namespace {
@@ -39,8 +43,12 @@ namespace {
class LoopRotate : public LoopPass {
public:
static char ID; // Pass ID, replacement for typeid
- LoopRotate() : LoopPass(ID) {
+ LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+ if (SpecifiedMaxHeaderSize == -1)
+ MaxHeaderSize = DefaultRotationThreshold;
+ else
+ MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
}
// LCSSA form makes instruction renaming easier.
@@ -61,6 +69,7 @@ namespace {
bool rotateLoop(Loop *L, bool SimplifiedLatch);
private:
+ unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
};
@@ -74,7 +83,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
-Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+Pass *llvm::createLoopRotatePass(int MaxHeaderSize) {
+ return new LoopRotate(MaxHeaderSize);
+}
/// Rotate Loop L as many times as possible. Return true if
/// the loop is rotated at least once.
@@ -82,6 +93,9 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipOptnoneFunction(L))
return false;
+ // Save the loop metadata.
+ MDNode *LoopMD = L->getLoopID();
+
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
@@ -96,6 +110,12 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
MadeChange = true;
SimplifiedLatch = false;
}
+
+ // Restore the loop metadata.
+ // NB! We presume LoopRotation DOESN'T ADD its own metadata.
+ if ((MadeChange || SimplifiedLatch) && LoopMD)
+ L->setLoopID(LoopMD);
+
return MadeChange;
}
@@ -281,7 +301,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
BasicBlock *OrigLatch = L->getLoopLatch();
BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
- if (BI == 0 || BI->isUnconditional())
+ if (!BI || BI->isUnconditional())
return false;
// If the loop header is not one of the loop exiting blocks then
@@ -292,7 +312,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop latch already contains a branch that leaves the loop then the
// loop is already rotated.
- if (OrigLatch == 0)
+ if (!OrigLatch)
return false;
// Rotate if either the loop latch does *not* exit the loop, or if the loop
@@ -310,7 +330,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
<< " instructions: "; L->dump());
return false;
}
- if (Metrics.NumInsts > MAX_HEADER_SIZE)
+ if (Metrics.NumInsts > MaxHeaderSize)
return false;
}
@@ -319,7 +339,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// If the loop could not be converted to canonical form, it must have an
// indirectbr in it, just give up.
- if (OrigPreheader == 0)
+ if (!OrigPreheader)
return false;
// Anything ScalarEvolution may know about this loop or the PHI nodes
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 272a16d..914b56a 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,7 +53,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-reduce"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Hashing.h"
@@ -78,6 +77,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "loop-reduce"
+
/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for
/// bail out. This threshold is far beyond the number of users that LSR can
/// conceivably solve, so it should not affect generated code, but catches the
@@ -237,7 +238,15 @@ struct Formula {
int64_t Scale;
/// BaseRegs - The list of "base" registers for this use. When this is
- /// non-empty,
+ /// non-empty. The canonical representation of a formula is
+ /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
+ /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
+ /// #1 enforces that the scaled register is always used when at least two
+ /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
+ /// #2 enforces that 1 * reg is reg.
+ /// This invariant can be temporarly broken while building a formula.
+ /// However, every formula inserted into the LSRInstance must be in canonical
+ /// form.
SmallVector<const SCEV *, 4> BaseRegs;
/// ScaledReg - The 'scaled' register for this use. This should be non-null
@@ -250,12 +259,18 @@ struct Formula {
int64_t UnfoldedOffset;
Formula()
- : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0),
- UnfoldedOffset(0) {}
+ : BaseGV(nullptr), BaseOffset(0), HasBaseReg(false), Scale(0),
+ ScaledReg(nullptr), UnfoldedOffset(0) {}
void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
- unsigned getNumRegs() const;
+ bool isCanonical() const;
+
+ void Canonicalize();
+
+ bool Unscale();
+
+ size_t getNumRegs() const;
Type *getType() const;
void DeleteBaseReg(const SCEV *&S);
@@ -345,12 +360,58 @@ void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
BaseRegs.push_back(Sum);
HasBaseReg = true;
}
+ Canonicalize();
+}
+
+/// \brief Check whether or not this formula statisfies the canonical
+/// representation.
+/// \see Formula::BaseRegs.
+bool Formula::isCanonical() const {
+ if (ScaledReg)
+ return Scale != 1 || !BaseRegs.empty();
+ return BaseRegs.size() <= 1;
+}
+
+/// \brief Helper method to morph a formula into its canonical representation.
+/// \see Formula::BaseRegs.
+/// Every formula having more than one base register, must use the ScaledReg
+/// field. Otherwise, we would have to do special cases everywhere in LSR
+/// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
+/// On the other hand, 1*reg should be canonicalized into reg.
+void Formula::Canonicalize() {
+ if (isCanonical())
+ return;
+ // So far we did not need this case. This is easy to implement but it is
+ // useless to maintain dead code. Beside it could hurt compile time.
+ assert(!BaseRegs.empty() && "1*reg => reg, should not be needed.");
+ // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
+ ScaledReg = BaseRegs.back();
+ BaseRegs.pop_back();
+ Scale = 1;
+ size_t BaseRegsSize = BaseRegs.size();
+ size_t Try = 0;
+ // If ScaledReg is an invariant, try to find a variant expression.
+ while (Try < BaseRegsSize && !isa<SCEVAddRecExpr>(ScaledReg))
+ std::swap(ScaledReg, BaseRegs[Try++]);
+}
+
+/// \brief Get rid of the scale in the formula.
+/// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
+/// \return true if it was possible to get rid of the scale, false otherwise.
+/// \note After this operation the formula may not be in the canonical form.
+bool Formula::Unscale() {
+ if (Scale != 1)
+ return false;
+ Scale = 0;
+ BaseRegs.push_back(ScaledReg);
+ ScaledReg = nullptr;
+ return true;
}
/// getNumRegs - Return the total number of register operands used by this
/// formula. This does not include register uses implied by non-constant
/// addrec strides.
-unsigned Formula::getNumRegs() const {
+size_t Formula::getNumRegs() const {
return !!ScaledReg + BaseRegs.size();
}
@@ -360,7 +421,7 @@ Type *Formula::getType() const {
return !BaseRegs.empty() ? BaseRegs.front()->getType() :
ScaledReg ? ScaledReg->getType() :
BaseGV ? BaseGV->getType() :
- 0;
+ nullptr;
}
/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
@@ -487,11 +548,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
if (!RC)
- return 0;
+ return nullptr;
const APInt &LA = C->getValue()->getValue();
const APInt &RA = RC->getValue()->getValue();
if (LA.srem(RA) != 0)
- return 0;
+ return nullptr;
return SE.getConstant(LA.sdiv(RA));
}
@@ -500,16 +561,16 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
IgnoreSignificantBits);
- if (!Step) return 0;
+ if (!Step) return nullptr;
const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
IgnoreSignificantBits);
- if (!Start) return 0;
+ if (!Start) return nullptr;
// FlagNW is independent of the start value, step direction, and is
// preserved with smaller magnitude steps.
// FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
}
- return 0;
+ return nullptr;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -520,12 +581,12 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
I != E; ++I) {
const SCEV *Op = getExactSDiv(*I, RHS, SE,
IgnoreSignificantBits);
- if (!Op) return 0;
+ if (!Op) return nullptr;
Ops.push_back(Op);
}
return SE.getAddExpr(Ops);
}
- return 0;
+ return nullptr;
}
// Check for a multiply operand that we can pull RHS out of.
@@ -544,13 +605,13 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
Ops.push_back(S);
}
- return Found ? SE.getMulExpr(Ops) : 0;
+ return Found ? SE.getMulExpr(Ops) : nullptr;
}
- return 0;
+ return nullptr;
}
// Otherwise we don't know.
- return 0;
+ return nullptr;
}
/// ExtractImmediate - If S involves the addition of a constant integer value,
@@ -604,7 +665,7 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
SCEV::FlagAnyWrap);
return Result;
}
- return 0;
+ return nullptr;
}
/// isAddressUse - Returns true if the specified instruction is using the
@@ -755,12 +816,12 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
Value *V = DeadInsts.pop_back_val();
Instruction *I = dyn_cast_or_null<Instruction>(V);
- if (I == 0 || !isInstructionTriviallyDead(I))
+ if (!I || !isInstructionTriviallyDead(I))
continue;
for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
- *OI = 0;
+ *OI = nullptr;
if (U->use_empty())
DeadInsts.push_back(U);
}
@@ -775,9 +836,18 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
namespace {
class LSRUse;
}
-// Check if it is legal to fold 2 base registers.
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F);
+
+/// \brief Check if the addressing mode defined by \p F is completely
+/// folded in \p LU at isel time.
+/// This includes address-mode folding and special icmp tricks.
+/// This function returns true if \p LU can accommodate what \p F
+/// defines and up to 1 base + 1 scaled + offset.
+/// In other words, if \p F has several base registers, this function may
+/// still return true. Therefore, users still need to account for
+/// additional base registers and/or unfolded offsets to derive an
+/// accurate cost model.
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F);
// Get the cost of the scaling factor used in F for LU.
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F);
@@ -828,7 +898,7 @@ public:
const SmallVectorImpl<int64_t> &Offsets,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
- SmallPtrSet<const SCEV *, 16> *LoserRegs = 0);
+ SmallPtrSet<const SCEV *, 16> *LoserRegs = nullptr);
void print(raw_ostream &OS) const;
void dump() const;
@@ -921,6 +991,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
ScalarEvolution &SE, DominatorTree &DT,
const LSRUse &LU,
SmallPtrSet<const SCEV *, 16> *LoserRegs) {
+ assert(F.isCanonical() && "Cost is accurate only for canonical formula");
// Tally up the registers.
if (const SCEV *ScaledReg = F.ScaledReg) {
if (VisitedRegs.count(ScaledReg)) {
@@ -944,11 +1015,13 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
}
// Determine how many (unfolded) adds we'll need inside the loop.
- size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0);
+ size_t NumBaseParts = F.getNumRegs();
if (NumBaseParts > 1)
// Do not count the base and a possible second register if the target
// allows to fold 2 registers.
- NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F));
+ NumBaseAdds +=
+ NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+ NumBaseAdds += (F.UnfoldedOffset != 0);
// Accumulate non-free scaling amounts.
ScaleCost += getScalingFactorCost(TTI, LU, F);
@@ -1047,7 +1120,8 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+ Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -1183,7 +1257,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true),
RigidFormula(false),
- WidestFixupType(0) {}
+ WidestFixupType(nullptr) {}
bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
@@ -1208,7 +1282,10 @@ bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
+/// The formula must be in canonical form.
bool LSRUse::InsertFormula(const Formula &F) {
+ assert(F.isCanonical() && "Invalid canonical representation");
+
if (!Formulae.empty() && RigidFormula)
return false;
@@ -1234,6 +1311,8 @@ bool LSRUse::InsertFormula(const Formula &F) {
// Record registers now being used by this use.
Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ if (F.ScaledReg)
+ Regs.insert(F.ScaledReg);
return true;
}
@@ -1300,12 +1379,10 @@ void LSRUse::dump() const {
}
#endif
-/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
-/// be completely folded into the user instruction at isel time. This includes
-/// address-mode folding and special icmp tricks.
-static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
- Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
@@ -1356,10 +1433,11 @@ static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
llvm_unreachable("Invalid LSRUse Kind!");
}
-static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
- int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
- GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale) {
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset,
+ bool HasBaseReg, int64_t Scale) {
// Check for overflow.
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
(MinOffset > 0))
@@ -1370,9 +1448,41 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
return false;
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
- Scale) &&
- isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
+ HasBaseReg, Scale) &&
+ isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
+ HasBaseReg, Scale);
+}
+
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ int64_t MinOffset, int64_t MaxOffset,
+ LSRUse::KindType Kind, Type *AccessTy,
+ const Formula &F) {
+ // For the purpose of isAMCompletelyFolded either having a canonical formula
+ // or a scale not equal to zero is correct.
+ // Problems may arise from non canonical formulae having a scale == 0.
+ // Strictly speaking it would best to just rely on canonical formulae.
+ // However, when we generate the scaled formulae, we first check that the
+ // scaling factor is profitable before computing the actual ScaledReg for
+ // compile time sake.
+ assert((F.isCanonical() || F.Scale != 0));
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
+}
+
+/// isLegalUse - Test whether we know how to expand the current formula.
+static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
+ int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
+ GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
+ int64_t Scale) {
+ // We know how to expand completely foldable formulae.
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale) ||
+ // Or formulae that use a base register produced by a sum of base
+ // registers.
+ (Scale == 1 &&
+ isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
+ BaseGV, BaseOffset, true, 0));
}
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
@@ -1382,36 +1492,23 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
F.BaseOffset, F.HasBaseReg, F.Scale);
}
-static bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU,
- const Formula &F) {
- // If F is used as an Addressing Mode, it may fold one Base plus one
- // scaled register. If the scaled register is nil, do as if another
- // element of the base regs is a 1-scaled register.
- // This is possible if BaseRegs has at least 2 registers.
-
- // If this is not an address calculation, this is not an addressing mode
- // use.
- if (LU.Kind != LSRUse::Address)
- return false;
-
- // F is already scaled.
- if (F.Scale != 0)
- return false;
-
- // We need to keep one register for the base and one to scale.
- if (F.BaseRegs.size() < 2)
- return false;
-
- return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
- F.BaseGV, F.BaseOffset, F.HasBaseReg, 1);
- }
+static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
+ const LSRUse &LU, const Formula &F) {
+ return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
+ F.Scale);
+}
static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
if (!F.Scale)
return 0;
- assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, F) && "Illegal formula in use.");
+
+ // If the use is not completely folded in that instruction, we will have to
+ // pay an extra cost only for scale != 1.
+ if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, F))
+ return F.Scale != 1;
switch (LU.Kind) {
case LSRUse::Address: {
@@ -1430,12 +1527,10 @@ static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
}
case LSRUse::ICmpZero:
- // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg.
- // Therefore, return 0 in case F.Scale == -1.
- return F.Scale != -1;
-
case LSRUse::Basic:
case LSRUse::Special:
+ // The use is completely folded, i.e., everything is folded into the
+ // instruction.
return 0;
}
@@ -1460,7 +1555,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
HasBaseReg = true;
}
- return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
+ HasBaseReg, Scale);
}
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
@@ -1485,8 +1581,8 @@ static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
// base and a scale.
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
- return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
- BaseOffset, HasBaseReg, Scale);
+ return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
+ BaseOffset, HasBaseReg, Scale);
}
namespace {
@@ -1515,7 +1611,7 @@ struct IVChain {
SmallVector<IVInc,1> Incs;
const SCEV *ExprBase;
- IVChain() : ExprBase(0) {}
+ IVChain() : ExprBase(nullptr) {}
IVChain(const IVInc &Head, const SCEV *Base)
: Incs(1, Head), ExprBase(Base) {}
@@ -1642,8 +1738,19 @@ class LSRInstance {
void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
unsigned Depth = 0);
+
+ void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, unsigned Depth,
+ size_t Idx, bool IsScaledReg = false);
void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg = false);
void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+ void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist,
+ size_t Idx, bool IsScaledReg = false);
void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
@@ -1721,7 +1828,7 @@ void LSRInstance::OptimizeShadowIV() {
IVUsers::const_iterator CandidateUI = UI;
++UI;
Instruction *ShadowUse = CandidateUI->getUser();
- Type *DestTy = 0;
+ Type *DestTy = nullptr;
bool IsSigned = false;
/* If shadow use is a int->float cast then insert a second IV
@@ -1783,7 +1890,7 @@ void LSRInstance::OptimizeShadowIV() {
continue;
/* Initialize new IV, double d = 0.0 in above example. */
- ConstantInt *C = 0;
+ ConstantInt *C = nullptr;
if (Incr->getOperand(0) == PH)
C = dyn_cast<ConstantInt>(Incr->getOperand(1));
else if (Incr->getOperand(1) == PH)
@@ -1905,7 +2012,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// for ICMP_ULE here because the comparison would be with zero, which
// isn't interesting.
CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
- const SCEVNAryExpr *Max = 0;
+ const SCEVNAryExpr *Max = nullptr;
if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
Pred = ICmpInst::ICMP_SLE;
Max = S;
@@ -1948,7 +2055,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
- Value *NewRHS = 0;
+ Value *NewRHS = nullptr;
if (ICmpInst::isTrueWhenEqual(Pred)) {
// Look for n+1, and grab n.
if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
@@ -2018,7 +2125,7 @@ LSRInstance::OptimizeLoopTermCond() {
continue;
// Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
+ IVStrideUse *CondUse = nullptr;
ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
if (!FindIVUserForCond(Cond, CondUse))
continue;
@@ -2071,12 +2178,12 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
Type *AccessTy = getAccessType(UI->getUser());
int64_t Scale = C->getSExtValue();
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
Scale = -Scale;
- if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
+ if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ nullptr,
/*BaseOffset=*/ 0,
/*HasBaseReg=*/ false, Scale))
goto decline_post_inc;
@@ -2146,23 +2253,25 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
// the uses will have all its uses outside the loop, for example.
if (LU.Kind != Kind)
return false;
+
+ // Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
+ if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+ NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
LU.MaxOffset - NewOffset, HasBaseReg))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
NewOffset - LU.MinOffset, HasBaseReg))
return false;
NewMaxOffset = NewOffset;
}
- // Check for a mismatched access type, and fall back conservatively as needed.
- // TODO: Be less conservative when the type is similar and can use the same
- // addressing modes.
- if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
- NewAccessTy = Type::getVoidTy(AccessTy->getContext());
// Update the use.
LU.MinOffset = NewMinOffset;
@@ -2183,7 +2292,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
+ if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
Offset, /*HasBaseReg=*/ true)) {
Expr = Copy;
Offset = 0;
@@ -2267,7 +2376,7 @@ LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
}
// Nothing looked good.
- return 0;
+ return nullptr;
}
void LSRInstance::CollectInterestingTypesAndFactors() {
@@ -2385,7 +2494,7 @@ static const SCEV *getExprBase(const SCEV *S) {
default: // uncluding scUnknown.
return S;
case scConstant:
- return 0;
+ return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
case scZeroExtend:
@@ -2476,7 +2585,7 @@ isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
&& SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
--cost;
}
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
unsigned NumConstIncrements = 0;
unsigned NumVarIncrements = 0;
unsigned NumReusedIncrements = 0;
@@ -2535,7 +2644,7 @@ void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
// Visit all existing chains. Check if its IVOper can be computed as a
// profitable loop invariant increment from the last link in the Chain.
unsigned ChainIdx = 0, NChains = IVChainVec.size();
- const SCEV *LastIncExpr = 0;
+ const SCEV *LastIncExpr = nullptr;
for (; ChainIdx < NChains; ++ChainIdx) {
IVChain &Chain = IVChainVec[ChainIdx];
@@ -2755,7 +2864,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
int64_t IncOffset = IncConst->getValue()->getSExtValue();
if (!isAlwaysFoldable(TTI, LSRUse::Address,
- getAccessType(UserInst), /*BaseGV=*/ 0,
+ getAccessType(UserInst), /*BaseGV=*/ nullptr,
IncOffset, /*HaseBaseReg=*/ false))
return false;
@@ -2773,7 +2882,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
// findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
IVOpEnd, L, SE);
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
while (IVOpIter != IVOpEnd) {
IVSrc = getWideOperand(*IVOpIter);
@@ -2800,7 +2909,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
Type *IVTy = IVSrc->getType();
Type *IntTy = SE.getEffectiveSCEVType(IVTy);
- const SCEV *LeftOverExpr = 0;
+ const SCEV *LeftOverExpr = nullptr;
for (IVChain::const_iterator IncI = Chain.begin(),
IncE = Chain.end(); IncI != IncE; ++IncI) {
@@ -2831,7 +2940,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
TTI)) {
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
IVSrc = IVOper;
- LeftOverExpr = 0;
+ LeftOverExpr = nullptr;
}
}
Type *OperTy = IncI->IVOperand->getType();
@@ -2886,7 +2995,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
LF.PostIncLoops = UI->getPostIncLoops();
LSRUse::KindType Kind = LSRUse::Basic;
- Type *AccessTy = 0;
+ Type *AccessTy = nullptr;
if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
Kind = LSRUse::Address;
AccessTy = getAccessType(LF.UserInst);
@@ -2917,7 +3026,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
- N = TransformForPostIncUse(Normalize, N, CI, 0,
+ N = TransformForPostIncUse(Normalize, N, CI, nullptr,
LF.PostIncLoops, SE, DT);
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
@@ -2992,6 +3101,9 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+ // Do not insert formula that we will not be able to expand.
+ assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
+ "Formula is illegal");
if (!LU.InsertFormula(F))
return false;
@@ -3068,7 +3180,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
LSRFixup &LF = getNewFixup();
LF.UserInst = const_cast<Instruction *>(UserInst);
LF.OperandValToReplace = U;
- std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+ std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, nullptr);
LF.LUIdx = P.first;
LF.Offset = P.second;
LSRUse &LU = Uses[LF.LUIdx];
@@ -3107,7 +3219,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
if (Remainder)
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
}
- return 0;
+ return nullptr;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (AR->getStart()->isZero())
@@ -3119,7 +3231,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
// does not pertain to this loop.
if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
- Remainder = 0;
+ Remainder = nullptr;
}
if (Remainder != AR->getStart()) {
if (!Remainder)
@@ -3141,90 +3253,110 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
if (Remainder)
Ops.push_back(SE.getMulExpr(C, Remainder));
- return 0;
+ return nullptr;
}
}
return S;
}
-/// GenerateReassociations - Split out subexpressions from adds and the bases of
-/// addrecs.
-void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
- Formula Base,
- unsigned Depth) {
- // Arbitrarily cap recursion to protect compile time.
- if (Depth >= 3) return;
-
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *BaseReg = Base.BaseRegs[i];
+/// \brief Helper function for LSRInstance::GenerateReassociations.
+void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base,
+ unsigned Depth, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ SmallVector<const SCEV *, 8> AddOps;
+ const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
+ if (Remainder)
+ AddOps.push_back(Remainder);
+
+ if (AddOps.size() == 1)
+ return;
- SmallVector<const SCEV *, 8> AddOps;
- const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE);
- if (Remainder)
- AddOps.push_back(Remainder);
+ for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+ JE = AddOps.end();
+ J != JE; ++J) {
- if (AddOps.size() == 1) continue;
+ // Loop-variant "unknown" values are uninteresting; we won't be able to
+ // do anything meaningful with them.
+ if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+ continue;
- for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
- JE = AddOps.end(); J != JE; ++J) {
+ // Don't pull a constant into a register if the constant could be folded
+ // into an immediate field.
+ if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, *J, Base.getNumRegs() > 1))
+ continue;
- // Loop-variant "unknown" values are uninteresting; we won't be able to
- // do anything meaningful with them.
- if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
- continue;
+ // Collect all operands except *J.
+ SmallVector<const SCEV *, 8> InnerAddOps(
+ ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append(std::next(J),
+ ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+ // Don't leave just a constant behind in a register if the constant could
+ // be folded into an immediate field.
+ if (InnerAddOps.size() == 1 &&
+ isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
+ LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
+ continue;
- // Don't pull a constant into a register if the constant could be folded
- // into an immediate field.
- if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, *J, Base.getNumRegs() > 1))
- continue;
+ const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+ if (InnerSum->isZero())
+ continue;
+ Formula F = Base;
- // Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps(
- ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
- InnerAddOps.append(std::next(J),
- ((const SmallVector<const SCEV *, 8> &)AddOps).end());
-
- // Don't leave just a constant behind in a register if the constant could
- // be folded into an immediate field.
- if (InnerAddOps.size() == 1 &&
- isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
- LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
- continue;
+ // Add the remaining pieces of the add back into the new formula.
+ const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
+ if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ InnerSumSC->getValue()->getZExtValue())) {
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + InnerSumSC->getValue()->getZExtValue();
+ if (IsScaledReg)
+ F.ScaledReg = nullptr;
+ else
+ F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
+ } else if (IsScaledReg)
+ F.ScaledReg = InnerSum;
+ else
+ F.BaseRegs[Idx] = InnerSum;
+
+ // Add J as its own register, or an unfolded immediate.
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
+ if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
+ TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
+ SC->getValue()->getZExtValue()))
+ F.UnfoldedOffset =
+ (uint64_t)F.UnfoldedOffset + SC->getValue()->getZExtValue();
+ else
+ F.BaseRegs.push_back(*J);
+ // We may have changed the number of register in base regs, adjust the
+ // formula accordingly.
+ F.Canonicalize();
+
+ if (InsertFormula(LU, LUIdx, F))
+ // If that formula hadn't been seen before, recurse to find more like
+ // it.
+ GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth + 1);
+ }
+}
- const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
- if (InnerSum->isZero())
- continue;
- Formula F = Base;
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+ Formula Base, unsigned Depth) {
+ assert(Base.isCanonical() && "Input must be in the canonical form");
+ // Arbitrarily cap recursion to protect compile time.
+ if (Depth >= 3)
+ return;
- // Add the remaining pieces of the add back into the new formula.
- const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
- if (InnerSumSC &&
- SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue())) {
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- InnerSumSC->getValue()->getZExtValue();
- F.BaseRegs.erase(F.BaseRegs.begin() + i);
- } else
- F.BaseRegs[i] = InnerSum;
-
- // Add J as its own register, or an unfolded immediate.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
- if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
- TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue()))
- F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
- SC->getValue()->getZExtValue();
- else
- F.BaseRegs.push_back(*J);
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
- if (InsertFormula(LU, LUIdx, F))
- // If that formula hadn't been seen before, recurse to find more like
- // it.
- GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
- }
- }
+ if (Base.Scale == 1)
+ GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
+ /* Idx */ -1, /* IsScaledReg */ true);
}
/// GenerateCombinations - Generate a formula consisting of all of the
@@ -3232,8 +3364,12 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// This method is only interesting on a plurality of registers.
- if (Base.BaseRegs.size() <= 1) return;
+ if (Base.BaseRegs.size() + (Base.Scale == 1) <= 1)
+ return;
+ // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
+ // processing the formula.
+ Base.Unscale();
Formula F = Base;
F.BaseRegs.clear();
SmallVector<const SCEV *, 4> Ops;
@@ -3253,29 +3389,87 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
// rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
+ F.Canonicalize();
(void)InsertFormula(LU, LUIdx, F);
}
}
}
+/// \brief Helper function for LSRInstance::GenerateSymbolicOffsets.
+void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
+ const Formula &Base, size_t Idx,
+ bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ GlobalValue *GV = ExtractSymbol(G, SE);
+ if (G->isZero() || !GV)
+ return;
+ Formula F = Base;
+ F.BaseGV = GV;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
+}
+
/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// We can't add a symbolic offset if the address already contains one.
if (Base.BaseGV) return;
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
- GlobalValue *GV = ExtractSymbol(G, SE);
- if (G->isZero() || !GV)
- continue;
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
+ if (Base.Scale == 1)
+ GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
+ /* IsScaledReg */ true);
+}
+
+/// \brief Helper function for LSRInstance::GenerateConstantOffsets.
+void LSRInstance::GenerateConstantOffsetsImpl(
+ LSRUse &LU, unsigned LUIdx, const Formula &Base,
+ const SmallVectorImpl<int64_t> &Worklist, size_t Idx, bool IsScaledReg) {
+ const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
+ for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+ E = Worklist.end();
+ I != E; ++I) {
Formula F = Base;
- F.BaseGV = GV;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
+ F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
+ if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
+ LU.AccessTy, F)) {
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ if (IsScaledReg) {
+ F.Scale = 0;
+ F.ScaledReg = nullptr;
+ } else
+ F.DeleteBaseReg(F.BaseRegs[Idx]);
+ F.Canonicalize();
+ } else if (IsScaledReg)
+ F.ScaledReg = NewG;
+ else
+ F.BaseRegs[Idx] = NewG;
+
+ (void)InsertFormula(LU, LUIdx, F);
+ }
}
+
+ int64_t Imm = ExtractImmediate(G, SE);
+ if (G->isZero() || Imm == 0)
+ return;
+ Formula F = Base;
+ F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
+ if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
+ return;
+ if (IsScaledReg)
+ F.ScaledReg = G;
+ else
+ F.BaseRegs[Idx] = G;
+ (void)InsertFormula(LU, LUIdx, F);
}
/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
@@ -3288,38 +3482,11 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
- for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
- const SCEV *G = Base.BaseRegs[i];
-
- for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
- E = Worklist.end(); I != E; ++I) {
- Formula F = Base;
- F.BaseOffset = (uint64_t)Base.BaseOffset - *I;
- if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
- LU.AccessTy, F)) {
- // Add the offset to the base register.
- const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
- // If it cancelled out, drop the base register, otherwise update it.
- if (NewG->isZero()) {
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
- } else
- F.BaseRegs[i] = NewG;
-
- (void)InsertFormula(LU, LUIdx, F);
- }
- }
-
- int64_t Imm = ExtractImmediate(G, SE);
- if (G->isZero() || Imm == 0)
- continue;
- Formula F = Base;
- F.BaseOffset = (uint64_t)F.BaseOffset + Imm;
- if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
- continue;
- F.BaseRegs[i] = G;
- (void)InsertFormula(LU, LUIdx, F);
- }
+ for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
+ if (Base.Scale == 1)
+ GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
+ /* IsScaledReg */ true);
}
/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
@@ -3419,7 +3586,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
if (!IntTy) return;
// If this Formula already has a scaled register, we can't add another one.
- if (Base.Scale != 0) return;
+ // Try to unscale the formula to generate a better scale.
+ if (Base.Scale != 0 && !Base.Unscale())
+ return;
+
+ assert(Base.Scale == 0 && "Unscale did not did its job!");
// Check each interesting stride.
for (SmallSetVector<int64_t, 8>::const_iterator
@@ -3460,6 +3631,11 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
Formula F = Base;
F.ScaledReg = Quotient;
F.DeleteBaseReg(F.BaseRegs[i]);
+ // The canonical representation of 1*reg is reg, which is already in
+ // Base. In that case, do not try to insert the formula, it will be
+ // rejected anyway.
+ if (F.Scale == 1 && F.BaseRegs.empty())
+ continue;
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -3624,7 +3800,12 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- const Formula &F = LU.Formulae[L];
+ Formula F = LU.Formulae[L];
+ // FIXME: The code for the scaled and unscaled registers looks
+ // very similar but slightly different. Investigate if they
+ // could be merged. That way, we would not have to unscale the
+ // Formula.
+ F.Unscale();
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale;
@@ -3650,6 +3831,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
continue;
// OK, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
} else {
// Use the immediate in a base register.
@@ -3683,6 +3865,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
goto skip_formula;
// Ok, looks good.
+ NewF.Canonicalize();
(void)InsertFormula(LU, LUIdx, NewF);
break;
skip_formula:;
@@ -3936,7 +4119,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- if (F.BaseOffset == 0 || F.Scale != 0)
+ if (F.BaseOffset == 0 || (F.Scale != 0 && F.Scale != 1))
continue;
LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
@@ -4033,7 +4216,7 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
- const SCEV *Best = 0;
+ const SCEV *Best = nullptr;
unsigned BestNum = 0;
for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
I != E; ++I) {
@@ -4130,19 +4313,22 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
E = LU.Formulae.end(); I != E; ++I) {
const Formula &F = *I;
- // Ignore formulae which do not use any of the required registers.
- bool SatisfiedReqReg = true;
+ // Ignore formulae which may not be ideal in terms of register reuse of
+ // ReqRegs. The formula should use all required registers before
+ // introducing new ones.
+ int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
JE = ReqRegs.end(); J != JE; ++J) {
const SCEV *Reg = *J;
- if ((!F.ScaledReg || F.ScaledReg != Reg) &&
- std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+ if ((F.ScaledReg && F.ScaledReg == Reg) ||
+ std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
F.BaseRegs.end()) {
- SatisfiedReqReg = false;
- break;
+ --NumReqRegsToFind;
+ if (NumReqRegsToFind == 0)
+ break;
}
}
- if (!SatisfiedReqReg) {
+ if (NumReqRegsToFind != 0) {
// If none of the formulae satisfied the required registers, then we could
// clear ReqRegs and try again. Currently, we simply give up in this case.
continue;
@@ -4240,7 +4426,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
}
bool AllDominate = true;
- Instruction *BetterPos = 0;
+ Instruction *BetterPos = nullptr;
Instruction *Tentative = IDom->getTerminator();
for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
E = Inputs.end(); I != E; ++I) {
@@ -4379,11 +4565,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
LF.UserInst, LF.OperandValToReplace,
Loops, SE, DT);
- Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+ Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, IP)));
}
// Expand the ScaledReg portion.
- Value *ICmpScaledV = 0;
+ Value *ICmpScaledV = nullptr;
if (F.Scale != 0) {
const SCEV *ScaledS = F.ScaledReg;
@@ -4394,25 +4580,34 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Loops, SE, DT);
if (LU.Kind == LSRUse::ICmpZero) {
- // An interesting way of "folding" with an icmp is to use a negated
- // scale, which we'll implement by inserting it into the other operand
- // of the icmp.
- assert(F.Scale == -1 &&
- "The only scale supported by ICmpZero uses is -1!");
- ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+ // Expand ScaleReg as if it was part of the base regs.
+ if (F.Scale == 1)
+ Ops.push_back(
+ SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP)));
+ else {
+ // An interesting way of "folding" with an icmp is to use a negated
+ // scale, which we'll implement by inserting it into the other operand
+ // of the icmp.
+ assert(F.Scale == -1 &&
+ "The only scale supported by ICmpZero uses is -1!");
+ ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, IP);
+ }
} else {
// Otherwise just expand the scaled register and an explicit scale,
// which is expected to be matched as part of the address.
// Flush the operand list to suppress SCEVExpander hoisting address modes.
- if (!Ops.empty() && LU.Kind == LSRUse::Address) {
+ // Unless the addressing mode will not be folded.
+ if (!Ops.empty() && LU.Kind == LSRUse::Address &&
+ isAMCompletelyFolded(TTI, LU, F)) {
Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
Ops.clear();
Ops.push_back(SE.getUnknown(FullV));
}
- ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
- ScaledS = SE.getMulExpr(ScaledS,
- SE.getConstant(ScaledS->getType(), F.Scale));
+ ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, IP));
+ if (F.Scale != 1)
+ ScaledS =
+ SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
Ops.push_back(ScaledS);
}
}
@@ -4490,7 +4685,9 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
}
CI->setOperand(1, ICmpScaledV);
} else {
- assert(F.Scale == 0 &&
+ // A scale of 1 means that the scale has been expanded as part of the
+ // base regs.
+ assert((F.Scale == 0 || F.Scale == 1) &&
"ICmp does not support folding a global value and "
"a scale at the same time!");
Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
@@ -4531,7 +4728,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
Loop *PNLoop = LI.getLoopFor(Parent);
if (!PNLoop || Parent != PNLoop->getHeader()) {
// Split the critical edge.
- BasicBlock *NewBB = 0;
+ BasicBlock *NewBB = nullptr;
if (!Parent->isLandingPad()) {
NewBB = SplitCriticalEdge(BB, Parent, P,
/*MergeIdenticalEdges=*/true,
@@ -4560,7 +4757,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
}
std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
if (!Pair.second)
PN->setIncomingValue(i, Pair.first->second);
else {
@@ -4670,7 +4867,7 @@ LSRInstance::LSRInstance(Loop *L, Pass *P)
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
LI(P->getAnalysis<LoopInfo>()),
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
- IVIncInsertPos(0) {
+ IVIncInsertPos(nullptr) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index ecd350b..fc28fd2 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,7 +12,6 @@
// counts of loops easily.
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopPass.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
static cl::opt<unsigned>
UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
cl::desc("The cut-off point for automatic loop unrolling"));
@@ -237,9 +238,12 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
return false;
}
uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > Threshold) {
- DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << Threshold << "\n");
+ if (TripCount != 1 &&
+ (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) {
+ if (Size > Threshold)
+ DEBUG(dbgs() << " Too large to fully unroll with count: " << Count
+ << " because size: " << Size << ">" << Threshold << "\n");
+
bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial;
if (!AllowPartial && !(Runtime && TripCount == 0)) {
DEBUG(dbgs() << " will not try to unroll partially because "
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 5954f4a..977c53a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -26,7 +26,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unswitch"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -53,6 +52,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "loop-unswitch"
+
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
STATISTIC(NumSelects , "Number of selects unswitched");
@@ -96,7 +97,7 @@ namespace {
public:
LUAnalysisCache() :
- CurLoopInstructions(0), CurrentLoopProperties(0),
+ CurLoopInstructions(nullptr), CurrentLoopProperties(nullptr),
MaxSize(Threshold)
{}
@@ -151,8 +152,8 @@ namespace {
static char ID; // Pass ID, replacement for typeid
explicit LoopUnswitch(bool Os = false) :
LoopPass(ID), OptimizeForSize(Os), redoLoop(false),
- currentLoop(0), DT(0), loopHeader(0),
- loopPreheader(0) {
+ currentLoop(nullptr), DT(nullptr), loopHeader(nullptr),
+ loopPreheader(nullptr) {
initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
}
@@ -180,15 +181,6 @@ namespace {
BranchesInfo.forgetLoop(currentLoop);
}
- /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
- /// remove it.
- void RemoveLoopFromWorklist(Loop *L) {
- std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
- LoopProcessWorklist.end(), L);
- if (I != LoopProcessWorklist.end())
- LoopProcessWorklist.erase(I);
- }
-
void initLoopData() {
loopHeader = currentLoop->getHeader();
loopPreheader = currentLoop->getLoopPreheader();
@@ -212,9 +204,8 @@ namespace {
Instruction *InsertPt);
void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
- void RemoveLoopFromHierarchy(Loop *L);
- bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
- BasicBlock **LoopExit = 0);
+ bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = nullptr,
+ BasicBlock **LoopExit = nullptr);
};
}
@@ -283,8 +274,8 @@ void LUAnalysisCache::forgetLoop(const Loop *L) {
LoopsProperties.erase(LIt);
}
- CurrentLoopProperties = 0;
- CurLoopInstructions = 0;
+ CurrentLoopProperties = nullptr;
+ CurLoopInstructions = nullptr;
}
// Mark case value as unswitched.
@@ -355,10 +346,10 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
if (Cond->getType()->isVectorTy())
- return 0;
+ return nullptr;
// Constants should be folded, not unswitched on!
- if (isa<Constant>(Cond)) return 0;
+ if (isa<Constant>(Cond)) return nullptr;
// TODO: Handle: br (VARIANT|INVARIANT).
@@ -378,7 +369,7 @@ static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
return RHS;
}
- return 0;
+ return nullptr;
}
bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
@@ -389,7 +380,7 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
LPM = &LPM_Ref;
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
currentLoop = L;
Function *F = currentLoop->getHeader()->getParent();
bool Changed = false;
@@ -461,7 +452,7 @@ bool LoopUnswitch::processCurrentLoop() {
// Find a value to unswitch on:
// FIXME: this should chose the most expensive case!
// FIXME: scan for a case with a non-critical edge?
- Constant *UnswitchVal = 0;
+ Constant *UnswitchVal = nullptr;
// Do not process same value again and again.
// At this point we have some cases already unswitched and
@@ -518,7 +509,7 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
if (!L->contains(BB)) {
// Otherwise, this is a loop exit, this is fine so long as this is the
// first exit.
- if (ExitBB != 0) return false;
+ if (ExitBB) return false;
ExitBB = BB;
return true;
}
@@ -545,10 +536,10 @@ static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
std::set<BasicBlock*> Visited;
Visited.insert(L->getHeader()); // Branches to header make infinite loops.
- BasicBlock *ExitBB = 0;
+ BasicBlock *ExitBB = nullptr;
if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
return ExitBB;
- return 0;
+ return nullptr;
}
/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
@@ -569,7 +560,7 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
TerminatorInst *HeaderTerm = Header->getTerminator();
LLVMContext &Context = Header->getContext();
- BasicBlock *LoopExitBB = 0;
+ BasicBlock *LoopExitBB = nullptr;
if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
// If the header block doesn't end with a conditional branch on Cond, we
// can't handle it.
@@ -639,8 +630,8 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
/// unswitch the loop, reprocess the pieces, then return true.
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
Function *F = loopHeader->getParent();
- Constant *CondVal = 0;
- BasicBlock *ExitBlock = 0;
+ Constant *CondVal = nullptr;
+ BasicBlock *ExitBlock = nullptr;
if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
// If the condition is trivial, always unswitch. There is no code growth
@@ -948,17 +939,6 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,
++NumSimplify;
}
-/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
-/// become unwrapped, either because the backedge was deleted, or because the
-/// edge into the header was removed. If the edge into the header from the
-/// latch block was removed, the loop is unwrapped but subloops are still alive,
-/// so they just reparent loops. If the loops are actually dead, they will be
-/// removed later.
-void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
- LPM->deleteLoopFromQueue(L);
- RemoveLoopFromWorklist(L);
-}
-
// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
// the value specified by Val in the specified loop, or we know it does NOT have
// that value. Rewrite any uses of LIC or of properties correlated to it.
@@ -1020,7 +1000,7 @@ void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
// If we know that LIC is not Val, use this info to simplify code.
SwitchInst *SI = dyn_cast<SwitchInst>(UI);
- if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+ if (!SI || !isa<ConstantInt>(Val)) continue;
SwitchInst::CaseIt DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
// Default case is live for multiple values.
diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp
index 7c0a623..4251ac4 100644
--- a/lib/Transforms/Scalar/LowerAtomic.cpp
+++ b/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loweratomic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -20,6 +19,8 @@
#include "llvm/Pass.h"
using namespace llvm;
+#define DEBUG_TYPE "loweratomic"
+
static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) {
IRBuilder<> Builder(CXI->getParent(), CXI);
Value *Ptr = CXI->getPointerOperand();
@@ -42,7 +43,7 @@ static bool LowerAtomicRMWInst(AtomicRMWInst *RMWI) {
Value *Val = RMWI->getValOperand();
LoadInst *Orig = Builder.CreateLoad(Ptr);
- Value *Res = NULL;
+ Value *Res = nullptr;
switch (RMWI->getOperation()) {
default: llvm_unreachable("Unexpected RMW operation");
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 2603c96..b6bc792 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "memcpyopt"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -33,6 +32,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "memcpyopt"
+
STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
@@ -49,7 +50,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
int64_t Offset = 0;
for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (OpC == 0)
+ if (!OpC)
return VariableIdxFound = true;
if (OpC->isZero()) continue; // No offset.
@@ -89,12 +90,12 @@ static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
// If one pointer is a GEP and the other isn't, then see if the GEP is a
// constant offset from the base, as in "P" and "gep P, 1".
- if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+ if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
- if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+ if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
return !VariableIdxFound;
}
@@ -317,9 +318,9 @@ namespace {
static char ID; // Pass identification, replacement for typeid
MemCpyOpt() : FunctionPass(ID) {
initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
- MD = 0;
- TLI = 0;
- DL = 0;
+ MD = nullptr;
+ TLI = nullptr;
+ DL = nullptr;
}
bool runOnFunction(Function &F) override;
@@ -373,7 +374,7 @@ INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
/// attempts to merge them together into a memcpy/memset.
Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
Value *StartPtr, Value *ByteVal) {
- if (DL == 0) return 0;
+ if (!DL) return nullptr;
// Okay, so we now have a single store that can be splatable. Scan to find
// all subsequent stores of the same value to offset from the same pointer.
@@ -426,7 +427,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// If we have no ranges, then we just had a single store with nothing that
// could be merged in. This is a very common case of course.
if (Ranges.empty())
- return 0;
+ return nullptr;
// If we had at least one store that could be merged in, add the starting
// store as well. We try to avoid this unless there is at least something
@@ -440,7 +441,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
// Now that we have full information about ranges, loop over the ranges and
// emit memset's for anything big enough to be worthwhile.
- Instruction *AMemSet = 0;
+ Instruction *AMemSet = nullptr;
for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
I != E; ++I) {
const MemsetRange &Range = *I;
@@ -491,7 +492,7 @@ Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (!SI->isSimple()) return false;
- if (DL == 0) return false;
+ if (!DL) return false;
// Detect cases where we're performing call slot forwarding, but
// happen to be using a load-store pair to implement it, rather than
@@ -500,7 +501,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
if (LI->isSimple() && LI->hasOneUse() &&
LI->getParent() == SI->getParent()) {
MemDepResult ldep = MD->getDependency(LI);
- CallInst *C = 0;
+ CallInst *C = nullptr;
if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
C = dyn_cast<CallInst>(ldep.getInst());
@@ -512,7 +513,7 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
E = C; I != E; --I) {
if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
- C = 0;
+ C = nullptr;
break;
}
}
@@ -603,7 +604,7 @@ bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
return false;
// Check that all of src is copied to dest.
- if (DL == 0) return false;
+ if (!DL) return false;
ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
if (!srcArraySize)
@@ -846,7 +847,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
// The optimizations after this point require the memcpy size.
ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
- if (CopySize == 0) return false;
+ if (!CopySize) return false;
// The are three possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
@@ -929,7 +930,7 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
/// processByValArgument - This is called on every byval argument in call sites.
bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
- if (DL == 0) return false;
+ if (!DL) return false;
// Find out what feeds this byval argument.
Value *ByValArg = CS.getArgument(ArgNo);
@@ -946,13 +947,13 @@ bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
// a memcpy, see if we can byval from the source of the memcpy instead of the
// result.
MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
- if (MDep == 0 || MDep->isVolatile() ||
+ if (!MDep || MDep->isVolatile() ||
ByValArg->stripPointerCasts() != MDep->getDest())
return false;
// The length of the memcpy must be larger or equal to the size of the byval.
ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
- if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+ if (!C1 || C1->getValue().getZExtValue() < ByValSize)
return false;
// Get the alignment of the byval. If the call doesn't specify the alignment,
@@ -1043,7 +1044,7 @@ bool MemCpyOpt::runOnFunction(Function &F) {
bool MadeChange = false;
MD = &getAnalysis<MemoryDependenceAnalysis>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TLI = &getAnalysis<TargetLibraryInfo>();
// If we don't have at least memset and memcpy, there is little point of doing
@@ -1058,6 +1059,6 @@ bool MemCpyOpt::runOnFunction(Function &F) {
MadeChange = true;
}
- MD = 0;
+ MD = nullptr;
return MadeChange;
}
diff --git a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index 2f19935..7cce89e 100644
--- a/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "partially-inline-libcalls"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "partially-inline-libcalls"
+
namespace {
class PartiallyInlineLibCalls : public FunctionPass {
public:
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index b6b4d97..986d6a4 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -20,7 +20,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reassociate"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "reassociate"
+
STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
@@ -122,14 +123,14 @@ namespace {
public:
XorOpnd(Value *V);
- bool isInvalid() const { return SymbolicPart == 0; }
+ bool isInvalid() const { return SymbolicPart == nullptr; }
bool isOrExpr() const { return isOr; }
Value *getValue() const { return OrigVal; }
Value *getSymbolicPart() const { return SymbolicPart; }
unsigned getSymbolicRank() const { return SymbolicRank; }
const APInt &getConstPart() const { return ConstPart; }
- void Invalidate() { SymbolicPart = OrigVal = 0; }
+ void Invalidate() { SymbolicPart = OrigVal = nullptr; }
void setSymbolicRank(unsigned R) { SymbolicRank = R; }
// Sort the XorOpnd-Pointer in ascending order of symbolic-value-rank.
@@ -236,7 +237,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
if (V->hasOneUse() && isa<Instruction>(V) &&
cast<Instruction>(V)->getOpcode() == Opcode)
return cast<BinaryOperator>(V);
- return 0;
+ return nullptr;
}
static bool isUnmovableInstruction(Instruction *I) {
@@ -284,7 +285,7 @@ void Reassociate::BuildRankMap(Function &F) {
unsigned Reassociate::getRank(Value *V) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) {
+ if (!I) {
if (isa<Argument>(V)) return ValueRankMap[V]; // Function argument.
return 0; // Otherwise it's a global or constant, rank 0.
}
@@ -705,7 +706,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// ExpressionChanged - Non-null if the rewritten expression differs from the
// original in some non-trivial way, requiring the clearing of optional flags.
// Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = 0;
+ BinaryOperator *ExpressionChanged = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -995,7 +996,7 @@ static Value *EmitAddTreeOfValues(Instruction *I,
/// remove Factor from the tree and return the new tree.
Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
- if (!BO) return 0;
+ if (!BO) return nullptr;
SmallVector<RepeatedValue, 8> Tree;
MadeChange |= LinearizeExprTree(BO, Tree);
@@ -1029,7 +1030,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
if (!FoundFactor) {
// Make sure to restore the operands to the expression tree.
RewriteExprTree(BO, Factors);
- return 0;
+ return nullptr;
}
BasicBlock::iterator InsertPt = BO; ++InsertPt;
@@ -1114,7 +1115,7 @@ static Value *OptimizeAndOrXor(unsigned Opcode,
++NumAnnihil;
}
}
- return 0;
+ return nullptr;
}
/// Helper funciton of CombineXorOpnd(). It creates a bitwise-and
@@ -1135,7 +1136,7 @@ static Value *createAndInstr(Instruction *InsertBefore, Value *Opnd,
}
return Opnd;
}
- return 0;
+ return nullptr;
}
// Helper function of OptimizeXor(). It tries to simplify "Opnd1 ^ ConstOpnd"
@@ -1261,7 +1262,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
return V;
if (Ops.size() == 1)
- return 0;
+ return nullptr;
SmallVector<XorOpnd, 8> Opnds;
SmallVector<XorOpnd*, 8> OpndPtrs;
@@ -1294,7 +1295,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
std::stable_sort(OpndPtrs.begin(), OpndPtrs.end(), XorOpnd::PtrSortFunctor());
// Step 3: Combine adjacent operands
- XorOpnd *PrevOpnd = 0;
+ XorOpnd *PrevOpnd = nullptr;
bool Changed = false;
for (unsigned i = 0, e = Opnds.size(); i < e; i++) {
XorOpnd *CurrOpnd = OpndPtrs[i];
@@ -1328,7 +1329,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
PrevOpnd = CurrOpnd;
} else {
CurrOpnd->Invalidate();
- PrevOpnd = 0;
+ PrevOpnd = nullptr;
}
Changed = true;
}
@@ -1358,7 +1359,7 @@ Value *Reassociate::OptimizeXor(Instruction *I,
}
}
- return 0;
+ return nullptr;
}
/// OptimizeAdd - Optimize a series of operands to an 'add' instruction. This
@@ -1445,7 +1446,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
// Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
// where they are actually the same multiply.
unsigned MaxOcc = 0;
- Value *MaxOccVal = 0;
+ Value *MaxOccVal = nullptr;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
BinaryOperator *BOp = isReassociableOp(Ops[i].Op, Instruction::Mul);
if (!BOp)
@@ -1543,7 +1544,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
}
- return 0;
+ return nullptr;
}
/// \brief Build up a vector of value/power pairs factoring a product.
@@ -1688,14 +1689,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
// We can only optimize the multiplies when there is a chain of more than
// three, such that a balanced tree might require fewer total multiplies.
if (Ops.size() < 4)
- return 0;
+ return nullptr;
// Try to turn linear trees of multiplies without other uses of the
// intermediate stages into minimal multiply DAGs with perfect sub-expression
// re-use.
SmallVector<Factor, 4> Factors;
if (!collectMultiplyFactors(Ops, Factors))
- return 0; // All distinct factors, so nothing left for us to do.
+ return nullptr; // All distinct factors, so nothing left for us to do.
IRBuilder<> Builder(I);
Value *V = buildMinimalMultiplyDAG(Builder, Factors);
@@ -1704,14 +1705,14 @@ Value *Reassociate::OptimizeMul(BinaryOperator *I,
ValueEntry NewEntry = ValueEntry(getRank(V), V);
Ops.insert(std::lower_bound(Ops.begin(), Ops.end(), NewEntry), NewEntry);
- return 0;
+ return nullptr;
}
Value *Reassociate::OptimizeExpression(BinaryOperator *I,
SmallVectorImpl<ValueEntry> &Ops) {
// Now that we have the linearized expression tree, try to optimize it.
// Start by folding any constants that we found.
- Constant *Cst = 0;
+ Constant *Cst = nullptr;
unsigned Opcode = I->getOpcode();
while (!Ops.empty() && isa<Constant>(Ops.back().Op)) {
Constant *C = cast<Constant>(Ops.pop_back_val().Op);
@@ -1761,7 +1762,7 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
if (Ops.size() != NumOps)
return OptimizeExpression(I, Ops);
- return 0;
+ return nullptr;
}
/// EraseInst - Zap the given instruction, adding interesting operands to the
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index d9809ce..b6023e2 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "reg2mem"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
@@ -30,6 +29,8 @@
#include <list>
using namespace llvm;
+#define DEBUG_TYPE "reg2mem"
+
STATISTIC(NumRegsDemoted, "Number of registers demoted");
STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index b8f10e9..feeb231 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -17,7 +17,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sccp"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -42,6 +41,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "sccp"
+
STATISTIC(NumInstRemoved, "Number of instructions removed");
STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
@@ -81,7 +82,7 @@ class LatticeVal {
}
public:
- LatticeVal() : Val(0, undefined) {}
+ LatticeVal() : Val(nullptr, undefined) {}
bool isUndefined() const { return getLatticeValue() == undefined; }
bool isConstant() const {
@@ -133,7 +134,7 @@ public:
ConstantInt *getConstantInt() const {
if (isConstant())
return dyn_cast<ConstantInt>(getConstant());
- return 0;
+ return nullptr;
}
void markForcedConstant(Constant *V) {
@@ -403,7 +404,7 @@ private:
if (Constant *C = dyn_cast<Constant>(V)) {
Constant *Elt = C->getAggregateElement(i);
- if (Elt == 0)
+ if (!Elt)
LV.markOverdefined(); // Unknown sort of constant.
else if (isa<UndefValue>(Elt))
; // Undef values remain undefined.
@@ -522,7 +523,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal BCValue = getValueState(BI->getCondition());
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0) {
+ if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
if (!BCValue.isUndefined())
@@ -549,7 +550,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0) { // Overdefined or undefined condition?
+ if (!CI) { // Overdefined or undefined condition?
// All destinations are executable!
if (!SCValue.isUndefined())
Succs.assign(TI.getNumSuccessors(), true);
@@ -594,7 +595,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
// Overdefined condition variables mean the branch could go either way,
// undef conditions mean that neither edge is feasible yet.
ConstantInt *CI = BCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !BCValue.isUndefined();
// Constant condition variables mean the branch can only go a single way.
@@ -612,7 +613,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
LatticeVal SCValue = getValueState(SI->getCondition());
ConstantInt *CI = SCValue.getConstantInt();
- if (CI == 0)
+ if (!CI)
return !SCValue.isUndefined();
return SI->findCaseValue(CI).getCaseSuccessor() == To;
@@ -626,7 +627,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
#ifndef NDEBUG
dbgs() << "Unknown terminator instruction: " << *TI << '\n';
#endif
- llvm_unreachable(0);
+ llvm_unreachable(nullptr);
}
// visit Implementations - Something changed in this instruction, either an
@@ -667,7 +668,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
// constant. If they are constant and don't agree, the PHI is overdefined.
// If there are no executable operands, the PHI remains undefined.
//
- Constant *OperandVal = 0;
+ Constant *OperandVal = nullptr;
for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
LatticeVal IV = getValueState(PN.getIncomingValue(i));
if (IV.isUndefined()) continue; // Doesn't influence PHI node.
@@ -678,7 +679,7 @@ void SCCPSolver::visitPHINode(PHINode &PN) {
if (IV.isOverdefined()) // PHI node becomes overdefined!
return markOverdefined(&PN);
- if (OperandVal == 0) { // Grab the first value.
+ if (!OperandVal) { // Grab the first value.
OperandVal = IV.getConstant();
continue;
}
@@ -774,7 +775,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
StructType *STy = dyn_cast<StructType>(IVI.getType());
- if (STy == 0)
+ if (!STy)
return markOverdefined(&IVI);
// If this has more than one index, we can't handle it, drive all results to
@@ -862,7 +863,7 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
// If this is an AND or OR with 0 or -1, it doesn't matter that the other
// operand is overdefined.
if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
- LatticeVal *NonOverdefVal = 0;
+ LatticeVal *NonOverdefVal = nullptr;
if (!V1State.isOverdefined())
NonOverdefVal = &V1State;
else if (!V2State.isOverdefined())
@@ -1081,7 +1082,7 @@ void SCCPSolver::visitCallSite(CallSite CS) {
// The common case is that we aren't tracking the callee, either because we
// are not doing interprocedural analysis or the callee is indirect, or is
// external. Handle these cases first.
- if (F == 0 || F->isDeclaration()) {
+ if (!F || F->isDeclaration()) {
CallOverdefined:
// Void return and not tracking callee, just bail.
if (I->getType()->isVoidTy()) return;
@@ -1555,7 +1556,7 @@ bool SCCP::runOnFunction(Function &F) {
DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
const DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
@@ -1684,7 +1685,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
bool IPSCCP::runOnModule(Module &M) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SCCPSolver Solver(DL, TLI);
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index ed5e618..04bf4f8 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -23,7 +23,6 @@
///
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sroa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -64,6 +63,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sroa"
+
STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
@@ -159,8 +160,8 @@ public:
Use *getUse() const { return UseAndIsSplittable.getPointer(); }
- bool isDead() const { return getUse() == 0; }
- void kill() { UseAndIsSplittable.setPointer(0); }
+ bool isDead() const { return getUse() == nullptr; }
+ void kill() { UseAndIsSplittable.setPointer(nullptr); }
/// \brief Support for ordering ranges.
///
@@ -320,7 +321,7 @@ static Value *foldSelectInst(SelectInst &SI) {
if (SI.getOperand(1) == SI.getOperand(2))
return SI.getOperand(1);
- return 0;
+ return nullptr;
}
/// \brief Builder for the alloca slices.
@@ -642,7 +643,7 @@ private:
Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
} while (!Uses.empty());
- return 0;
+ return nullptr;
}
void visitPHINode(PHINode &PN) {
@@ -724,7 +725,7 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
AI(AI),
#endif
- PointerEscapingInstr(0) {
+ PointerEscapingInstr(nullptr) {
SliceBuilder PB(DL, AI, *this);
SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
if (PtrI.isEscaped() || PtrI.isAborted()) {
@@ -873,7 +874,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = 0;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -969,7 +970,7 @@ class SROA : public FunctionPass {
public:
SROA(bool RequiresDomTree = true)
: FunctionPass(ID), RequiresDomTree(RequiresDomTree),
- C(0), DL(0), DT(0) {
+ C(nullptr), DL(nullptr), DT(nullptr) {
initializeSROAPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -1011,9 +1012,9 @@ INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates",
static Type *findCommonType(AllocaSlices::const_iterator B,
AllocaSlices::const_iterator E,
uint64_t EndOffset) {
- Type *Ty = 0;
+ Type *Ty = nullptr;
bool TyIsCommon = true;
- IntegerType *ITy = 0;
+ IntegerType *ITy = nullptr;
// Note that we need to look at *every* alloca slice's Use to ensure we
// always get consistent results regardless of the order of slices.
@@ -1024,7 +1025,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
continue;
- Type *UserTy = 0;
+ Type *UserTy = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
UserTy = LI->getType();
} else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
@@ -1074,7 +1075,7 @@ static Type *findCommonType(AllocaSlices::const_iterator B,
/// FIXME: This should be hoisted into a generic utility, likely in
/// Transforms/Util/Local.h
static bool isSafePHIToSpeculate(PHINode &PN,
- const DataLayout *DL = 0) {
+ const DataLayout *DL = nullptr) {
// For now, we can only do this promotion if the load is in the same block
// as the PHI, and if there are no stores between the phi and load.
// TODO: Allow recursive phi users.
@@ -1084,7 +1085,7 @@ static bool isSafePHIToSpeculate(PHINode &PN,
bool HaveLoad = false;
for (User *U : PN.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// For now we only allow loads in the same block as the PHI. This is
@@ -1191,7 +1192,8 @@ static void speculatePHINodeLoads(PHINode &PN) {
///
/// We can do this to a select if its only uses are loads and if the operand
/// to the select can be loaded unconditionally.
-static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
+static bool isSafeSelectToSpeculate(SelectInst &SI,
+ const DataLayout *DL = nullptr) {
Value *TValue = SI.getTrueValue();
Value *FValue = SI.getFalseValue();
bool TDerefable = TValue->isDereferenceablePointer();
@@ -1199,7 +1201,7 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = 0) {
for (User *U : SI.users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple())
+ if (!LI || !LI->isSimple())
return false;
// Both operands to the select need to be dereferencable, either
@@ -1332,19 +1334,21 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
// We can't recurse through pointer types.
if (Ty->isPointerTy())
- return 0;
+ return nullptr;
// We try to analyze GEPs over vectors here, but note that these GEPs are
// extremely poorly defined currently. The long-term goal is to remove GEPing
// over a vector from the IR completely.
if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
- if (ElementSizeInBits % 8)
- return 0; // GEPs over non-multiple of 8 size vector elements are invalid.
+ if (ElementSizeInBits % 8 != 0) {
+ // GEPs over non-multiple of 8 size vector elements are invalid.
+ return nullptr;
+ }
APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(VecTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
@@ -1356,7 +1360,7 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
APInt NumSkippedElements = Offset.sdiv(ElementSize);
if (NumSkippedElements.ugt(ArrTy->getNumElements()))
- return 0;
+ return nullptr;
Offset -= NumSkippedElements * ElementSize;
Indices.push_back(IRB.getInt(NumSkippedElements));
@@ -1366,17 +1370,17 @@ static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
uint64_t StructOffset = Offset.getZExtValue();
if (StructOffset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(StructOffset);
Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
Type *ElementTy = STy->getElementType(Index);
if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
Indices.push_back(IRB.getInt32(Index));
return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
@@ -1402,14 +1406,14 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
// Don't consider any GEPs through an i8* as natural unless the TargetTy is
// an i8.
if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
- return 0;
+ return nullptr;
Type *ElementTy = Ty->getElementType();
if (!ElementTy->isSized())
- return 0; // We can't GEP through an unsized element.
+ return nullptr; // We can't GEP through an unsized element.
APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
if (ElementSize == 0)
- return 0; // Zero-length arrays can't help us build a natural GEP.
+ return nullptr; // Zero-length arrays can't help us build a natural GEP.
APInt NumSkippedElements = Offset.sdiv(ElementSize);
Offset -= NumSkippedElements * ElementSize;
@@ -1445,11 +1449,11 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
// We may end up computing an offset pointer that has the wrong type. If we
// never are able to compute one directly that has the correct type, we'll
// fall back to it, so keep it around here.
- Value *OffsetPtr = 0;
+ Value *OffsetPtr = nullptr;
// Remember any i8 pointer we come across to re-use if we need to do a raw
// byte offset.
- Value *Int8Ptr = 0;
+ Value *Int8Ptr = nullptr;
APInt Int8PtrOffset(Offset.getBitWidth(), 0);
Type *TargetTy = PointerTy->getPointerElementType();
@@ -2043,14 +2047,14 @@ public:
NewAllocaBeginOffset(NewAllocaBeginOffset),
NewAllocaEndOffset(NewAllocaEndOffset),
NewAllocaTy(NewAI.getAllocatedType()),
- VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : 0),
- ElementTy(VecTy ? VecTy->getElementType() : 0),
+ VecTy(IsVectorPromotable ? cast<VectorType>(NewAllocaTy) : nullptr),
+ ElementTy(VecTy ? VecTy->getElementType() : nullptr),
ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
IntTy(IsIntegerPromotable
? Type::getIntNTy(
NewAI.getContext(),
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
- : 0),
+ : nullptr),
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
IRB(NewAI.getContext(), ConstantFolder()) {
@@ -2144,7 +2148,7 @@ private:
///
/// You can optionally pass a type to this routine and if that type's ABI
/// alignment is itself suitable, this will return zero.
- unsigned getSliceAlign(Type *Ty = 0) {
+ unsigned getSliceAlign(Type *Ty = nullptr) {
unsigned NewAIAlign = NewAI.getAlignment();
if (!NewAIAlign)
NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
@@ -2594,7 +2598,7 @@ private:
unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
unsigned NumElements = EndIndex - BeginIndex;
IntegerType *SubIntTy
- = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : 0;
+ = IntTy ? Type::getIntNTy(IntTy->getContext(), Size*8) : nullptr;
// Reset the other pointer type to match the register type we're going to
// use, but using the address space of the original other pointer.
@@ -2992,22 +2996,22 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
return stripAggregateTypeWrapping(DL, Ty);
if (Offset > DL.getTypeAllocSize(Ty) ||
(DL.getTypeAllocSize(Ty) - Offset) < Size)
- return 0;
+ return nullptr;
if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
// We can't partition pointers...
if (SeqTy->isPointerTy())
- return 0;
+ return nullptr;
Type *ElementTy = SeqTy->getElementType();
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
uint64_t NumSkippedElements = Offset / ElementSize;
if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
if (NumSkippedElements >= ArrTy->getNumElements())
- return 0;
+ return nullptr;
} else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
if (NumSkippedElements >= VecTy->getNumElements())
- return 0;
+ return nullptr;
}
Offset -= NumSkippedElements * ElementSize;
@@ -3015,7 +3019,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (Offset > 0 || Size < ElementSize) {
// Bail if the partition ends in a different array element.
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
// Recurse through the element type trying to peel off offset bytes.
return getTypePartition(DL, ElementTy, Offset, Size);
}
@@ -3026,20 +3030,20 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
assert(Size > ElementSize);
uint64_t NumElements = Size / ElementSize;
if (NumElements * ElementSize != Size)
- return 0;
+ return nullptr;
return ArrayType::get(ElementTy, NumElements);
}
StructType *STy = dyn_cast<StructType>(Ty);
if (!STy)
- return 0;
+ return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
if (Offset >= SL->getSizeInBytes())
- return 0;
+ return nullptr;
uint64_t EndOffset = Offset + Size;
if (EndOffset > SL->getSizeInBytes())
- return 0;
+ return nullptr;
unsigned Index = SL->getElementContainingOffset(Offset);
Offset -= SL->getElementOffset(Index);
@@ -3047,12 +3051,12 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
Type *ElementTy = STy->getElementType(Index);
uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
if (Offset >= ElementSize)
- return 0; // The offset points into alignment padding.
+ return nullptr; // The offset points into alignment padding.
// See if any partition must be contained by the element.
if (Offset > 0 || Size < ElementSize) {
if ((Offset + Size) > ElementSize)
- return 0;
+ return nullptr;
return getTypePartition(DL, ElementTy, Offset, Size);
}
assert(Offset == 0);
@@ -3065,14 +3069,14 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
if (EndOffset < SL->getSizeInBytes()) {
unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
if (Index == EndIndex)
- return 0; // Within a single element and its padding.
+ return nullptr; // Within a single element and its padding.
// Don't try to form "natural" types if the elements don't line up with the
// expected size.
// FIXME: We could potentially recurse down through the last element in the
// sub-struct to find a natural end point.
if (SL->getElementOffset(EndIndex) != EndOffset)
- return 0;
+ return nullptr;
assert(Index < EndIndex);
EE = STy->element_begin() + EndIndex;
@@ -3083,7 +3087,7 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty,
STy->isPacked());
const StructLayout *SubSL = DL.getStructLayout(SubTy);
if (Size != SubSL->getSizeInBytes())
- return 0; // The sub-struct doesn't have quite the size needed.
+ return nullptr; // The sub-struct doesn't have quite the size needed.
return SubTy;
}
@@ -3108,7 +3112,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// Try to compute a friendly type for this partition of the alloca. This
// won't always succeed, in which case we fall back to a legal integer type
// or an i8 array of an appropriate size.
- Type *SliceTy = 0;
+ Type *SliceTy = nullptr;
if (Type *CommonUseTy = findCommonType(B, E, EndOffset))
if (DL->getTypeAllocSize(CommonUseTy) >= SliceSize)
SliceTy = CommonUseTy;
@@ -3155,7 +3159,7 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
// the alloca's alignment unconstrained.
if (Alignment <= DL->getABITypeAlignment(SliceTy))
Alignment = 0;
- NewAI = new AllocaInst(SliceTy, 0, Alignment,
+ NewAI = new AllocaInst(SliceTy, nullptr, Alignment,
AI.getName() + ".sroa." + Twine(B - S.begin()), &AI);
++NumNewAllocas;
}
@@ -3494,7 +3498,7 @@ void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) {
for (Use &Operand : I->operands())
if (Instruction *U = dyn_cast<Instruction>(Operand)) {
// Zero out the operand and see if it becomes trivially dead.
- Operand = 0;
+ Operand = nullptr;
if (isInstructionTriviallyDead(U))
DeadInsts.insert(U);
}
@@ -3612,7 +3616,7 @@ bool SROA::runOnFunction(Function &F) {
DL = &DLP->getDataLayout();
DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : 0;
+ DT = DTWP ? &DTWP->getDomTree() : nullptr;
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp
index 20d6daa..8e557aa 100644
--- a/lib/Transforms/Scalar/SampleProfile.cpp
+++ b/lib/Transforms/Scalar/SampleProfile.cpp
@@ -22,8 +22,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sample-profile"
-
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -54,6 +52,8 @@
using namespace llvm;
+#define DEBUG_TYPE "sample-profile"
+
// Command line option to specify the file to read samples from. This is
// mainly used for debugging.
static cl::opt<std::string> SampleProfileFile(
@@ -120,8 +120,8 @@ typedef DenseMap<BasicBlock *, SmallVector<BasicBlock *, 8>> BlockEdgeMap;
class SampleFunctionProfile {
public:
SampleFunctionProfile()
- : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(0), PDT(0),
- LI(0), Ctx(0) {}
+ : TotalSamples(0), TotalHeadSamples(0), HeaderLineno(0), DT(nullptr),
+ PDT(nullptr), LI(nullptr), Ctx(nullptr) {}
unsigned getFunctionLoc(Function &F);
bool emitAnnotations(Function &F, DominatorTree *DomTree,
@@ -315,7 +315,7 @@ protected:
/// \brief Name of the profile file to load.
StringRef Filename;
- /// \brief Flag indicating whether the profile input loaded succesfully.
+ /// \brief Flag indicating whether the profile input loaded successfully.
bool ProfileIsValid;
};
}
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index e950eba..f8f828c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -64,6 +64,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeStructurizeCFGPass(Registry);
initializeSinkingPass(Registry);
initializeTailCallElimPass(Registry);
+ initializeSeparateConstOffsetFromGEPPass(Registry);
}
void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
@@ -181,6 +182,7 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createVerifierPass());
+ // FIXME: should this also add createDebugInfoVerifierPass()?
}
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index e7b5ab2..58192fc 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -19,7 +19,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarrepl"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
@@ -52,6 +51,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "scalarrepl"
+
STATISTIC(NumReplaced, "Number of allocas broken up");
STATISTIC(NumPromoted, "Number of allocas promoted");
STATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion");
@@ -304,7 +305,7 @@ public:
explicit ConvertToScalarInfo(unsigned Size, const DataLayout &DL,
unsigned SLT)
: AllocaSize(Size), DL(DL), ScalarLoadThreshold(SLT), IsNotTrivial(false),
- ScalarKind(Unknown), VectorTy(0), HadNonMemTransferAccess(false),
+ ScalarKind(Unknown), VectorTy(nullptr), HadNonMemTransferAccess(false),
HadDynamicAccess(false) { }
AllocaInst *TryConvert(AllocaInst *AI);
@@ -332,8 +333,8 @@ private:
AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// If we can't convert this scalar, or if mem2reg can trivially do it, bail
// out.
- if (!CanConvertToScalar(AI, 0, 0) || !IsNotTrivial)
- return 0;
+ if (!CanConvertToScalar(AI, 0, nullptr) || !IsNotTrivial)
+ return nullptr;
// If an alloca has only memset / memcpy uses, it may still have an Unknown
// ScalarKind. Treat it as an Integer below.
@@ -361,23 +362,24 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
// Do not convert to scalar integer if the alloca size exceeds the
// scalar load threshold.
if (BitWidth > ScalarLoadThreshold)
- return 0;
+ return nullptr;
if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
!HadNonMemTransferAccess && !DL.fitsInLegalInteger(BitWidth))
- return 0;
+ return nullptr;
// Dynamic accesses on integers aren't yet supported. They need us to shift
// by a dynamic amount which could be difficult to work out as we might not
// know whether to use a left or right shift.
if (ScalarKind == Integer && HadDynamicAccess)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
// Create and insert the integer alloca.
NewTy = IntegerType::get(AI->getContext(), BitWidth);
}
- AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
- ConvertUsesToScalar(AI, NewAI, 0, 0);
+ AllocaInst *NewAI = new AllocaInst(NewTy, nullptr, "",
+ AI->getParent()->begin());
+ ConvertUsesToScalar(AI, NewAI, 0, nullptr);
return NewAI;
}
@@ -508,7 +510,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value *GEPNonConstantIdx = 0;
+ Value *GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
if (!isa<VectorType>(PtrTy->getElementType()))
return false;
@@ -564,7 +566,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset,
if (NonConstantIdx)
return false;
ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
- if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+ if (!Len || Len->getZExtValue() != AllocaSize || Offset != 0)
return false;
IsNotTrivial = true; // Can't be mem2reg'd.
@@ -608,7 +610,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
// Compute the offset that this GEP adds to the pointer.
SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
- Value* GEPNonConstantIdx = 0;
+ Value* GEPNonConstantIdx = nullptr;
if (!GEP->hasAllConstantIndices()) {
assert(!NonConstantIdx &&
"Dynamic GEP reading from dynamic GEP unsupported");
@@ -671,7 +673,7 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
- Old, Offset, 0, Builder);
+ Old, Offset, nullptr, Builder);
Builder.CreateStore(New, NewAI);
// If the load we just inserted is now dead, then the memset overwrote
@@ -809,7 +811,7 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -822,7 +824,8 @@ ConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
Value *Res = UndefValue::get(AT);
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
- Offset+i*EltSize, 0, Builder);
+ Offset+i*EltSize, nullptr,
+ Builder);
Res = Builder.CreateInsertValue(Res, Elt, i);
}
return Res;
@@ -938,7 +941,7 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
Value *Elt = Builder.CreateExtractValue(SV, i);
Old = ConvertScalar_InsertValue(Elt, Old,
Offset+Layout.getElementOffsetInBits(i),
- 0, Builder);
+ nullptr, Builder);
}
return Old;
}
@@ -949,7 +952,8 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,
uint64_t EltSize = DL.getTypeAllocSizeInBits(AT->getElementType());
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
Value *Elt = Builder.CreateExtractValue(SV, i);
- Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, 0, Builder);
+ Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, nullptr,
+ Builder);
}
return Old;
}
@@ -1024,7 +1028,7 @@ bool SROA::runOnFunction(Function &F) {
return false;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
bool Changed = performPromotion(F);
@@ -1054,7 +1058,7 @@ class AllocaPromoter : public LoadAndStorePromoter {
public:
AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
DIBuilder *DB)
- : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
+ : LoadAndStorePromoter(Insts, S), AI(nullptr), DIB(DB) {}
void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
// Remember which alloca we're promoting (for isInstInList).
@@ -1100,7 +1104,7 @@ public:
for (SmallVectorImpl<DbgValueInst *>::const_iterator I = DVIs.begin(),
E = DVIs.end(); I != E; ++I) {
DbgValueInst *DVI = *I;
- Value *Arg = NULL;
+ Value *Arg = nullptr;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
@@ -1143,7 +1147,7 @@ static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) {
for (User *U : SI->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// Both operands to the select need to be dereferencable, either absolutely
// (e.g. allocas) or at this point because we can see other accesses to it.
@@ -1183,7 +1187,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) {
unsigned MaxAlign = 0;
for (User *U : PN->users()) {
LoadInst *LI = dyn_cast<LoadInst>(U);
- if (LI == 0 || !LI->isSimple()) return false;
+ if (!LI || !LI->isSimple()) return false;
// For now we only allow loads in the same block as the PHI. This is a
// common case that happens when instcombine merges two loads through a PHI.
@@ -1380,7 +1384,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *Pred = PN->getIncomingBlock(i);
LoadInst *&Load = InsertedLoads[Pred];
- if (Load == 0) {
+ if (!Load) {
Load = new LoadInst(PN->getIncomingValue(i),
PN->getName() + "." + Pred->getName(),
Pred->getTerminator());
@@ -1400,7 +1404,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const DataLayout *DL) {
bool SROA::performPromotion(Function &F) {
std::vector<AllocaInst*> Allocas;
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (HasDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1537,7 +1541,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
ElementAllocas.reserve(ST->getNumContainedTypes());
for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+ AllocaInst *NA = new AllocaInst(ST->getContainedType(i), nullptr,
AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
@@ -1548,7 +1552,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
ElementAllocas.reserve(AT->getNumElements());
Type *ElTy = AT->getElementType();
for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
- AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+ AllocaInst *NA = new AllocaInst(ElTy, nullptr, AI->getAlignment(),
AI->getName() + "." + Twine(i), AI);
ElementAllocas.push_back(NA);
WorkList.push_back(NA); // Add to worklist for recursive processing
@@ -1577,7 +1581,7 @@ void SROA::DeleteDeadInstructions() {
// Zero out the operand and see if it becomes trivially dead.
// (But, don't add allocas to the dead instruction list -- they are
// already on the worklist and will be deleted separately.)
- *OI = 0;
+ *OI = nullptr;
if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
DeadInsts.push_back(U);
}
@@ -1604,12 +1608,10 @@ void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
isSafeForScalarRepl(GEPI, GEPOffset, Info);
} else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
- if (Length == 0)
- return MarkUnsafe(Info, User);
- if (Length->isNegative())
+ if (!Length || Length->isNegative())
return MarkUnsafe(Info, User);
- isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+ isSafeMemAccess(Offset, Length->getZExtValue(), nullptr,
U.getOperandNo() == 0, Info, MI,
true /*AllowWholeAccess*/);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1744,12 +1746,12 @@ static bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
Type *&EltTy) {
if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
NumElts = AT->getNumElements();
- EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+ EltTy = (NumElts == 0 ? nullptr : AT->getElementType());
return true;
}
if (StructType *ST = dyn_cast<StructType>(T)) {
NumElts = ST->getNumContainedTypes();
- EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+ EltTy = (NumElts == 0 ? nullptr : ST->getContainedType(0));
for (unsigned n = 1; n < NumElts; ++n) {
if (ST->getContainedType(n) != EltTy)
return false;
@@ -2038,7 +2040,7 @@ void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
// In this case, it must be the last GEP operand which is dynamic so keep that
// aside until we've found the constant GEP offset then add it back in at the
// end.
- Value* NonConstantIdx = 0;
+ Value* NonConstantIdx = nullptr;
if (!GEPI->hasAllConstantIndices())
NonConstantIdx = Indices.pop_back_val();
Offset += DL->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
@@ -2108,7 +2110,8 @@ void SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
if (NewOffset) {
// Splice the first element and index 'NewOffset' bytes in. SROA will
// split the alloca again later.
- Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
+ unsigned AS = AI->getType()->getAddressSpace();
+ Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy(AS));
V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
IdxTy = NewElts[Idx]->getAllocatedType();
@@ -2155,7 +2158,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// appropriate type. The "Other" pointer is the pointer that goes to memory
// that doesn't have anything to do with the alloca that we are promoting. For
// memset, this Value* stays null.
- Value *OtherPtr = 0;
+ Value *OtherPtr = nullptr;
unsigned MemAlignment = MI->getAlignment();
if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
if (Inst == MTI->getRawDest())
@@ -2207,7 +2210,7 @@ SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
// If this is a memcpy/memmove, emit a GEP of the other element address.
- Value *OtherElt = 0;
+ Value *OtherElt = nullptr;
unsigned OtherEltAlign = MemAlignment;
if (OtherPtr) {
@@ -2449,7 +2452,7 @@ SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
// There are two forms here: AI could be an array or struct. Both cases
// have different ways to compute the element offset.
- const StructLayout *Layout = 0;
+ const StructLayout *Layout = nullptr;
uint64_t ArrayEltBitOffset = 0;
if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
Layout = DL->getStructLayout(EltSTy);
diff --git a/lib/Transforms/Scalar/Scalarizer.cpp b/lib/Transforms/Scalar/Scalarizer.cpp
index 006375c..7a73f11 100644
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "scalarizer"
#include "llvm/ADT/STLExtras.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -25,6 +24,8 @@
using namespace llvm;
+#define DEBUG_TYPE "scalarizer"
+
namespace {
// Used to store the scattered form of a vector.
typedef SmallVector<Value *, 8> ValueVector;
@@ -48,7 +49,7 @@ public:
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- ValueVector *cachePtr = 0);
+ ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
Value *operator[](unsigned I);
@@ -101,7 +102,7 @@ struct BinarySplitter {
// Information about a load or store that we're scalarizing.
struct VectorLayout {
- VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {}
+ VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
// Return the alignment of element I.
uint64_t getElemAlign(unsigned I) {
@@ -186,9 +187,9 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
Ty = PtrTy->getElementType();
Size = Ty->getVectorNumElements();
if (!CachePtr)
- Tmp.resize(Size, 0);
+ Tmp.resize(Size, nullptr);
else if (CachePtr->empty())
- CachePtr->resize(Size, 0);
+ CachePtr->resize(Size, nullptr);
else
assert(Size == CachePtr->size() && "Inconsistent vector sizes");
}
@@ -241,7 +242,7 @@ bool Scalarizer::doInitialization(Module &M) {
bool Scalarizer::runOnFunction(Function &F) {
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
BasicBlock *BB = BBI;
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
new file mode 100644
index 0000000..b8529e1
--- /dev/null
+++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -0,0 +1,623 @@
+//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loop unrolling may create many similar GEPs for array accesses.
+// e.g., a 2-level loop
+//
+// float a[32][32]; // global variable
+//
+// for (int i = 0; i < 2; ++i) {
+// for (int j = 0; j < 2; ++j) {
+// ...
+// ... = a[x + i][y + j];
+// ...
+// }
+// }
+//
+// will probably be unrolled to:
+//
+// gep %a, 0, %x, %y; load
+// gep %a, 0, %x, %y + 1; load
+// gep %a, 0, %x + 1, %y; load
+// gep %a, 0, %x + 1, %y + 1; load
+//
+// LLVM's GVN does not use partial redundancy elimination yet, and is thus
+// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
+// significant slowdown in targets with limited addressing modes. For instance,
+// because the PTX target does not support the reg+reg addressing mode, the
+// NVPTX backend emits PTX code that literally computes the pointer address of
+// each GEP, wasting tons of registers. It emits the following PTX for the
+// first load and similar PTX for other loads.
+//
+// mov.u32 %r1, %x;
+// mov.u32 %r2, %y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6];
+//
+// To reduce the register pressure, the optimization implemented in this file
+// merges the common part of a group of GEPs, so we can compute each pointer
+// address by adding a simple offset to the common part, saving many registers.
+//
+// It works by splitting each GEP into a variadic base and a constant offset.
+// The variadic base can be computed once and reused by multiple GEPs, and the
+// constant offsets can be nicely folded into the reg+immediate addressing mode
+// (supported by most targets) without using any extra register.
+//
+// For instance, we transform the four GEPs and four loads in the above example
+// into:
+//
+// base = gep a, 0, x, y
+// load base
+// laod base + 1 * sizeof(float)
+// load base + 32 * sizeof(float)
+// load base + 33 * sizeof(float)
+//
+// Given the transformed IR, a backend that supports the reg+immediate
+// addressing mode can easily fold the pointer arithmetics into the loads. For
+// example, the NVPTX backend can easily fold the pointer arithmetics into the
+// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
+//
+// mov.u32 %r1, %tid.x;
+// mov.u32 %r2, %tid.y;
+// mul.wide.u32 %rl2, %r1, 128;
+// mov.u64 %rl3, a;
+// add.s64 %rl4, %rl3, %rl2;
+// mul.wide.u32 %rl5, %r2, 4;
+// add.s64 %rl6, %rl4, %rl5;
+// ld.global.f32 %f1, [%rl6]; // so far the same as unoptimized PTX
+// ld.global.f32 %f2, [%rl6+4]; // much better
+// ld.global.f32 %f3, [%rl6+128]; // much better
+// ld.global.f32 %f4, [%rl6+132]; // much better
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DisableSeparateConstOffsetFromGEP(
+ "disable-separate-const-offset-from-gep", cl::init(false),
+ cl::desc("Do not separate the constant offset from a GEP instruction"),
+ cl::Hidden);
+
+namespace {
+
+/// \brief A helper class for separating a constant offset from a GEP index.
+///
+/// In real programs, a GEP index may be more complicated than a simple addition
+/// of something and a constant integer which can be trivially splitted. For
+/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
+/// constant offset, so that we can separate the index to (a << 3) + b and 5.
+///
+/// Therefore, this class looks into the expression that computes a given GEP
+/// index, and tries to find a constant integer that can be hoisted to the
+/// outermost level of the expression as an addition. Not every constant in an
+/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
+/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
+/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
+class ConstantOffsetExtractor {
+ public:
+ /// Extracts a constant offset from the given GEP index. It outputs the
+ /// numeric value of the extracted constant offset (0 if failed), and a
+ /// new index representing the remainder (equal to the original index minus
+ /// the constant offset).
+ /// \p Idx The given GEP index
+ /// \p NewIdx The new index to replace
+ /// \p DL The datalayout of the module
+ /// \p IP Calculating the new index requires new instructions. IP indicates
+ /// where to insert them (typically right before the GEP).
+ static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
+ Instruction *IP);
+ /// Looks for a constant offset without extracting it. The meaning of the
+ /// arguments and the return value are the same as Extract.
+ static int64_t Find(Value *Idx, const DataLayout *DL);
+
+ private:
+ ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
+ : DL(Layout), IP(InsertionPt) {}
+ /// Searches the expression that computes V for a constant offset. If the
+ /// searching is successful, update UserChain as a path from V to the constant
+ /// offset.
+ int64_t find(Value *V);
+ /// A helper function to look into both operands of a binary operator U.
+ /// \p IsSub Whether U is a sub operator. If so, we need to negate the
+ /// constant offset at some point.
+ int64_t findInEitherOperand(User *U, bool IsSub);
+ /// After finding the constant offset and how it is reached from the GEP
+ /// index, we build a new index which is a clone of the old one except the
+ /// constant offset is removed. For example, given (a + (b + 5)) and knowning
+ /// the constant offset is 5, this function returns (a + b).
+ ///
+ /// We cannot simply change the constant to zero because the expression that
+ /// computes the index or its intermediate result may be used by others.
+ Value *rebuildWithoutConstantOffset();
+ // A helper function for rebuildWithoutConstantOffset that rebuilds the direct
+ // user (U) of the constant offset (C).
+ Value *rebuildLeafWithoutConstantOffset(User *U, Value *C);
+ /// Returns a clone of U except the first occurrence of From with To.
+ Value *cloneAndReplace(User *U, Value *From, Value *To);
+
+ /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
+ bool NoCommonBits(Value *LHS, Value *RHS) const;
+ /// Computes which bits are known to be one or zero.
+ /// \p KnownOne Mask of all bits that are known to be one.
+ /// \p KnownZero Mask of all bits that are known to be zero.
+ void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
+ /// Finds the first use of Used in U. Returns -1 if not found.
+ static unsigned FindFirstUse(User *U, Value *Used);
+ /// Returns whether OPC (sext or zext) can be distributed to the operands of
+ /// BO. e.g., sext can be distributed to the operands of an "add nsw" because
+ /// sext (add nsw a, b) == add nsw (sext a), (sext b).
+ static bool Distributable(unsigned OPC, BinaryOperator *BO);
+
+ /// The path from the constant offset to the old GEP index. e.g., if the GEP
+ /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
+ /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
+ /// UserChain[2] will be the entire expression "a * b + (c + 5)".
+ ///
+ /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index.
+ SmallVector<User *, 8> UserChain;
+ /// The data layout of the module. Used in ComputeKnownBits.
+ const DataLayout *DL;
+ Instruction *IP; /// Insertion position of cloned instructions.
+};
+
+/// \brief A pass that tries to split every GEP in the function into a variadic
+/// base and a constant offset. It is a FunctionPass because searching for the
+/// constant offset may inspect other basic blocks.
+class SeparateConstOffsetFromGEP : public FunctionPass {
+ public:
+ static char ID;
+ SeparateConstOffsetFromGEP() : FunctionPass(ID) {
+ initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<DataLayoutPass>();
+ AU.addRequired<TargetTransformInfo>();
+ }
+ bool runOnFunction(Function &F) override;
+
+ private:
+ /// Tries to split the given GEP into a variadic base and a constant offset,
+ /// and returns true if the splitting succeeds.
+ bool splitGEP(GetElementPtrInst *GEP);
+ /// Finds the constant offset within each index, and accumulates them. This
+ /// function only inspects the GEP without changing it. The output
+ /// NeedsExtraction indicates whether we can extract a non-zero constant
+ /// offset from any index.
+ int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL,
+ bool &NeedsExtraction);
+};
+} // anonymous namespace
+
+char SeparateConstOffsetFromGEP::ID = 0;
+INITIALIZE_PASS_BEGIN(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
+INITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
+INITIALIZE_PASS_END(
+ SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
+ "Split GEPs to a variadic base and a constant offset for better CSE", false,
+ false)
+
+FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
+ return new SeparateConstOffsetFromGEP();
+}
+
+bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) {
+ assert(OPC == Instruction::SExt || OPC == Instruction::ZExt);
+
+ // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
+ // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
+ if (BO->getOpcode() == Instruction::Add ||
+ BO->getOpcode() == Instruction::Sub) {
+ return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) ||
+ (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap());
+ }
+
+ // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B)
+ // -instcombine also leverages this invariant to do the reverse
+ // transformation to reduce integer casts.
+ return BO->getOpcode() == Instruction::And ||
+ BO->getOpcode() == Instruction::Or ||
+ BO->getOpcode() == Instruction::Xor;
+}
+
+int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) {
+ assert(U->getNumOperands() == 2);
+ int64_t ConstantOffset = find(U->getOperand(0));
+ // If we found a constant offset in the left operand, stop and return that.
+ // This shortcut might cause us to miss opportunities of combining the
+ // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
+ // However, such cases are probably already handled by -instcombine,
+ // given this pass runs after the standard optimizations.
+ if (ConstantOffset != 0) return ConstantOffset;
+ ConstantOffset = find(U->getOperand(1));
+ // If U is a sub operator, negate the constant offset found in the right
+ // operand.
+ return IsSub ? -ConstantOffset : ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::find(Value *V) {
+ // TODO(jingyue): We can even trace into integer/pointer casts, such as
+ // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
+ // integers because it gives good enough results for our benchmarks.
+ assert(V->getType()->isIntegerTy());
+
+ User *U = dyn_cast<User>(V);
+ // We cannot do much with Values that are not a User, such as BasicBlock and
+ // MDNode.
+ if (U == nullptr) return 0;
+
+ int64_t ConstantOffset = 0;
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) {
+ // Hooray, we found it!
+ ConstantOffset = CI->getSExtValue();
+ } else if (Operator *O = dyn_cast<Operator>(U)) {
+ // The GEP index may be more complicated than a simple addition of a
+ // varaible and a constant. Therefore, we trace into subexpressions for more
+ // hoisting opportunities.
+ switch (O->getOpcode()) {
+ case Instruction::Add: {
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::Sub: {
+ ConstantOffset = findInEitherOperand(U, true);
+ break;
+ }
+ case Instruction::Or: {
+ // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to
+ // (LHS + RHS).
+ if (NoCommonBits(U->getOperand(0), U->getOperand(1)))
+ ConstantOffset = findInEitherOperand(U, false);
+ break;
+ }
+ case Instruction::SExt:
+ case Instruction::ZExt: {
+ // We trace into sext/zext if the operator can be distributed to its
+ // operand. e.g., we can transform into "sext (add nsw a, 5)" and
+ // extract constant 5, because
+ // sext (add nsw a, 5) == add nsw (sext a), 5
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) {
+ if (Distributable(O->getOpcode(), BO))
+ ConstantOffset = find(U->getOperand(0));
+ }
+ break;
+ }
+ }
+ }
+ // If we found a non-zero constant offset, adds it to the path for future
+ // transformation (rebuildWithoutConstantOffset). Zero is a valid constant
+ // offset, but doesn't help this optimization.
+ if (ConstantOffset != 0)
+ UserChain.push_back(U);
+ return ConstantOffset;
+}
+
+unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) {
+ for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) {
+ if (U->getOperand(I) == Used)
+ return I;
+ }
+ return -1;
+}
+
+Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From,
+ Value *To) {
+ // Finds in U the first use of From. It is safe to ignore future occurrences
+ // of From, because findInEitherOperand similarly stops searching the right
+ // operand when the first operand has a non-zero constant offset.
+ unsigned OpNo = FindFirstUse(U, From);
+ assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly");
+
+ // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions
+ // and ConstantExprs). Therefore, U is either an Instruction or a
+ // ConstantExpr.
+ if (Instruction *I = dyn_cast<Instruction>(U)) {
+ Instruction *Clone = I->clone();
+ Clone->setOperand(OpNo, To);
+ Clone->insertBefore(IP);
+ return Clone;
+ }
+ // cast<Constant>(To) is safe because a ConstantExpr only uses Constants.
+ return cast<ConstantExpr>(U)
+ ->getWithOperandReplaced(OpNo, cast<Constant>(To));
+}
+
+Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U,
+ Value *C) {
+ assert(U->getNumOperands() <= 2 &&
+ "We didn't trace into any operator with more than 2 operands");
+ // If U has only one operand which is the constant offset, removing the
+ // constant offset leaves U as a null value.
+ if (U->getNumOperands() == 1)
+ return Constant::getNullValue(U->getType());
+
+ // U->getNumOperands() == 2
+ unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C
+ assert(OpNo < 2 && "UserChain wasn't built correctly");
+ Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U
+ // If U = C - X, removing C makes U = -X; otherwise U will simply be X.
+ if (!isa<SubOperator>(U) || OpNo == 1)
+ return TheOther;
+ if (isa<ConstantExpr>(U))
+ return ConstantExpr::getNeg(cast<Constant>(TheOther));
+ return BinaryOperator::CreateNeg(TheOther, "", IP);
+}
+
+Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() {
+ assert(UserChain.size() > 0 && "you at least found a constant, right?");
+ // Start with the constant and go up through UserChain, each time building a
+ // clone of the subexpression but with the constant removed.
+ // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we
+ // first c, then (b + c), and finally (a + (b + c)).
+ //
+ // Fast path: if the GEP index is a constant, simply returns 0.
+ if (UserChain.size() == 1)
+ return ConstantInt::get(UserChain[0]->getType(), 0);
+
+ Value *Remainder =
+ rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]);
+ for (size_t I = 2; I < UserChain.size(); ++I)
+ Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder);
+ return Remainder;
+}
+
+int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
+ const DataLayout *DL,
+ Instruction *IP) {
+ ConstantOffsetExtractor Extractor(DL, IP);
+ // Find a non-zero constant offset first.
+ int64_t ConstantOffset = Extractor.find(Idx);
+ if (ConstantOffset == 0)
+ return 0;
+ // Then rebuild a new index with the constant removed.
+ NewIdx = Extractor.rebuildWithoutConstantOffset();
+ return ConstantOffset;
+}
+
+int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) {
+ return ConstantOffsetExtractor(DL, nullptr).find(Idx);
+}
+
+void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
+ APInt &KnownZero) const {
+ IntegerType *IT = cast<IntegerType>(V->getType());
+ KnownOne = APInt(IT->getBitWidth(), 0);
+ KnownZero = APInt(IT->getBitWidth(), 0);
+ llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
+}
+
+bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
+ assert(LHS->getType() == RHS->getType() &&
+ "LHS and RHS should have the same type");
+ APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
+ ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
+ ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
+ return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
+}
+
+int64_t SeparateConstOffsetFromGEP::accumulateByteOffset(
+ GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) {
+ NeedsExtraction = false;
+ int64_t AccumulativeByteOffset = 0;
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Find(GEP->getOperand(I), DL);
+ if (ConstantOffset != 0) {
+ NeedsExtraction = true;
+ // A GEP may have multiple indices. We accumulate the extracted
+ // constant offset to a byte offset, and later offset the remainder of
+ // the original GEP with this byte offset.
+ AccumulativeByteOffset +=
+ ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
+ }
+ }
+ }
+ return AccumulativeByteOffset;
+}
+
+bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
+ // Skip vector GEPs.
+ if (GEP->getType()->isVectorTy())
+ return false;
+
+ // The backend can already nicely handle the case where all indices are
+ // constant.
+ if (GEP->hasAllConstantIndices())
+ return false;
+
+ bool Changed = false;
+
+ // Shortcuts integer casts. Eliminating these explicit casts can make
+ // subsequent optimizations more obvious: ConstantOffsetExtractor needn't
+ // trace into these casts.
+ if (GEP->isInBounds()) {
+ // Doing this to inbounds GEPs is safe because their indices are guaranteed
+ // to be non-negative and in bounds.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) {
+ if (O->getOpcode() == Instruction::SExt ||
+ O->getOpcode() == Instruction::ZExt) {
+ GEP->setOperand(I, O->getOperand(0));
+ Changed = true;
+ }
+ }
+ }
+ }
+ }
+
+ const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout();
+ bool NeedsExtraction;
+ int64_t AccumulativeByteOffset =
+ accumulateByteOffset(GEP, DL, NeedsExtraction);
+
+ if (!NeedsExtraction)
+ return Changed;
+ // Before really splitting the GEP, check whether the backend supports the
+ // addressing mode we are about to produce. If no, this splitting probably
+ // won't be beneficial.
+ TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
+ if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
+ /*BaseGV=*/nullptr, AccumulativeByteOffset,
+ /*HasBaseReg=*/true, /*Scale=*/0)) {
+ return Changed;
+ }
+
+ // Remove the constant offset in each GEP index. The resultant GEP computes
+ // the variadic base.
+ gep_type_iterator GTI = gep_type_begin(*GEP);
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+ if (isa<SequentialType>(*GTI)) {
+ Value *NewIdx = nullptr;
+ // Tries to extract a constant offset from this GEP index.
+ int64_t ConstantOffset =
+ ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
+ if (ConstantOffset != 0) {
+ assert(NewIdx != nullptr &&
+ "ConstantOffset != 0 implies NewIdx is set");
+ GEP->setOperand(I, NewIdx);
+ // Clear the inbounds attribute because the new index may be off-bound.
+ // e.g.,
+ //
+ // b = add i64 a, 5
+ // addr = gep inbounds float* p, i64 b
+ //
+ // is transformed to:
+ //
+ // addr2 = gep float* p, i64 a
+ // addr = gep float* addr2, i64 5
+ //
+ // If a is -4, although the old index b is in bounds, the new index a is
+ // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
+ // inbounds keyword is not present, the offsets are added to the base
+ // address with silently-wrapping two's complement arithmetic".
+ // Therefore, the final code will be a semantically equivalent.
+ //
+ // TODO(jingyue): do some range analysis to keep as many inbounds as
+ // possible. GEPs with inbounds are more friendly to alias analysis.
+ GEP->setIsInBounds(false);
+ Changed = true;
+ }
+ }
+ }
+
+ // Offsets the base with the accumulative byte offset.
+ //
+ // %gep ; the base
+ // ... %gep ...
+ //
+ // => add the offset
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // %gep ; will be removed
+ // ... %gep ...
+ //
+ // => replace all uses of %gep with %new.gep and remove %gep
+ //
+ // %gep2 ; clone of %gep
+ // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // ... %new.gep ...
+ //
+ // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
+ // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
+ // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
+ // type of %gep.
+ //
+ // %gep2 ; clone of %gep
+ // %0 = bitcast %gep2 to i8*
+ // %uglygep = gep %0, <offset>
+ // %new.gep = bitcast %uglygep to <type of %gep>
+ // ... %new.gep ...
+ Instruction *NewGEP = GEP->clone();
+ NewGEP->insertBefore(GEP);
+
+ Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ uint64_t ElementTypeSizeOfGEP =
+ DL->getTypeAllocSize(GEP->getType()->getElementType());
+ if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
+ // Very likely. As long as %gep is natually aligned, the byte offset we
+ // extracted should be a multiple of sizeof(*%gep).
+ // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
+ // cast ElementTypeSizeOfGEP to signed.
+ int64_t Index =
+ AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
+ } else {
+ // Unlikely but possible. For example,
+ // #pragma pack(1)
+ // struct S {
+ // int a[3];
+ // int64 b[8];
+ // };
+ // #pragma pack()
+ //
+ // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
+ // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
+ // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
+ // sizeof(int64).
+ //
+ // Emit an uglygep in this case.
+ Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
+ GEP->getPointerAddressSpace());
+ NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
+ NewGEP = GetElementPtrInst::Create(
+ NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
+ "uglygep", GEP);
+ if (GEP->getType() != I8PtrTy)
+ NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
+ }
+
+ GEP->replaceAllUsesWith(NewGEP);
+ GEP->eraseFromParent();
+
+ return true;
+}
+
+bool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
+ if (DisableSeparateConstOffsetFromGEP)
+ return false;
+
+ bool Changed = false;
+ for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
+ for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
+ Changed |= splitGEP(GEP);
+ }
+ // No need to split GEP ConstantExprs because all its indices are constant
+ // already.
+ }
+ }
+ return Changed;
+}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index ceae5a7..5d5606b 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -38,6 +37,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "simplifycfg"
+
STATISTIC(NumSimpl, "Number of blocks simplified");
namespace {
@@ -71,7 +72,7 @@ FunctionPass *llvm::createCFGSimplificationPass() {
static bool mergeEmptyReturnBlocks(Function &F) {
bool Changed = false;
- BasicBlock *RetBlock = 0;
+ BasicBlock *RetBlock = nullptr;
// Scan all the blocks in the function, looking for empty return blocks.
for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
@@ -79,7 +80,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// Only look at return blocks.
ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
- if (Ret == 0) continue;
+ if (!Ret) continue;
// Only look at the block if it is empty or the only other thing in it is a
// single PHI node that is the operand to the return.
@@ -98,7 +99,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
}
// If this is the first returning block, remember it and keep going.
- if (RetBlock == 0) {
+ if (!RetBlock) {
RetBlock = &BB;
continue;
}
@@ -119,7 +120,7 @@ static bool mergeEmptyReturnBlocks(Function &F) {
// If the canonical return block has no PHI node, create one now.
PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
- if (RetBlockPHI == 0) {
+ if (!RetBlockPHI) {
Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
@@ -173,7 +174,7 @@ bool CFGSimplifyPass::runOnFunction(Function &F) {
const TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
bool EverChanged = removeUnreachableBlocks(F);
EverChanged |= mergeEmptyReturnBlocks(F);
EverChanged |= iterativelySimplifyCFG(F, TTI, DL);
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
index 4107374..482c33a 100644
--- a/lib/Transforms/Scalar/Sink.cpp
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "sink"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -25,6 +24,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "sink"
+
STATISTIC(NumSunk, "Number of instructions sunk");
STATISTIC(NumSinkIter, "Number of sinking iterations");
@@ -203,7 +204,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst,
// Don't sink instructions into a loop.
Loop *succ = LI->getLoopFor(SuccToSinkTo);
Loop *cur = LI->getLoopFor(Inst->getParent());
- if (succ != 0 && succ != cur)
+ if (succ != nullptr && succ != cur)
return false;
}
@@ -237,14 +238,14 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
- BasicBlock *SuccToSinkTo = 0;
+ BasicBlock *SuccToSinkTo = nullptr;
// Instructions can only be sunk if all their uses are in blocks
// dominated by one of the successors.
// Look at all the postdominators and see if we can sink it in one.
DomTreeNode *DTN = DT->getNode(Inst->getParent());
for (DomTreeNode::iterator I = DTN->begin(), E = DTN->end();
- I != E && SuccToSinkTo == 0; ++I) {
+ I != E && SuccToSinkTo == nullptr; ++I) {
BasicBlock *Candidate = (*I)->getBlock();
if ((*I)->getIDom()->getBlock() == Inst->getParent() &&
IsAcceptableTarget(Inst, Candidate))
@@ -254,13 +255,13 @@ bool Sinking::SinkInstruction(Instruction *Inst,
// If no suitable postdominator was found, look at all the successors and
// decide which one we should sink to, if any.
for (succ_iterator I = succ_begin(Inst->getParent()),
- E = succ_end(Inst->getParent()); I != E && SuccToSinkTo == 0; ++I) {
+ E = succ_end(Inst->getParent()); I != E && !SuccToSinkTo; ++I) {
if (IsAcceptableTarget(Inst, *I))
SuccToSinkTo = *I;
}
// If we couldn't find a block to sink to, ignore this instruction.
- if (SuccToSinkTo == 0)
+ if (!SuccToSinkTo)
return false;
DEBUG(dbgs() << "Sink" << *Inst << " (";
diff --git a/lib/Transforms/Scalar/StructurizeCFG.cpp b/lib/Transforms/Scalar/StructurizeCFG.cpp
index 8fd2268..7b77ae1 100644
--- a/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -7,7 +7,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "structurizecfg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SCCIterator.h"
@@ -21,6 +20,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+#define DEBUG_TYPE "structurizecfg"
+
namespace {
// Definition of the complex types used in this pass.
@@ -64,14 +65,14 @@ public:
/// \brief Start a new query
NearestCommonDominator(DominatorTree *DomTree) {
DT = DomTree;
- Result = 0;
+ Result = nullptr;
}
/// \brief Add BB to the resulting dominator
void addBlock(BasicBlock *BB, bool Remember = true) {
DomTreeNode *Node = DT->getNode(BB);
- if (Result == 0) {
+ if (!Result) {
unsigned Numbering = 0;
for (;Node;Node = Node->getIDom())
IndexMap[Node] = ++Numbering;
@@ -279,7 +280,7 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
void StructurizeCFG::orderNodes() {
scc_iterator<Region *> I = scc_begin(ParentRegion);
for (Order.clear(); !I.isAtEnd(); ++I) {
- std::vector<RegionNode *> &Nodes = *I;
+ const std::vector<RegionNode *> &Nodes = *I;
Order.append(Nodes.begin(), Nodes.end());
}
}
@@ -453,10 +454,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
Value *Default = Loops ? BoolTrue : BoolFalse;
SSAUpdater PhiInserter;
- for (BranchVector::iterator I = Conds.begin(),
- E = Conds.end(); I != E; ++I) {
-
- BranchInst *Term = *I;
+ for (BranchInst *Term : Conds) {
assert(Term->isConditional());
BasicBlock *Parent = Term->getParent();
@@ -472,7 +470,7 @@ void StructurizeCFG::insertConditions(bool Loops) {
NearestCommonDominator Dominator(DT);
Dominator.addBlock(Parent, false);
- Value *ParentValue = 0;
+ Value *ParentValue = nullptr;
for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
PI != PE; ++PI) {
@@ -591,7 +589,7 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
if (Node->isSubRegion()) {
Region *SubRegion = Node->getNodeAs<Region>();
BasicBlock *OldExit = SubRegion->getExit();
- BasicBlock *Dominator = 0;
+ BasicBlock *Dominator = nullptr;
// Find all the edges from the sub region to the exit
for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
@@ -678,7 +676,8 @@ BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
/// \brief Set the previous node
void StructurizeCFG::setPrevNode(BasicBlock *BB) {
- PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
+ PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
+ : nullptr;
}
/// \brief Does BB dominate all the predicates of Node ?
@@ -699,7 +698,7 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
bool Dominated = false;
// Regionentry is always true
- if (PrevNode == 0)
+ if (!PrevNode)
return true;
for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
@@ -806,11 +805,11 @@ void StructurizeCFG::createFlow() {
Conditions.clear();
LoopConds.clear();
- PrevNode = 0;
+ PrevNode = nullptr;
Visited.clear();
while (!Order.empty()) {
- handleLoops(EntryDominatesExit, 0);
+ handleLoops(EntryDominatesExit, nullptr);
}
if (PrevNode)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 6d02777..05b9892 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -50,12 +50,12 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -64,6 +64,7 @@
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -76,6 +77,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "tailcallelim"
+
STATISTIC(NumEliminated, "Number of tail calls removed");
STATISTIC(NumRetDuped, "Number of return duplicated");
STATISTIC(NumAccumAdded, "Number of accumulators introduced");
@@ -94,6 +97,9 @@ namespace {
bool runOnFunction(Function &F) override;
private:
+ bool runTRE(Function &F);
+ bool markTails(Function &F, bool &AllCallsAreTailCalls);
+
CallInst *FindTRECandidate(Instruction *I,
bool CannotTailCallElimCallsMarkedTail);
bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
@@ -131,55 +137,255 @@ void TailCallElim::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetTransformInfo>();
}
-/// CanTRE - Scan the specified basic block for alloca instructions.
-/// If it contains any that are variable-sized or not in the entry block,
-/// returns false.
-static bool CanTRE(AllocaInst *AI) {
- // Because of PR962, we don't TRE allocas outside the entry block.
-
- // If this alloca is in the body of the function, or if it is a variable
- // sized allocation, we cannot tail call eliminate calls marked 'tail'
- // with this mechanism.
- BasicBlock *BB = AI->getParent();
- return BB == &BB->getParent()->getEntryBlock() &&
- isa<ConstantInt>(AI->getArraySize());
+/// \brief Scan the specified function for alloca instructions.
+/// If it contains any dynamic allocas, returns false.
+static bool CanTRE(Function &F) {
+ // Because of PR962, we don't TRE dynamic allocas.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ if (!AI->isStaticAlloca())
+ return false;
+ }
+ }
+ }
+
+ return true;
}
-namespace {
-struct AllocaCaptureTracker : public CaptureTracker {
- AllocaCaptureTracker() : Captured(false) {}
+bool TailCallElim::runOnFunction(Function &F) {
+ if (skipOptnoneFunction(F))
+ return false;
- void tooManyUses() override { Captured = true; }
+ bool AllCallsAreTailCalls = false;
+ bool Modified = markTails(F, AllCallsAreTailCalls);
+ if (AllCallsAreTailCalls)
+ Modified |= runTRE(F);
+ return Modified;
+}
- bool shouldExplore(const Use *U) override {
- Value *V = U->getUser();
- if (isa<CallInst>(V) || isa<InvokeInst>(V))
- UsesAlloca.insert(V);
- return true;
+namespace {
+struct AllocaDerivedValueTracker {
+ // Start at a root value and walk its use-def chain to mark calls that use the
+ // value or a derived value in AllocaUsers, and places where it may escape in
+ // EscapePoints.
+ void walk(Value *Root) {
+ SmallVector<Use *, 32> Worklist;
+ SmallPtrSet<Use *, 32> Visited;
+
+ auto AddUsesToWorklist = [&](Value *V) {
+ for (auto &U : V->uses()) {
+ if (!Visited.insert(&U))
+ continue;
+ Worklist.push_back(&U);
+ }
+ };
+
+ AddUsesToWorklist(Root);
+
+ while (!Worklist.empty()) {
+ Use *U = Worklist.pop_back_val();
+ Instruction *I = cast<Instruction>(U->getUser());
+
+ switch (I->getOpcode()) {
+ case Instruction::Call:
+ case Instruction::Invoke: {
+ CallSite CS(I);
+ bool IsNocapture = !CS.isCallee(U) &&
+ CS.doesNotCapture(CS.getArgumentNo(U));
+ callUsesLocalStack(CS, IsNocapture);
+ if (IsNocapture) {
+ // If the alloca-derived argument is passed in as nocapture, then it
+ // can't propagate to the call's return. That would be capturing.
+ continue;
+ }
+ break;
+ }
+ case Instruction::Load: {
+ // The result of a load is not alloca-derived (unless an alloca has
+ // otherwise escaped, but this is a local analysis).
+ continue;
+ }
+ case Instruction::Store: {
+ if (U->getOperandNo() == 0)
+ EscapePoints.insert(I);
+ continue; // Stores have no users to analyze.
+ }
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::PHI:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ break;
+ default:
+ EscapePoints.insert(I);
+ break;
+ }
+
+ AddUsesToWorklist(I);
+ }
}
- bool captured(const Use *U) override {
- if (isa<ReturnInst>(U->getUser()))
- return false;
- Captured = true;
- return true;
+ void callUsesLocalStack(CallSite CS, bool IsNocapture) {
+ // Add it to the list of alloca users. If it's already there, skip further
+ // processing.
+ if (!AllocaUsers.insert(CS.getInstruction()))
+ return;
+
+ // If it's nocapture then it can't capture the alloca.
+ if (IsNocapture)
+ return;
+
+ // If it can write to memory, it can leak the alloca value.
+ if (!CS.onlyReadsMemory())
+ EscapePoints.insert(CS.getInstruction());
}
- bool Captured;
- SmallPtrSet<const Value *, 16> UsesAlloca;
+ SmallPtrSet<Instruction *, 32> AllocaUsers;
+ SmallPtrSet<Instruction *, 32> EscapePoints;
};
-} // end anonymous namespace
+}
-bool TailCallElim::runOnFunction(Function &F) {
- if (skipOptnoneFunction(F))
+bool TailCallElim::markTails(Function &F, bool &AllCallsAreTailCalls) {
+ if (F.callsFunctionThatReturnsTwice())
return false;
+ AllCallsAreTailCalls = true;
+
+ // The local stack holds all alloca instructions and all byval arguments.
+ AllocaDerivedValueTracker Tracker;
+ for (Argument &Arg : F.args()) {
+ if (Arg.hasByValAttr())
+ Tracker.walk(&Arg);
+ }
+ for (auto &BB : F) {
+ for (auto &I : BB)
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ Tracker.walk(AI);
+ }
+ bool Modified = false;
+
+ // Track whether a block is reachable after an alloca has escaped. Blocks that
+ // contain the escaping instruction will be marked as being visited without an
+ // escaped alloca, since that is how the block began.
+ enum VisitType {
+ UNVISITED,
+ UNESCAPED,
+ ESCAPED
+ };
+ DenseMap<BasicBlock *, VisitType> Visited;
+
+ // We propagate the fact that an alloca has escaped from block to successor.
+ // Visit the blocks that are propagating the escapedness first. To do this, we
+ // maintain two worklists.
+ SmallVector<BasicBlock *, 32> WorklistUnescaped, WorklistEscaped;
+
+ // We may enter a block and visit it thinking that no alloca has escaped yet,
+ // then see an escape point and go back around a loop edge and come back to
+ // the same block twice. Because of this, we defer setting tail on calls when
+ // we first encounter them in a block. Every entry in this list does not
+ // statically use an alloca via use-def chain analysis, but may find an alloca
+ // through other means if the block turns out to be reachable after an escape
+ // point.
+ SmallVector<CallInst *, 32> DeferredTails;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ VisitType Escaped = UNESCAPED;
+ do {
+ for (auto &I : *BB) {
+ if (Tracker.EscapePoints.count(&I))
+ Escaped = ESCAPED;
+
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI || CI->isTailCall())
+ continue;
+
+ if (CI->doesNotAccessMemory()) {
+ // A call to a readnone function whose arguments are all things computed
+ // outside this function can be marked tail. Even if you stored the
+ // alloca address into a global, a readnone function can't load the
+ // global anyhow.
+ //
+ // Note that this runs whether we know an alloca has escaped or not. If
+ // it has, then we can't trust Tracker.AllocaUsers to be accurate.
+ bool SafeToTail = true;
+ for (auto &Arg : CI->arg_operands()) {
+ if (isa<Constant>(Arg.getUser()))
+ continue;
+ if (Argument *A = dyn_cast<Argument>(Arg.getUser()))
+ if (!A->hasByValAttr())
+ continue;
+ SafeToTail = false;
+ break;
+ }
+ if (SafeToTail) {
+ emitOptimizationRemark(
+ F.getContext(), "tailcallelim", F, CI->getDebugLoc(),
+ "marked this readnone call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ continue;
+ }
+ }
+
+ if (Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+ DeferredTails.push_back(CI);
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
+ auto &State = Visited[SuccBB];
+ if (State < Escaped) {
+ State = Escaped;
+ if (State == ESCAPED)
+ WorklistEscaped.push_back(SuccBB);
+ else
+ WorklistUnescaped.push_back(SuccBB);
+ }
+ }
+
+ if (!WorklistEscaped.empty()) {
+ BB = WorklistEscaped.pop_back_val();
+ Escaped = ESCAPED;
+ } else {
+ BB = nullptr;
+ while (!WorklistUnescaped.empty()) {
+ auto *NextBB = WorklistUnescaped.pop_back_val();
+ if (Visited[NextBB] == UNESCAPED) {
+ BB = NextBB;
+ Escaped = UNESCAPED;
+ break;
+ }
+ }
+ }
+ } while (BB);
+
+ for (CallInst *CI : DeferredTails) {
+ if (Visited[CI->getParent()] != ESCAPED) {
+ // If the escape point was part way through the block, calls after the
+ // escape point wouldn't have been put into DeferredTails.
+ emitOptimizationRemark(F.getContext(), "tailcallelim", F,
+ CI->getDebugLoc(),
+ "marked this call a tail call candidate");
+ CI->setTailCall();
+ Modified = true;
+ } else {
+ AllCallsAreTailCalls = false;
+ }
+ }
+
+ return Modified;
+}
+
+bool TailCallElim::runTRE(Function &F) {
// If this function is a varargs function, we won't be able to PHI the args
// right, so don't even try to convert it...
if (F.getFunctionType()->isVarArg()) return false;
TTI = &getAnalysis<TargetTransformInfo>();
- BasicBlock *OldEntry = 0;
+ BasicBlock *OldEntry = nullptr;
bool TailCallsAreMarkedTail = false;
SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
@@ -188,39 +394,23 @@ bool TailCallElim::runOnFunction(Function &F) {
// marked with the 'tail' attribute, because doing so would cause the stack
// size to increase (real TRE would deallocate variable sized allocas, TRE
// doesn't).
- bool CanTRETailMarkedCall = true;
-
- // Find calls that can be marked tail.
- AllocaCaptureTracker ACT;
- for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- CanTRETailMarkedCall &= CanTRE(AI);
- PointerMayBeCaptured(AI, &ACT);
- // If any allocas are captured, exit.
- if (ACT.Captured)
- return false;
- }
- }
- }
+ bool CanTRETailMarkedCall = CanTRE(F);
- // Second pass, change any tail recursive calls to loops.
+ // Change any tail recursive calls to loops.
//
// FIXME: The code generator produces really bad code when an 'escaping
// alloca' is changed from being a static alloca to being a dynamic alloca.
// Until this is resolved, disable this transformation if that would ever
// happen. This bug is PR962.
- if (ACT.UsesAlloca.empty()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
- bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
- ArgumentPHIs, !CanTRETailMarkedCall);
- if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
- Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
- TailCallsAreMarkedTail, ArgumentPHIs,
- !CanTRETailMarkedCall);
- MadeChange |= Change;
- }
+ for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+ ArgumentPHIs, !CanTRETailMarkedCall);
+ if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+ Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+ TailCallsAreMarkedTail, ArgumentPHIs,
+ !CanTRETailMarkedCall);
+ MadeChange |= Change;
}
}
@@ -229,34 +419,13 @@ bool TailCallElim::runOnFunction(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- if (!ArgumentPHIs.empty()) {
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
- // If the PHI Node is a dynamic constant, replace it with the value it is.
- if (Value *PNV = SimplifyInstruction(PN)) {
- PN->replaceAllUsesWith(PNV);
- PN->eraseFromParent();
- }
- }
- }
+ for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+ PHINode *PN = ArgumentPHIs[i];
- // At this point, we know that the function does not have any captured
- // allocas. If additionally the function does not call setjmp, mark all calls
- // in the function that do not access stack memory with the tail keyword. This
- // implies ensuring that there does not exist any path from a call that takes
- // in an alloca but does not capture it and the call which we wish to mark
- // with "tail".
- if (!F.callsFunctionThatReturnsTwice()) {
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (!ACT.UsesAlloca.count(CI)) {
- CI->setTailCall();
- MadeChange = true;
- }
- }
- }
+ // If the PHI Node is a dynamic constant, replace it with the value it is.
+ if (Value *PNV = SimplifyInstruction(PN)) {
+ PN->replaceAllUsesWith(PNV);
+ PN->eraseFromParent();
}
}
@@ -343,11 +512,11 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
//
static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
Function *F = CI->getParent()->getParent();
- Value *ReturnedValue = 0;
+ Value *ReturnedValue = nullptr;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
- if (RI == 0 || RI == IgnoreRI) continue;
+ if (RI == nullptr || RI == IgnoreRI) continue;
// We can only perform this transformation if the value returned is
// evaluatable at the start of the initial invocation of the function,
@@ -355,10 +524,10 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
//
Value *RetOp = RI->getOperand(0);
if (!isDynamicConstant(RetOp, CI, RI))
- return 0;
+ return nullptr;
if (ReturnedValue && RetOp != ReturnedValue)
- return 0; // Cannot transform if differing values are returned.
+ return nullptr; // Cannot transform if differing values are returned.
ReturnedValue = RetOp;
}
return ReturnedValue;
@@ -370,18 +539,18 @@ static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative() || !I->isCommutative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return nullptr;
assert(I->getNumOperands() == 2 &&
"Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction.
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
(I->getOperand(0) != CI && I->getOperand(1) != CI))
- return 0;
+ return nullptr;
// The only user of this instruction we allow is a single return instruction.
if (!I->hasOneUse() || !isa<ReturnInst>(I->user_back()))
- return 0;
+ return nullptr;
// Ok, now we have to check all of the other return instructions in this
// function. If they return non-constants or differing values, then we cannot
@@ -402,11 +571,11 @@ TailCallElim::FindTRECandidate(Instruction *TI,
Function *F = BB->getParent();
if (&BB->front() == TI) // Make sure there is something before the terminator.
- return 0;
+ return nullptr;
// Scan backwards from the return, checking to see if there is a tail call in
// this block. If so, set CI to it.
- CallInst *CI = 0;
+ CallInst *CI = nullptr;
BasicBlock::iterator BBI = TI;
while (true) {
CI = dyn_cast<CallInst>(BBI);
@@ -414,14 +583,14 @@ TailCallElim::FindTRECandidate(Instruction *TI,
break;
if (BBI == BB->begin())
- return 0; // Didn't find a potential tail call.
+ return nullptr; // Didn't find a potential tail call.
--BBI;
}
// If this call is marked as a tail call, and if there are dynamic allocas in
// the function, we cannot perform this optimization.
if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
- return 0;
+ return nullptr;
// As a special case, detect code like this:
// double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
@@ -441,7 +610,7 @@ TailCallElim::FindTRECandidate(Instruction *TI,
for (; I != E && FI != FE; ++I, ++FI)
if (*I != &*FI) break;
if (I == E && FI == FE)
- return 0;
+ return nullptr;
}
return CI;
@@ -462,8 +631,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// which is different to the constant returned by other return instructions
// (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
// special case of accumulator recursion, the operation being "return C".
- Value *AccumulatorRecursionEliminationInitVal = 0;
- Instruction *AccumulatorRecursionInstr = 0;
+ Value *AccumulatorRecursionEliminationInitVal = nullptr;
+ Instruction *AccumulatorRecursionInstr = nullptr;
// Ok, we found a potential tail call. We can currently only transform the
// tail call if all of the instructions between the call and the return are
@@ -493,8 +662,8 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
// accumulator recursion variable eliminated.
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
- AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(0, CI)) {
+ AccumulatorRecursionEliminationInitVal == nullptr &&
+ !getCommonReturnValue(nullptr, CI)) {
// One case remains that we are able to handle: the current return
// instruction returns a constant, and all other return instructions
// return a different constant.
@@ -510,9 +679,12 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
+ emitOptimizationRemark(F->getContext(), "tailcallelim", *F, CI->getDebugLoc(),
+ "transforming tail recursion to loop");
+
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
- if (OldEntry == 0) {
+ if (!OldEntry) {
OldEntry = &F->getEntryBlock();
BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
NewEntry->takeName(OldEntry);
diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp
index f42635e..196ac79 100644
--- a/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -52,8 +52,6 @@
// http://wiki.dwarfstd.org/index.php?title=Path_Discriminators
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "add-discriminators"
-
#include "llvm/Transforms/Scalar.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -69,6 +67,8 @@
using namespace llvm;
+#define DEBUG_TYPE "add-discriminators"
+
namespace {
struct AddDiscriminators : public FunctionPass {
static char ID; // Pass identification, replacement for typeid
@@ -99,7 +99,7 @@ FunctionPass *llvm::createAddDiscriminatorsPass() {
static bool hasDebugInfo(const Function &F) {
NamedMDNode *CUNodes = F.getParent()->getNamedMetadata("llvm.dbg.cu");
- return CUNodes != 0;
+ return CUNodes != nullptr;
}
/// \brief Assign DWARF discriminators.
@@ -154,10 +154,15 @@ static bool hasDebugInfo(const Function &F) {
/// file and line location as I2. This new lexical block will have a
/// different discriminator number than I1.
bool AddDiscriminators::runOnFunction(Function &F) {
- // No need to do anything if there is no debug info for this function.
// If the function has debug information, but the user has disabled
// discriminators, do nothing.
- if (!hasDebugInfo(F) || NoDiscriminators) return false;
+ // Simlarly, if the function has no debug info, do nothing.
+ // Finally, if this module is built with dwarf versions earlier than 4,
+ // do nothing (discriminator support is a DWARF 4 feature).
+ if (NoDiscriminators ||
+ !hasDebugInfo(F) ||
+ F.getParent()->getDwarfVersion() < 4)
+ return false;
bool Changed = false;
Module *M = F.getParent();
diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk
index ab4d8a8..cbd8dd0 100644
--- a/lib/Transforms/Utils/Android.mk
+++ b/lib/Transforms/Utils/Android.mk
@@ -11,6 +11,7 @@ transforms_utils_SRC_FILES := \
CloneModule.cpp \
CmpInstAnalysis.cpp \
CodeExtractor.cpp \
+ CtorUtils.cpp \
DemoteRegToStack.cpp \
GlobalStatus.cpp \
InlineFunction.cpp \
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index b3cd5ce..80b7e22 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -68,8 +68,8 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {
void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
if (!isa<PHINode>(BB->begin())) return;
- AliasAnalysis *AA = 0;
- MemoryDependenceAnalysis *MemDep = 0;
+ AliasAnalysis *AA = nullptr;
+ MemoryDependenceAnalysis *MemDep = nullptr;
if (P) {
AA = P->getAnalysisIfAvailable<AliasAnalysis>();
MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
@@ -130,7 +130,7 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
BasicBlock *OnlySucc = BB;
for (; SI != SE; ++SI)
if (*SI != OnlySucc) {
- OnlySucc = 0; // There are multiple distinct successors!
+ OnlySucc = nullptr; // There are multiple distinct successors!
break;
}
@@ -217,7 +217,7 @@ void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
///
void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
BasicBlock::iterator &BI, Instruction *I) {
- assert(I->getParent() == 0 &&
+ assert(I->getParent() == nullptr &&
"ReplaceInstWithInst: Instruction already inserted into basic block!");
// Insert the new instruction into the basic block...
@@ -254,7 +254,7 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
// If the successor only has a single pred, split the top of the successor
// block.
assert(SP == BB && "CFG broken");
- SP = NULL;
+ SP = nullptr;
return SplitBlock(Succ, Succ->begin(), P);
}
@@ -310,7 +310,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
if (!P) return;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
- Loop *L = LI ? LI->getLoopFor(OldBB) : 0;
+ Loop *L = LI ? LI->getLoopFor(OldBB) : nullptr;
// If we need to preserve loop analyses, collect some information about how
// this split will affect loops.
@@ -351,7 +351,7 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,
// loop). To find this, examine each of the predecessors and determine which
// loops enclose them, and select the most-nested loop which contains the
// loop containing the block being split.
- Loop *InnermostPredLoop = 0;
+ Loop *InnermostPredLoop = nullptr;
for (ArrayRef<BasicBlock*>::iterator
i = Preds.begin(), e = Preds.end(); i != e; ++i) {
BasicBlock *Pred = *i;
@@ -384,51 +384,68 @@ static void UpdatePHINodes(BasicBlock *OrigBB, BasicBlock *NewBB,
ArrayRef<BasicBlock*> Preds, BranchInst *BI,
Pass *P, bool HasLoopExit) {
// Otherwise, create a new PHI node in NewBB for each PHI node in OrigBB.
- AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+ AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : nullptr;
+ SmallPtrSet<BasicBlock *, 16> PredSet(Preds.begin(), Preds.end());
for (BasicBlock::iterator I = OrigBB->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I++);
// Check to see if all of the values coming in are the same. If so, we
// don't need to create a new PHI node, unless it's needed for LCSSA.
- Value *InVal = 0;
+ Value *InVal = nullptr;
if (!HasLoopExit) {
InVal = PN->getIncomingValueForBlock(Preds[0]);
- for (unsigned i = 1, e = Preds.size(); i != e; ++i)
- if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
- InVal = 0;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ if (!PredSet.count(PN->getIncomingBlock(i)))
+ continue;
+ if (!InVal)
+ InVal = PN->getIncomingValue(i);
+ else if (InVal != PN->getIncomingValue(i)) {
+ InVal = nullptr;
break;
}
+ }
}
if (InVal) {
// If all incoming values for the new PHI would be the same, just don't
// make a new PHI. Instead, just remove the incoming values from the old
// PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- // Explicitly check the BB index here to handle duplicates in Preds.
- int Idx = PN->getBasicBlockIndex(Preds[i]);
- if (Idx >= 0)
- PN->removeIncomingValue(Idx, false);
- }
- } else {
- // If the values coming into the block are not the same, we need a PHI.
- // Create the new PHI node, insert it into NewBB at the end of the block
- PHINode *NewPHI =
- PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
- if (AA) AA->copyValue(PN, NewPHI);
- // Move all of the PHI values for 'Preds' to the new PHI.
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- Value *V = PN->removeIncomingValue(Preds[i], false);
- NewPHI->addIncoming(V, Preds[i]);
- }
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values
+ // aren't invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i)
+ if (PredSet.count(PN->getIncomingBlock(i)))
+ PN->removeIncomingValue(i, false);
+
+ // Add an incoming value to the PHI node in the loop for the preheader
+ // edge.
+ PN->addIncoming(InVal, NewBB);
+ continue;
+ }
- InVal = NewPHI;
+ // If the values coming into the block are not the same, we need a new
+ // PHI.
+ // Create the new PHI node, insert it into NewBB at the end of the block
+ PHINode *NewPHI =
+ PHINode::Create(PN->getType(), Preds.size(), PN->getName() + ".ph", BI);
+ if (AA)
+ AA->copyValue(PN, NewPHI);
+
+ // NOTE! This loop walks backwards for a reason! First off, this minimizes
+ // the cost of removal if we end up removing a large number of values, and
+ // second off, this ensures that the indices for the incoming values aren't
+ // invalidated when we remove one.
+ for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) {
+ BasicBlock *IncomingBB = PN->getIncomingBlock(i);
+ if (PredSet.count(IncomingBB)) {
+ Value *V = PN->removeIncomingValue(i, false);
+ NewPHI->addIncoming(V, IncomingBB);
+ }
}
- // Add an incoming value to the PHI node in the loop for the preheader
- // edge.
- PN->addIncoming(InVal, NewBB);
+ PN->addIncoming(NewPHI, NewBB);
}
}
@@ -542,7 +559,7 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
e = pred_end(OrigBB);
}
- BasicBlock *NewBB2 = 0;
+ BasicBlock *NewBB2 = nullptr;
if (!NewBB2Preds.empty()) {
// Create another basic block for the rest of OrigBB's predecessors.
NewBB2 = BasicBlock::Create(OrigBB->getContext(),
@@ -607,7 +624,7 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
i != e; ++i) {
Value *V = *i;
- Instruction *NewBC = 0;
+ Instruction *NewBC = nullptr;
if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
// Return value might be bitcasted. Clone and insert it before the
// return instruction.
@@ -724,32 +741,32 @@ void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
BasicBlock *&IfFalse) {
PHINode *SomePHI = dyn_cast<PHINode>(BB->begin());
- BasicBlock *Pred1 = NULL;
- BasicBlock *Pred2 = NULL;
+ BasicBlock *Pred1 = nullptr;
+ BasicBlock *Pred2 = nullptr;
if (SomePHI) {
if (SomePHI->getNumIncomingValues() != 2)
- return NULL;
+ return nullptr;
Pred1 = SomePHI->getIncomingBlock(0);
Pred2 = SomePHI->getIncomingBlock(1);
} else {
pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
if (PI == PE) // No predecessor
- return NULL;
+ return nullptr;
Pred1 = *PI++;
if (PI == PE) // Only one predecessor
- return NULL;
+ return nullptr;
Pred2 = *PI++;
if (PI != PE) // More than two predecessors
- return NULL;
+ return nullptr;
}
// We can only handle branches. Other control flow will be lowered to
// branches if possible anyway.
BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
- if (Pred1Br == 0 || Pred2Br == 0)
- return 0;
+ if (!Pred1Br || !Pred2Br)
+ return nullptr;
// Eliminate code duplication by ensuring that Pred1Br is conditional if
// either are.
@@ -759,7 +776,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// required anyway, we stand no chance of eliminating it, so the xform is
// probably not profitable.
if (Pred1Br->isConditional())
- return 0;
+ return nullptr;
std::swap(Pred1, Pred2);
std::swap(Pred1Br, Pred2Br);
@@ -769,8 +786,8 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// The only thing we have to watch out for here is to make sure that Pred2
// doesn't have incoming edges from other blocks. If it does, the condition
// doesn't dominate BB.
- if (Pred2->getSinglePredecessor() == 0)
- return 0;
+ if (!Pred2->getSinglePredecessor())
+ return nullptr;
// If we found a conditional branch predecessor, make sure that it branches
// to BB and Pred2Br. If it doesn't, this isn't an "if statement".
@@ -785,7 +802,7 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
} else {
// We know that one arm of the conditional goes to BB, so the other must
// go somewhere unrelated, and this must not be an "if statement".
- return 0;
+ return nullptr;
}
return Pred1Br->getCondition();
@@ -795,12 +812,12 @@ Value *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
// BB. Don't panic! If both blocks only have a single (identical)
// predecessor, and THAT is a conditional branch, then we're all ok!
BasicBlock *CommonPred = Pred1->getSinglePredecessor();
- if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
- return 0;
+ if (CommonPred == nullptr || CommonPred != Pred2->getSinglePredecessor())
+ return nullptr;
// Otherwise, if this is a conditional branch, then we can use it!
BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
- if (BI == 0) return 0;
+ if (!BI) return nullptr;
assert(BI->isConditional() && "Two successors but not conditional?");
if (BI->getSuccessor(0) == Pred1) {
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 76ebb9f..80bd516 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "break-crit-edges"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -30,6 +29,8 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "break-crit-edges"
+
STATISTIC(NumBroken, "Number of blocks inserted");
namespace {
@@ -141,7 +142,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
Pass *P, bool MergeIdenticalEdges,
bool DontDeleteUselessPhis,
bool SplitLandingPads) {
- if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+ if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return nullptr;
assert(!isa<IndirectBrInst>(TI) &&
"Cannot split critical edge from IndirectBrInst");
@@ -151,7 +152,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// Splitting the critical edge to a landing pad block is non-trivial. Don't do
// it in this generic function.
- if (DestBB->isLandingPad()) return 0;
+ if (DestBB->isLandingPad()) return nullptr;
// Create a new basic block, linking it into the CFG.
BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
@@ -207,15 +208,15 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
// If we don't have a pass object, we can't update anything...
- if (P == 0) return NewBB;
+ if (!P) return NewBB;
DominatorTreeWrapperPass *DTWP =
P->getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
// If we have nothing to update, just return.
- if (DT == 0 && LI == 0)
+ if (!DT && !LI)
return NewBB;
// Now update analysis information. Since the only predecessor of NewBB is
@@ -251,7 +252,7 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
//
if (TINode) { // Don't break unreachable code!
DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
- DomTreeNode *DestBBNode = 0;
+ DomTreeNode *DestBBNode = nullptr;
// If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
if (!OtherPreds.empty()) {
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 82384a1..be00b69 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -27,7 +27,8 @@ using namespace llvm;
/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
- return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+ unsigned AS = V->getType()->getPointerAddressSpace();
+ return B.CreateBitCast(V, B.getInt8PtrTy(AS), "cstr");
}
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -35,7 +36,7 @@ Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strlen))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -64,7 +65,7 @@ Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strnlen))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -94,7 +95,7 @@ Value *llvm::EmitStrNLen(Value *Ptr, Value *MaxLen, IRBuilder<> &B,
Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strchr))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
Attribute::AttrKind AVs[2] = { Attribute::ReadOnly, Attribute::NoUnwind };
@@ -120,7 +121,7 @@ Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::strncmp))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -153,7 +154,7 @@ Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI,
StringRef Name) {
if (!TLI->has(LibFunc::strcpy))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -177,7 +178,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI, StringRef Name) {
if (!TLI->has(LibFunc::strncpy))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -204,7 +205,7 @@ Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcpy_chk))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS;
@@ -232,7 +233,7 @@ Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memchr))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS;
@@ -260,7 +261,7 @@ Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
Value *Len, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::memcmp))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -347,7 +348,7 @@ Value *llvm::EmitBinaryFloatFnCall(Value *Op1, Value *Op2, StringRef Name,
Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::putchar))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
@@ -369,7 +370,7 @@ Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::puts))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -393,7 +394,7 @@ Value *llvm::EmitPutS(Value *Str, IRBuilder<> &B, const DataLayout *TD,
Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputc))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[2];
@@ -426,7 +427,7 @@ Value *llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
Value *llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
const DataLayout *TD, const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fputs))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
@@ -459,7 +460,7 @@ Value *llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
IRBuilder<> &B, const DataLayout *TD,
const TargetLibraryInfo *TLI) {
if (!TLI->has(LibFunc::fwrite))
- return 0;
+ return nullptr;
Module *M = B.GetInsertBlock()->getParent()->getParent();
AttributeSet AS[3];
diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp
index 1f517d0..f2d5e07 100644
--- a/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -15,7 +15,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "bypass-slow-division"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Function.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "bypass-slow-division"
+
namespace {
struct DivOpInfo {
bool SignedOp;
@@ -53,11 +54,11 @@ namespace llvm {
}
static DivOpInfo getEmptyKey() {
- return DivOpInfo(false, 0, 0);
+ return DivOpInfo(false, nullptr, nullptr);
}
static DivOpInfo getTombstoneKey() {
- return DivOpInfo(true, 0, 0);
+ return DivOpInfo(true, nullptr, nullptr);
}
static unsigned getHashValue(const DivOpInfo &Val) {
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index dac2090..e10ca90 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMTransformUtils
BreakCriticalEdges.cpp
BuildLibCalls.cpp
BypassSlowDivision.cpp
+ CtorUtils.cpp
CloneFunction.cpp
CloneModule.cpp
CmpInstAnalysis.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index a199086..5c8f20d 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -159,7 +159,7 @@ static MDNode* FindSubprogram(const Function *F, DebugInfoFinder &Finder) {
for (DISubprogram Subprogram : Finder.subprograms()) {
if (Subprogram.describes(F)) return Subprogram;
}
- return NULL;
+ return nullptr;
}
// Add an operand to an existing MDNode. The new operand will be added at the
@@ -359,7 +359,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If the condition was a known constant in the callee...
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
- if (Cond == 0) {
+ if (!Cond) {
Value *V = VMap[BI->getCondition()];
Cond = dyn_cast_or_null<ConstantInt>(V);
}
@@ -375,7 +375,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
} else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
- if (Cond == 0) { // Or known constant after constant prop in the callee...
+ if (!Cond) { // Or known constant after constant prop in the callee...
Value *V = VMap[SI->getCondition()];
Cond = dyn_cast_or_null<ConstantInt>(V);
}
@@ -454,7 +454,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
BI != BE; ++BI) {
Value *V = VMap[BI];
BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
- if (NewBB == 0) continue; // Dead block.
+ if (!NewBB) continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 64df089..eb67db1 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -47,8 +47,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
GlobalVariable *GV = new GlobalVariable(*New,
I->getType()->getElementType(),
I->isConstant(), I->getLinkage(),
- (Constant*) 0, I->getName(),
- (GlobalVariable*) 0,
+ (Constant*) nullptr, I->getName(),
+ (GlobalVariable*) nullptr,
I->getThreadLocalMode(),
I->getType()->getAddressSpace());
GV->copyAttributesFrom(I);
@@ -67,8 +67,10 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = new GlobalAlias(I->getType(), I->getLinkage(),
- I->getName(), NULL, New);
+ auto *PTy = cast<PointerType>(I->getType());
+ auto *GA =
+ GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+ I->getLinkage(), I->getName(), New);
GA->copyAttributesFrom(I);
VMap[I] = GA;
}
@@ -105,8 +107,8 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
- if (const Constant *C = I->getAliasee())
- GA->setAliasee(MapValue(C, VMap));
+ if (const GlobalObject *C = I->getAliasee())
+ GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap)));
}
// And named metadata....
diff --git a/lib/Transforms/Utils/CmpInstAnalysis.cpp b/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 8fa412a..3b15a0a 100644
--- a/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -84,7 +84,7 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
case 7: // True.
return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
}
- return NULL;
+ return nullptr;
}
/// PredicatesFoldable - Return true if both predicates match sign or if at
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index b814842..e70a7d6 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -38,6 +38,8 @@
#include <set>
using namespace llvm;
+#define DEBUG_TYPE "code-extractor"
+
// Provide a command-line option to aggregate function arguments into a struct
// for functions produced by the code extractor. This is useful when converting
// extracted functions to pthread-based code, as only one argument (void*) can
@@ -118,7 +120,7 @@ buildExtractionBlockSet(const RegionNode &RN) {
}
CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
- : DT(0), AggregateArgs(AggregateArgs||AggregateArgsOpt),
+ : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt),
Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
@@ -410,7 +412,7 @@ static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
return P->getIncomingBlock(U);
}
- return 0;
+ return nullptr;
}
/// emitCallAndSwitchStatement - This method sets up the caller side by adding
@@ -438,14 +440,14 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
StructValues.push_back(*i);
} else {
AllocaInst *alloca =
- new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+ new AllocaInst((*i)->getType(), nullptr, (*i)->getName()+".loc",
codeReplacer->getParent()->begin()->begin());
ReloadOutputs.push_back(alloca);
params.push_back(alloca);
}
}
- AllocaInst *Struct = 0;
+ AllocaInst *Struct = nullptr;
if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
std::vector<Type*> ArgTypes;
for (ValueSet::iterator v = StructValues.begin(),
@@ -455,7 +457,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Allocate a struct at the beginning of this function
Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
Struct =
- new AllocaInst(StructArgTy, 0, "structArg",
+ new AllocaInst(StructArgTy, nullptr, "structArg",
codeReplacer->getParent()->begin()->begin());
params.push_back(Struct);
@@ -484,7 +486,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Reload the outputs passed in by reference
for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
- Value *Output = 0;
+ Value *Output = nullptr;
if (AggregateArgs) {
Value *Idx[2];
Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
@@ -537,7 +539,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
newFunction);
unsigned SuccNum = switchVal++;
- Value *brVal = 0;
+ Value *brVal = nullptr;
switch (NumExitBlocks) {
case 0:
case 1: break; // No value needed.
@@ -633,7 +635,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
// Check if the function should return a value
if (OldFnRetTy->isVoidTy()) {
- ReturnInst::Create(Context, 0, TheSwitch); // Return void
+ ReturnInst::Create(Context, nullptr, TheSwitch); // Return void
} else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
// return what we have
ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
@@ -685,7 +687,7 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
Function *CodeExtractor::extractCodeRegion() {
if (!isEligible())
- return 0;
+ return nullptr;
ValueSet inputs, outputs;
diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp
new file mode 100644
index 0000000..a359424
--- /dev/null
+++ b/lib/Transforms/Utils/CtorUtils.cpp
@@ -0,0 +1,183 @@
+//===- CtorUtils.cpp - Helpers for working with global_ctors ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions that are used to process llvm.global_ctors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CtorUtils.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "ctor_utils"
+
+namespace llvm {
+
+namespace {
+/// Given a specified llvm.global_ctors list, install the
+/// specified array.
+void installGlobalCtors(GlobalVariable *GCL,
+ const std::vector<Function *> &Ctors) {
+ // If we made a change, reassemble the initializer list.
+ Constant *CSVals[3];
+
+ StructType *StructTy =
+ cast<StructType>(GCL->getType()->getElementType()->getArrayElementType());
+
+ // Create the new init list.
+ std::vector<Constant *> CAList;
+ for (Function *F : Ctors) {
+ Type *Int32Ty = Type::getInt32Ty(GCL->getContext());
+ if (F) {
+ CSVals[0] = ConstantInt::get(Int32Ty, 65535);
+ CSVals[1] = F;
+ } else {
+ CSVals[0] = ConstantInt::get(Int32Ty, 0x7fffffff);
+ CSVals[1] = Constant::getNullValue(StructTy->getElementType(1));
+ }
+ // FIXME: Only allow the 3-field form in LLVM 4.0.
+ size_t NumElts = StructTy->getNumElements();
+ if (NumElts > 2)
+ CSVals[2] = Constant::getNullValue(StructTy->getElementType(2));
+ CAList.push_back(
+ ConstantStruct::get(StructTy, makeArrayRef(CSVals, NumElts)));
+ }
+
+ // Create the array initializer.
+ Constant *CA =
+ ConstantArray::get(ArrayType::get(StructTy, CAList.size()), CAList);
+
+ // If we didn't change the number of elements, don't create a new GV.
+ if (CA->getType() == GCL->getInitializer()->getType()) {
+ GCL->setInitializer(CA);
+ return;
+ }
+
+ // Create the new global and insert it next to the existing list.
+ GlobalVariable *NGV =
+ new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
+ CA, "", GCL->getThreadLocalMode());
+ GCL->getParent()->getGlobalList().insert(GCL, NGV);
+ NGV->takeName(GCL);
+
+ // Nuke the old list, replacing any uses with the new one.
+ if (!GCL->use_empty()) {
+ Constant *V = NGV;
+ if (V->getType() != GCL->getType())
+ V = ConstantExpr::getBitCast(V, GCL->getType());
+ GCL->replaceAllUsesWith(V);
+ }
+ GCL->eraseFromParent();
+}
+
+/// Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+std::vector<Function*> parseGlobalCtors(GlobalVariable *GV) {
+ if (GV->getInitializer()->isNullValue())
+ return std::vector<Function *>();
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+ std::vector<Function *> Result;
+ Result.reserve(CA->getNumOperands());
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+ }
+ return Result;
+}
+
+/// Find the llvm.global_ctors list, verifying that all initializers have an
+/// init priority of 65535.
+GlobalVariable *findGlobalCtors(Module &M) {
+ GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+ if (!GV)
+ return nullptr;
+
+ // Verify that the initializer is simple enough for us to handle. We are
+ // only allowed to optimize the initializer if it is unique.
+ if (!GV->hasUniqueInitializer())
+ return nullptr;
+
+ if (isa<ConstantAggregateZero>(GV->getInitializer()))
+ return GV;
+ ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+
+ for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+ if (isa<ConstantAggregateZero>(*i))
+ continue;
+ ConstantStruct *CS = cast<ConstantStruct>(*i);
+ if (isa<ConstantPointerNull>(CS->getOperand(1)))
+ continue;
+
+ // Must have a function or null ptr.
+ if (!isa<Function>(CS->getOperand(1)))
+ return nullptr;
+
+ // Init priority must be standard.
+ ConstantInt *CI = cast<ConstantInt>(CS->getOperand(0));
+ if (CI->getZExtValue() != 65535)
+ return nullptr;
+ }
+
+ return GV;
+}
+} // namespace
+
+/// Call "ShouldRemove" for every entry in M's global_ctor list and remove the
+/// entries for which it returns true. Return true if anything changed.
+bool optimizeGlobalCtorsList(Module &M,
+ function_ref<bool(Function *)> ShouldRemove) {
+ GlobalVariable *GlobalCtors = findGlobalCtors(M);
+ if (!GlobalCtors)
+ return false;
+
+ std::vector<Function *> Ctors = parseGlobalCtors(GlobalCtors);
+ if (Ctors.empty())
+ return false;
+
+ bool MadeChange = false;
+
+ // Loop over global ctors, optimizing them when we can.
+ for (unsigned i = 0; i != Ctors.size(); ++i) {
+ Function *F = Ctors[i];
+ // Found a null terminator in the middle of the list, prune off the rest of
+ // the list.
+ if (!F) {
+ if (i != Ctors.size() - 1) {
+ Ctors.resize(i + 1);
+ MadeChange = true;
+ }
+ break;
+ }
+ DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");
+
+ // We cannot simplify external ctor functions.
+ if (F->empty())
+ continue;
+
+ // If we can evaluate the ctor at compile time, do.
+ if (ShouldRemove(F)) {
+ Ctors.erase(Ctors.begin() + i);
+ MadeChange = true;
+ --i;
+ continue;
+ }
+ }
+
+ if (!MadeChange)
+ return false;
+
+ installGlobalCtors(GlobalCtors, Ctors);
+ return true;
+}
+
+} // End llvm namespace
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index ac6926f..9972b22 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -25,17 +25,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Instruction *AllocaPoint) {
if (I.use_empty()) {
I.eraseFromParent();
- return 0;
+ return nullptr;
}
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(I.getType(), 0,
+ Slot = new AllocaInst(I.getType(), nullptr,
I.getName()+".reg2mem", AllocaPoint);
} else {
Function *F = I.getParent()->getParent();
- Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+ Slot = new AllocaInst(I.getType(), nullptr, I.getName()+".reg2mem",
F->getEntryBlock().begin());
}
@@ -56,7 +56,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == &I) {
Value *&V = Loads[PN->getIncomingBlock(i)];
- if (V == 0) {
+ if (!V) {
// Insert the load into the predecessor block
V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
@@ -110,17 +110,17 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
AllocaInst *llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
if (P->use_empty()) {
P->eraseFromParent();
- return 0;
+ return nullptr;
}
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
- Slot = new AllocaInst(P->getType(), 0,
+ Slot = new AllocaInst(P->getType(), nullptr,
P->getName()+".reg2mem", AllocaPoint);
} else {
Function *F = P->getParent()->getParent();
- Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+ Slot = new AllocaInst(P->getType(), nullptr, P->getName()+".reg2mem",
F->getEntryBlock().begin());
}
diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp
index 39c80f8..51ead40 100644
--- a/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/lib/Transforms/Utils/FlattenCFG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "flattencfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -22,16 +21,19 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "flattencfg"
+
namespace {
class FlattenCFGOpt {
AliasAnalysis *AA;
/// \brief Use parallel-and or parallel-or to generate conditions for
/// conditional branches.
- bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
+ Pass *P = nullptr);
/// \brief If \param BB is the merge block of an if-region, attempt to merge
/// the if-region with an adjacent if-region upstream if two if-regions
/// contain identical instructions.
- bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = 0);
+ bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder, Pass *P = nullptr);
/// \brief Compare a pair of blocks: \p Block1 and \p Block2, which
/// are from two if-regions whose entry blocks are \p Head1 and \p
/// Head2. \returns true if \p Block1 and \p Block2 contain identical
@@ -126,9 +128,9 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder,
if (PHI)
return false; // For simplicity, avoid cases containing PHI nodes.
- BasicBlock *LastCondBlock = NULL;
- BasicBlock *FirstCondBlock = NULL;
- BasicBlock *UnCondBlock = NULL;
+ BasicBlock *LastCondBlock = nullptr;
+ BasicBlock *FirstCondBlock = nullptr;
+ BasicBlock *UnCondBlock = nullptr;
int Idx = -1;
// Check predecessors of \param BB.
diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp
index e9ebc45..12057e4 100644
--- a/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/lib/Transforms/Utils/GlobalStatus.cpp
@@ -61,7 +61,7 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,
} else if (const Instruction *I = dyn_cast<Instruction>(UR)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
- if (GS.AccessingFunction == 0)
+ if (!GS.AccessingFunction)
GS.AccessingFunction = F;
else if (GS.AccessingFunction != F)
GS.HasMultipleAccessingFunctions = true;
@@ -176,6 +176,6 @@ bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) {
GlobalStatus::GlobalStatus()
: IsCompared(false), IsLoaded(false), StoredType(NotStored),
- StoredOnceValue(0), AccessingFunction(0),
+ StoredOnceValue(nullptr), AccessingFunction(nullptr),
HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
Ordering(NotAtomic) {}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 86def3e..e01d0c3 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
@@ -51,8 +52,8 @@ namespace {
public:
InvokeInliningInfo(InvokeInst *II)
- : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(0),
- CallerLPad(0), InnerEHValuesPHI(0) {
+ : OuterResumeDest(II->getUnwindDest()), InnerResumeDest(nullptr),
+ CallerLPad(nullptr), InnerEHValuesPHI(nullptr) {
// If there are PHI nodes in the unwind destination block, we need to keep
// track of which values came into them from the invoke before removing
// the edge from this block.
@@ -289,13 +290,13 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
- if (VMI == VMap.end() || VMI->second == 0)
+ if (VMI == VMap.end() || VMI->second == nullptr)
continue;
// If the call was inlined, but then constant folded, there is no edge to
// add. Check for this case.
Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
- if (NewCall == 0) continue;
+ if (!NewCall) continue;
// Remember that this call site got inlined for the client of
// InlineFunction.
@@ -306,7 +307,7 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
// happens, set the callee of the new call site to a more precise
// destination. This can also happen if the call graph node of the caller
// was just unnecessarily imprecise.
- if (I->second->getFunction() == 0)
+ if (!I->second->getFunction())
if (Function *F = CallSite(NewCall).getCalledFunction()) {
// Indirect call site resolved to direct call.
CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
@@ -322,13 +323,44 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+ BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ LLVMContext &Context = Src->getContext();
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+ Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+ Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+ Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ IRBuilder<> builder(InsertBlock->begin());
+ Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+ Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+ Value *Size;
+ if (IFI.DL == nullptr)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.DL->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DstCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ builder.CreateCall(MemCpyFn, CallArgs);
+}
+
/// HandleByValArgument - When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
unsigned ByValAlignment) {
- Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+ PointerType *ArgTy = cast<PointerType>(Arg->getType());
+ Type *AggTy = ArgTy->getElementType();
// If the called function is readonly, then it could not mutate the caller's
// copy of the byval'd memory. In this case, it is safe to elide the copy and
@@ -349,11 +381,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
-
- LLVMContext &Context = Arg->getContext();
- Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
if (IFI.DL)
@@ -366,32 +394,9 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Function *Caller = TheCall->getParent()->getParent();
- Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
+ Value *NewAlloca = new AllocaInst(AggTy, nullptr, Align, Arg->getName(),
&*Caller->begin()->begin());
- // Emit a memcpy.
- Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.DL == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.DL->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::getFalse(Context) // isVolatile
- };
- IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
// Uses of the argument in the function should use our new alloca
// instead.
@@ -417,8 +422,10 @@ static bool isUsedByLifetimeMarker(Value *V) {
// hasLifetimeMarkers - Check whether the given alloca already has
// lifetime.start or lifetime.end intrinsics.
static bool hasLifetimeMarkers(AllocaInst *AI) {
- Type *Int8PtrTy = Type::getInt8PtrTy(AI->getType()->getContext());
- if (AI->getType() == Int8PtrTy)
+ Type *Ty = AI->getType();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(),
+ Ty->getPointerAddressSpace());
+ if (Ty == Int8PtrTy)
return isUsedByLifetimeMarker(AI);
// Do a scan to find all the casts to i8*.
@@ -472,6 +479,33 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
}
}
+/// Returns a musttail call instruction if one immediately precedes the given
+/// return instruction with an optional bitcast instruction between them.
+static CallInst *getPrecedingMustTailCall(ReturnInst *RI) {
+ Instruction *Prev = RI->getPrevNode();
+ if (!Prev)
+ return nullptr;
+
+ if (Value *RV = RI->getReturnValue()) {
+ if (RV != Prev)
+ return nullptr;
+
+ // Look through the optional bitcast.
+ if (auto *BI = dyn_cast<BitCastInst>(Prev)) {
+ RV = BI->getOperand(0);
+ Prev = BI->getPrevNode();
+ if (!Prev || RV != Prev)
+ return nullptr;
+ }
+ }
+
+ if (auto *CI = dyn_cast<CallInst>(Prev)) {
+ if (CI->isMustTailCall())
+ return CI;
+ }
+ return nullptr;
+}
+
/// InlineFunction - This function inlines the called function into the basic
/// block of the caller. This returns false if it is not possible to inline
/// this call. The program is still in a well defined state if this occurs
@@ -491,15 +525,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
IFI.reset();
const Function *CalledFunc = CS.getCalledFunction();
- if (CalledFunc == 0 || // Can't inline external function or indirect
+ if (!CalledFunc || // Can't inline external function or indirect
CalledFunc->isDeclaration() || // call, or call to a vararg function!
CalledFunc->getFunctionType()->isVarArg()) return false;
- // If the call to the callee is not a tail call, we must clear the 'tail'
- // flags on any calls that we inline.
- bool MustClearTailCallFlags =
- !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
-
// If the call to the callee cannot throw, set the 'nounwind' flag on any
// calls that we inline.
bool MarkNoUnwind = CS.doesNotThrow();
@@ -519,7 +548,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
// Get the personality function from the callee if it contains a landing pad.
- Value *CalleePersonality = 0;
+ Value *CalleePersonality = nullptr;
for (Function::const_iterator I = CalledFunc->begin(), E = CalledFunc->end();
I != E; ++I)
if (const InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator())) {
@@ -562,6 +591,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -581,11 +612,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
if (CS.isByValArgument(ArgNo)) {
ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
CalledFunc->getParamAlignment(ArgNo+1));
-
- // Calls that we inline may use the new alloca, so we need to clear
- // their 'tail' flags if HandleByValArgument introduced a new alloca and
- // the callee has calls.
- MustClearTailCallFlags |= ActualArg != *AI;
+ if (ActualArg != *AI)
+ ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
}
VMap[I] = ActualArg;
@@ -602,6 +630,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ // Inject byval arguments initialization.
+ for (std::pair<Value*, Value*> &Init : ByValInit)
+ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ FirstNewBlock, IFI);
+
// Update the callgraph if requested.
if (IFI.CG)
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
@@ -619,7 +652,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
for (BasicBlock::iterator I = FirstNewBlock->begin(),
E = FirstNewBlock->end(); I != E; ) {
AllocaInst *AI = dyn_cast<AllocaInst>(I++);
- if (AI == 0) continue;
+ if (!AI) continue;
// If the alloca is now dead, remove it. This often occurs due to code
// specialization.
@@ -651,6 +684,45 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
}
+ bool InlinedMustTailCalls = false;
+ if (InlinedFunctionInfo.ContainsCalls) {
+ CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;
+ if (CallInst *CI = dyn_cast<CallInst>(TheCall))
+ CallSiteTailKind = CI->getTailCallKind();
+
+ for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;
+ ++BB) {
+ for (Instruction &I : *BB) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ continue;
+
+ // We need to reduce the strength of any inlined tail calls. For
+ // musttail, we have to avoid introducing potential unbounded stack
+ // growth. For example, if functions 'f' and 'g' are mutually recursive
+ // with musttail, we can inline 'g' into 'f' so long as we preserve
+ // musttail on the cloned call to 'f'. If either the inlined call site
+ // or the cloned call site is *not* musttail, the program already has
+ // one frame of stack growth, so it's safe to remove musttail. Here is
+ // a table of example transformations:
+ //
+ // f -> musttail g -> musttail f ==> f -> musttail f
+ // f -> musttail g -> tail f ==> f -> tail f
+ // f -> g -> musttail f ==> f -> f
+ // f -> g -> tail f ==> f -> f
+ CallInst::TailCallKind ChildTCK = CI->getTailCallKind();
+ ChildTCK = std::min(CallSiteTailKind, ChildTCK);
+ CI->setTailCallKind(ChildTCK);
+ InlinedMustTailCalls |= CI->isMustTailCall();
+
+ // Calls inlined through a 'nounwind' call site should be marked
+ // 'nounwind'.
+ if (MarkNoUnwind)
+ CI->setDoesNotThrow();
+ }
+ }
+ }
+
// Leave lifetime markers for the static alloca's, scoping them to the
// function we just inlined.
if (InsertLifetime && !IFI.StaticAllocas.empty()) {
@@ -664,7 +736,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
continue;
// Try to determine the size of the allocation.
- ConstantInt *AllocaSize = 0;
+ ConstantInt *AllocaSize = nullptr;
if (ConstantInt *AIArraySize =
dyn_cast<ConstantInt>(AI->getArraySize())) {
if (IFI.DL) {
@@ -683,9 +755,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
}
builder.CreateLifetimeStart(AI, AllocaSize);
- for (unsigned ri = 0, re = Returns.size(); ri != re; ++ri) {
- IRBuilder<> builder(Returns[ri]);
- builder.CreateLifetimeEnd(AI, AllocaSize);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.lifetime.end calls between a musttail call and a
+ // return. The return kills all local allocas.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);
}
}
}
@@ -704,33 +779,56 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Insert a call to llvm.stackrestore before any return instructions in the
// inlined function.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
- IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
+ for (ReturnInst *RI : Returns) {
+ // Don't insert llvm.stackrestore calls between a musttail call and a
+ // return. The return will restore the stack pointer.
+ if (InlinedMustTailCalls && getPrecedingMustTailCall(RI))
+ continue;
+ IRBuilder<>(RI).CreateCall(StackRestore, SavedPtr);
}
}
- // If we are inlining tail call instruction through a call site that isn't
- // marked 'tail', we must remove the tail marker for any calls in the inlined
- // code. Also, calls inlined through a 'nounwind' call site should be marked
- // 'nounwind'.
- if (InlinedFunctionInfo.ContainsCalls &&
- (MustClearTailCallFlags || MarkNoUnwind)) {
- for (Function::iterator BB = FirstNewBlock, E = Caller->end();
- BB != E; ++BB)
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (MustClearTailCallFlags)
- CI->setTailCall(false);
- if (MarkNoUnwind)
- CI->setDoesNotThrow();
- }
- }
-
// If we are inlining for an invoke instruction, we must make sure to rewrite
// any call instructions into invoke instructions.
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+ // Handle any inlined musttail call sites. In order for a new call site to be
+ // musttail, the source of the clone and the inlined call site must have been
+ // musttail. Therefore it's safe to return without merging control into the
+ // phi below.
+ if (InlinedMustTailCalls) {
+ // Check if we need to bitcast the result of any musttail calls.
+ Type *NewRetTy = Caller->getReturnType();
+ bool NeedBitCast = !TheCall->use_empty() && TheCall->getType() != NewRetTy;
+
+ // Handle the returns preceded by musttail calls separately.
+ SmallVector<ReturnInst *, 8> NormalReturns;
+ for (ReturnInst *RI : Returns) {
+ CallInst *ReturnedMustTail = getPrecedingMustTailCall(RI);
+ if (!ReturnedMustTail) {
+ NormalReturns.push_back(RI);
+ continue;
+ }
+ if (!NeedBitCast)
+ continue;
+
+ // Delete the old return and any preceding bitcast.
+ BasicBlock *CurBB = RI->getParent();
+ auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());
+ RI->eraseFromParent();
+ if (OldCast)
+ OldCast->eraseFromParent();
+
+ // Insert a new bitcast and return with the right type.
+ IRBuilder<> Builder(CurBB);
+ Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));
+ }
+
+ // Leave behind the normal returns so we can merge control flow.
+ std::swap(Returns, NormalReturns);
+ }
+
// If we cloned in _exactly one_ basic block, and if that block ends in a
// return instruction, we splice the body of the inlined callee directly into
// the calling basic block.
@@ -774,7 +872,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// "starter" and "ender" blocks. How we accomplish this depends on whether
// this is an invoke instruction or a call instruction.
BasicBlock *AfterCallBB;
- BranchInst *CreatedBranchToNormalDest = NULL;
+ BranchInst *CreatedBranchToNormalDest = nullptr;
if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
// Add an unconditional branch to make this look like the CallInst case...
@@ -813,7 +911,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// any users of the original call/invoke instruction.
Type *RTy = CalledFunc->getReturnType();
- PHINode *PHI = 0;
+ PHINode *PHI = nullptr;
if (Returns.size() > 1) {
// The PHI node should go at the front of the new basic block to merge all
// possible incoming values.
@@ -886,6 +984,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Since we are now done with the Call/Invoke, we can delete it.
TheCall->eraseFromParent();
+ // If we inlined any musttail calls and the original return is now
+ // unreachable, delete it. It can only contain a bitcast and ret.
+ if (InlinedMustTailCalls && pred_begin(AfterCallBB) == pred_end(AfterCallBB))
+ AfterCallBB->eraseFromParent();
+
// We should always be able to fold the entry block of the function into the
// single predecessor of the block...
assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp
index e73a543..9f91eeb 100644
--- a/lib/Transforms/Utils/IntegerDivision.cpp
+++ b/lib/Transforms/Utils/IntegerDivision.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "integer-division"
#include "llvm/Transforms/Utils/IntegerDivision.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,6 +23,8 @@
using namespace llvm;
+#define DEBUG_TYPE "integer-division"
+
/// Generate code to compute the remainder of two signed integers. Returns the
/// remainder, which will have the sign of the dividend. Builder's insert point
/// should be pointing where the caller wants code generated, e.g. at the srem
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index d538175..51a3d9c 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -27,7 +27,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lcssa"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
@@ -44,6 +43,8 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
using namespace llvm;
+#define DEBUG_TYPE "lcssa"
+
STATISTIC(NumLCSSA, "Number of live out of a loop variables");
/// Return true if the specified block is in the list.
@@ -267,8 +268,6 @@ struct LCSSA : public FunctionPass {
}
private:
- bool processLoop(Loop &L);
-
void verifyAnalysis() const override;
};
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 9d0be8b..aedd787 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -43,6 +43,8 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+#define DEBUG_TYPE "local"
+
STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");
//===----------------------------------------------------------------------===//
@@ -159,7 +161,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = 0;
+ if (i.getCaseSuccessor() != TheOnlyDest) TheOnlyDest = nullptr;
}
if (CI && !TheOnlyDest) {
@@ -180,7 +182,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Found case matching a constant operand?
BasicBlock *Succ = SI->getSuccessor(i);
if (Succ == TheOnlyDest)
- TheOnlyDest = 0; // Don't modify the first branch to TheOnlyDest
+ TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest
else
Succ->removePredecessor(BB);
}
@@ -233,7 +235,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
if (IBI->getDestination(i) == TheOnlyDest)
- TheOnlyDest = 0;
+ TheOnlyDest = nullptr;
else
IBI->getDestination(i)->removePredecessor(IBI->getParent());
}
@@ -331,7 +333,7 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,
// dead as we go.
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *OpV = I->getOperand(i);
- I->setOperand(i, 0);
+ I->setOperand(i, nullptr);
if (!OpV->use_empty()) continue;
@@ -894,24 +896,26 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
return PrefAlign;
}
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (auto *GO = dyn_cast<GlobalObject>(V)) {
// If there is a large requested alignment and we can, bump up the alignment
// of the global.
- if (GV->isDeclaration()) return Align;
+ if (GO->isDeclaration())
+ return Align;
// If the memory we set aside for the global may not be the memory used by
// the final program then it is impossible for us to reliably enforce the
// preferred alignment.
- if (GV->isWeakForLinker()) return Align;
+ if (GO->isWeakForLinker())
+ return Align;
- if (GV->getAlignment() >= PrefAlign)
- return GV->getAlignment();
+ if (GO->getAlignment() >= PrefAlign)
+ return GO->getAlignment();
// We can only increase the alignment of the global if it has no alignment
// specified or if it is not assigned a section. If it is assigned a
// section, the global could be densely packed with other objects in the
// section, increasing the alignment could cause padding issues.
- if (!GV->hasSection() || GV->getAlignment() == 0)
- GV->setAlignment(PrefAlign);
- return GV->getAlignment();
+ if (!GO->hasSection() || GO->getAlignment() == 0)
+ GO->setAlignment(PrefAlign);
+ return GO->getAlignment();
}
return Align;
@@ -928,7 +932,7 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(V->getType()) : 64;
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
- ComputeMaskedBits(V, KnownZero, KnownOne, DL);
+ computeKnownBits(V, KnownZero, KnownOne, DL);
unsigned TrailZ = KnownZero.countTrailingOnes();
// Avoid trouble with ridiculously large TrailZ values, such as
@@ -981,10 +985,10 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
if (LdStHasDebugValue(DIVar, SI))
return true;
- Instruction *DbgVal = NULL;
+ Instruction *DbgVal = nullptr;
// If an argument is zero extended then use argument directly. The ZExt
// may be zapped by an optimization pass in future.
- Argument *ExtendedArg = NULL;
+ Argument *ExtendedArg = nullptr;
if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
@@ -993,14 +997,7 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
DbgVal = Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, SI);
else
DbgVal = Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI);
-
- // Propagate any debug metadata from the store onto the dbg.value.
- DebugLoc SIDL = SI->getDebugLoc();
- if (!SIDL.isUnknown())
- DbgVal->setDebugLoc(SIDL);
- // Otherwise propagate debug metadata from dbg.declare.
- else
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
@@ -1020,17 +1017,16 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
Instruction *DbgVal =
Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0,
DIVar, LI);
-
- // Propagate any debug metadata from the store onto the dbg.value.
- DebugLoc LIDL = LI->getDebugLoc();
- if (!LIDL.isUnknown())
- DbgVal->setDebugLoc(LIDL);
- // Otherwise propagate debug metadata from dbg.declare.
- else
- DbgVal->setDebugLoc(DDI->getDebugLoc());
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
return true;
}
+/// Determine whether this alloca is either a VLA or an array.
+static bool isArray(AllocaInst *AI) {
+ return AI->isArrayAllocation() ||
+ AI->getType()->getElementType()->isArrayTy();
+}
+
/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set
/// of llvm.dbg.value intrinsics.
bool llvm::LowerDbgDeclare(Function &F) {
@@ -1049,20 +1045,26 @@ bool llvm::LowerDbgDeclare(Function &F) {
AllocaInst *AI = dyn_cast_or_null<AllocaInst>(DDI->getAddress());
// If this is an alloca for a scalar variable, insert a dbg.value
// at each load and store to the alloca and erase the dbg.declare.
- if (AI && !AI->isArrayAllocation()) {
-
- // We only remove the dbg.declare intrinsic if all uses are
- // converted to dbg.value intrinsics.
- bool RemoveDDI = true;
+ // The dbg.values allow tracking a variable even if it is not
+ // stored on the stack, while the dbg.declare can only describe
+ // the stack slot (and at a lexical-scope granularity). Later
+ // passes will attempt to elide the stack slot.
+ if (AI && !isArray(AI)) {
for (User *U : AI->users())
if (StoreInst *SI = dyn_cast<StoreInst>(U))
ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
else if (LoadInst *LI = dyn_cast<LoadInst>(U))
ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
- else
- RemoveDDI = false;
- if (RemoveDDI)
- DDI->eraseFromParent();
+ else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+ // This is a call by-value or some other instruction that
+ // takes a pointer to the variable. Insert a *value*
+ // intrinsic that describes the alloca.
+ auto DbgVal =
+ DIB.insertDbgValueIntrinsic(AI, 0,
+ DIVariable(DDI->getVariable()), CI);
+ DbgVal->setDebugLoc(DDI->getDebugLoc());
+ }
+ DDI->eraseFromParent();
}
}
return true;
@@ -1076,7 +1078,7 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
return DDI;
- return 0;
+ return nullptr;
}
bool llvm::replaceDbgDeclareForAlloca(AllocaInst *AI, Value *NewAllocaAddress,
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 47083ea..f7787da 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -37,7 +37,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-simplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SetOperations.h"
@@ -63,6 +62,8 @@
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-simplify"
+
STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
STATISTIC(NumNested , "Number of nested loops split out");
@@ -85,7 +86,7 @@ static void placeSplitBlockCarefully(BasicBlock *NewBB,
// Figure out *which* outside block to put this after. Prefer an outside
// block that neighbors a BB actually in the loop.
- BasicBlock *FoundBB = 0;
+ BasicBlock *FoundBB = nullptr;
for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
Function::iterator BBI = SplitPreds[i];
if (++BBI != NewBB->getParent()->end() &&
@@ -119,7 +120,7 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, Pass *PP) {
// If the loop is branched to from an indirect branch, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
// Keep track of it.
OutsideBlocks.push_back(P);
@@ -160,14 +161,14 @@ static BasicBlock *rewriteLoopExitBlock(Loop *L, BasicBlock *Exit, Pass *PP) {
BasicBlock *P = *I;
if (L->contains(P)) {
// Don't do this if the loop is exited via an indirect branch.
- if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return nullptr;
LoopBlocks.push_back(P);
}
}
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
- BasicBlock *NewExitBB = 0;
+ BasicBlock *NewExitBB = nullptr;
if (Exit->isLandingPad()) {
SmallVector<BasicBlock*, 2> NewBBs;
@@ -211,7 +212,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
PHINode *PN = cast<PHINode>(I);
++I;
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
// This is a degenerate PHI already, don't modify it!
PN->replaceAllUsesWith(V);
if (AA) AA->deleteValue(PN);
@@ -226,7 +227,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, AliasAnalysis *AA,
// We found something tasty to remove.
return PN;
}
- return 0;
+ return nullptr;
}
/// \brief If this loop has multiple backedges, try to pull one of them out into
@@ -253,14 +254,14 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
LoopInfo *LI, ScalarEvolution *SE, Pass *PP) {
// Don't try to separate loops without a preheader.
if (!Preheader)
- return 0;
+ return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
assert(!L->getHeader()->isLandingPad() &&
"Can't insert backedge to landing pad");
PHINode *PN = findPHIToPartitionLoops(L, AA, DT);
- if (PN == 0) return 0; // No known way to partition.
+ if (!PN) return nullptr; // No known way to partition.
// Pull out all predecessors that have varying values in the loop. This
// handles the case when a PHI node has multiple instances of itself as
@@ -271,7 +272,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
!L->contains(PN->getIncomingBlock(i))) {
// We can't split indirectbr edges.
if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
- return 0;
+ return nullptr;
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
}
@@ -362,7 +363,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Unique backedge insertion currently depends on having a preheader.
if (!Preheader)
- return 0;
+ return nullptr;
// The header is not a landing pad; preheader insertion should ensure this.
assert(!Header->isLandingPad() && "Can't insert backedge to landing pad");
@@ -374,7 +375,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// Indirectbr edges cannot be split, so we must fail if we find one.
if (isa<IndirectBrInst>(P->getTerminator()))
- return 0;
+ return nullptr;
if (P != Preheader) BackedgeBlocks.push_back(P);
}
@@ -403,7 +404,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// preheader over to the new PHI node.
unsigned PreheaderIdx = ~0U;
bool HasUniqueIncomingValue = true;
- Value *UniqueValue = 0;
+ Value *UniqueValue = nullptr;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *IBB = PN->getIncomingBlock(i);
Value *IV = PN->getIncomingValue(i);
@@ -412,7 +413,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
} else {
NewPN->addIncoming(IV, IBB);
if (HasUniqueIncomingValue) {
- if (UniqueValue == 0)
+ if (!UniqueValue)
UniqueValue = IV;
else if (UniqueValue != IV)
HasUniqueIncomingValue = false;
@@ -609,7 +610,7 @@ ReprocessLoop:
PHINode *PN;
for (BasicBlock::iterator I = L->getHeader()->begin();
(PN = dyn_cast<PHINode>(I++)); )
- if (Value *V = SimplifyInstruction(PN, 0, 0, DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, DT)) {
if (AA) AA->deleteValue(PN);
if (SE) SE->forgetValue(PN);
PN->replaceAllUsesWith(V);
@@ -653,7 +654,8 @@ ReprocessLoop:
if (Inst == CI)
continue;
if (!L->makeLoopInvariant(Inst, AnyInvariant,
- Preheader ? Preheader->getTerminator() : 0)) {
+ Preheader ? Preheader->getTerminator()
+ : nullptr)) {
AllInvariant = false;
break;
}
@@ -761,12 +763,6 @@ namespace {
/// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
void verifyAnalysis() const override;
-
- private:
- bool ProcessLoop(Loop *L);
- BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
- Loop *SeparateNestedLoop(Loop *L, BasicBlock *Preheader);
- BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
};
}
@@ -782,7 +778,7 @@ INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
char &llvm::LoopSimplifyID = LoopSimplify::ID;
Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
-/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
/// it in any convenient order) inserting preheaders...
///
bool LoopSimplify::runOnFunction(Function &F) {
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d2dfc20..d953e30 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -16,7 +16,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -25,6 +24,8 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -34,6 +35,8 @@
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
@@ -68,10 +71,10 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI,
// pred, and if there is only one distinct successor of the predecessor, and
// if there are no PHI nodes.
BasicBlock *OnlyPred = BB->getSinglePredecessor();
- if (!OnlyPred) return 0;
+ if (!OnlyPred) return nullptr;
if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
@@ -227,20 +230,33 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
(unsigned)GreatestCommonDivisor64(Count, TripMultiple);
}
+ // Report the unrolling decision.
+ DebugLoc LoopLoc = L->getStartLoc();
+ Function *F = Header->getParent();
+ LLVMContext &Ctx = F->getContext();
+
if (CompletelyUnroll) {
DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
<< " with trip count " << TripCount << "!\n");
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
+ Twine("completely unrolled loop with ") +
+ Twine(TripCount) + " iterations");
} else {
DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
<< " by " << Count);
+ Twine DiagMsg("unrolled loop by a factor of " + Twine(Count));
if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+ DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip));
} else if (TripMultiple != 1) {
DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+ DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch");
} else if (RuntimeTripCount) {
DEBUG(dbgs() << " with run-time trip count");
+ DiagMsg.concat(" with run-time trip count");
}
DEBUG(dbgs() << "!\n");
+ emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg);
}
bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
@@ -411,7 +427,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
}
}
- DominatorTree *DT = 0;
+ DominatorTree *DT = nullptr;
if (PP) {
// FIXME: Reconstruct dom info, because it is not preserved properly.
// Incrementally updating domtree after loop unrolling would be easy.
@@ -458,7 +474,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Loop *OuterL = L->getParentLoop();
// Remove the loop from the LoopPassManager if it's completely removed.
- if (CompletelyUnroll && LPM != NULL)
+ if (CompletelyUnroll && LPM != nullptr)
LPM->deleteLoopFromQueue(L);
// If we have a pass and a DominatorTree we should re-simplify impacted loops
@@ -470,7 +486,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
OuterL = L;
if (OuterL) {
ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>();
- simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ 0, SE);
+ simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE);
formLCSSARecursively(*OuterL, *DT, SE);
}
}
diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index d801d5f..5bef091 100644
--- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -21,7 +21,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "loop-unroll"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -37,6 +36,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-unroll"
+
STATISTIC(NumRuntimeUnrolled,
"Number of loops unrolled with run-time trip counts");
@@ -58,7 +59,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
BasicBlock *OrigPH, BasicBlock *NewPH,
ValueToValueMapTy &LVMap, Pass *P) {
BasicBlock *Latch = L->getLoopLatch();
- assert(Latch != 0 && "Loop must have a latch");
+ assert(Latch && "Loop must have a latch");
// Create a PHI node for each outgoing value from the original loop
// (which means it is an outgoing value from the prolog code too).
@@ -110,7 +111,7 @@ static void ConnectProlog(Loop *L, Value *TripCount, unsigned Count,
new ICmpInst(InsertPt, ICmpInst::ICMP_ULT, TripCount,
ConstantInt::get(TripCount->getType(), Count));
BasicBlock *Exit = L->getUniqueExitBlock();
- assert(Exit != 0 && "Loop must have a single exit block only");
+ assert(Exit && "Loop must have a single exit block only");
// Split the exit to maintain loop canonicalization guarantees
SmallVector<BasicBlock*, 4> Preds(pred_begin(Exit), pred_end(Exit));
if (!Exit->isLandingPad()) {
@@ -232,7 +233,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
// Make sure the loop is in canonical form, and there is a single
// exit block only.
- if (!L->isLoopSimplifyForm() || L->getUniqueExitBlock() == 0)
+ if (!L->isLoopSimplifyForm() || !L->getUniqueExitBlock())
return false;
// Use Scalar Evolution to compute the trip count. This allows more
@@ -240,7 +241,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
if (!LPM)
return false;
ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>();
- if (SE == 0)
+ if (!SE)
return false;
// Only unroll loops with a computable trip count and the trip count needs
@@ -301,7 +302,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
ValueToValueMapTy LVMap;
Function *F = Header->getParent();
// These variables are used to update the CFG links in each iteration
- BasicBlock *CompareBB = 0;
+ BasicBlock *CompareBB = nullptr;
BasicBlock *LastLoopBB = PH;
// Get an ordered list of blocks in the loop to help with the ordering of the
// cloned blocks in the prolog code
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
index 3e61289..ff89e74 100644
--- a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lower-expect-intrinsic"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/BasicBlock.h"
@@ -29,6 +28,8 @@
using namespace llvm;
+#define DEBUG_TYPE "lower-expect-intrinsic"
+
STATISTIC(IfHandled, "Number of 'expect' intrinsic instructions handled");
static cl::opt<uint32_t>
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index b1f758e..66d57b0 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "lowerinvoke"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -25,6 +24,8 @@
#include "llvm/Support/CommandLine.h"
using namespace llvm;
+#define DEBUG_TYPE "lowerinvoke"
+
STATISTIC(NumInvokes, "Number of invokes replaced");
namespace {
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 6fb7410..9ef694c 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -27,6 +27,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE "lower-switch"
+
namespace {
/// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
/// instructions.
@@ -51,7 +53,8 @@ namespace {
Constant* High;
BasicBlock* BB;
- CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+ CaseRange(Constant *low = nullptr, Constant *high = nullptr,
+ BasicBlock *bb = nullptr) :
Low(low), High(high), BB(bb) { }
};
@@ -182,7 +185,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
F->getBasicBlockList().insert(++FI, NewLeaf);
// Emit comparison
- ICmpInst* Comp = NULL;
+ ICmpInst* Comp = nullptr;
if (Leaf.Low == Leaf.High) {
// Make the seteq instruction...
Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp
index a188ac5..189caa7 100644
--- a/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/lib/Transforms/Utils/Mem2Reg.cpp
@@ -12,7 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Dominators.h"
@@ -22,6 +21,8 @@
#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
+#define DEBUG_TYPE "mem2reg"
+
STATISTIC(NumPromoted, "Number of alloca's promoted");
namespace {
diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp
index ff6e6f9..d9dbbca 100644
--- a/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/lib/Transforms/Utils/ModuleUtils.cpp
@@ -24,16 +24,16 @@ static void appendToGlobalArray(const char *Array,
Module &M, Function *F, int Priority) {
IRBuilder<> IRB(M.getContext());
FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false);
- StructType *Ty = StructType::get(
- IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL);
-
- Constant *RuntimeCtorInit = ConstantStruct::get(
- Ty, IRB.getInt32(Priority), F, NULL);
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- if (GlobalVariable * GVCtor = M.getNamedGlobal(Array)) {
+ StructType *EltTy;
+ if (GlobalVariable *GVCtor = M.getNamedGlobal(Array)) {
+ // If there is a global_ctors array, use the existing struct type, which can
+ // have 2 or 3 fields.
+ ArrayType *ATy = cast<ArrayType>(GVCtor->getType()->getElementType());
+ EltTy = cast<StructType>(ATy->getElementType());
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
@@ -41,13 +41,26 @@ static void appendToGlobalArray(const char *Array,
CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
+ } else {
+ // Use a simple two-field struct if there isn't one already.
+ EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy),
+ nullptr);
}
+ // Build a 2 or 3 field global_ctor entry. We don't take a comdat key.
+ Constant *CSVals[3];
+ CSVals[0] = IRB.getInt32(Priority);
+ CSVals[1] = F;
+ // FIXME: Drop support for the two element form in LLVM 4.0.
+ if (EltTy->getNumElements() >= 3)
+ CSVals[2] = llvm::Constant::getNullValue(IRB.getInt8PtrTy());
+ Constant *RuntimeCtorInit =
+ ConstantStruct::get(EltTy, makeArrayRef(CSVals, EltTy->getNumElements()));
+
CurrentCtors.push_back(RuntimeCtorInit);
// Create a new initializer.
- ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(),
- CurrentCtors.size());
+ ArrayType *AT = ArrayType::get(EltTy, CurrentCtors.size());
Constant *NewInit = ConstantArray::get(AT, CurrentCtors);
// Create the new global variable and replace all uses of
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 25fab89..06d73fe 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -25,7 +25,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "mem2reg"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -51,6 +50,8 @@
#include <queue>
using namespace llvm;
+#define DEBUG_TYPE "mem2reg"
+
STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store");
STATISTIC(NumDeadAlloca, "Number of dead alloca's removed");
@@ -59,6 +60,7 @@ STATISTIC(NumPHIInsert, "Number of PHI nodes inserted");
bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// FIXME: If the memory unit is of pointer or integer type, we can permit
// assignments to subsections of the memory unit.
+ unsigned AS = AI->getType()->getAddressSpace();
// Only allow direct and non-volatile loads and stores...
for (const User *U : AI->users()) {
@@ -79,12 +81,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
II->getIntrinsicID() != Intrinsic::lifetime_end)
return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
- if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (BCI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!onlyUsedByLifetimeMarkers(BCI))
return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
- if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+ if (GEPI->getType() != Type::getInt8PtrTy(U->getContext(), AS))
return false;
if (!GEPI->hasAllZeroIndices())
return false;
@@ -114,11 +116,11 @@ struct AllocaInfo {
void clear() {
DefiningBlocks.clear();
UsingBlocks.clear();
- OnlyStore = 0;
- OnlyBlock = 0;
+ OnlyStore = nullptr;
+ OnlyBlock = nullptr;
OnlyUsedInOneBlock = true;
- AllocaPointerVal = 0;
- DbgDeclare = 0;
+ AllocaPointerVal = nullptr;
+ DbgDeclare = nullptr;
}
/// Scan the uses of the specified alloca, filling in the AllocaInfo used
@@ -146,7 +148,7 @@ struct AllocaInfo {
}
if (OnlyUsedInOneBlock) {
- if (OnlyBlock == 0)
+ if (!OnlyBlock)
OnlyBlock = User->getParent();
else if (OnlyBlock != User->getParent())
OnlyUsedInOneBlock = false;
@@ -162,7 +164,7 @@ class RenamePassData {
public:
typedef std::vector<Value *> ValVector;
- RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+ RenamePassData() : BB(nullptr), Pred(nullptr), Values() {}
RenamePassData(BasicBlock *B, BasicBlock *P, const ValVector &V)
: BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
@@ -471,7 +473,8 @@ static void promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
// Find the nearest store that has a lower index than this load.
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::make_pair(LoadIdx, static_cast<StoreInst *>(0)),
+ std::make_pair(LoadIdx,
+ static_cast<StoreInst *>(nullptr)),
less_first());
if (I == StoresByIndex.begin())
@@ -632,7 +635,7 @@ void PromoteMem2Reg::run() {
// and inserting the phi nodes we marked as necessary
//
std::vector<RenamePassData> RenamePassWorkList;
- RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+ RenamePassWorkList.push_back(RenamePassData(F.begin(), nullptr, Values));
do {
RenamePassData RPD;
RPD.swap(RenamePassWorkList.back());
@@ -682,7 +685,7 @@ void PromoteMem2Reg::run() {
PHINode *PN = I->second;
// If this PHI node merges one value and/or undefs, get the value.
- if (Value *V = SimplifyInstruction(PN, 0, 0, &DT)) {
+ if (Value *V = SimplifyInstruction(PN, nullptr, nullptr, &DT)) {
if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
@@ -990,7 +993,7 @@ NextIteration:
// Get the next phi node.
++PNI;
APN = dyn_cast<PHINode>(PNI);
- if (APN == 0)
+ if (!APN)
break;
// Verify that it is missing entries. If not, it is not being inserted
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 28f5c44..3fcb789 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "ssaupdater"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -28,20 +27,22 @@
using namespace llvm;
+#define DEBUG_TYPE "ssaupdater"
+
typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+ : AV(nullptr), ProtoType(nullptr), ProtoName(), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete static_cast<AvailableValsTy*>(AV);
}
void SSAUpdater::Initialize(Type *Ty, StringRef Name) {
- if (AV == 0)
+ if (!AV)
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
@@ -54,7 +55,7 @@ bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
}
void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
- assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+ assert(ProtoType && "Need to initialize SSAUpdater");
assert(ProtoType == V->getType() &&
"All rewritten values must have the same type");
getAvailableVals(AV)[BB] = V;
@@ -90,7 +91,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// Otherwise, we have the hard case. Get the live-in values for each
// predecessor.
SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
- Value *SingularValue = 0;
+ Value *SingularValue = nullptr;
// We can get our predecessor info by walking the pred_iterator list, but it
// is relatively slow. If we already have PHI nodes in this block, walk one
@@ -105,7 +106,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
if (i == 0)
SingularValue = PredVal;
else if (PredVal != SingularValue)
- SingularValue = 0;
+ SingularValue = nullptr;
}
} else {
bool isFirstPred = true;
@@ -119,7 +120,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
SingularValue = PredVal;
isFirstPred = false;
} else if (PredVal != SingularValue)
- SingularValue = 0;
+ SingularValue = nullptr;
}
}
@@ -128,7 +129,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
return UndefValue::get(ProtoType);
// Otherwise, if all the merged values are the same, just use it.
- if (SingularValue != 0)
+ if (SingularValue)
return SingularValue;
// Otherwise, we do need a PHI: check to see if we already have one available
@@ -291,7 +292,7 @@ public:
PHINode *PHI = ValueIsPHI(Val, Updater);
if (PHI && PHI->getNumIncomingValues() == 0)
return PHI;
- return 0;
+ return nullptr;
}
/// GetPHIValue - For the specified PHI instruction, return the value
@@ -401,7 +402,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
// the order of these instructions in the block. If the first use in the
// block is a load, then it uses the live in value. The last store defines
// the live out value. We handle this by doing a linear scan of the block.
- Value *StoredValue = 0;
+ Value *StoredValue = nullptr;
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
if (LoadInst *L = dyn_cast<LoadInst>(II)) {
// If this is a load from an unrelated pointer, ignore it.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 1e88587..150dbdd 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "simplifycfg"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -50,6 +49,8 @@
using namespace llvm;
using namespace PatternMatch;
+#define DEBUG_TYPE "simplifycfg"
+
static cl::opt<unsigned>
PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1),
cl::desc("Control the amount of phi node folding to perform (default = 1)"));
@@ -212,6 +213,7 @@ static unsigned ComputeSpeculationCost(const User *I) {
if (!cast<GEPOperator>(I)->hasAllConstantIndices())
return UINT_MAX;
return 1;
+ case Instruction::ExtractValue:
case Instruction::Load:
case Instruction::Add:
case Instruction::Sub:
@@ -272,12 +274,12 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
// branch to BB, then it must be in the 'conditional' part of the "if
// statement". If not, it definitely dominates the region.
BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
- if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+ if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
return true;
// If we aren't allowing aggressive promotion anymore, then don't consider
// instructions in the 'if region'.
- if (AggressiveInsts == 0) return false;
+ if (!AggressiveInsts) return false;
// If we have seen this instruction before, don't count it again.
if (AggressiveInsts->count(I)) return true;
@@ -332,7 +334,7 @@ static ConstantInt *GetConstantInt(Value *V, const DataLayout *DL) {
return cast<ConstantInt>
(ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
}
- return 0;
+ return nullptr;
}
/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
@@ -343,7 +345,7 @@ static Value *
GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
const DataLayout *DL, bool isEQ, unsigned &UsedICmps) {
Instruction *I = dyn_cast<Instruction>(V);
- if (I == 0) return 0;
+ if (!I) return nullptr;
// If this is an icmp against a constant, handle this as one of the cases.
if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
@@ -390,19 +392,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// If there are a ton of values, we don't want to make a ginormous switch.
if (Span.getSetSize().ugt(8) || Span.isEmptySet())
- return 0;
+ return nullptr;
for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
UsedICmps++;
return hasAdd ? RHSVal : I->getOperand(0);
}
- return 0;
+ return nullptr;
}
// Otherwise, we can only handle an | or &, depending on isEQ.
if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
- return 0;
+ return nullptr;
unsigned NumValsBeforeLHS = Vals.size();
unsigned UsedICmpsBeforeLHS = UsedICmps;
@@ -420,19 +422,19 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
// The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
// set it and return success.
- if (Extra == 0 || Extra == I->getOperand(1)) {
+ if (Extra == nullptr || Extra == I->getOperand(1)) {
Extra = I->getOperand(1);
return LHS;
}
Vals.resize(NumValsBeforeLHS);
UsedICmps = UsedICmpsBeforeLHS;
- return 0;
+ return nullptr;
}
// If the LHS can't be folded in, but Extra is available and RHS can, try to
// use LHS as Extra.
- if (Extra == 0 || Extra == I->getOperand(0)) {
+ if (Extra == nullptr || Extra == I->getOperand(0)) {
Value *OldExtra = Extra;
Extra = I->getOperand(0);
if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, DL,
@@ -442,11 +444,11 @@ GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
Extra = OldExtra;
}
- return 0;
+ return nullptr;
}
static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
- Instruction *Cond = 0;
+ Instruction *Cond = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
Cond = dyn_cast<Instruction>(SI->getCondition());
} else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
@@ -463,7 +465,7 @@ static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
/// isValueEqualityComparison - Return true if the specified terminator checks
/// to see if a value is equal to constant integer value.
Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
- Value *CV = 0;
+ Value *CV = nullptr;
if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
// Do not permit merging of large switch instructions into their
// predecessors unless there is only one predecessor.
@@ -653,11 +655,11 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// Otherwise, TI's block must correspond to some matched value. Find out
// which value (or set of values) this is.
- ConstantInt *TIV = 0;
+ ConstantInt *TIV = nullptr;
BasicBlock *TIBB = TI->getParent();
for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
if (PredCases[i].Dest == TIBB) {
- if (TIV != 0)
+ if (TIV)
return false; // Cannot handle multiple values coming to this block.
TIV = PredCases[i].Value;
}
@@ -665,7 +667,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
// Okay, we found the one constant that our value can be if we get into TI's
// BB. Find out which successor will unconditionally be branched to.
- BasicBlock *TheRealDest = 0;
+ BasicBlock *TheRealDest = nullptr;
for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
if (ThisCases[i].Value == TIV) {
TheRealDest = ThisCases[i].Dest;
@@ -673,7 +675,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
}
// If not handled by any explicit cases, it is handled by the default case.
- if (TheRealDest == 0) TheRealDest = ThisDef;
+ if (!TheRealDest) TheRealDest = ThisDef;
// Remove PHI node entries for dead edges.
BasicBlock *CheckEdge = TheRealDest;
@@ -681,7 +683,7 @@ SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
if (*SI != CheckEdge)
(*SI)->removePredecessor(TIBB);
else
- CheckEdge = 0;
+ CheckEdge = nullptr;
// Insert the new branch.
Instruction *NI = Builder.CreateBr(TheRealDest);
@@ -950,10 +952,10 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
// Okay, last check. If BB is still a successor of PSI, then we must
// have an infinite loop case. If so, add an infinitely looping block
// to handle the case to preserve the behavior of the code.
- BasicBlock *InfLoopBlock = 0;
+ BasicBlock *InfLoopBlock = nullptr;
for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
if (NewSI->getSuccessor(i) == BB) {
- if (InfLoopBlock == 0) {
+ if (!InfLoopBlock) {
// Insert it at the end of the function, because it's either code,
// or it won't matter if it's hot. :)
InfLoopBlock = BasicBlock::Create(BB->getContext(),
@@ -1099,7 +1101,7 @@ HoistTerminator:
// These values do not agree. Insert a select instruction before NT
// that determines the right value.
SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
- if (SI == 0)
+ if (!SI)
SI = cast<SelectInst>
(Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
BB1V->getName()+"."+BB2V->getName()));
@@ -1144,7 +1146,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// Gather the PHI nodes in BBEnd.
std::map<Value*, std::pair<Value*, PHINode*> > MapValueFromBB1ToBB2;
- Instruction *FirstNonPhiInBBEnd = 0;
+ Instruction *FirstNonPhiInBBEnd = nullptr;
for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end();
I != E; ++I) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
@@ -1222,7 +1224,7 @@ static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
// The operands should be either the same or they need to be generated
// with a PHI node after sinking. We only handle the case where there is
// a single pair of different operands.
- Value *DifferentOp1 = 0, *DifferentOp2 = 0;
+ Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
unsigned Op1Idx = 0;
for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
if (I1->getOperand(I) == I2->getOperand(I))
@@ -1318,11 +1320,11 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
BasicBlock *StoreBB, BasicBlock *EndBB) {
StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
if (!StoreToHoist)
- return 0;
+ return nullptr;
// Volatile or atomic.
if (!StoreToHoist->isSimple())
- return 0;
+ return nullptr;
Value *StorePtr = StoreToHoist->getPointerOperand();
@@ -1334,7 +1336,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Could be calling an instruction that effects memory like free().
if (CurI->mayHaveSideEffects() && !isa<StoreInst>(CurI))
- return 0;
+ return nullptr;
StoreInst *SI = dyn_cast<StoreInst>(CurI);
// Found the previous store make sure it stores to the same location.
@@ -1342,10 +1344,10 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
// Found the previous store, return its value operand.
return SI->getValueOperand();
else if (SI)
- return 0; // Unknown store.
+ return nullptr; // Unknown store.
}
- return 0;
+ return nullptr;
}
/// \brief Speculate a conditional basic block flattening the CFG.
@@ -1411,8 +1413,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) {
SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
unsigned SpeculationCost = 0;
- Value *SpeculatedStoreValue = 0;
- StoreInst *SpeculatedStore = 0;
+ Value *SpeculatedStoreValue = nullptr;
+ StoreInst *SpeculatedStore = nullptr;
for (BasicBlock::iterator BBI = ThenBB->begin(),
BBE = std::prev(ThenBB->end());
BBI != BBE; ++BBI) {
@@ -1620,7 +1622,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout *DL) {
// constants.
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
- if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+ if (!CB || !CB->getType()->isIntegerTy(1)) continue;
// Okay, we now know that all edges from PredBB should be revectored to
// branch to RealDest.
@@ -1745,7 +1747,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// If we folded the first phi, PN dangles at this point. Refresh it. If
// we ran out of PHIs then we simplified them all.
PN = dyn_cast<PHINode>(BB->begin());
- if (PN == 0) return true;
+ if (!PN) return true;
// Don't fold i1 branches on PHIs which contain binary operators. These can
// often be turned into switches and other things.
@@ -1759,11 +1761,11 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
// instructions in the predecessor blocks can be promoted as well. If
// not, we won't be able to get rid of the control flow, so it's not
// worth promoting to select instructions.
- BasicBlock *DomBlock = 0;
+ BasicBlock *DomBlock = nullptr;
BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
- IfBlock1 = 0;
+ IfBlock1 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock1);
for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
@@ -1776,7 +1778,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) {
}
if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
- IfBlock2 = 0;
+ IfBlock2 = nullptr;
} else {
DomBlock = *pred_begin(IfBlock2);
for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
@@ -1959,7 +1961,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) {
bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
BasicBlock *BB = BI->getParent();
- Instruction *Cond = 0;
+ Instruction *Cond = nullptr;
if (BI->isConditional())
Cond = dyn_cast<Instruction>(BI->getCondition());
else {
@@ -1985,12 +1987,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
}
}
- if (Cond == 0)
+ if (!Cond)
return false;
}
- if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
- Cond->getParent() != BB || !Cond->hasOneUse())
+ if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
return false;
// Only allow this if the condition is a simple instruction that can be
@@ -2005,7 +2007,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// that feeds the branch. We later ensure that any values that _it_ uses
// were also live in the predecessor, so that we don't unnecessarily create
// register pressure or inhibit out-of-order execution.
- Instruction *BonusInst = 0;
+ Instruction *BonusInst = nullptr;
if (&*FrontIt != Cond &&
FrontIt->hasOneUse() && FrontIt->user_back() == Cond &&
isSafeToSpeculativelyExecute(FrontIt)) {
@@ -2040,7 +2042,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Finally, don't infinitely unroll conditional loops.
BasicBlock *TrueDest = BI->getSuccessor(0);
- BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : 0;
+ BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
if (TrueDest == BB || FalseDest == BB)
return false;
@@ -2052,7 +2054,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// the common successor, verify that the same value flows in from both
// blocks.
SmallVector<PHINode*, 4> PHIs;
- if (PBI == 0 || PBI->isUnconditional() ||
+ if (!PBI || PBI->isUnconditional() ||
(BI->isConditional() &&
!SafeToMergeTerminators(BI, PBI)) ||
(!BI->isConditional() &&
@@ -2142,7 +2144,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
}
// If we have a bonus inst, clone it into the predecessor block.
- Instruction *NewBonus = 0;
+ Instruction *NewBonus = nullptr;
if (BonusInst) {
NewBonus = BonusInst->clone();
@@ -2218,14 +2220,14 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
MDBuilder(BI->getContext()).
createBranchWeights(MDWeights));
} else
- PBI->setMetadata(LLVMContext::MD_prof, NULL);
+ PBI->setMetadata(LLVMContext::MD_prof, nullptr);
} else {
// Update PHI nodes in the common successors.
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
ConstantInt *PBI_C = cast<ConstantInt>(
PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
assert(PBI_C->getType()->isIntegerTy(1));
- Instruction *MergedCond = 0;
+ Instruction *MergedCond = nullptr;
if (PBI->getSuccessor(0) == TrueDest) {
// Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
// PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
@@ -2498,16 +2500,16 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// If TrueBB and FalseBB are equal, only try to preserve one copy of that
// successor.
BasicBlock *KeepEdge1 = TrueBB;
- BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+ BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
// Then remove the rest.
for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
BasicBlock *Succ = OldTerm->getSuccessor(I);
// Make sure only to keep exactly one copy of each edge.
if (Succ == KeepEdge1)
- KeepEdge1 = 0;
+ KeepEdge1 = nullptr;
else if (Succ == KeepEdge2)
- KeepEdge2 = 0;
+ KeepEdge2 = nullptr;
else
Succ->removePredecessor(OldTerm->getParent());
}
@@ -2516,7 +2518,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
// Insert an appropriate new terminator.
- if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+ if (!KeepEdge1 && !KeepEdge2) {
if (TrueBB == FalseBB)
// We were only looking for one successor, and it was present.
// Create an unconditional branch to it.
@@ -2538,7 +2540,7 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
// One of the selected values was a successor, but the other wasn't.
// Insert an unconditional branch to the one that was found;
// the edge to the one that wasn't must be unreachable.
- if (KeepEdge1 == 0)
+ if (!KeepEdge1)
// Only TrueBB was found.
Builder.CreateBr(TrueBB);
else
@@ -2639,7 +2641,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// 'V' and this block is the default case for the switch. In this case we can
// fold the compared value into the switch to simplify things.
BasicBlock *Pred = BB->getSinglePredecessor();
- if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+ if (!Pred || !isa<SwitchInst>(Pred->getTerminator())) return false;
SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
if (SI->getCondition() != V)
@@ -2681,7 +2683,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
// the block.
BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
- if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+ if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
isa<PHINode>(++BasicBlock::iterator(PHIUse)))
return false;
@@ -2733,16 +2735,16 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(
static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
IRBuilder<> &Builder) {
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (Cond == 0) return false;
+ if (!Cond) return false;
// Change br (X == 0 | X == 1), T, F into a switch instruction.
// If this is a bunch of seteq's or'd together, or if it's a bunch of
// 'setne's and'ed together, collect them.
- Value *CompVal = 0;
+ Value *CompVal = nullptr;
std::vector<ConstantInt*> Values;
bool TrueWhenEqual = true;
- Value *ExtraCase = 0;
+ Value *ExtraCase = nullptr;
unsigned UsedICmps = 0;
if (Cond->getOpcode() == Instruction::Or) {
@@ -2755,7 +2757,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, const DataLayout *DL,
}
// If we didn't have a multiply compared value, fail.
- if (CompVal == 0) return false;
+ if (!CompVal) return false;
// Avoid turning single icmps into a switch.
if (UsedICmps <= 1)
@@ -3050,7 +3052,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
// Find the most popular block.
unsigned MaxPop = 0;
unsigned MaxIndex = 0;
- BasicBlock *MaxBlock = 0;
+ BasicBlock *MaxBlock = nullptr;
for (std::map<BasicBlock*, std::pair<unsigned, unsigned> >::iterator
I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
if (I->second.first > MaxPop ||
@@ -3188,7 +3190,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
Value *Cond = SI->getCondition();
unsigned Bits = Cond->getType()->getIntegerBitWidth();
APInt KnownZero(Bits, 0), KnownOne(Bits, 0);
- ComputeMaskedBits(Cond, KnownZero, KnownOne);
+ computeKnownBits(Cond, KnownZero, KnownOne);
// Gather dead cases.
SmallVector<ConstantInt*, 8> DeadCases;
@@ -3241,13 +3243,13 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
BasicBlock *BB,
int *PhiIndex) {
if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
- return NULL; // BB must be empty to be a candidate for simplification.
+ return nullptr; // BB must be empty to be a candidate for simplification.
if (!BB->getSinglePredecessor())
- return NULL; // BB must be dominated by the switch.
+ return nullptr; // BB must be dominated by the switch.
BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
if (!Branch || !Branch->isUnconditional())
- return NULL; // Terminator must be unconditional branch.
+ return nullptr; // Terminator must be unconditional branch.
BasicBlock *Succ = Branch->getSuccessor(0);
@@ -3263,7 +3265,7 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
return PHI;
}
- return NULL;
+ return nullptr;
}
/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
@@ -3336,12 +3338,12 @@ ConstantFold(Instruction *I,
if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
if (!A)
- return 0;
+ return nullptr;
if (A->isAllOnesValue())
return LookupConstant(Select->getTrueValue(), ConstantPool);
if (A->isNullValue())
return LookupConstant(Select->getFalseValue(), ConstantPool);
- return 0;
+ return nullptr;
}
SmallVector<Constant *, 4> COps;
@@ -3349,7 +3351,7 @@ ConstantFold(Instruction *I,
if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
COps.push_back(A);
else
- return 0;
+ return nullptr;
}
if (CmpInst *Cmp = dyn_cast<CmpInst>(I))
@@ -3492,7 +3494,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
const SmallVectorImpl<std::pair<ConstantInt*, Constant*> >& Values,
Constant *DefaultValue,
const DataLayout *DL)
- : SingleValue(0), BitMap(0), BitMapElementTy(0), Array(0) {
+ : SingleValue(nullptr), BitMap(nullptr), BitMapElementTy(nullptr),
+ Array(nullptr) {
assert(Values.size() && "Can't build lookup table without values!");
assert(TableSize >= Values.size() && "Can't fit values in table!");
@@ -3513,7 +3516,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
TableContents[Idx] = CaseRes;
if (CaseRes != SingleValue)
- SingleValue = 0;
+ SingleValue = nullptr;
}
// Fill in any holes in the table with the default result.
@@ -3526,7 +3529,7 @@ SwitchLookupTable::SwitchLookupTable(Module &M,
}
if (DefaultValue != SingleValue)
- SingleValue = 0;
+ SingleValue = nullptr;
}
// If each element in the table contains the same value, we only need to store
@@ -3696,7 +3699,7 @@ static bool SwitchToLookupTable(SwitchInst *SI,
ConstantInt *MinCaseVal = CI.getCaseValue();
ConstantInt *MaxCaseVal = CI.getCaseValue();
- BasicBlock *CommonDest = 0;
+ BasicBlock *CommonDest = nullptr;
typedef SmallVector<std::pair<ConstantInt*, Constant*>, 4> ResultListTy;
SmallDenseMap<PHINode*, ResultListTy> ResultLists;
SmallDenseMap<PHINode*, Constant*> DefaultResults;
@@ -3741,8 +3744,8 @@ static bool SwitchToLookupTable(SwitchInst *SI,
SmallVector<std::pair<PHINode*, Constant*>, 4> DefaultResultsList;
bool HasDefaultResults = false;
if (TableHasHoles) {
- HasDefaultResults = GetCaseResults(SI, 0, SI->getDefaultDest(), &CommonDest,
- DefaultResultsList, DL);
+ HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
+ &CommonDest, DefaultResultsList, DL);
}
bool NeedMask = (TableHasHoles && !HasDefaultResults);
if (NeedMask) {
@@ -4038,8 +4041,8 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// from BI. We know that the condbr dominates the two blocks, so see if
// there is any identical code in the "then" and "else" blocks. If so, we
// can hoist it up to the branching block.
- if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
- if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ if (BI->getSuccessor(0)->getSinglePredecessor()) {
+ if (BI->getSuccessor(1)->getSinglePredecessor()) {
if (HoistThenElseCodeToIf(BI))
return SimplifyCFG(BB, TTI, DL) | true;
} else {
@@ -4051,7 +4054,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
return SimplifyCFG(BB, TTI, DL) | true;
}
- } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+ } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
// If Successor #0 has multiple preds, we may be able to conditionally
// execute Successor #1 if it branches to successor #0.
TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp
index 30f56be..b284e6f 100644
--- a/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -13,8 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "indvars"
-
#include "llvm/Transforms/Utils/SimplifyIndVar.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
@@ -34,6 +32,8 @@
using namespace llvm;
+#define DEBUG_TYPE "indvars"
+
STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
STATISTIC(NumElimOperand, "Number of IV operands folded into a use");
STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
@@ -56,14 +56,14 @@ namespace {
public:
SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, LPPassManager *LPM,
- SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = NULL) :
+ SmallVectorImpl<WeakVH> &Dead, IVUsers *IVU = nullptr) :
L(Loop),
LI(LPM->getAnalysisIfAvailable<LoopInfo>()),
SE(SE),
DeadInsts(Dead),
Changed(false) {
DataLayoutPass *DLP = LPM->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
assert(LI && "IV simplification requires LoopInfo");
}
@@ -72,7 +72,7 @@ namespace {
/// Iteratively perform simplification on a worklist of users of the
/// specified induction variable. This is the top-level driver that applies
/// all simplicitions to users of an IV.
- void simplifyUsers(PHINode *CurrIV, IVVisitor *V = NULL);
+ void simplifyUsers(PHINode *CurrIV, IVVisitor *V = nullptr);
Value *foldIVUser(Instruction *UseInst, Instruction *IVOperand);
@@ -95,25 +95,25 @@ namespace {
/// be folded (in case more folding opportunities have been exposed).
/// Otherwise return null.
Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand) {
- Value *IVSrc = 0;
+ Value *IVSrc = nullptr;
unsigned OperIdx = 0;
- const SCEV *FoldedExpr = 0;
+ const SCEV *FoldedExpr = nullptr;
switch (UseInst->getOpcode()) {
default:
- return 0;
+ return nullptr;
case Instruction::UDiv:
case Instruction::LShr:
// We're only interested in the case where we know something about
// the numerator and have a constant denominator.
if (IVOperand != UseInst->getOperand(OperIdx) ||
!isa<ConstantInt>(UseInst->getOperand(1)))
- return 0;
+ return nullptr;
// Attempt to fold a binary operator with constant operand.
// e.g. ((I + 1) >> 2) => I >> 2
if (!isa<BinaryOperator>(IVOperand)
|| !isa<ConstantInt>(IVOperand->getOperand(1)))
- return 0;
+ return nullptr;
IVSrc = IVOperand->getOperand(0);
// IVSrc must be the (SCEVable) IV, since the other operand is const.
@@ -124,7 +124,7 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
// Get a constant for the divisor. See createSCEV.
uint32_t BitWidth = cast<IntegerType>(UseInst->getType())->getBitWidth();
if (D->getValue().uge(BitWidth))
- return 0;
+ return nullptr;
D = ConstantInt::get(UseInst->getContext(),
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
@@ -133,11 +133,11 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
- return 0;
+ return nullptr;
// Bypass the operand if SCEV can prove it has no effect.
if (SE->getSCEV(UseInst) != FoldedExpr)
- return 0;
+ return nullptr;
DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand
<< " -> " << *UseInst << '\n');
@@ -283,8 +283,8 @@ Instruction *SimplifyIndvar::splitOverflowIntrinsic(Instruction *IVUser,
return IVUser;
// Find a branch guarded by the overflow check.
- BranchInst *Branch = 0;
- Instruction *AddVal = 0;
+ BranchInst *Branch = nullptr;
+ Instruction *AddVal = nullptr;
for (User *U : II->users()) {
if (ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(U)) {
if (ExtractInst->getNumIndices() != 1)
diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp
index bbd65f1..33b3637 100644
--- a/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -14,7 +14,6 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "instsimplify"
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -29,6 +28,8 @@
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
+#define DEBUG_TYPE "instsimplify"
+
STATISTIC(NumSimplified, "Number of redundant instructions removed");
namespace {
@@ -47,17 +48,18 @@ namespace {
bool runOnFunction(Function &F) override {
const DominatorTreeWrapperPass *DTWP =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : 0;
+ const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- const DataLayout *DL = DLP ? &DLP->getDataLayout() : 0;
+ const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr;
const TargetLibraryInfo *TLI = &getAnalysis<TargetLibraryInfo>();
SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
do {
- for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
- DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
- for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+ for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
+ // Here be subtlety: the iterator must be incremented before the loop
+ // body (not sure why), so a range-for loop won't work here.
+ for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
Instruction *I = BI++;
// The first time through the loop ToSimplify is empty and we try to
// simplify all instructions. On later iterations ToSimplify is not
@@ -74,7 +76,15 @@ namespace {
++NumSimplified;
Changed = true;
}
- Changed |= RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ if (res) {
+ // RecursivelyDeleteTriviallyDeadInstruction can remove
+ // more than one instruction, so simply incrementing the
+ // iterator does not work. When instructions get deleted
+ // re-iterate instead.
+ BI = BB->begin(); BE = BB->end();
+ Changed |= res;
+ }
}
// Place the list of instructions to simplify on the next loop iteration
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index b5bc391..3b61bb5 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -75,7 +76,7 @@ public:
// We never change the calling convention.
if (!ignoreCallingConv() && CI->getCallingConv() != llvm::CallingConv::C)
- return NULL;
+ return nullptr;
return callOptimizer(CI->getCalledFunction(), CI, B);
}
@@ -186,14 +187,14 @@ struct MemCpyChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -210,14 +211,14 @@ struct MemMoveChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -234,7 +235,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
!FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != DL->getIntPtrType(Context) ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
@@ -242,7 +243,7 @@ struct MemSetChkOpt : public InstFortifiedLibCallOptimization {
B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
return CI->getArgOperand(0);
}
- return 0;
+ return nullptr;
}
};
@@ -260,7 +261,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
FT->getParamType(2) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // __strcpy_chk(x,x) -> x
@@ -277,10 +278,10 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
} else {
// Maybe we can stil fold __strcpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// This optimization require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Value *Ret =
EmitMemCpyChk(Dst, Src,
@@ -288,7 +289,7 @@ struct StrCpyChkOpt : public InstFortifiedLibCallOptimization {
CI->getArgOperand(2), B, DL, TLI);
return Ret;
}
- return 0;
+ return nullptr;
}
};
@@ -306,12 +307,12 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
}
// If a) we don't have any length information, or b) we know this will
@@ -325,10 +326,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
} else {
// Maybe we can stil fold __stpcpy_chk to __memcpy_chk.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// This optimization require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Type *PT = FT->getParamType(0);
Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
@@ -336,10 +337,10 @@ struct StpCpyChkOpt : public InstFortifiedLibCallOptimization {
ConstantInt::get(DL->getIntPtrType(PT),
Len - 1));
if (!EmitMemCpyChk(Dst, Src, LenV, CI->getArgOperand(2), B, DL, TLI))
- return 0;
+ return nullptr;
return DstEnd;
}
- return 0;
+ return nullptr;
}
};
@@ -357,7 +358,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
!FT->getParamType(2)->isIntegerTy() ||
FT->getParamType(3) != DL->getIntPtrType(Context))
- return 0;
+ return nullptr;
if (isFoldable(3, 2, false)) {
Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -365,7 +366,7 @@ struct StrNCpyChkOpt : public InstFortifiedLibCallOptimization {
Name.substr(2, 7));
return Ret;
}
- return 0;
+ return nullptr;
}
};
@@ -382,7 +383,7 @@ struct StrCatOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType())
- return 0;
+ return nullptr;
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
@@ -390,7 +391,7 @@ struct StrCatOpt : public LibCallOptimization {
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
--Len; // Unbias length.
// Handle the simple, do-nothing case: strcat(x, "") -> x
@@ -398,7 +399,7 @@ struct StrCatOpt : public LibCallOptimization {
return Dst;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return emitStrLenMemCpy(Src, Dst, Len, B);
}
@@ -409,7 +410,7 @@ struct StrCatOpt : public LibCallOptimization {
// memory is to be moved to. We just generate a call to strlen.
Value *DstLen = EmitStrLen(Dst, B, DL, TLI);
if (!DstLen)
- return 0;
+ return nullptr;
// Now that we have the destination's length, we must index into the
// destination's pointer to get the actual memcpy destination (end of
@@ -434,7 +435,7 @@ struct StrNCatOpt : public StrCatOpt {
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
// Extract some information from the instruction
Value *Dst = CI->getArgOperand(0);
@@ -445,11 +446,11 @@ struct StrNCatOpt : public StrCatOpt {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
+ if (SrcLen == 0) return nullptr;
--SrcLen; // Unbias length.
// Handle the simple, do-nothing cases:
@@ -458,10 +459,10 @@ struct StrNCatOpt : public StrCatOpt {
if (SrcLen == 0 || Len == 0) return Dst;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// We don't optimize this case
- if (Len < SrcLen) return 0;
+ if (Len < SrcLen) return nullptr;
// strncat(x, s, c) -> strcat(x, s)
// s is constant so the strcat can be optimized further
@@ -478,20 +479,20 @@ struct StrChrOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- if (CharC == 0) {
+ if (!CharC) {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
uint64_t Len = GetStringLength(SrcStr);
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
- return 0;
+ return nullptr;
return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(DL->getIntPtrType(*Context), Len),
@@ -504,7 +505,7 @@ struct StrChrOpt : public LibCallOptimization {
if (!getConstantStringInfo(SrcStr, Str)) {
if (DL && CharC->isZero()) // strchr(p, 0) -> p + strlen(p)
return B.CreateGEP(SrcStr, EmitStrLen(SrcStr, B, DL, TLI), "strchr");
- return 0;
+ return nullptr;
}
// Compute the offset, make sure to handle the case when we're searching for
@@ -528,21 +529,21 @@ struct StrRChrOpt : public LibCallOptimization {
FT->getReturnType() != B.getInt8PtrTy() ||
FT->getParamType(0) != FT->getReturnType() ||
!FT->getParamType(1)->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *SrcStr = CI->getArgOperand(0);
ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
// Cannot fold anything if we're not looking for a constant.
if (!CharC)
- return 0;
+ return nullptr;
StringRef Str;
if (!getConstantStringInfo(SrcStr, Str)) {
// strrchr(s, 0) -> strchr(s, 0)
if (DL && CharC->isZero())
return EmitStrChr(SrcStr, '\0', B, DL, TLI);
- return 0;
+ return nullptr;
}
// Compute the offset.
@@ -565,7 +566,7 @@ struct StrCmpOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
@@ -591,14 +592,14 @@ struct StrCmpOpt : public LibCallOptimization {
uint64_t Len2 = GetStringLength(Str2P);
if (Len1 && Len2) {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return EmitMemCmp(Str1P, Str2P,
ConstantInt::get(DL->getIntPtrType(*Context),
std::min(Len1, Len2)), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
};
@@ -612,7 +613,7 @@ struct StrNCmpOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
@@ -623,7 +624,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Length = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -649,7 +650,7 @@ struct StrNCmpOpt : public LibCallOptimization {
if (HasStr2 && Str2.empty()) // strncmp(x, "", n) -> *x
return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
- return 0;
+ return nullptr;
}
};
@@ -662,18 +663,18 @@ struct StrCpyOpt : public LibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
// We have enough information to now generate the memcpy call to do the
// copy for us. Make a memcpy to copy the nul byte with align = 1.
@@ -692,20 +693,20 @@ struct StpCpyOpt: public LibCallOptimization {
FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy())
- return 0;
+ return nullptr;
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : 0;
+ return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
}
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
- if (Len == 0) return 0;
+ if (Len == 0) return nullptr;
Type *PT = FT->getParamType(0);
Value *LenV = ConstantInt::get(DL->getIntPtrType(PT), Len);
@@ -728,7 +729,7 @@ struct StrNCpyOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getParamType(2)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Dst = CI->getArgOperand(0);
Value *Src = CI->getArgOperand(1);
@@ -736,7 +737,7 @@ struct StrNCpyOpt : public LibCallOptimization {
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
- if (SrcLen == 0) return 0;
+ if (SrcLen == 0) return nullptr;
--SrcLen;
if (SrcLen == 0) {
@@ -749,15 +750,15 @@ struct StrNCpyOpt : public LibCallOptimization {
if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
Len = LengthArg->getZExtValue();
else
- return 0;
+ return nullptr;
if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// Let strncpy handle the zero padding
- if (Len > SrcLen+1) return 0;
+ if (Len > SrcLen+1) return nullptr;
Type *PT = FT->getParamType(0);
// strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
@@ -776,7 +777,7 @@ struct StrLenOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != B.getInt8PtrTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
Value *Src = CI->getArgOperand(0);
@@ -784,11 +785,26 @@ struct StrLenOpt : public LibCallOptimization {
if (uint64_t Len = GetStringLength(Src))
return ConstantInt::get(CI->getType(), Len-1);
+ // strlen(x?"foo":"bars") --> x ? 3 : 4
+ if (SelectInst *SI = dyn_cast<SelectInst>(Src)) {
+ uint64_t LenTrue = GetStringLength(SI->getTrueValue());
+ uint64_t LenFalse = GetStringLength(SI->getFalseValue());
+ if (LenTrue && LenFalse) {
+ emitOptimizationRemark(*Context, "simplify-libcalls", *Caller,
+ SI->getDebugLoc(),
+ "folded strlen(select) to select of constants");
+ return B.CreateSelect(SI->getCondition(),
+ ConstantInt::get(CI->getType(), LenTrue-1),
+ ConstantInt::get(CI->getType(), LenFalse-1));
+ }
+ }
+
// strlen(x) != 0 --> *x != 0
// strlen(x) == 0 --> *x == 0
if (isOnlyUsedInZeroEqualityComparison(CI))
return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
- return 0;
+
+ return nullptr;
}
};
@@ -800,7 +816,7 @@ struct StrPBrkOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
FT->getReturnType() != FT->getParamType(0))
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -824,7 +840,7 @@ struct StrPBrkOpt : public LibCallOptimization {
if (DL && HasS2 && S2.size() == 1)
return EmitStrChr(CI->getArgOperand(0), S2[0], B, DL, TLI);
- return 0;
+ return nullptr;
}
};
@@ -835,7 +851,7 @@ struct StrToOpt : public LibCallOptimization {
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy())
- return 0;
+ return nullptr;
Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
@@ -844,7 +860,7 @@ struct StrToOpt : public LibCallOptimization {
CI->addAttribute(1, Attribute::NoCapture);
}
- return 0;
+ return nullptr;
}
};
@@ -856,7 +872,7 @@ struct StrSpnOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -874,7 +890,7 @@ struct StrSpnOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), Pos);
}
- return 0;
+ return nullptr;
}
};
@@ -886,7 +902,7 @@ struct StrCSpnOpt : public LibCallOptimization {
FT->getParamType(0) != B.getInt8PtrTy() ||
FT->getParamType(1) != FT->getParamType(0) ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
StringRef S1, S2;
bool HasS1 = getConstantStringInfo(CI->getArgOperand(0), S1);
@@ -907,7 +923,7 @@ struct StrCSpnOpt : public LibCallOptimization {
if (DL && HasS2 && S2.empty())
return EmitStrLen(CI->getArgOperand(0), B, DL, TLI);
- return 0;
+ return nullptr;
}
};
@@ -919,7 +935,7 @@ struct StrStrOpt : public LibCallOptimization {
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isPointerTy())
- return 0;
+ return nullptr;
// fold strstr(x, x) -> x.
if (CI->getArgOperand(0) == CI->getArgOperand(1))
@@ -929,11 +945,11 @@ struct StrStrOpt : public LibCallOptimization {
if (DL && isOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, DL, TLI);
if (!StrLen)
- return 0;
+ return nullptr;
Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
StrLen, B, DL, TLI);
if (!StrNCmp)
- return 0;
+ return nullptr;
for (auto UI = CI->user_begin(), UE = CI->user_end(); UI != UE;) {
ICmpInst *Old = cast<ICmpInst>(*UI++);
Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
@@ -969,9 +985,9 @@ struct StrStrOpt : public LibCallOptimization {
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1) {
Value *StrChr= EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, DL, TLI);
- return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : 0;
+ return StrChr ? B.CreateBitCast(StrChr, CI->getType()) : nullptr;
}
- return 0;
+ return nullptr;
}
};
@@ -982,7 +998,7 @@ struct MemCmpOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy(32))
- return 0;
+ return nullptr;
Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
@@ -991,7 +1007,7 @@ struct MemCmpOpt : public LibCallOptimization {
// Make sure we have a constant length.
ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!LenC) return 0;
+ if (!LenC) return nullptr;
uint64_t Len = LenC->getZExtValue();
if (Len == 0) // memcmp(s1,s2,0) -> 0
@@ -1012,7 +1028,7 @@ struct MemCmpOpt : public LibCallOptimization {
getConstantStringInfo(RHS, RHSStr)) {
// Make sure we're not reading out-of-bounds memory.
if (Len > LHSStr.size() || Len > RHSStr.size())
- return 0;
+ return nullptr;
// Fold the memcmp and normalize the result. This way we get consistent
// results across multiple platforms.
uint64_t Ret = 0;
@@ -1024,7 +1040,7 @@ struct MemCmpOpt : public LibCallOptimization {
return ConstantInt::get(CI->getType(), Ret);
}
- return 0;
+ return nullptr;
}
};
@@ -1032,14 +1048,14 @@ struct MemCpyOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(*Context))
- return 0;
+ return nullptr;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1052,14 +1068,14 @@ struct MemMoveOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != DL->getIntPtrType(*Context))
- return 0;
+ return nullptr;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1072,14 +1088,14 @@ struct MemSetOpt : public LibCallOptimization {
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != DL->getIntPtrType(FT->getParamType(0)))
- return 0;
+ return nullptr;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
@@ -1103,21 +1119,21 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
!FT->getParamType(0)->isDoubleTy())
- return 0;
+ return nullptr;
if (CheckRetType) {
// Check if all the uses for function like 'sin' are converted to float.
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (Cast == 0 || !Cast->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
}
}
// If this is something like 'floor((double)floatval)', convert to floorf.
FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
- if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
// floor((double)floatval) -> (double)floorf(floatval)
Value *V = Cast->getOperand(0);
@@ -1138,15 +1154,15 @@ struct BinaryDoubleFPOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
!FT->getParamType(0)->isFloatingPointTy())
- return 0;
+ return nullptr;
if (CheckRetType) {
// Check if all the uses for function like 'fmin/fmax' are converted to
// float.
for (User *U : CI->users()) {
FPTruncInst *Cast = dyn_cast<FPTruncInst>(U);
- if (Cast == 0 || !Cast->getType()->isFloatTy())
- return 0;
+ if (!Cast || !Cast->getType()->isFloatTy())
+ return nullptr;
}
}
@@ -1154,13 +1170,13 @@ struct BinaryDoubleFPOpt : public LibCallOptimization {
// we convert it to fminf.
FPExtInst *Cast1 = dyn_cast<FPExtInst>(CI->getArgOperand(0));
FPExtInst *Cast2 = dyn_cast<FPExtInst>(CI->getArgOperand(1));
- if (Cast1 == 0 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
- Cast2 == 0 || !Cast2->getOperand(0)->getType()->isFloatTy())
- return 0;
+ if (!Cast1 || !Cast1->getOperand(0)->getType()->isFloatTy() ||
+ !Cast2 || !Cast2->getOperand(0)->getType()->isFloatTy())
+ return nullptr;
// fmin((double)floatval1, (double)floatval2)
// -> (double)fmin(floatval1, floatval2)
- Value *V = NULL;
+ Value *V = nullptr;
Value *V1 = Cast1->getOperand(0);
Value *V2 = Cast2->getOperand(0);
V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B,
@@ -1180,7 +1196,7 @@ struct CosOpt : public UnsafeFPLibCallOptimization {
CosOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "cos" &&
TLI->has(LibFunc::cosf)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1208,7 +1224,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
PowOpt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "pow" &&
TLI->has(LibFunc::powf)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1242,7 +1258,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
}
ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
- if (Op2C == 0) return Ret;
+ if (!Op2C) return Ret;
if (Op2C->getValueAPF().isZero()) // pow(x, 0.0) -> 1.0
return ConstantFP::get(CI->getType(), 1.0);
@@ -1275,7 +1291,7 @@ struct PowOpt : public UnsafeFPLibCallOptimization {
if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
Op1, "powrecip");
- return 0;
+ return nullptr;
}
};
@@ -1283,7 +1299,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
Exp2Opt(bool UnsafeFPShrink) : UnsafeFPLibCallOptimization(UnsafeFPShrink) {}
Value *callOptimizer(Function *Callee, CallInst *CI,
IRBuilder<> &B) override {
- Value *Ret = NULL;
+ Value *Ret = nullptr;
if (UnsafeFPShrink && Callee->getName() == "exp2" &&
TLI->has(LibFunc::exp2f)) {
UnaryDoubleFPOpt UnsafeUnaryDoubleFP(true);
@@ -1307,7 +1323,7 @@ struct Exp2Opt : public UnsafeFPLibCallOptimization {
LdExp = LibFunc::ldexp;
if (TLI->has(LdExp)) {
- Value *LdExpArg = 0;
+ Value *LdExpArg = nullptr;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty());
@@ -1344,7 +1360,7 @@ struct SinCosPiOpt : public LibCallOptimization {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
- return 0;
+ return nullptr;
Value *Arg = CI->getArgOperand(0);
SmallVector<CallInst *, 1> SinCalls;
@@ -1362,7 +1378,7 @@ struct SinCosPiOpt : public LibCallOptimization {
// It's only worthwhile if both sinpi and cospi are actually used.
if (SinCosCalls.empty() && (SinCalls.empty() || CosCalls.empty()))
- return 0;
+ return nullptr;
Value *Sin, *Cos, *SinCos;
insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos,
@@ -1372,7 +1388,7 @@ struct SinCosPiOpt : public LibCallOptimization {
replaceTrigInsts(CosCalls, Cos);
replaceTrigInsts(SinCosCalls, SinCos);
- return 0;
+ return nullptr;
}
bool isTrigLibCall(CallInst *CI) {
@@ -1498,7 +1514,7 @@ struct FFSOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 ||
!FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
- return 0;
+ return nullptr;
Value *Op = CI->getArgOperand(0);
@@ -1531,7 +1547,7 @@ struct AbsOpt : public LibCallOptimization {
// We require integer(integer) where the types agree.
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
- return 0;
+ return nullptr;
// abs(x) -> x >s -1 ? x : -x
Value *Op = CI->getArgOperand(0);
@@ -1549,7 +1565,7 @@ struct IsDigitOpt : public LibCallOptimization {
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// isdigit(c) -> (c-'0') <u 10
Value *Op = CI->getArgOperand(0);
@@ -1566,7 +1582,7 @@ struct IsAsciiOpt : public LibCallOptimization {
// We require integer(i32)
if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// isascii(c) -> c <u 128
Value *Op = CI->getArgOperand(0);
@@ -1582,7 +1598,7 @@ struct ToAsciiOpt : public LibCallOptimization {
// We require i32(i32)
if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
!FT->getParamType(0)->isIntegerTy(32))
- return 0;
+ return nullptr;
// toascii(c) -> c & 0x7f
return B.CreateAnd(CI->getArgOperand(0),
@@ -1612,7 +1628,7 @@ struct ErrorReportingOpt : public LibCallOptimization {
CI->addAttribute(AttributeSet::FunctionIndex, Attribute::Cold);
}
- return 0;
+ return nullptr;
}
protected:
@@ -1649,7 +1665,7 @@ struct PrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(0), FormatStr))
- return 0;
+ return nullptr;
// Empty format string -> noop.
if (FormatStr.empty()) // Tolerate printf's declared void.
@@ -1660,7 +1676,7 @@ struct PrintFOpt : public LibCallOptimization {
// is used, in general the printf return value is not compatible with either
// putchar() or puts().
if (!CI->use_empty())
- return 0;
+ return nullptr;
// printf("x") -> putchar('x'), even for '%'.
if (FormatStr.size() == 1) {
@@ -1697,7 +1713,7 @@ struct PrintFOpt : public LibCallOptimization {
CI->getArgOperand(1)->getType()->isPointerTy()) {
return EmitPutS(CI->getArgOperand(1), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1707,7 +1723,7 @@ struct PrintFOpt : public LibCallOptimization {
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
- return 0;
+ return nullptr;
if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1724,7 +1740,7 @@ struct PrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1734,7 +1750,7 @@ struct SPrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return 0;
+ return nullptr;
// If we just have a format string (nothing else crazy) transform it.
if (CI->getNumArgOperands() == 2) {
@@ -1742,10 +1758,10 @@ struct SPrintFOpt : public LibCallOptimization {
// %% -> % in the future if we cared.
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%')
- return 0; // we found a format specifier, bail out.
+ return nullptr; // we found a format specifier, bail out.
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
@@ -1758,12 +1774,12 @@ struct SPrintFOpt : public LibCallOptimization {
// and have an extra operand.
if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
CI->getNumArgOperands() < 3)
- return 0;
+ return nullptr;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
@@ -1775,14 +1791,14 @@ struct SPrintFOpt : public LibCallOptimization {
if (FormatStr[1] == 's') {
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return nullptr;
Value *Len = EmitStrLen(CI->getArgOperand(2), B, DL, TLI);
if (!Len)
- return 0;
+ return nullptr;
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
@@ -1791,7 +1807,7 @@ struct SPrintFOpt : public LibCallOptimization {
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1801,7 +1817,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1818,7 +1834,7 @@ struct SPrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1831,22 +1847,22 @@ struct FPrintFOpt : public LibCallOptimization {
// All the optimizations depend on the format string.
StringRef FormatStr;
if (!getConstantStringInfo(CI->getArgOperand(1), FormatStr))
- return 0;
+ return nullptr;
// Do not do any of the following transformations if the fprintf return
// value is used, in general the fprintf return value is not compatible
// with fwrite(), fputc() or fputs().
if (!CI->use_empty())
- return 0;
+ return nullptr;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
- return 0; // We found a format specifier.
+ return nullptr; // We found a format specifier.
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
return EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(DL->getIntPtrType(*Context),
@@ -1858,22 +1874,22 @@ struct FPrintFOpt : public LibCallOptimization {
// and have an extra operand.
if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
CI->getNumArgOperands() < 3)
- return 0;
+ return nullptr;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> fputc(chr, F)
- if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return nullptr;
return EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) --> fputs(str, F)
if (!CI->getArgOperand(2)->getType()->isPointerTy())
- return 0;
+ return nullptr;
return EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, DL, TLI);
}
- return 0;
+ return nullptr;
}
Value *callOptimizer(Function *Callee, CallInst *CI,
@@ -1883,7 +1899,7 @@ struct FPrintFOpt : public LibCallOptimization {
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
if (Value *V = optimizeFixedFormatString(Callee, CI, B)) {
return V;
@@ -1900,7 +1916,7 @@ struct FPrintFOpt : public LibCallOptimization {
B.Insert(New);
return New;
}
- return 0;
+ return nullptr;
}
};
@@ -1917,12 +1933,12 @@ struct FWriteOpt : public LibCallOptimization {
!FT->getParamType(2)->isIntegerTy() ||
!FT->getParamType(3)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy())
- return 0;
+ return nullptr;
// Get the element size and count.
ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
- if (!SizeC || !CountC) return 0;
+ if (!SizeC || !CountC) return nullptr;
uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
// If this is writing zero records, remove the call (it's a noop).
@@ -1934,10 +1950,10 @@ struct FWriteOpt : public LibCallOptimization {
if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F)
Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
Value *NewCI = EmitFPutC(Char, CI->getArgOperand(3), B, DL, TLI);
- return NewCI ? ConstantInt::get(CI->getType(), 1) : 0;
+ return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr;
}
- return 0;
+ return nullptr;
}
};
@@ -1948,18 +1964,18 @@ struct FPutsOpt : public LibCallOptimization {
(void) ER.callOptimizer(Callee, CI, B);
// These optimizations require DataLayout.
- if (!DL) return 0;
+ if (!DL) return nullptr;
// Require two pointers. Also, we can't optimize if return value is used.
FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
!FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
- return 0;
+ return nullptr;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
uint64_t Len = GetStringLength(CI->getArgOperand(0));
- if (!Len) return 0;
+ if (!Len) return nullptr;
// Known to have no uses (see above).
return EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(DL->getIntPtrType(*Context), Len-1),
@@ -1975,12 +1991,12 @@ struct PutsOpt : public LibCallOptimization {
if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
!(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
- return 0;
+ return nullptr;
// Check for a constant string.
StringRef Str;
if (!getConstantStringInfo(CI->getArgOperand(0), Str))
- return 0;
+ return nullptr;
if (Str.empty() && CI->use_empty()) {
// puts("") -> putchar('\n')
@@ -1989,7 +2005,7 @@ struct PutsOpt : public LibCallOptimization {
return B.CreateIntCast(Res, CI->getType(), true);
}
- return 0;
+ return nullptr;
}
};
@@ -2100,7 +2116,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case Intrinsic::exp2:
return &Exp2;
default:
- return 0;
+ return nullptr;
}
}
@@ -2210,7 +2226,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::trunc:
if (hasFloatVersion(FuncName))
return &UnaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::acos:
case LibFunc::acosh:
case LibFunc::asin:
@@ -2234,16 +2250,16 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
case LibFunc::tanh:
if (UnsafeFPShrink && hasFloatVersion(FuncName))
return &UnsafeUnaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::fmin:
case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return &BinaryDoubleFP;
- return 0;
+ return nullptr;
case LibFunc::memcpy_chk:
return &MemCpyChk;
default:
- return 0;
+ return nullptr;
}
}
@@ -2263,7 +2279,7 @@ LibCallOptimization *LibCallSimplifierImpl::lookupOptimization(CallInst *CI) {
return &StrNCpyChk;
}
- return 0;
+ return nullptr;
}
@@ -2273,7 +2289,7 @@ Value *LibCallSimplifierImpl::optimizeCall(CallInst *CI) {
IRBuilder<> Builder(CI);
return LCO->optimizeCall(CI, DL, TLI, LCS, Builder);
}
- return 0;
+ return nullptr;
}
LibCallSimplifier::LibCallSimplifier(const DataLayout *DL,
@@ -2287,7 +2303,7 @@ LibCallSimplifier::~LibCallSimplifier() {
}
Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
- if (CI->isNoBuiltin()) return 0;
+ if (CI->isNoBuiltin()) return nullptr;
return Impl->optimizeCall(CI);
}
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index c318560..2c6fcd1 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -41,7 +41,7 @@ struct SpecialCaseList::Entry {
StringSet<> Strings;
Regex *RegEx;
- Entry() : RegEx(0) {}
+ Entry() : RegEx(nullptr) {}
bool match(StringRef Query) const {
return Strings.count(Query) || (RegEx && RegEx->match(Query));
@@ -57,7 +57,7 @@ SpecialCaseList *SpecialCaseList::create(
std::unique_ptr<MemoryBuffer> File;
if (error_code EC = MemoryBuffer::getFile(Path, File)) {
Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str();
- return 0;
+ return nullptr;
}
return create(File.get(), Error);
}
@@ -66,7 +66,7 @@ SpecialCaseList *SpecialCaseList::create(
const MemoryBuffer *MB, std::string &Error) {
std::unique_ptr<SpecialCaseList> SCL(new SpecialCaseList());
if (!SCL->parse(MB, Error))
- return 0;
+ return nullptr;
return SCL.release();
}
diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
index 560f581..0c2fc0a 100644
--- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
+++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -59,7 +59,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Then unreachable blocks.
if (UnreachableBlocks.empty()) {
- UnreachableBlock = 0;
+ UnreachableBlock = nullptr;
} else if (UnreachableBlocks.size() == 1) {
UnreachableBlock = UnreachableBlocks.front();
} else {
@@ -77,7 +77,7 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
// Now handle return blocks.
if (ReturningBlocks.empty()) {
- ReturnBlock = 0;
+ ReturnBlock = nullptr;
return false; // No blocks return
} else if (ReturningBlocks.size() == 1) {
ReturnBlock = ReturningBlocks.front(); // Already has a single return block
@@ -91,9 +91,9 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
"UnifiedReturnBlock", &F);
- PHINode *PN = 0;
+ PHINode *PN = nullptr;
if (F.getReturnType()->isVoidTy()) {
- ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+ ReturnInst::Create(F.getContext(), nullptr, NewRetBlock);
} else {
// If the function doesn't return void... add a PHI node to the block...
PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(),
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 457fc80..0f20e6d 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -71,12 +71,12 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Check all operands to see if any need to be remapped.
for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
Value *OP = MD->getOperand(i);
- if (OP == 0) continue;
+ if (!OP) continue;
Value *Mapped_OP = MapValue(OP, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
if (Mapped_OP == OP ||
- (Mapped_OP == 0 && (Flags & RF_IgnoreMissingEntries)))
+ (Mapped_OP == nullptr && (Flags & RF_IgnoreMissingEntries)))
continue;
// Ok, at least one operand needs remapping.
@@ -84,13 +84,13 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
Elts.reserve(MD->getNumOperands());
for (i = 0; i != e; ++i) {
Value *Op = MD->getOperand(i);
- if (Op == 0)
- Elts.push_back(0);
+ if (!Op)
+ Elts.push_back(nullptr);
else {
Value *Mapped_Op = MapValue(Op, VM, Flags, TypeMapper, Materializer);
// Use identity map if Mapped_Op is null and we can ignore missing
// entries.
- if (Mapped_Op == 0 && (Flags & RF_IgnoreMissingEntries))
+ if (Mapped_Op == nullptr && (Flags & RF_IgnoreMissingEntries))
Mapped_Op = Op;
Elts.push_back(Mapped_Op);
}
@@ -112,8 +112,8 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Okay, this either must be a constant (which may or may not be mappable) or
// is something that is not in the mapping table.
Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
- if (C == 0)
- return 0;
+ if (!C)
+ return nullptr;
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
Function *F =
@@ -126,7 +126,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
// Otherwise, we have some other constant to remap. Start by checking to see
// if all operands have an identity remapping.
unsigned OpNo = 0, NumOperands = C->getNumOperands();
- Value *Mapped = 0;
+ Value *Mapped = nullptr;
for (; OpNo != NumOperands; ++OpNo) {
Value *Op = C->getOperand(OpNo);
Mapped = MapValue(Op, VM, Flags, TypeMapper, Materializer);
@@ -187,7 +187,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
Value *V = MapValue(*op, VMap, Flags, TypeMapper, Materializer);
// If we aren't ignoring missing entries, assert that something happened.
- if (V != 0)
+ if (V)
*op = V;
else
assert((Flags & RF_IgnoreMissingEntries) &&
@@ -199,7 +199,7 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
// If we aren't ignoring missing entries, assert that something happened.
- if (V != 0)
+ if (V)
PN->setIncomingBlock(i, cast<BasicBlock>(V));
else
assert((Flags & RF_IgnoreMissingEntries) &&
diff --git a/lib/Transforms/Vectorize/BBVectorize.cpp b/lib/Transforms/Vectorize/BBVectorize.cpp
index 71350e7..28ec83b 100644
--- a/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -15,7 +15,6 @@
//===----------------------------------------------------------------------===//
#define BBV_NAME "bb-vectorize"
-#define DEBUG_TYPE BBV_NAME
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -50,6 +49,8 @@
#include <algorithm>
using namespace llvm;
+#define DEBUG_TYPE BBV_NAME
+
static cl::opt<bool>
IgnoreTargetInfo("bb-vectorize-ignore-target-info", cl::init(false),
cl::Hidden, cl::desc("Ignore target information"));
@@ -122,6 +123,10 @@ NoMath("bb-vectorize-no-math", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize floating-point math intrinsics"));
static cl::opt<bool>
+ NoBitManipulation("bb-vectorize-no-bitmanip", cl::init(false), cl::Hidden,
+ cl::desc("Don't try to vectorize BitManipulation intrinsics"));
+
+static cl::opt<bool>
NoFMA("bb-vectorize-no-fma", cl::init(false), cl::Hidden,
cl::desc("Don't try to vectorize the fused-multiply-add intrinsic"));
@@ -202,8 +207,8 @@ namespace {
DT = &P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &P->getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = P->getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
- TTI = IgnoreTargetInfo ? 0 : &P->getAnalysis<TargetTransformInfo>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TTI = IgnoreTargetInfo ? nullptr : &P->getAnalysis<TargetTransformInfo>();
}
typedef std::pair<Value *, Value *> ValuePair;
@@ -279,7 +284,7 @@ namespace {
bool trackUsesOfI(DenseSet<Value *> &Users,
AliasSetTracker &WriteSet, Instruction *I,
Instruction *J, bool UpdateUsers = true,
- DenseSet<ValuePair> *LoadMoveSetPairs = 0);
+ DenseSet<ValuePair> *LoadMoveSetPairs = nullptr);
void computePairsConnectedTo(
DenseMap<Value *, std::vector<Value *> > &CandidatePairs,
@@ -292,8 +297,8 @@ namespace {
bool pairsConflict(ValuePair P, ValuePair Q,
DenseSet<ValuePair> &PairableInstUsers,
DenseMap<ValuePair, std::vector<ValuePair> >
- *PairableInstUserMap = 0,
- DenseSet<VPPair> *PairableInstUserPairSet = 0);
+ *PairableInstUserMap = nullptr,
+ DenseSet<VPPair> *PairableInstUserPairSet = nullptr);
bool pairWillFormCycle(ValuePair P,
DenseMap<ValuePair, std::vector<ValuePair> > &PairableInstUsers,
@@ -438,8 +443,8 @@ namespace {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
- TTI = IgnoreTargetInfo ? 0 : &getAnalysis<TargetTransformInfo>();
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
+ TTI = IgnoreTargetInfo ? nullptr : &getAnalysis<TargetTransformInfo>();
return vectorizeBB(BB);
}
@@ -674,7 +679,20 @@ namespace {
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::pow:
+ case Intrinsic::round:
+ case Intrinsic::copysign:
+ case Intrinsic::ceil:
+ case Intrinsic::nearbyint:
+ case Intrinsic::rint:
+ case Intrinsic::trunc:
+ case Intrinsic::floor:
+ case Intrinsic::fabs:
return Config.VectorizeMath;
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ return Config.VectorizeBitManipulations;
case Intrinsic::fma:
case Intrinsic::fmuladd:
return Config.VectorizeFMA;
@@ -878,7 +896,7 @@ namespace {
}
// We can't vectorize memory operations without target data
- if (DL == 0 && IsSimpleLoadStore)
+ if (!DL && IsSimpleLoadStore)
return false;
Type *T1, *T2;
@@ -915,7 +933,7 @@ namespace {
if (T2->isX86_FP80Ty() || T2->isPPC_FP128Ty() || T2->isX86_MMXTy())
return false;
- if ((!Config.VectorizePointers || DL == 0) &&
+ if ((!Config.VectorizePointers || !DL) &&
(T1->getScalarType()->isPointerTy() ||
T2->getScalarType()->isPointerTy()))
return false;
@@ -1049,7 +1067,7 @@ namespace {
(isa<ConstantVector>(JOp) || isa<ConstantDataVector>(JOp))) {
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
Constant *SplatValue = cast<Constant>(IOp)->getSplatValue();
- if (SplatValue != NULL &&
+ if (SplatValue != nullptr &&
SplatValue == cast<Constant>(JOp)->getSplatValue())
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
}
@@ -1079,13 +1097,14 @@ namespace {
CostSavings = ICost + JCost - VCost;
}
- // The powi intrinsic is special because only the first argument is
- // vectorized, the second arguments must be equal.
+ // The powi,ctlz,cttz intrinsics are special because only the first
+ // argument is vectorized, the second arguments must be equal.
CallInst *CI = dyn_cast<CallInst>(I);
Function *FI;
if (CI && (FI = CI->getCalledFunction())) {
Intrinsic::ID IID = (Intrinsic::ID) FI->getIntrinsicID();
- if (IID == Intrinsic::powi) {
+ if (IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) {
Value *A1I = CI->getArgOperand(1),
*A1J = cast<CallInst>(J)->getArgOperand(1);
const SCEV *A1ISCEV = SE->getSCEV(A1I),
@@ -1109,7 +1128,8 @@ namespace {
assert(CI->getNumArgOperands() == CJ->getNumArgOperands() &&
"Intrinsic argument counts differ");
for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
- if (IID == Intrinsic::powi && i == 1)
+ if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) && i == 1)
Tys.push_back(CI->getArgOperand(i)->getType());
else
Tys.push_back(getVecTypeForPair(CI->getArgOperand(i)->getType(),
@@ -1665,8 +1685,9 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
if (C2->second >= C->second) {
CanAdd = false;
break;
@@ -1686,8 +1707,9 @@ namespace {
T->second == C->first.first ||
T->second == C->first.second ||
pairsConflict(*T, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1704,8 +1726,9 @@ namespace {
C2->first.second == C->first.first ||
C2->first.second == C->first.second ||
pairsConflict(C2->first, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1720,8 +1743,9 @@ namespace {
ChosenPairs.begin(), E2 = ChosenPairs.end();
C2 != E2; ++C2) {
if (pairsConflict(*C2, C->first, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet
+ : nullptr)) {
CanAdd = false;
break;
}
@@ -1802,8 +1826,8 @@ namespace {
for (DenseMap<Value *, Value *>::iterator C = ChosenPairs.begin(),
E = ChosenPairs.end(); C != E; ++C) {
if (pairsConflict(*C, IJ, PairableInstUsers,
- UseCycleCheck ? &PairableInstUserMap : 0,
- UseCycleCheck ? &PairableInstUserPairSet : 0)) {
+ UseCycleCheck ? &PairableInstUserMap : nullptr,
+ UseCycleCheck ? &PairableInstUserPairSet : nullptr)) {
DoesConflict = true;
break;
}
@@ -2373,7 +2397,7 @@ namespace {
} while ((LIENext =
dyn_cast<InsertElementInst>(LIENext->getOperand(0))));
- LIENext = 0;
+ LIENext = nullptr;
Value *LIEPrev = UndefValue::get(ArgTypeH);
for (unsigned i = 0; i < numElemL; ++i) {
if (isa<UndefValue>(VectElemts[i])) continue;
@@ -2441,14 +2465,14 @@ namespace {
if ((LEE || LSV) && (HEE || HSV) && !IsSizeChangeShuffle) {
// We can have at most two unique vector inputs.
bool CanUseInputs = true;
- Value *I1, *I2 = 0;
+ Value *I1, *I2 = nullptr;
if (LEE) {
I1 = LEE->getOperand(0);
} else {
I1 = LSV->getOperand(0);
I2 = LSV->getOperand(1);
if (I2 == I1 || isa<UndefValue>(I2))
- I2 = 0;
+ I2 = nullptr;
}
if (HEE) {
@@ -2764,10 +2788,11 @@ namespace {
ReplacedOperands[o] = Intrinsic::getDeclaration(M, IID, VArgType);
continue;
- } else if (IID == Intrinsic::powi && o == 1) {
- // The second argument of powi is a single integer and we've already
- // checked that both arguments are equal. As a result, we just keep
- // I's second argument.
+ } else if ((IID == Intrinsic::powi || IID == Intrinsic::ctlz ||
+ IID == Intrinsic::cttz) && o == 1) {
+ // The second argument of powi/ctlz/cttz is a single integer/constant
+ // and we've already checked that both arguments are equal.
+ // As a result, we just keep I's second argument.
ReplacedOperands[o] = I->getOperand(o);
continue;
}
@@ -2952,7 +2977,7 @@ namespace {
switch (Kind) {
default:
- K->setMetadata(Kind, 0); // Remove unknown metadata
+ K->setMetadata(Kind, nullptr); // Remove unknown metadata
break;
case LLVMContext::MD_tbaa:
K->setMetadata(Kind, MDNode::getMostGenericTBAA(JMD, KMD));
@@ -3123,7 +3148,7 @@ namespace {
// Instruction insertion point:
Instruction *InsertionPt = K;
- Instruction *K1 = 0, *K2 = 0;
+ Instruction *K1 = nullptr, *K2 = nullptr;
replaceOutputsOfPair(Context, L, H, K, InsertionPt, K1, K2);
// The use dag of the first original instruction must be moved to after
@@ -3213,6 +3238,7 @@ VectorizeConfig::VectorizeConfig() {
VectorizePointers = !::NoPointers;
VectorizeCasts = !::NoCasts;
VectorizeMath = !::NoMath;
+ VectorizeBitManipulations = !::NoBitManipulation;
VectorizeFMA = !::NoFMA;
VectorizeSelect = !::NoSelect;
VectorizeCmp = !::NoCmp;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 9a98c44..34d8a10 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -42,9 +42,6 @@
//
//===----------------------------------------------------------------------===//
-#define LV_NAME "loop-vectorize"
-#define DEBUG_TYPE LV_NAME
-
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/EquivalenceClasses.h"
@@ -54,6 +51,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -67,7 +65,9 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -85,16 +85,23 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLibraryInfo.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
+#include <tuple>
using namespace llvm;
using namespace llvm::PatternMatch;
+#define LV_NAME "loop-vectorize"
+#define DEBUG_TYPE LV_NAME
+
+STATISTIC(LoopsVectorized, "Number of loops vectorized");
+STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
+
static cl::opt<unsigned>
VectorizationFactor("force-vector-width", cl::init(0), cl::Hidden,
cl::desc("Sets the SIMD width. Zero is autoselect."));
@@ -223,8 +230,9 @@ public:
const TargetLibraryInfo *TLI, unsigned VecWidth,
unsigned UnrollFactor)
: OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI),
- VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(0),
- OldInduction(0), WidenMap(UnrollFactor), Legal(0) {}
+ VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()),
+ Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor),
+ Legal(nullptr) {}
// Perform the actual loop widening (vectorization).
void vectorize(LoopVectorizationLegality *L) {
@@ -469,6 +477,24 @@ static void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
B.SetCurrentDebugLocation(DebugLoc());
}
+#ifndef NDEBUG
+/// \return string containing a file name and a line # for the given loop.
+static std::string getDebugLocString(const Loop *L) {
+ std::string Result;
+ if (L) {
+ raw_string_ostream OS(Result);
+ const DebugLoc LoopDbgLoc = L->getStartLoc();
+ if (!LoopDbgLoc.isUnknown())
+ LoopDbgLoc.print(L->getHeader()->getContext(), OS);
+ else
+ // Just print the module name.
+ OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
+ OS.flush();
+ }
+ return Result;
+}
+#endif
+
/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
/// to what vectorization factor.
/// This class does not look at the profitability of vectorization, only the
@@ -491,8 +517,8 @@ public:
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL,
DominatorTree *DT, TargetLibraryInfo *TLI)
: NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL),
- DT(DT), TLI(TLI), Induction(0), WidestIndTy(0), HasFunNoNaNAttr(false),
- MaxSafeDepDistBytes(-1U) {}
+ DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr),
+ HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {}
/// This enum represents the kinds of reductions that we support.
enum ReductionKind {
@@ -530,7 +556,7 @@ public:
/// This struct holds information about reduction variables.
struct ReductionDescriptor {
- ReductionDescriptor() : StartValue(0), LoopExitInstr(0),
+ ReductionDescriptor() : StartValue(nullptr), LoopExitInstr(nullptr),
Kind(RK_NoReduction), MinMaxKind(MRK_Invalid) {}
ReductionDescriptor(Value *Start, Instruction *Exit, ReductionKind K,
@@ -602,7 +628,7 @@ public:
/// A struct for saving information about induction variables.
struct InductionInfo {
InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
- InductionInfo() : StartValue(0), IK(IK_NoInduction) {}
+ InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
/// Start value.
TrackingVH<Value> StartValue;
/// Induction kind.
@@ -789,7 +815,8 @@ public:
/// then this vectorization factor will be selected if vectorization is
/// possible.
VectorizationFactor selectVectorizationFactor(bool OptForSize,
- unsigned UserVF);
+ unsigned UserVF,
+ bool ForceVectorization);
/// \return The size (in bits) of the widest type in the code that
/// needs to be vectorized. We ignore values that remain scalar such as
@@ -856,35 +883,32 @@ private:
/// Utility class for getting and setting loop vectorizer hints in the form
/// of loop metadata.
-struct LoopVectorizeHints {
- /// Vectorization width.
- unsigned Width;
- /// Vectorization unroll factor.
- unsigned Unroll;
- /// Vectorization forced (-1 not selected, 0 force disabled, 1 force enabled)
- int Force;
+class LoopVectorizeHints {
+public:
+ enum ForceKind {
+ FK_Undefined = -1, ///< Not selected.
+ FK_Disabled = 0, ///< Forcing disabled.
+ FK_Enabled = 1, ///< Forcing enabled.
+ };
LoopVectorizeHints(const Loop *L, bool DisableUnrolling)
- : Width(VectorizationFactor)
- , Unroll(DisableUnrolling ? 1 : VectorizationUnroll)
- , Force(-1)
- , LoopID(L->getLoopID()) {
+ : Width(VectorizationFactor),
+ Unroll(DisableUnrolling),
+ Force(FK_Undefined),
+ LoopID(L->getLoopID()) {
getHints(L);
- // The command line options override any loop metadata except for when
- // width == 1 which is used to indicate the loop is already vectorized.
- if (VectorizationFactor.getNumOccurrences() > 0 && Width != 1)
- Width = VectorizationFactor;
+ // force-vector-unroll overrides DisableUnrolling.
if (VectorizationUnroll.getNumOccurrences() > 0)
Unroll = VectorizationUnroll;
- DEBUG(if (DisableUnrolling && Unroll == 1)
- dbgs() << "LV: Unrolling disabled by the pass manager\n");
+ DEBUG(if (DisableUnrolling && Unroll == 1) dbgs()
+ << "LV: Unrolling disabled by the pass manager\n");
}
/// Return the loop vectorizer metadata prefix.
static StringRef Prefix() { return "llvm.vectorizer."; }
- MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) {
+ MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const {
SmallVector<Value*, 2> Vals;
Vals.push_back(MDString::get(Context, Name));
Vals.push_back(ConstantInt::get(Type::getInt32Ty(Context), V));
@@ -918,9 +942,12 @@ struct LoopVectorizeHints {
LoopID = NewLoopID;
}
-private:
- MDNode *LoopID;
+ unsigned getWidth() const { return Width; }
+ unsigned getUnroll() const { return Unroll; }
+ enum ForceKind getForce() const { return Force; }
+ MDNode *getLoopID() const { return LoopID; }
+private:
/// Find hints specified in the loop metadata.
void getHints(const Loop *L) {
if (!LoopID)
@@ -931,7 +958,7 @@ private:
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
- const MDString *S = 0;
+ const MDString *S = nullptr;
SmallVector<Value*, 4> Args;
// The expected hint is either a MDString or a MDNode with the first
@@ -980,13 +1007,23 @@ private:
DEBUG(dbgs() << "LV: ignoring invalid unroll hint metadata\n");
} else if (Hint == "enable") {
if (C->getBitWidth() == 1)
- Force = Val;
+ Force = Val == 1 ? LoopVectorizeHints::FK_Enabled
+ : LoopVectorizeHints::FK_Disabled;
else
DEBUG(dbgs() << "LV: ignoring invalid enable hint metadata\n");
} else {
DEBUG(dbgs() << "LV: ignoring unknown hint " << Hint << '\n');
}
}
+
+ /// Vectorization width.
+ unsigned Width;
+ /// Vectorization unroll factor.
+ unsigned Unroll;
+ /// Vectorization forced
+ enum ForceKind Force;
+
+ MDNode *LoopID;
};
static void addInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
@@ -1024,7 +1061,7 @@ struct LoopVectorize : public FunctionPass {
bool runOnFunction(Function &F) override {
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1041,8 +1078,9 @@ struct LoopVectorize : public FunctionPass {
if (!TTI->getNumberOfRegisters(true))
return false;
- if (DL == NULL) {
- DEBUG(dbgs() << "LV: Not vectorizing: Missing data layout\n");
+ if (!DL) {
+ DEBUG(dbgs() << "\nLV: Not vectorizing " << F.getName()
+ << ": Missing data layout\n");
return false;
}
@@ -1054,6 +1092,8 @@ struct LoopVectorize : public FunctionPass {
for (Loop *L : *LI)
addInnerLoop(*L, Worklist);
+ LoopsAnalyzed += Worklist.size();
+
// Now walk the identified inner loops.
bool Changed = false;
while (!Worklist.empty())
@@ -1065,26 +1105,56 @@ struct LoopVectorize : public FunctionPass {
bool processLoop(Loop *L) {
assert(L->empty() && "Only process inner loops.");
- DEBUG(dbgs() << "LV: Checking a loop in \"" <<
- L->getHeader()->getParent()->getName() << "\"\n");
+
+#ifndef NDEBUG
+ const std::string DebugLocStr = getDebugLocString(L);
+#endif /* NDEBUG */
+
+ DEBUG(dbgs() << "\nLV: Checking a loop in \""
+ << L->getHeader()->getParent()->getName() << "\" from "
+ << DebugLocStr << "\n");
LoopVectorizeHints Hints(L, DisableUnrolling);
- if (Hints.Force == 0) {
+ DEBUG(dbgs() << "LV: Loop hints:"
+ << " force="
+ << (Hints.getForce() == LoopVectorizeHints::FK_Disabled
+ ? "disabled"
+ : (Hints.getForce() == LoopVectorizeHints::FK_Enabled
+ ? "enabled"
+ : "?")) << " width=" << Hints.getWidth()
+ << " unroll=" << Hints.getUnroll() << "\n");
+
+ if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) {
DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
return false;
}
- if (!AlwaysVectorize && Hints.Force != 1) {
+ if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) {
DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
return false;
}
- if (Hints.Width == 1 && Hints.Unroll == 1) {
+ if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) {
DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
return false;
}
+ // Check the loop for a trip count threshold:
+ // do not vectorize loops with a tiny trip count.
+ BasicBlock *Latch = L->getLoopLatch();
+ const unsigned TC = SE->getSmallConstantTripCount(L, Latch);
+ if (TC > 0u && TC < TinyTripCountVectorThreshold) {
+ DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
+ << "This loop is not worth vectorizing.");
+ if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
+ DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
+ else {
+ DEBUG(dbgs() << "\n");
+ return false;
+ }
+ }
+
// Check if it is legal to vectorize the loop.
LoopVectorizationLegality LVL(L, SE, DL, DT, TLI);
if (!LVL.canVectorize()) {
@@ -1098,8 +1168,8 @@ struct LoopVectorize : public FunctionPass {
// Check the function attributes to find out if this function should be
// optimized for size.
Function *F = L->getHeader()->getParent();
- bool OptForSize =
- Hints.Force != 1 && F->hasFnAttribute(Attribute::OptimizeForSize);
+ bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ F->hasFnAttribute(Attribute::OptimizeForSize);
// Compute the weighted frequency of this loop being executed and see if it
// is less than 20% of the function entry baseline frequency. Note that we
@@ -1108,7 +1178,8 @@ struct LoopVectorize : public FunctionPass {
// exactly what block frequency models.
if (LoopVectorizeWithBlockFrequency) {
BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader());
- if (Hints.Force != 1 && LoopEntryFreq < ColdEntryFreq)
+ if (Hints.getForce() != LoopVectorizeHints::FK_Enabled &&
+ LoopEntryFreq < ColdEntryFreq)
OptForSize = true;
}
@@ -1123,14 +1194,17 @@ struct LoopVectorize : public FunctionPass {
}
// Select the optimal vectorization factor.
- LoopVectorizationCostModel::VectorizationFactor VF;
- VF = CM.selectVectorizationFactor(OptForSize, Hints.Width);
+ const LoopVectorizationCostModel::VectorizationFactor VF =
+ CM.selectVectorizationFactor(OptForSize, Hints.getWidth(),
+ Hints.getForce() ==
+ LoopVectorizeHints::FK_Enabled);
+
// Select the unroll factor.
- unsigned UF = CM.selectUnrollFactor(OptForSize, Hints.Unroll, VF.Width,
- VF.Cost);
+ const unsigned UF =
+ CM.selectUnrollFactor(OptForSize, Hints.getUnroll(), VF.Width, VF.Cost);
- DEBUG(dbgs() << "LV: Found a vectorizable loop ("<< VF.Width << ") in "<<
- F->getParent()->getModuleIdentifier() << '\n');
+ DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
+ << DebugLocStr << '\n');
DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n');
if (VF.Width == 1) {
@@ -1138,6 +1212,13 @@ struct LoopVectorize : public FunctionPass {
if (UF == 1)
return false;
DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n");
+
+ // Report the unrolling decision.
+ emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ Twine("unrolled with interleaving factor " +
+ Twine(UF) +
+ " (vectorization not beneficial)"));
+
// We decided not to vectorize, but we may want to unroll.
InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF);
Unroller.vectorize(&LVL);
@@ -1145,6 +1226,13 @@ struct LoopVectorize : public FunctionPass {
// If we decided that it is *legal* to vectorize the loop then do it.
InnerLoopVectorizer LB(L, SE, LI, DT, DL, TLI, VF.Width, UF);
LB.vectorize(&LVL);
+ ++LoopsVectorized;
+
+ // Report the vectorization decision.
+ emitOptimizationRemark(
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
+ Twine("vectorized loop (vectorization factor: ") + Twine(VF.Width) +
+ ", unrolling interleave factor: " + Twine(UF) + ")");
}
// Mark the loop as already vectorized to avoid vectorizing again.
@@ -1188,7 +1276,7 @@ static Value *stripIntegerCast(Value *V) {
/// \p Ptr.
static const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE,
ValueToValueMap &PtrToStride,
- Value *Ptr, Value *OrigPtr = 0) {
+ Value *Ptr, Value *OrigPtr = nullptr) {
const SCEV *OrigSCEV = SE->getSCEV(Ptr);
@@ -1355,7 +1443,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
// We can emit wide load/stores only if the last non-zero index is the
// induction variable.
- const SCEV *Last = 0;
+ const SCEV *Last = nullptr;
if (!Strides.count(Gep))
Last = SE->getSCEV(Gep->getOperand(InductionOperand));
else {
@@ -1604,17 +1692,17 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? 0 :
+ Value *UndefVec = IsVoidRetTy ? nullptr :
UndefValue::get(VectorType::get(Instr->getType(), VF));
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
Instruction *InsertPt = Builder.GetInsertPoint();
BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = 0;
+ BasicBlock *CondBlock = nullptr;
VectorParts Cond;
- Loop *VectorLp = 0;
+ Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
@@ -1630,7 +1718,7 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr, bool IfPredic
for (unsigned Width = 0; Width < VF; ++Width) {
// Start if-block.
- Value *Cmp = 0;
+ Value *Cmp = nullptr;
if (IfPredicateStore) {
Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp, ConstantInt::get(Cmp->getType(), 1));
@@ -1681,21 +1769,21 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
if (FirstInst)
return FirstInst;
if (Instruction *I = dyn_cast<Instruction>(V))
- return I->getParent() == Loc->getParent() ? I : 0;
- return 0;
+ return I->getParent() == Loc->getParent() ? I : nullptr;
+ return nullptr;
}
std::pair<Instruction *, Instruction *>
InnerLoopVectorizer::addStrideCheck(Instruction *Loc) {
- Instruction *tnullptr = 0;
+ Instruction *tnullptr = nullptr;
if (!Legal->mustCheckStrides())
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
IRBuilder<> ChkBuilder(Loc);
// Emit checks.
- Value *Check = 0;
- Instruction *FirstInst = 0;
+ Value *Check = nullptr;
+ Instruction *FirstInst = nullptr;
for (SmallPtrSet<Value *, 8>::iterator SI = Legal->strides_begin(),
SE = Legal->strides_end();
SI != SE; ++SI) {
@@ -1727,7 +1815,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
LoopVectorizationLegality::RuntimePointerCheck *PtrRtCheck =
Legal->getRuntimePointerCheck();
- Instruction *tnullptr = 0;
+ Instruction *tnullptr = nullptr;
if (!PtrRtCheck->Need)
return std::pair<Instruction *, Instruction *>(tnullptr, tnullptr);
@@ -1737,7 +1825,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
LLVMContext &Ctx = Loc->getContext();
SCEVExpander Exp(*SE, "induction");
- Instruction *FirstInst = 0;
+ Instruction *FirstInst = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
Value *Ptr = PtrRtCheck->Pointers[i];
@@ -1764,7 +1852,7 @@ InnerLoopVectorizer::addRuntimeCheck(Instruction *Loc) {
IRBuilder<> ChkBuilder(Loc);
// Our instructions might fold to a constant.
- Value *MemoryRuntimeCheck = 0;
+ Value *MemoryRuntimeCheck = nullptr;
for (unsigned i = 0; i < NumPointers; ++i) {
for (unsigned j = i+1; j < NumPointers; ++j) {
// No need to check if two readonly pointers intersect.
@@ -2028,7 +2116,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
// start value.
// This variable saves the new starting index for the scalar loop.
- PHINode *ResumeIndex = 0;
+ PHINode *ResumeIndex = nullptr;
LoopVectorizationLegality::InductionList::iterator I, E;
LoopVectorizationLegality::InductionList *List = Legal->getInductionVars();
// Set builder to point to last bypass block.
@@ -2044,9 +2132,9 @@ void InnerLoopVectorizer::createEmptyLoop() {
// truncated version for the scalar loop.
PHINode *TruncResumeVal = (OrigPhi == OldInduction) ?
PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val",
- MiddleBlock->getTerminator()) : 0;
+ MiddleBlock->getTerminator()) : nullptr;
- Value *EndValue = 0;
+ Value *EndValue = nullptr;
switch (II.IK) {
case LoopVectorizationLegality::IK_NoInduction:
llvm_unreachable("Unknown induction");
@@ -2209,148 +2297,6 @@ LoopVectorizationLegality::getReductionIdentity(ReductionKind K, Type *Tp) {
}
}
-static Intrinsic::ID checkUnaryFloatSignature(const CallInst &I,
- Intrinsic::ID ValidIntrinsicID) {
- if (I.getNumArgOperands() != 1 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- !I.onlyReadsMemory())
- return Intrinsic::not_intrinsic;
-
- return ValidIntrinsicID;
-}
-
-static Intrinsic::ID checkBinaryFloatSignature(const CallInst &I,
- Intrinsic::ID ValidIntrinsicID) {
- if (I.getNumArgOperands() != 2 ||
- !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
- !I.getArgOperand(1)->getType()->isFloatingPointTy() ||
- I.getType() != I.getArgOperand(0)->getType() ||
- I.getType() != I.getArgOperand(1)->getType() ||
- !I.onlyReadsMemory())
- return Intrinsic::not_intrinsic;
-
- return ValidIntrinsicID;
-}
-
-
-static Intrinsic::ID
-getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
- // If we have an intrinsic call, check if it is trivially vectorizable.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sqrt:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::log:
- case Intrinsic::log10:
- case Intrinsic::log2:
- case Intrinsic::fabs:
- case Intrinsic::copysign:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::pow:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- return II->getIntrinsicID();
- default:
- return Intrinsic::not_intrinsic;
- }
- }
-
- if (!TLI)
- return Intrinsic::not_intrinsic;
-
- LibFunc::Func Func;
- Function *F = CI->getCalledFunction();
- // We're going to make assumptions on the semantics of the functions, check
- // that the target knows that it's available in this environment and it does
- // not have local linkage.
- if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(F->getName(), Func))
- return Intrinsic::not_intrinsic;
-
- // Otherwise check if we have a call to a function that can be turned into a
- // vector intrinsic.
- switch (Func) {
- default:
- break;
- case LibFunc::sin:
- case LibFunc::sinf:
- case LibFunc::sinl:
- return checkUnaryFloatSignature(*CI, Intrinsic::sin);
- case LibFunc::cos:
- case LibFunc::cosf:
- case LibFunc::cosl:
- return checkUnaryFloatSignature(*CI, Intrinsic::cos);
- case LibFunc::exp:
- case LibFunc::expf:
- case LibFunc::expl:
- return checkUnaryFloatSignature(*CI, Intrinsic::exp);
- case LibFunc::exp2:
- case LibFunc::exp2f:
- case LibFunc::exp2l:
- return checkUnaryFloatSignature(*CI, Intrinsic::exp2);
- case LibFunc::log:
- case LibFunc::logf:
- case LibFunc::logl:
- return checkUnaryFloatSignature(*CI, Intrinsic::log);
- case LibFunc::log10:
- case LibFunc::log10f:
- case LibFunc::log10l:
- return checkUnaryFloatSignature(*CI, Intrinsic::log10);
- case LibFunc::log2:
- case LibFunc::log2f:
- case LibFunc::log2l:
- return checkUnaryFloatSignature(*CI, Intrinsic::log2);
- case LibFunc::fabs:
- case LibFunc::fabsf:
- case LibFunc::fabsl:
- return checkUnaryFloatSignature(*CI, Intrinsic::fabs);
- case LibFunc::copysign:
- case LibFunc::copysignf:
- case LibFunc::copysignl:
- return checkBinaryFloatSignature(*CI, Intrinsic::copysign);
- case LibFunc::floor:
- case LibFunc::floorf:
- case LibFunc::floorl:
- return checkUnaryFloatSignature(*CI, Intrinsic::floor);
- case LibFunc::ceil:
- case LibFunc::ceilf:
- case LibFunc::ceill:
- return checkUnaryFloatSignature(*CI, Intrinsic::ceil);
- case LibFunc::trunc:
- case LibFunc::truncf:
- case LibFunc::truncl:
- return checkUnaryFloatSignature(*CI, Intrinsic::trunc);
- case LibFunc::rint:
- case LibFunc::rintf:
- case LibFunc::rintl:
- return checkUnaryFloatSignature(*CI, Intrinsic::rint);
- case LibFunc::nearbyint:
- case LibFunc::nearbyintf:
- case LibFunc::nearbyintl:
- return checkUnaryFloatSignature(*CI, Intrinsic::nearbyint);
- case LibFunc::round:
- case LibFunc::roundf:
- case LibFunc::roundl:
- return checkUnaryFloatSignature(*CI, Intrinsic::round);
- case LibFunc::pow:
- case LibFunc::powf:
- case LibFunc::powl:
- return checkBinaryFloatSignature(*CI, Intrinsic::pow);
- }
-
- return Intrinsic::not_intrinsic;
-}
-
/// This function translates the reduction kind to an LLVM binary operator.
static unsigned
getReductionBinOp(LoopVectorizationLegality::ReductionKind Kind) {
@@ -2651,7 +2597,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
assert(isPowerOf2_32(VF) &&
"Reduction emission only supported for pow2 vectors!");
Value *TmpVec = ReducedPartRdx;
- SmallVector<Constant*, 32> ShuffleMask(VF, 0);
+ SmallVector<Constant*, 32> ShuffleMask(VF, nullptr);
for (unsigned i = VF; i != 1; i >>= 1) {
// Move the upper half of the vector to the lower half.
for (unsigned j = 0; j != i/2; ++j)
@@ -3049,7 +2995,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
VectorParts &A = getVectorValue(it->getOperand(0));
VectorParts &B = getVectorValue(it->getOperand(1));
for (unsigned Part = 0; Part < UF; ++Part) {
- Value *C = 0;
+ Value *C = nullptr;
if (FCmp)
C = Builder.CreateFCmp(Cmp->getPredicate(), A[Part], B[Part]);
else
@@ -3275,15 +3221,6 @@ bool LoopVectorizationLegality::canVectorize() {
return false;
}
- // Do not loop-vectorize loops with a tiny trip count.
- BasicBlock *Latch = TheLoop->getLoopLatch();
- unsigned TC = SE->getSmallConstantTripCount(TheLoop, Latch);
- if (TC > 0u && TC < TinyTripCountVectorThreshold) {
- DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " <<
- "This loop is not worth vectorizing.\n");
- return false;
- }
-
// Check if we can vectorize the instructions and CFG in this loop.
if (!canVectorizeInstrs()) {
DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
@@ -3536,14 +3473,14 @@ static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE,
///\brief Look for a cast use of the passed value.
static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
- Value *UniqueCast = 0;
+ Value *UniqueCast = nullptr;
for (User *U : Ptr->users()) {
CastInst *CI = dyn_cast<CastInst>(U);
if (CI && CI->getType() == Ty) {
if (!UniqueCast)
UniqueCast = CI;
else
- return 0;
+ return nullptr;
}
}
return UniqueCast;
@@ -3556,7 +3493,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
const DataLayout *DL, Loop *Lp) {
const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
if (!PtrTy || PtrTy->isAggregateType())
- return 0;
+ return nullptr;
// Try to remove a gep instruction to make the pointer (actually index at this
// point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the
@@ -3576,11 +3513,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
if (!S)
- return 0;
+ return nullptr;
V = S->getStepRecurrence(*SE);
if (!V)
- return 0;
+ return nullptr;
// Strip off the size of access multiplication if we are still analyzing the
// pointer.
@@ -3588,24 +3525,24 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
DL->getTypeAllocSize(PtrTy->getElementType());
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
if (M->getOperand(0)->getSCEVType() != scConstant)
- return 0;
+ return nullptr;
const APInt &APStepVal =
cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue();
// Huge step value - give up.
if (APStepVal.getBitWidth() > 64)
- return 0;
+ return nullptr;
int64_t StepVal = APStepVal.getSExtValue();
if (PtrAccessSize != StepVal)
- return 0;
+ return nullptr;
V = M->getOperand(1);
}
}
// Strip off casts.
- Type *StripedOffRecurrenceCast = 0;
+ Type *StripedOffRecurrenceCast = nullptr;
if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
StripedOffRecurrenceCast = C->getType();
V = C->getOperand();
@@ -3614,11 +3551,11 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
// Look for the loop invariant symbolic value.
const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
if (!U)
- return 0;
+ return nullptr;
Value *Stride = U->getValue();
if (!Lp->isLoopInvariant(Stride))
- return 0;
+ return nullptr;
// If we have stripped off the recurrence cast we have to make sure that we
// return the value that is used in this loop so that we can replace it later.
@@ -3629,7 +3566,7 @@ static Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE,
}
void LoopVectorizationLegality::collectStridedAcccess(Value *MemAccess) {
- Value *Ptr = 0;
+ Value *Ptr = nullptr;
if (LoadInst *LI = dyn_cast<LoadInst>(MemAccess))
Ptr = LI->getPointerOperand();
else if (StoreInst *SI = dyn_cast<StoreInst>(MemAccess))
@@ -4628,7 +4565,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// We only allow for a single reduction value to be used outside the loop.
// This includes users of the reduction, variables (which form a cycle
// which ends in the phi node).
- Instruction *ExitInstruction = 0;
+ Instruction *ExitInstruction = nullptr;
// Indicates that we found a reduction operation in our scan.
bool FoundReduxOp = false;
@@ -4642,7 +4579,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// the number of instruction we saw from the recognized min/max pattern,
// to make sure we only see exactly the two instructions.
unsigned NumCmpSelectPatternInst = 0;
- ReductionInstDesc ReduxDesc(false, 0);
+ ReductionInstDesc ReduxDesc(false, nullptr);
SmallPtrSet<Instruction *, 8> VisitedInsts;
SmallVector<Instruction *, 8> Worklist;
@@ -4725,7 +4662,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// being used. In this case the user uses the value of the previous
// iteration, in which case we would loose "VF-1" iterations of the
// reduction operation if we vectorize.
- if (ExitInstruction != 0 || Cur == Phi)
+ if (ExitInstruction != nullptr || Cur == Phi)
return false;
// The instruction used by an outside user must be the last instruction
@@ -4741,7 +4678,7 @@ bool LoopVectorizationLegality::AddReductionVar(PHINode *Phi,
// Process instructions only once (termination). Each reduction cycle
// value must only be used once, except by phi nodes and min/max
// reductions which are represented as a cmp followed by a select.
- ReductionInstDesc IgnoredVal(false, 0);
+ ReductionInstDesc IgnoredVal(false, nullptr);
if (VisitedInsts.insert(UI)) {
if (isa<PHINode>(UI))
PHIs.push_back(UI);
@@ -4795,8 +4732,8 @@ LoopVectorizationLegality::isMinMaxSelectCmpPattern(Instruction *I,
assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
"Expect a select instruction");
- Instruction *Cmp = 0;
- SelectInst *Select = 0;
+ Instruction *Cmp = nullptr;
+ SelectInst *Select = nullptr;
// We must handle the select(cmp()) as a single instruction. Advance to the
// select.
@@ -4982,7 +4919,8 @@ bool LoopVectorizationLegality::blockCanBePredicated(BasicBlock *BB,
LoopVectorizationCostModel::VectorizationFactor
LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
- unsigned UserVF) {
+ unsigned UserVF,
+ bool ForceVectorization) {
// Width 1 means no vectorize
VectorizationFactor Factor = { 1U, 0U };
if (OptForSize && Legal->getRuntimePointerCheck()->Need) {
@@ -5052,8 +4990,18 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
}
float Cost = expectedCost(1);
+#ifndef NDEBUG
+ const float ScalarCost = Cost;
+#endif /* NDEBUG */
unsigned Width = 1;
- DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)Cost << ".\n");
+ DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n");
+
+ // Ignore scalar width, because the user explicitly wants vectorization.
+ if (ForceVectorization && VF > 1) {
+ Width = 2;
+ Cost = expectedCost(Width) / (float)Width;
+ }
+
for (unsigned i=2; i <= VF; i*=2) {
// Notice that the vector loop needs to be executed less times, so
// we need to divide the cost of the vector loops by the width of
@@ -5067,7 +5015,10 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize,
}
}
- DEBUG(dbgs() << "LV: Selecting VF = : "<< Width << ".\n");
+ DEBUG(if (ForceVectorization && Width > 1 && Cost >= ScalarCost) dbgs()
+ << "LV: Vectorization seems to be not beneficial, "
+ << "but was forced by a user.\n");
+ DEBUG(dbgs() << "LV: Selecting VF: "<< Width << ".\n");
Factor.Width = Width;
Factor.Cost = Width * Cost;
return Factor;
@@ -5516,7 +5467,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
else if (isa<ConstantVector>(Op2) || isa<ConstantDataVector>(Op2)) {
Op2VK = TargetTransformInfo::OK_NonUniformConstantValue;
- if (cast<Constant>(Op2)->getSplatValue() != NULL)
+ if (cast<Constant>(Op2)->getSplatValue() != nullptr)
Op2VK = TargetTransformInfo::OK_UniformConstantValue;
}
@@ -5730,17 +5681,17 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// Does this instruction return a value ?
bool IsVoidRetTy = Instr->getType()->isVoidTy();
- Value *UndefVec = IsVoidRetTy ? 0 :
+ Value *UndefVec = IsVoidRetTy ? nullptr :
UndefValue::get(Instr->getType());
// Create a new entry in the WidenMap and initialize it to Undef or Null.
VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
Instruction *InsertPt = Builder.GetInsertPoint();
BasicBlock *IfBlock = Builder.GetInsertBlock();
- BasicBlock *CondBlock = 0;
+ BasicBlock *CondBlock = nullptr;
VectorParts Cond;
- Loop *VectorLp = 0;
+ Loop *VectorLp = nullptr;
if (IfPredicateStore) {
assert(Instr->getParent()->getSinglePredecessor() &&
"Only support single predecessor blocks");
@@ -5755,7 +5706,7 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
// For each scalar that we create:
// Start an "if (pred) a[i] = ..." block.
- Value *Cmp = 0;
+ Value *Cmp = nullptr;
if (IfPredicateStore) {
if (Cond[Part]->getType()->isVectorTy())
Cond[Part] =
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ee32227..e13ba95 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15,9 +15,6 @@
// "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks.
//
//===----------------------------------------------------------------------===//
-#define SV_NAME "slp-vectorizer"
-#define DEBUG_TYPE "SLP"
-
#include "llvm/Transforms/Vectorize.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -34,6 +31,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/Verifier.h"
@@ -41,11 +39,15 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/VectorUtils.h"
#include <algorithm>
#include <map>
using namespace llvm;
+#define SV_NAME "slp-vectorizer"
+#define DEBUG_TYPE "SLP"
+
static cl::opt<int>
SLPCostThreshold("slp-threshold", cl::init(0), cl::Hidden,
cl::desc("Only vectorize if you gain more than this "
@@ -72,8 +74,6 @@ struct BlockNumbering {
BlockNumbering(BasicBlock *Bb) : BB(Bb), Valid(false) {}
- BlockNumbering() : BB(0), Valid(false) {}
-
void numberInstructions() {
unsigned Loc = 0;
InstrIdx.clear();
@@ -120,15 +120,15 @@ private:
static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
Instruction *I0 = dyn_cast<Instruction>(VL[0]);
if (!I0)
- return 0;
+ return nullptr;
BasicBlock *BB = I0->getParent();
for (int i = 1, e = VL.size(); i < e; i++) {
Instruction *I = dyn_cast<Instruction>(VL[i]);
if (!I)
- return 0;
+ return nullptr;
if (BB != I->getParent())
- return 0;
+ return nullptr;
}
return BB;
}
@@ -180,7 +180,7 @@ static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
switch (Kind) {
default:
- MD = 0; // Remove unknown metadata
+ MD = nullptr; // Remove unknown metadata
break;
case LLVMContext::MD_tbaa:
MD = MDNode::getMostGenericTBAA(MD, IMD);
@@ -201,7 +201,7 @@ static Type* getSameType(ArrayRef<Value *> VL) {
Type *Ty = VL[0]->getType();
for (int i = 1, e = VL.size(); i < e; i++)
if (VL[i]->getType() != Ty)
- return 0;
+ return nullptr;
return Ty;
}
@@ -345,17 +345,10 @@ public:
typedef SmallVector<StoreInst *, 8> StoreList;
BoUpSLP(Function *Func, ScalarEvolution *Se, const DataLayout *Dl,
- TargetTransformInfo *Tti, AliasAnalysis *Aa, LoopInfo *Li,
- DominatorTree *Dt) :
- F(Func), SE(Se), DL(Dl), TTI(Tti), AA(Aa), LI(Li), DT(Dt),
- Builder(Se->getContext()) {
- // Setup the block numbering utility for all of the blocks in the
- // function.
- for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
- BasicBlock *BB = it;
- BlocksNumbers[BB] = BlockNumbering(BB);
- }
- }
+ TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa,
+ LoopInfo *Li, DominatorTree *Dt)
+ : F(Func), SE(Se), DL(Dl), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt),
+ Builder(Se->getContext()) {}
/// \brief Vectorize the tree that starts with the elements in \p VL.
/// Returns the vectorized root.
@@ -365,13 +358,13 @@ public:
/// A negative number means that this is profitable.
int getTreeCost();
- /// Construct a vectorizable tree that starts at \p Roots and is possibly
- /// used by a reduction of \p RdxOps.
- void buildTree(ArrayRef<Value *> Roots, ValueSet *RdxOps = 0);
+ /// Construct a vectorizable tree that starts at \p Roots, ignoring users for
+ /// the purpose of scheduling and extraction in the \p UserIgnoreLst.
+ void buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst = None);
/// Clear the internal data structures that are created by 'buildTree'.
void deleteTree() {
- RdxOps = 0;
VectorizableTree.clear();
ScalarToTreeEntry.clear();
MustGather.clear();
@@ -446,7 +439,7 @@ private:
bool isFullyVectorizableTinyTree();
struct TreeEntry {
- TreeEntry() : Scalars(), VectorizedValue(0), LastScalarIndex(0),
+ TreeEntry() : Scalars(), VectorizedValue(nullptr), LastScalarIndex(0),
NeedToGather(0) {}
/// \returns true if the scalars in VL are equal to this entry.
@@ -527,14 +520,22 @@ private:
/// Numbers instructions in different blocks.
DenseMap<BasicBlock *, BlockNumbering> BlocksNumbers;
- /// Reduction operators.
- ValueSet *RdxOps;
+ /// \brief Get the corresponding instruction numbering list for a given
+ /// BasicBlock. The list is allocated lazily.
+ BlockNumbering &getBlockNumbering(BasicBlock *BB) {
+ auto I = BlocksNumbers.insert(std::make_pair(BB, BlockNumbering(BB)));
+ return I.first->second;
+ }
+
+ /// List of users to ignore during scheduling and that don't need extracting.
+ ArrayRef<Value *> UserIgnoreList;
// Analysis and block reference.
Function *F;
ScalarEvolution *SE;
const DataLayout *DL;
TargetTransformInfo *TTI;
+ TargetLibraryInfo *TLI;
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
@@ -542,9 +543,10 @@ private:
IRBuilder<> Builder;
};
-void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
+void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
+ ArrayRef<Value *> UserIgnoreLst) {
deleteTree();
- RdxOps = Rdx;
+ UserIgnoreList = UserIgnoreLst;
if (!getSameType(Roots))
return;
buildTree_rec(Roots, 0);
@@ -576,8 +578,9 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, ValueSet *Rdx) {
if (!UserInst)
continue;
- // Ignore uses that are part of the reduction.
- if (Rdx && std::find(Rdx->begin(), Rdx->end(), UserInst) != Rdx->end())
+ // Ignore users in the user ignore list.
+ if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
+ UserIgnoreList.end())
continue;
DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
@@ -708,12 +711,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
continue;
}
- // This user is part of the reduction.
- if (RdxOps && RdxOps->count(UI))
+ // Ignore users in the user ignore list.
+ if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UI) !=
+ UserIgnoreList.end())
continue;
// Make sure that we can schedule this unknown user.
- BlockNumbering &BN = BlocksNumbers[BB];
+ BlockNumbering &BN = getBlockNumbering(BB);
int UserIndex = BN.getIndex(UI);
if (UserIndex < MyLastIndex) {
@@ -948,32 +952,36 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
}
case Instruction::Call: {
// Check if the calls are all to the same vectorizable intrinsic.
- IntrinsicInst *II = dyn_cast<IntrinsicInst>(VL[0]);
- if (II==NULL) {
+ CallInst *CI = cast<CallInst>(VL[0]);
+ // Check if this is an Intrinsic call or something that can be
+ // represented by an intrinsic call
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
+ if (!isTriviallyVectorizable(ID)) {
newTreeEntry(VL, false);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
- Function *Int = II->getCalledFunction();
+ Function *Int = CI->getCalledFunction();
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
- IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[i]);
- if (!II2 || II2->getCalledFunction() != Int) {
+ CallInst *CI2 = dyn_cast<CallInst>(VL[i]);
+ if (!CI2 || CI2->getCalledFunction() != Int ||
+ getIntrinsicIDForCall(CI2, TLI) != ID) {
newTreeEntry(VL, false);
- DEBUG(dbgs() << "SLP: mismatched calls:" << *II << "!=" << *VL[i]
+ DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
}
}
newTreeEntry(VL, true);
- for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) {
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned j = 0; j < VL.size(); ++j) {
- IntrinsicInst *II2 = dyn_cast<IntrinsicInst>(VL[j]);
- Operands.push_back(II2->getArgOperand(i));
+ CallInst *CI2 = dyn_cast<CallInst>(VL[j]);
+ Operands.push_back(CI2->getArgOperand(i));
}
buildTree_rec(Operands, Depth + 1);
}
@@ -1090,7 +1098,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
// If instead not all operands are constants, then set the operand kind
// to OK_AnyValue. If all operands are constants but not the same,
// then set the operand kind to OK_NonUniformConstantValue.
- ConstantInt *CInt = NULL;
+ ConstantInt *CInt = nullptr;
for (unsigned i = 0; i < VL.size(); ++i) {
const Instruction *I = cast<Instruction>(VL[i]);
if (!isa<ConstantInt>(I->getOperand(1))) {
@@ -1129,12 +1137,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
- IntrinsicInst *II = cast<IntrinsicInst>(CI);
- Intrinsic::ID ID = II->getIntrinsicID();
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
// Calculate the cost of the scalar and vector calls.
SmallVector<Type*, 4> ScalarTys, VecTys;
- for (unsigned op = 0, opc = II->getNumArgOperands(); op!= opc; ++op) {
+ for (unsigned op = 0, opc = CI->getNumArgOperands(); op!= opc; ++op) {
ScalarTys.push_back(CI->getArgOperand(op)->getType());
VecTys.push_back(VectorType::get(CI->getArgOperand(op)->getType(),
VecTy->getNumElements()));
@@ -1147,7 +1154,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
DEBUG(dbgs() << "SLP: Call cost "<< VecCallCost - ScalarCallCost
<< " (" << VecCallCost << "-" << ScalarCallCost << ")"
- << " for " << *II << "\n");
+ << " for " << *CI << "\n");
return VecCallCost - ScalarCallCost;
}
@@ -1244,7 +1251,7 @@ Value *BoUpSLP::getPointerOperand(Value *I) {
return LI->getPointerOperand();
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
- return 0;
+ return nullptr;
}
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
@@ -1318,13 +1325,13 @@ Value *BoUpSLP::getSinkBarrier(Instruction *Src, Instruction *Dst) {
if (!A.Ptr || !B.Ptr || AA->alias(A, B))
return I;
}
- return 0;
+ return nullptr;
}
int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
- BlockNumbering &BN = BlocksNumbers[BB];
+ assert(BB == getSameBlock(VL) && "Invalid block");
+ BlockNumbering &BN = getBlockNumbering(BB);
int MaxIdx = BN.getIndex(BB->getFirstNonPHI());
for (unsigned i = 0, e = VL.size(); i < e; ++i)
@@ -1334,8 +1341,8 @@ int BoUpSLP::getLastIndex(ArrayRef<Value *> VL) {
Instruction *BoUpSLP::getLastInstruction(ArrayRef<Value *> VL) {
BasicBlock *BB = cast<Instruction>(VL[0])->getParent();
- assert(BB == getSameBlock(VL) && BlocksNumbers.count(BB) && "Invalid block");
- BlockNumbering &BN = BlocksNumbers[BB];
+ assert(BB == getSameBlock(VL) && "Invalid block");
+ BlockNumbering &BN = getBlockNumbering(BB);
int MaxIdx = BN.getIndex(cast<Instruction>(VL[0]));
for (unsigned i = 1, e = VL.size(); i < e; ++i)
@@ -1394,7 +1401,7 @@ Value *BoUpSLP::alreadyVectorized(ArrayRef<Value *> VL) const {
if (En->isSame(VL) && En->VectorizedValue)
return En->VectorizedValue;
}
- return 0;
+ return nullptr;
}
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
@@ -1615,6 +1622,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
VecTy->getPointerTo(AS));
unsigned Alignment = LI->getAlignment();
LI = Builder.CreateLoad(VecPtr);
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(LI->getPointerOperand()->getType());
LI->setAlignment(Alignment);
E->VectorizedValue = LI;
return propagateMetadata(LI, E->Scalars);
@@ -1634,13 +1643,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
VecTy->getPointerTo(AS));
StoreInst *S = Builder.CreateStore(VecValue, VecPtr);
+ if (!Alignment)
+ Alignment = DL->getABITypeAlignment(SI->getPointerOperand()->getType());
S->setAlignment(Alignment);
E->VectorizedValue = S;
return propagateMetadata(S, E->Scalars);
}
case Instruction::Call: {
CallInst *CI = cast<CallInst>(VL0);
-
setInsertPointAfterBundle(E->Scalars);
std::vector<Value *> OpVecs;
for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) {
@@ -1656,8 +1666,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Module *M = F->getParent();
- IntrinsicInst *II = cast<IntrinsicInst>(CI);
- Intrinsic::ID ID = II->getIntrinsicID();
+ Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
Type *Tys[] = { VectorType::get(CI->getType(), E->Scalars.size()) };
Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
Value *V = Builder.CreateCall(CF, OpVecs);
@@ -1667,7 +1676,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
default:
llvm_unreachable("unknown inst");
}
- return 0;
+ return nullptr;
}
Value *BoUpSLP::vectorizeTree() {
@@ -1746,8 +1755,9 @@ Value *BoUpSLP::vectorizeTree() {
DEBUG(dbgs() << "SLP: \tvalidating user:" << *U << ".\n");
assert((ScalarToTreeEntry.count(U) ||
- // It is legal to replace the reduction users by undef.
- (RdxOps && RdxOps->count(U))) &&
+ // It is legal to replace users in the ignorelist by undef.
+ (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
+ UserIgnoreList.end())) &&
"Replacing out-of-tree value with undef");
}
#endif
@@ -1759,9 +1769,9 @@ Value *BoUpSLP::vectorizeTree() {
}
}
- for (Function::iterator it = F->begin(), e = F->end(); it != e; ++it) {
- BlocksNumbers[it].forget();
- }
+ for (auto &BN : BlocksNumbers)
+ BN.second.forget();
+
Builder.ClearInsertionPoint();
return VectorizableTree[0].VectorizedValue;
@@ -1802,11 +1812,19 @@ void BoUpSLP::optimizeGatherSequence() {
Insert->moveBefore(PreHeader->getTerminator());
}
+ // Make a list of all reachable blocks in our CSE queue.
+ SmallVector<const DomTreeNode *, 8> CSEWorkList;
+ CSEWorkList.reserve(CSEBlocks.size());
+ for (BasicBlock *BB : CSEBlocks)
+ if (DomTreeNode *N = DT->getNode(BB)) {
+ assert(DT->isReachableFromEntry(N));
+ CSEWorkList.push_back(N);
+ }
+
// Sort blocks by domination. This ensures we visit a block after all blocks
// dominating it are visited.
- SmallVector<BasicBlock *, 8> CSEWorkList(CSEBlocks.begin(), CSEBlocks.end());
std::stable_sort(CSEWorkList.begin(), CSEWorkList.end(),
- [this](const BasicBlock *A, const BasicBlock *B) {
+ [this](const DomTreeNode *A, const DomTreeNode *B) {
return DT->properlyDominates(A, B);
});
@@ -1814,12 +1832,10 @@ void BoUpSLP::optimizeGatherSequence() {
// instructions. TODO: We can further optimize this scan if we split the
// instructions into different buckets based on the insert lane.
SmallVector<Instruction *, 16> Visited;
- for (SmallVectorImpl<BasicBlock *>::iterator I = CSEWorkList.begin(),
- E = CSEWorkList.end();
- I != E; ++I) {
+ for (auto I = CSEWorkList.begin(), E = CSEWorkList.end(); I != E; ++I) {
assert((I == CSEWorkList.begin() || !DT->dominates(*I, *std::prev(I))) &&
"Worklist not sorted properly!");
- BasicBlock *BB = *I;
+ BasicBlock *BB = (*I)->getBlock();
// For all instructions in blocks containing gather sequences:
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e;) {
Instruction *In = it++;
@@ -1835,7 +1851,7 @@ void BoUpSLP::optimizeGatherSequence() {
DT->dominates((*v)->getParent(), In->getParent())) {
In->replaceAllUsesWith(*v);
In->eraseFromParent();
- In = 0;
+ In = nullptr;
break;
}
}
@@ -1864,6 +1880,7 @@ struct SLPVectorizer : public FunctionPass {
ScalarEvolution *SE;
const DataLayout *DL;
TargetTransformInfo *TTI;
+ TargetLibraryInfo *TLI;
AliasAnalysis *AA;
LoopInfo *LI;
DominatorTree *DT;
@@ -1874,8 +1891,9 @@ struct SLPVectorizer : public FunctionPass {
SE = &getAnalysis<ScalarEvolution>();
DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
- DL = DLP ? &DLP->getDataLayout() : 0;
+ DL = DLP ? &DLP->getDataLayout() : nullptr;
TTI = &getAnalysis<TargetTransformInfo>();
+ TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
AA = &getAnalysis<AliasAnalysis>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -1900,8 +1918,8 @@ struct SLPVectorizer : public FunctionPass {
DEBUG(dbgs() << "SLP: Analyzing blocks in " << F.getName() << ".\n");
// Use the bottom up slp vectorizer to construct chains that start with
- // he store instructions.
- BoUpSLP R(&F, SE, DL, TTI, AA, LI, DT);
+ // store instructions.
+ BoUpSLP R(&F, SE, DL, TTI, TLI, AA, LI, DT);
// Scan the blocks in the function in post order.
for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()),
@@ -1951,8 +1969,11 @@ private:
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R);
/// \brief Try to vectorize a list of operands.
+ /// \@param BuildVector A list of users to ignore for the purpose of
+ /// scheduling and that don't need extracting.
/// \returns true if a value was vectorized.
- bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R);
+ bool tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
+ ArrayRef<Value *> BuildVector = None);
/// \brief Try to vectorize a chain that may start at the operands of \V;
bool tryToVectorize(BinaryOperator *V, BoUpSLP &R);
@@ -2106,7 +2127,7 @@ unsigned SLPVectorizer::collectStores(BasicBlock *BB, BoUpSLP &R) {
// Check that the pointer points to scalars.
Type *Ty = SI->getValueOperand()->getType();
if (Ty->isAggregateType() || Ty->isVectorTy())
- return 0;
+ continue;
// Find the base pointer.
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), DL);
@@ -2125,7 +2146,8 @@ bool SLPVectorizer::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
return tryToVectorizeList(VL, R);
}
-bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
+bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
+ ArrayRef<Value *> BuildVector) {
if (VL.size() < 2)
return false;
@@ -2153,7 +2175,7 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
bool Changed = false;
- // Keep track of values that were delete by vectorizing in the loop below.
+ // Keep track of values that were deleted by vectorizing in the loop below.
SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
@@ -2175,13 +2197,38 @@ bool SLPVectorizer::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R) {
<< "\n");
ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
- R.buildTree(Ops);
+ ArrayRef<Value *> BuildVectorSlice;
+ if (!BuildVector.empty())
+ BuildVectorSlice = BuildVector.slice(i, OpsWidth);
+
+ R.buildTree(Ops, BuildVectorSlice);
int Cost = R.getTreeCost();
if (Cost < -SLPCostThreshold) {
DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
- R.vectorizeTree();
-
+ Value *VectorizedRoot = R.vectorizeTree();
+
+ // Reconstruct the build vector by extracting the vectorized root. This
+ // way we handle the case where some elements of the vector are undefined.
+ // (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+ if (!BuildVectorSlice.empty()) {
+ // The insert point is the last build vector instruction. The vectorized
+ // root will precede it. This guarantees that we get an instruction. The
+ // vectorized tree could have been constant folded.
+ Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+ unsigned VecIdx = 0;
+ for (auto &V : BuildVectorSlice) {
+ IRBuilder<true, NoFolder> Builder(
+ ++BasicBlock::iterator(InsertAfter));
+ InsertElementInst *IE = cast<InsertElementInst>(V);
+ Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
+ VectorizedRoot, Builder.getInt32(VecIdx++)));
+ IE->setOperand(1, Extract);
+ IE->removeFromParent();
+ IE->insertAfter(Extract);
+ InsertAfter = IE;
+ }
+ }
// Move to the next bundle.
i += VF - 1;
Changed = true;
@@ -2290,7 +2337,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
/// *p =
///
class HorizontalReduction {
- SmallPtrSet<Value *, 16> ReductionOps;
+ SmallVector<Value *, 16> ReductionOps;
SmallVector<Value *, 32> ReducedVals;
BinaryOperator *ReductionRoot;
@@ -2308,7 +2355,7 @@ class HorizontalReduction {
public:
HorizontalReduction()
- : ReductionRoot(0), ReductionPHI(0), ReductionOpcode(0),
+ : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
ReducedValueOpcode(0), ReduxWidth(0), IsPairwiseReduction(false) {}
/// \brief Try to find a reduction tree.
@@ -2323,10 +2370,10 @@ public:
// In such a case start looking for a tree rooted in the first '+'.
if (Phi) {
if (B->getOperand(0) == Phi) {
- Phi = 0;
+ Phi = nullptr;
B = dyn_cast<BinaryOperator>(B->getOperand(1));
} else if (B->getOperand(1) == Phi) {
- Phi = 0;
+ Phi = nullptr;
B = dyn_cast<BinaryOperator>(B->getOperand(0));
}
}
@@ -2384,7 +2431,7 @@ public:
// We need to be able to reassociate the adds.
if (!TreeN->isAssociative())
return false;
- ReductionOps.insert(TreeN);
+ ReductionOps.push_back(TreeN);
}
// Retract.
Stack.pop_back();
@@ -2412,7 +2459,7 @@ public:
if (NumReducedVals < ReduxWidth)
return false;
- Value *VectorizedTree = 0;
+ Value *VectorizedTree = nullptr;
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
Unsafe.setUnsafeAlgebra();
@@ -2421,7 +2468,7 @@ public:
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
ArrayRef<Value *> ValsToReduce(&ReducedVals[i], ReduxWidth);
- V.buildTree(ValsToReduce, &ReductionOps);
+ V.buildTree(ValsToReduce, ReductionOps);
// Estimate cost.
int Cost = V.getTreeCost() + getReductionCost(TTI, ReducedVals[i]);
@@ -2455,13 +2502,13 @@ public:
}
// Update users.
if (ReductionPHI) {
- assert(ReductionRoot != NULL && "Need a reduction operation");
+ assert(ReductionRoot && "Need a reduction operation");
ReductionRoot->setOperand(0, VectorizedTree);
ReductionRoot->setOperand(1, ReductionPHI);
} else
ReductionRoot->replaceAllUsesWith(VectorizedTree);
}
- return VectorizedTree != 0;
+ return VectorizedTree != nullptr;
}
private:
@@ -2540,13 +2587,16 @@ private:
///
/// Returns true if it matches
///
-static bool findBuildVector(InsertElementInst *IE,
- SmallVectorImpl<Value *> &Ops) {
- if (!isa<UndefValue>(IE->getOperand(0)))
+static bool findBuildVector(InsertElementInst *FirstInsertElem,
+ SmallVectorImpl<Value *> &BuildVector,
+ SmallVectorImpl<Value *> &BuildVectorOpds) {
+ if (!isa<UndefValue>(FirstInsertElem->getOperand(0)))
return false;
+ InsertElementInst *IE = FirstInsertElem;
while (true) {
- Ops.push_back(IE->getOperand(1));
+ BuildVector.push_back(IE);
+ BuildVectorOpds.push_back(IE->getOperand(1));
if (IE->use_empty())
return false;
@@ -2641,7 +2691,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
Value *Rdx =
(P->getIncomingBlock(0) == BB
? (P->getIncomingValue(0))
- : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1) : 0));
+ : (P->getIncomingBlock(1) == BB ? P->getIncomingValue(1)
+ : nullptr));
// Check if this is a Binary Operator.
BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
if (!BI)
@@ -2680,7 +2731,7 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
if (BinaryOperator *BinOp =
dyn_cast<BinaryOperator>(SI->getValueOperand())) {
HorizontalReduction HorRdx;
- if (((HorRdx.matchAssociativeReduction(0, BinOp, DL) &&
+ if (((HorRdx.matchAssociativeReduction(nullptr, BinOp, DL) &&
HorRdx.tryToReduce(R, TTI)) ||
tryToVectorize(BinOp, R))) {
Changed = true;
@@ -2716,12 +2767,16 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
// Try to vectorize trees that start at insertelement instructions.
- if (InsertElementInst *IE = dyn_cast<InsertElementInst>(it)) {
- SmallVector<Value *, 8> Ops;
- if (!findBuildVector(IE, Ops))
+ if (InsertElementInst *FirstInsertElem = dyn_cast<InsertElementInst>(it)) {
+ SmallVector<Value *, 16> BuildVector;
+ SmallVector<Value *, 16> BuildVectorOpds;
+ if (!findBuildVector(FirstInsertElem, BuildVector, BuildVectorOpds))
continue;
- if (tryToVectorizeList(Ops, R)) {
+ // Vectorize starting with the build vector operands ignoring the
+ // BuildVector instructions for the purpose of scheduling and user
+ // extraction.
+ if (tryToVectorizeList(BuildVectorOpds, R, BuildVector)) {
Changed = true;
it = BB->begin();
e = BB->end();